Skip to content
This repository has been archived by the owner on Jun 12, 2018. It is now read-only.

Commit

Permalink
Merge pull request #29 from closeio/python-style
Browse files Browse the repository at this point in the history
Style improvements
  • Loading branch information
Travis Redman committed Nov 10, 2015
2 parents 9a211bd + b6c7f41 commit 1e61454
Show file tree
Hide file tree
Showing 4 changed files with 238 additions and 106 deletions.
16 changes: 14 additions & 2 deletions record/config.py.example
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,38 @@
import logging

DB_CONFIG = {

# Indicates which database(s) to record.
"target_databases": ["test"],

# Indicates which collections to record. If user wants to capture all the
# collections' activities, leave this field to be `None` (but we'll always
# skip collection `system.profile`, even if it has been explicit
# specified).
"target_collections": [],

"oplog_servers": [
{ "mongodb_uri": "mongodb://localhost:27017" },
],

# In most cases you will record from the profile DB on the primary
# If you are also sending queries to secondaries, you may want to specify
# a list of secondary servers in addition to the primary
"profiler_servers": [
{ "mongodb_uri": "mongodb://localhost:27017" }
],

"oplog_output_file": "./OPLOG_OUTPUT",
"output_file": "./OUTPUT",
# the length for the recording
"duration_secs": 10

# If overwrite_output_file is True, the same output file will be
# overwritten is False in between consecutive calls of the recorer. If
# it's False, the recorder will append a unique number to the end of the
# output_file if the original one already exists.
"overwrite_output_file": True,

# The length for the recording
"duration_secs": 10,
}

APP_CONFIG = {
Expand Down
40 changes: 28 additions & 12 deletions record/merge.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""This script allows us to manually merge the results from oplog and profiling
results."""
import os
import utils
import config
import calendar
Expand Down Expand Up @@ -39,18 +40,21 @@ def merge_to_final_output(oplog_output_file, profiler_output_files, output_file)
on-time merge since you cannot determine if some "old" entries will come
later."""
oplog = open(oplog_output_file, "rb")

# create a map of profiler file names to files
profiler_files = {}
for profiler_file in profiler_output_files:
profiler_files[profiler_file] = open(profiler_file, "rb")

output = open(output_file, "wb")
logger = utils.LOG

logger.info("Starts completing the insert options")
oplog_doc = utils.unpickle(oplog)
# create a map of (profiler file names, doc ts) to doc

# Create a map of tuple(doc's timestamp, profiler file name) to doc for
# each profiler. This makes it easy to fetch the earliest doc in the group
# on each iteration.
profiler_docs = {}
for file_name in profiler_files:
doc = utils.unpickle(profiler_files[file_name])
Expand All @@ -68,7 +72,7 @@ def merge_to_final_output(oplog_output_file, profiler_output_files, output_file)
while oplog_doc and len(profiler_docs) > 0:
if (noninserts + inserts) % 2500 == 0:
logger.info("processed %d items", noninserts + inserts)

# get the earliest profile doc out of all profiler_docs
key = min(profiler_docs.keys())
profiler_doc = profiler_docs[key]
Expand All @@ -79,16 +83,20 @@ def merge_to_final_output(oplog_output_file, profiler_output_files, output_file)
if doc:
profiler_docs[(doc["ts"], key[1])] = doc

# If the retrieved operation is not an insert, we can simply dump it
# to the output file. Otherwise, we need to cross-reference the
# profiler's insert operation with an oplog entry (because the
# profiler doesn't contain the inserted object's details).
if profiler_doc["op"] != "insert":
dump_op(output, profiler_doc)
noninserts += 1
else:
# Replace the the profiler's insert operation doc with oplog's,
# but keeping the canonical form of "ts".
# Compare the profile doc's ts with the oplog doc's ts. In the
# ideal scenario, every insert we capture via the profile
# collection should match a consecutive oplog entry (the oplog
# tailer only looks at insert ops).
profiler_ts = calendar.timegm(profiler_doc["ts"].timetuple())
oplog_ts = oplog_doc["ts"].time
# only care about the second-level precision.
# This is a lame enforcement of consistency
delta = abs(profiler_ts - oplog_ts)
if delta > 3:
# TODO strictly speaking, this ain't good since the files are
Expand All @@ -104,11 +112,12 @@ def merge_to_final_output(oplog_output_file, profiler_output_files, output_file)
" profiler %d", oplog_ts, profiler_ts)
mild_inconsistencies += 1

oplog_doc["ts"] = profiler_doc["ts"]
# make sure "op" is "insert" instead of "i".
oplog_doc["op"] = profiler_doc["op"]
oplog_doc["ts"] = profiler_doc["ts"] # we still want to keep the canonical form of the ts
oplog_doc["op"] = profiler_doc["op"] # make sure "op" is "insert" instead of "i"
dump_op(output, oplog_doc)
inserts += 1

# Get the next doc from the oplog
oplog_doc = utils.unpickle(oplog)

# finish up any remaining non-insert ops
Expand All @@ -121,7 +130,7 @@ def merge_to_final_output(oplog_output_file, profiler_output_files, output_file)
doc = utils.unpickle(profiler_files[key[1]])
if doc:
profiler_docs[(doc["ts"], key[1])] = doc

if profiler_doc["op"] == "insert":
break
dump_op(output, profiler_doc)
Expand All @@ -132,11 +141,18 @@ def merge_to_final_output(oplog_output_file, profiler_output_files, output_file)
" severe ts incosistencies: %d\n"
" mild ts incosistencies: %d\n", inserts, noninserts,
severe_inconsistencies, mild_inconsistencies)

for f in [oplog, output]:
f.close()
for f in profiler_files.values():
f.close()

# Clean up temporary files (oplog + profiler files), since everything is
# already in the main output file
for f in profiler_output_files:
os.remove(f)
os.remove(oplog_output_file)

return True


Expand Down
Loading

0 comments on commit 1e61454

Please sign in to comment.