-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
cms-derived-datasets: added external scripts to generate NanoAODRun1 …
…and PFNano docs
- Loading branch information
1 parent
9699a44
commit ebbb275
Showing
2 changed files
with
818 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,347 @@ | ||
#!/usr/bin/env python | ||
|
||
# code from https://github.com/cms-opendata-analyses/NanoAODRun1ProducerTool/blob/main/tools/documentation.py | ||
|
||
####################################################################################################################### | ||
#This file is from the one, named as 'inspectNanoFile.py' in cms software github(https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/NanoAOD/test) | ||
#only the lines here given as 300-301, are replaced by the line, 302, because of 'dict_values' issue | ||
#usage: e.g., python3 documentation.py --doc=ANYNAME test.root | ||
#usage: e.g., python3 documentation.py --size=ANYNAME test.root | ||
#usage: e.g., python3 documentation.py --json=ANYNAME test.root | ||
####################################################################################################################### | ||
|
||
from builtins import range | ||
import sys, os.path, json | ||
from collections import defaultdict | ||
import six | ||
import ROOT | ||
ROOT.PyConfig.IgnoreCommandLineOptions = True | ||
ROOT.gROOT.SetBatch(True) | ||
|
||
|
||
class FileData: | ||
def __init__(self,data): | ||
self._json = data | ||
for k,v in six.iteritems(data): | ||
setattr(self,k,v) | ||
self.Events = self.trees["Events"] | ||
self.nevents = self.Events["entries"] | ||
|
||
class Branch: | ||
def __init__(self, tree, branch): | ||
self.tree = tree | ||
self.branch = branch | ||
self.name = branch.GetName() | ||
self.doc = branch.GetTitle() | ||
self.tot = branch.GetZipBytes()/1024.0 | ||
self.entries = None; | ||
self.single = True | ||
self.kind = "Unknown" | ||
if branch.GetNleaves() != 1: | ||
sys.stderr.write("Cannot parse branch '%s' in tree %s (%d leaves)\n"%(branch.GetName(), tree.GetName(), branch.GetNleaves())) | ||
return | ||
self.leaf = branch.FindLeaf(branch.GetName()) | ||
if not self.leaf: | ||
sys.stderr.write("Cannot parse branch '%s' in tree %s (no leaf)\n"%(branch.GetName(), tree.GetName())) | ||
return | ||
self.kind = self.leaf.GetTypeName() | ||
if "Idx" in self.name: | ||
self.kind+="(index to %s)"%((self.name[self.name.find("_")+1:self.name.find("Idx")]).title()) | ||
if self.leaf.GetLen() == 0 and self.leaf.GetLeafCount() != None: | ||
self.single = False | ||
self.counter = self.leaf.GetLeafCount().GetName() | ||
def toJSON(self): | ||
return ( self.name, dict(name = self.name, doc = self.doc, tot=self.tot, entries=self.entries, single=self.single, kind=self.kind, counter = getattr(self,'counter','')) ) | ||
|
||
class BranchGroup: | ||
def __init__(self, name): | ||
self.name = name | ||
self.tot = 0 | ||
self.entries = None; | ||
self.subs = [] | ||
self.kind = None | ||
self.doc = '' | ||
def append(self, sub): | ||
self.subs.append(sub) | ||
self.tot += sub.tot | ||
if not self.doc: self.doc = sub.doc | ||
def getKind(self): | ||
if self.kind: return self.kind | ||
if len(self.subs) == 1: | ||
if self.subs[0].single: self.kind = "Variable" | ||
else: | ||
self.kind = "Vector" | ||
self.counter = self.subs[0].counter | ||
else: | ||
allsingles, commonCounter = True, True | ||
counter = None | ||
for s in self.subs: | ||
if not s.single: | ||
allsingles = False | ||
if counter == None: counter = s.counter | ||
elif counter != s.counter: | ||
commonCounter = False | ||
if allsingles: | ||
self.kind = "Singleton" | ||
elif commonCounter: | ||
self.kind = "Collection" | ||
self.counter = counter | ||
else: | ||
self.kind = "ItsComplicated" | ||
return self.kind | ||
def toJSON(self): | ||
return (self.name, dict(name = self.name, doc = self.doc, kind = self.kind, tot = self.tot, entries = self.entries, subs = [s.name for s in self.subs])) | ||
|
||
|
||
def inspectRootFile(infile): | ||
if not os.path.isfile(infile): raise RuntimeError | ||
filesize = os.path.getsize(infile)/1024.0 | ||
tfile = ROOT.TFile.Open(infile) | ||
trees = {} | ||
for treeName in "Events", "Runs", "Lumis": | ||
toplevelDoc={} | ||
tree = tfile.Get(treeName) | ||
entries = tree.GetEntries() | ||
trees[treeName] = tree | ||
branchList = tree.GetListOfBranches() | ||
allbranches = [ Branch(tree,branchList.At(i)) for i in range(branchList.GetSize()) ] | ||
branchmap = dict((b.name,b) for b in allbranches) | ||
branchgroups = {} | ||
# make list of counters and countees | ||
counters = defaultdict(list) | ||
for b in allbranches: | ||
if not b.single: | ||
counters[b.counter].append(b.name) | ||
else: | ||
b.entries = entries | ||
c1 = ROOT.TCanvas("c1","c1") | ||
for counter,countees in six.iteritems(counters): | ||
n = tree.Draw(counter+">>htemp") | ||
if n != 0: | ||
htemp = ROOT.gROOT.FindObject("htemp") | ||
n = htemp.GetEntries() * htemp.GetMean() | ||
htemp.Delete() | ||
branchmap[counter].entries = entries | ||
for c in countees: | ||
br = branchmap[c] | ||
br.entries = n | ||
# now we start to create branch groups | ||
for b in allbranches: | ||
if b.name in counters: | ||
if len(b.doc) > 0: | ||
toplevelDoc[b.name[1:]]=b.doc | ||
continue # skip counters | ||
if "_" in b.name: | ||
head, tail = b.name.split("_",1) | ||
else: | ||
head = b.name | ||
toplevelDoc[b.name]=b.doc | ||
if head not in branchgroups: | ||
branchgroups[head] = BranchGroup(head) | ||
branchgroups[head].append(b) | ||
for bg in six.itervalues(branchgroups): | ||
if bg.name in toplevelDoc: | ||
bg.doc = toplevelDoc[bg.name] | ||
kind = bg.getKind() | ||
bg.entries = bg.subs[0].entries | ||
if kind == "Vector" or kind == "Collection": | ||
bg.append(branchmap[bg.counter]) | ||
elif kind == "ItsComplicated": | ||
for counter in set(s.counter for s in bg.subs if not s.single): | ||
bg.append(branchmap[counter]) | ||
allsize_c = sum(b.tot for b in allbranches) | ||
allsize = sum(b.tot for b in six.itervalues(branchgroups)) | ||
if abs(allsize_c - allsize) > 1e-6*(allsize_c+allsize): | ||
sys.stderr.write("Total size mismatch for tree %s: %10.4f kb vs %10.4f kb\n" % (treeName, allsize, allsize_c)) | ||
trees[treeName] = dict( | ||
entries = entries, | ||
allsize = allsize, | ||
branches = dict(b.toJSON() for b in allbranches), | ||
branchgroups = dict(bg.toJSON() for bg in six.itervalues(branchgroups)), | ||
) | ||
c1.Close() | ||
break # only Event tree for now | ||
tfile.Close() | ||
return dict(filename = os.path.basename(infile), filesize = filesize, trees = trees) | ||
|
||
def makeSurvey(treeName, treeData): | ||
allsize = treeData['allsize'] | ||
entries = treeData['entries'] | ||
survey = list(six.itervalues(treeData['branchgroups'])) | ||
survey.sort(key = lambda bg : - bg['tot']) | ||
scriptdata = [] | ||
runningtotal = 0 | ||
unit = treeName[:-1].lower() | ||
for s in survey: | ||
if s['tot'] < 0.01*allsize: | ||
tag = "<b>Others</b><br/>Size: %.0f b/%s (%.1f%%)" % ((allsize - runningtotal)/entries*1024, unit, (allsize-runningtotal)/allsize*100) | ||
scriptdata.append( "{ 'label':'others', 'tag':'top', 'size':%s, 'tip':'%s' }" % ((allsize-runningtotal)/entries, tag) ) | ||
break | ||
else: | ||
tag = "<b><a href=\"#%s\">%s</a></b><br/>" % (s['name'],s['name']); | ||
tag += "Size: %.0f b/%s (%.1f%%)" % (s['tot']/entries*1024, unit, s['tot']/allsize*100); | ||
if (s['kind'] in ("Vector","Collection")) and s['entries'] > 0: | ||
tag += "<br/>Items/%s: %.1f, %.0f b/item" %(unit, float(s['entries'])/entries, s['tot']/s['entries']*1024); | ||
scriptdata.append( "{ 'label':'%s', 'tag':'%s', 'size':%s, 'tip':'%s' }" % ( s['name'], s['name'], s['tot']/entries, tag) ) | ||
runningtotal += s['tot'] | ||
return (survey, "\n,\t".join(scriptdata)) | ||
|
||
def writeSizeReport(fileData, stream): | ||
filename = fileData.filename | ||
filesize = fileData.filesize | ||
events = fileData.nevents | ||
survey, scriptdata = makeSurvey("Events", filedata.Events) | ||
title = "%s (%.3f Mb, %d events, %.2f kb/event)" % (filename, filesize/1024.0, events, filesize/events) | ||
stream.write(""" | ||
<html> | ||
<head> | ||
<title>{title}</title> | ||
<link rel="stylesheet" type="text/css" href="https://gpetrucc.web.cern.ch/gpetrucc/micro/patsize.css" /> | ||
<script type="text/javascript" src="https://gpetrucc.web.cern.ch/gpetrucc/rgraph/RGraph.common.core.js"></script> | ||
<script type="text/javascript" src="https://gpetrucc.web.cern.ch/gpetrucc/rgraph/RGraph.pie.js"></script> | ||
<script type="text/javascript" src="https://gpetrucc.web.cern.ch/gpetrucc/rgraph/RGraph.common.dynamic.js"></script> | ||
<script type="text/javascript" src="https://gpetrucc.web.cern.ch/gpetrucc/rgraph/RGraph.common.tooltips.js"></script> | ||
<script type="text/javascript" src="https://gpetrucc.web.cern.ch/gpetrucc/rgraph/RGraph.common.key.js"></script> | ||
</head> | ||
<body> | ||
<a name="top" id="top"><h1>{title}</h1></a> | ||
<canvas id="mainCanvas" width="800" height="300">[No canvas support]</canvas> | ||
<script type="text/javascript"> | ||
var data = [ {scriptdata} ]; | ||
""".format(title = title, scriptdata = scriptdata)); ## must split into two write calls, since the javascript confounds str.format | ||
stream.write(""" | ||
window.onload = function() { | ||
values = []; | ||
labels = []; | ||
keys = []; | ||
tips = []; | ||
for (var i = 0; i < data.length; i++) { | ||
values.push( data[i].size ); | ||
labels.push( data[i].label ); | ||
keys.push( data[i].label ); | ||
tips.push( data[i].tip ); | ||
} | ||
var chart = new RGraph.Pie("mainCanvas", values) | ||
.Set('exploded', 7) | ||
.Set('tooltips', tips) | ||
.Set('tooltips.event', 'onmousemove') | ||
.Set('key', labels) | ||
.Set('key.position.graph.boxed', false) | ||
.Draw(); | ||
} | ||
</script> | ||
<h1>Event data</h1> | ||
<table> | ||
"""); | ||
stream.write("<tr class='header'><th>" + "</th><th>".join([ "collection", "kind", "vars", "items/evt", "kb/evt", "b/item", "plot", "%" ]) + "</th><th colspan=\"2\">cumulative %</th></tr>\n"); | ||
grandtotal = filedata.Events['allsize']; runningtotal = 0 | ||
for s in survey: | ||
stream.write("<th title=\"%s\"><a href='#%s'>%s</a></th><td style='text-align : left;'>%s</td><td>%d</td>" % (s['doc'],s['name'],s['name'],s['kind'].lower(),len(s['subs']))) | ||
stream.write("<td>%.2f</td><td>%.3f</td><td>%.1f</td>" % (s['entries']/events, s['tot']/events, s['tot']/s['entries']*1024 if s['entries'] else 0)) | ||
stream.write("<td class=\"img\"><img src='https://gpetrucc.web.cern.ch/gpetrucc/micro/blue-dot.gif' width='%d' height='%d' /></td>" % (s['tot']/grandtotal*200,10)) | ||
stream.write("<td>%.1f%%</td>" % ( s['tot']/grandtotal * 100.0)) | ||
stream.write("<td>%.1f%%</td>" % ( (runningtotal+s['tot'])/grandtotal * 100.0)) | ||
stream.write("<td>%.1f%%</td>" % ( (grandtotal-runningtotal)/grandtotal * 100.0)) | ||
stream.write("</tr>\n") | ||
runningtotal += s['tot']; | ||
|
||
# all known data | ||
stream.write("<th>All Event data</th>") | ||
stream.write("<td> </td><td> </td><td> </td><td><b>%.2f</b></td><td> </td>" % (grandtotal/events)) | ||
stream.write("<td class=\"img\"><img src=\"https://gpetrucc.web.cern.ch/gpetrucc/micro/green-dot.gif\" width='%d' height='10' />" % ( grandtotal/filesize*100.0)) | ||
stream.write("</td><td>%.1f%%<sup>a</sup></td>" % (grandtotal/filesize*100.0)) | ||
stream.write("</tr>\n") | ||
|
||
# other, unknown overhead | ||
stream.write("<th>Non per-event data or overhead</th>") | ||
stream.write("<td> </td><td> </td><td> </td><td>%.2f</td><td> </td>" % ( (filesize-grandtotal)/events)) | ||
stream.write("<td class=\"img\"><img src='https://gpetrucc.web.cern.ch/gpetrucc/micro/red-dot.gif' width='%d' height='%d' /></td>" % ( (filesize-grandtotal)/filesize * 100, 10 )) | ||
stream.write("<td>%.1f%%<sup>a</sup></td>" % ( (filesize-grandtotal)/filesize * 100.0 )) | ||
stream.write("</tr>\n") | ||
|
||
# all file | ||
stream.write("<th>File size</th>") | ||
stream.write("<td> </td><td> </td><td> </td><td><b>%.2f</b></td><td> </td>" % (filesize/events)) | ||
stream.write("<td> </td><td> </td></tr>\n") | ||
|
||
stream.write(""" | ||
</table> | ||
Note: size percentages of individual event products are relative to the total size of Event data only.<br /> | ||
Percentages with <sup>a</sup> are instead relative to the full file size. | ||
<h1>Events detail</h1> | ||
""") | ||
for s in sorted(survey, key = lambda s : s['name']): | ||
stream.write("<h2><a name='%s' id='%s'>%s</a> (%.1f items/evt, %.3f kb/evt) <sup><a href=\"#top\">[back to top]</a></sup></h2>" % (s['name'], s['name'], s['name'], s['entries']/events, s['tot']/events)) | ||
stream.write("<table>\n") | ||
stream.write("<tr class='header'><th>" + "</th><th>".join( [ "branch", "kind", "b/event", "b/item", "plot", "%" ]) + "</th></tr>\n") | ||
subs = [ fileData.Events['branches'][b] for b in s['subs'] ] | ||
for b in sorted(subs, key = lambda s : - s['tot']): | ||
stream.write("<th title=\"%s\">%s</th><td style='text-align : left;'>%s</td><td>%.1f</td><td>%.1f</td>" % (b['doc'],b['name'], b['kind'], b['tot']/events*1024, b['tot']/s['entries']*1024 if s['entries'] else 0)) | ||
stream.write("<td class=\"img\"><img src='https://gpetrucc.web.cern.ch/gpetrucc/micro/blue-dot.gif' width='%d' height='%d' /></td>" % ( b['tot']/s['tot']*200, 10 )) | ||
stream.write("<td>%.1f%%</td>" % (b['tot']/s['tot'] * 100.0)) | ||
stream.write("</tr>\n") | ||
stream.write("</table>\n") | ||
stream.write(""" | ||
</body></html> | ||
""") | ||
|
||
def writeDocReport(fileData, stream): | ||
stream.write( """ | ||
<html> | ||
<head> | ||
<title>Documentation for {filename} </title> | ||
<link rel="stylesheet" type="text/css" href="https://gpetrucc.web.cern.ch/gpetrucc/micro/patsize.css" /> | ||
</head> | ||
<body> | ||
<h1>Content</h1> | ||
<table> | ||
""".format(filename=fileData.filename)) | ||
stream.write( "<tr class='header'><th>Collection</th><th>Description</th></tr>\n" ) | ||
groups = fileData.Events['branchgroups'].values() | ||
|
||
# groups.sort(key = lambda s : s['name']) | ||
# for s in groups: | ||
for s in sorted(groups, key = lambda s : s['name']): | ||
|
||
stream.write( "<th><a href='#%s'>%s</a></th><td style='text-align : left;'>%s</td></tr>\n" % (s['name'],s['name'],s['doc']) ) | ||
stream.write( "</table>\n\n<h1>Events detail</h1>\n" ) | ||
for s in groups: | ||
stream.write( "<h2><a name='%s' id='%s'>%s</a> <sup><a href=\"#top\">[back to top]</a></sup></h2>\n" % (s['name'], s['name'], s['name']) ) | ||
stream.write( "<table>\n" ) | ||
stream.write( "<tr class='header'><th>Object property</th><th>Type</th><th>Description</th></tr>\n" ) | ||
subs = [ fileData.Events['branches'][b] for b in s['subs'] ] | ||
for b in sorted(subs, key = lambda s : s['name']): | ||
stream.write( "<th>%s</th><td style='text-align : left;'>%s</td><td style='text-align : left;'>%s</td>" % (b['name'], b['kind'], b['doc']) ) | ||
stream.write( "</tr>\n" ) | ||
stream.write( "</table>\n" ) | ||
stream.write( """ | ||
</body></html> | ||
""" ) | ||
|
||
def _maybeOpen(filename): | ||
return open(filename, 'w') if filename != "-" else sys.stdout | ||
|
||
if __name__ == '__main__': | ||
from optparse import OptionParser | ||
parser = OptionParser(usage="%prog [options] inputFile") | ||
parser.add_option("-j", "--json", dest="json", type="string", default=None, help="Write out json file") | ||
parser.add_option("-d", "--doc", dest="doc", type="string", default=None, help="Write out html doc") | ||
parser.add_option("-s", "--size", dest="size", type="string", default=None, help="Write out html size report") | ||
(options, args) = parser.parse_args() | ||
if len(args) != 1: raise RuntimeError("Please specify one input file") | ||
|
||
if args[0].endswith(".root"): | ||
filedata = FileData(inspectRootFile(args[0])) | ||
elif args[0].endswith(".json"): | ||
filedata = FileData(json.load(open(args[0],'r'))) | ||
else: raise RuntimeError("Input file %s is not a root or json file" % args[0]) | ||
|
||
if options.json: | ||
json.dump(filedata._json, _maybeOpen(options.json), indent=4) | ||
sys.stderr.write("JSON output saved to %s\n" % options.json) | ||
if options.doc: | ||
writeDocReport(filedata, _maybeOpen(options.doc)) | ||
sys.stderr.write("HTML documentation saved to %s\n" % options.doc) | ||
if options.size: | ||
writeSizeReport(filedata, _maybeOpen(options.size)) | ||
sys.stderr.write("HTML size report saved to %s\n" % options.size) |
Oops, something went wrong.