Skip to content

Commit

Permalink
With function that plots ROC curves
Browse files Browse the repository at this point in the history
  • Loading branch information
Gigi Stark authored and Gigi Stark committed Jul 13, 2016
1 parent 41a43d7 commit 7efc1a8
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 31 deletions.
12 changes: 5 additions & 7 deletions nn_combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import datetime
import time

def NN_train(data):
def NN_train(data, model_name):
'''
Args:
data: dictionary containing relevant data
Expand Down Expand Up @@ -41,6 +41,8 @@ def NN_train(data):
#combining the jet and photon classes to make a combined recurrent neural network
combined_rnn = Sequential()
combined_rnn.add(Merge([jet_channel, photon_channel], mode='concat'))
combined_rnn.add(Dense(72, activation='relu'))
combined_rnn.add(Dropout(0.3))
combined_rnn.add(Dense(36, activation='relu'))
combined_rnn.add(Dropout(0.3))
combined_rnn.add(Dense(24, activation='relu'))
Expand All @@ -54,7 +56,7 @@ def NN_train(data):
print 'Training:'
try:
combined_rnn.fit([X_jets_train, X_photons_train],
y_train, batch_size=16, class_weight={
y_train, batch_size=100, class_weight={
k : (float(len(y_train)) / float(len(np.unique(y_train)) * (len(y_train[y_train == k])))) for k in np.unique(y_train)
},
callbacks = [
Expand All @@ -68,8 +70,7 @@ def NN_train(data):
print 'Training ended early.'

#saving the combined recurrent neural network
setType=raw_input("What set is this?")
combined_rnn.save_weights('TestModel'+setType+'.H5')
combined_rnn.save_weights('TestModel_'+model_name+'.H5')
combined_rnn_json=combined_rnn.to_json()
open('TestModel.json','w').write(combined_rnn_json)

Expand All @@ -89,9 +90,6 @@ def NN_test(net, data):
y_test=data['y_test']
w_test=data['w_test']

print y_test.shape
print w_test.shape

yhat_rnn = net.predict([X_jets_test, X_photons_test], verbose = True, batch_size = 512)

return yhat_rnn
18 changes: 9 additions & 9 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
import logging
from nn_combined import NN_train, NN_test
import deepdish.io as io
from plotting import plot_NN
from plotting import plot_NN, plot_roc_Curve
#from plotting import plot_inputs, plot_performance

#from nn_model import train, test

def main(json_config, tree_name):
def main(json_config, model_name, tree_name):
'''
Args:
-----
Expand Down Expand Up @@ -59,7 +59,7 @@ def sha(s):
try:
logger.info('Attempting to read from {}'.format(pickle_name))
data = cPickle.load(open(pickle_name, 'rb'))
logger.info('Pre-processed data found and loaded from pickle')
logger.info('Pre-processed data found and loaded from pickle')
# -- otherwise, process the new data
except IOError:
logger.info('Pre-processed data not found in {}'.format(pickle_name))
Expand Down Expand Up @@ -101,22 +101,21 @@ def sha(s):

# # -- train
# # design a Keras NN with three RNN streams (jets, photons, muons)
# # -- train
# # design a Keras NN with three RNN streams (jets, photons, muons)

le=data['LabelEncoder']
# # combine the outputs and process them through a bunch of FF layers
# # use a validation split of 20%
# # save out the weights to hdf5 and the model to yaml
net=NN_train(data)
net=NN_train(data, model_name)

# # -- test
# # evaluate performance on the test set
yhat=NN_test(net, data)

# # -- plot performance
plot_NN(yhat, data)
#plot_NN(yhat, data)

# # produce ROC curves to evaluate performance
plot_roc_Curve(yhat, data, le, model_name)
# # save them out to pdf
# plot_performance(yhat, data['y_test'], data['w_test'])

Expand All @@ -130,8 +129,9 @@ def sha(s):
# -- read in arguments
parser = argparse.ArgumentParser()
parser.add_argument('config', help="path to JSON file that specifies classes and corresponding ROOT files' paths")
parser.add_argument('model_name', help="name of the set from particular network")
parser.add_argument('--tree', help="name of the tree to open in the ntuples", default='mini')
args = parser.parse_args()

# -- pass arguments to main
sys.exit(main(args.config, args.tree))
sys.exit(main(args.config, args.model_name, args.tree))
67 changes: 52 additions & 15 deletions plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import pandautils as pup
import os
from sklearn.preprocessing import LabelEncoder
from viz import calculate_roc, ROC_plotter, add_curve
import cPickle

def _plot_X(train, test, y_train, y_test, w_train, w_test, varlist, le, feature):
'''
Expand Down Expand Up @@ -120,19 +122,54 @@ def plot_NN(yhat, data):
matplotlib.rcParams.update({'font.size': 16})
fig = plt.figure(figsize=(11.69, 8.27), dpi=100)
bins = np.linspace(0,1,30)
#find probability of each class
for k in range(len(np.unique(y_test))):
color = iter(cm.rainbow(np.linspace(0, 1, len(np.unique(y_test)))))
for j in range (len(np.unique(y_test))):
c = next(color)
_ = plt.hist(yhat[:,k][y_test==j],
bins=bins,
histtype='step',
normed=True,
label='Y=' + str(j),
weights=w_test[y_test == j],
color=c,
linewidth=1)
plt.xlabel('Probabilty of Y=' +str(k))
plt.ylabel('Weighted Normalized Number of Events')
plt.legend()
plt.show()
print k
color = iter(cm.rainbow(np.linspace(0, 1, len(np.unique(y_test)))))
#find the truth label for each class
for j in range (len(np.unique(y_test))):
c = next(color)
_ = plt.hist(yhat[:,k][y_test==j],
bins=bins,
histtype='step',
normed=True,
label='Y=' + str(j),
weights=w_test[y_test == j],
color=c,
linewidth=1)
plt.xlabel('Probabilty of Y=' +str(k))
plt.ylabel('Weighted Normalized Number of Events')
plt.legend()
plt.savefig('/Users/gigifstark/CERN_Work/HH2YBB')

def plot_roc_Curve(yhat, data, le, model_name):
'''
Args:
yhat: an ndarray of the probability of each event for each class
data: dictionary containing relevant data
Returns:
plot: MatLibPlot for each particle with different mass compared to background
pickle file: pkl file dictionary with each curve
'''
y_test=data['y_test']
w_test=data['w_test']
pkl_dict={}
for k in range(0, len(np.unique(y_test))-1):
sig_back= (y_test==k)|(y_test==5)
y=np.log(yhat[sig_back][:,k]/yhat[sig_back][:,5])
finite= np.isfinite(y)
curves_dictionary=add_curve ("Y="+str(k), 'blue',
calculate_roc(
y_test[sig_back][finite],
np.log(yhat[sig_back][finite][:,k]/yhat[sig_back][finite][:,5]),
pos_label=k,
weights=w_test[sig_back][finite]
)
)
pkl_dict.update(curves_dictionary)
print 'Plotting'
fig=ROC_plotter(curves_dictionary, model_name, title=le.inverse_transform(k), min_eff = 0.1, max_eff=1.0, logscale=True)
plt.ylim([0,100])
fig.savefig('/Users/gigifstark/CERN_Work/HH2YBB/roc'+ str(k)+'.pdf')
cPickle.dump(pkl_dict, open(trial+"_pkl", 'wb'))

0 comments on commit 7efc1a8

Please sign in to comment.