Skip to content

Commit

Permalink
Clean before merging
Browse files Browse the repository at this point in the history
  • Loading branch information
mickypaganini committed Jul 22, 2016
2 parents aa050f3 + 0b67352 commit 274864c
Show file tree
Hide file tree
Showing 10 changed files with 311 additions and 221 deletions.
80 changes: 0 additions & 80 deletions config_hh.json

This file was deleted.

10 changes: 6 additions & 4 deletions data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import logging
from collections import OrderedDict
from itertools import izip
import tqdm

logger = logging.getLogger('data_processing')

Expand Down Expand Up @@ -84,10 +85,10 @@ def _make_df(val, key, branches):
if mode == 'classification':
df['y'] = key
elif mode == 'regression':
try:
df['y'] = int(key[1:])
except ValueError:
if key == 'bkg':
df['y'] = 0
else:
df['y'] = int(key[1:])
return df

all_events = pd.concat([_make_df(val, key, branches) for key, val in class_files_dict.iteritems()], ignore_index=True)
Expand All @@ -107,7 +108,7 @@ def _make_df(val, key, branches):

#w = all_events['HGamEventInfoAuxDyn.yybb_weight'].values
w = np.ones(len(y))

return X, y, w, le


Expand Down Expand Up @@ -168,6 +169,7 @@ def shuffle_split_scale(X, y, w):

data = OrderedDict()
for particle, (train, test) in zip(X.keys(), _pairwise(data_tuple[:(2 * len(X))])):
print particle
data['X_' + particle + '_train'], data['X_' + particle+ '_test'] = _scale(train, test)

data['y_train'], data['y_test'], data['w_train'], data['w_test'] = data_tuple[-4:]
Expand Down
4 changes: 4 additions & 0 deletions nets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import nn
import nn_with_modes
import nn_combined
import functional_nn
File renamed without changes.
File renamed without changes.
109 changes: 109 additions & 0 deletions nets/nn_combined.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from keras.models import Sequential
from keras.layers.core import Activation, Dense, Dropout
from keras.layers import Masking, GRU, Merge, Input, merge, Lambda
from keras.callbacks import EarlyStopping, ModelCheckpoint
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import datetime
import time
import logging
import os

def NN_train(data, model_name, mode):
'''
Args:
data: dictionary containing relevant data
Returns:
recurrent neural network: A combined recurrent neural network trained on the different classes of the data
'''

#defines training sets of different classes
X_jets_train = data['X_jet_train']
X_photons_train = data['X_photon_train']
X_event_train = data['X_event_train']
y_train = data['y_train']
X_muons_train=data['X_muon_train']
X_electrons_train=data['X_electron_train']

#set up sequential neural networks for the jet and photon classes
jet_channel = Sequential()
photon_channel = Sequential()
event_level = Sequential()
muon_channel=Sequential()
electron_channel=Sequential()

#declaring the shape of the first row of each class matrix
JET_SHAPE = X_jets_train.shape[1:]
PHOTON_SHAPE = X_photons_train.shape[1:]
EVENT_SHAPE = X_event_train.shape[1]
MUON_SHAPE = X_muons_train.shape[1:]
ELECTRON_SHAPE = X_electrons_train.shape[1:]

#adding layers to the jet and photon class neural networks
jet_channel.add(Masking(mask_value=-999, input_shape=JET_SHAPE, name='jet_masking'))
jet_channel.add(GRU(25, name='jet_gru'))
jet_channel.add(Dropout(0.3, name='jet_dropout'))

photon_channel.add(Masking(mask_value=-999, input_shape=PHOTON_SHAPE, name='photon_masking'))
photon_channel.add(GRU(10, name='photon_gru'))
photon_channel.add(Dropout(0.3, name='photon_dropout'))

event_level.add(Lambda(lambda x: x, input_shape=(EVENT_SHAPE, )))

muon_channel.add(Masking(mask_value=-999, input_shape=MUON_SHAPE, name='muon_masking'))
muon_channel.add(GRU(10, name='muon_gru'))
muon_channel.add(Dropout(0.3, name='muon_dropout'))

electron_channel.add(Masking(mask_value=-999, input_shape=ELECTRON_SHAPE, name='electron_masking'))
electron_channel.add(GRU(10, name='electron_gru'))
electron_channel.add(Dropout(0.3, name='electron_dropout'))


#combining the jet and photon classes to make a combined recurrent neural network
combined_rnn = Sequential()
combined_rnn.add(Merge([jet_channel, photon_channel, event_level, muon_channel, electron_channel], mode='concat'))
combined_rnn.add(Dense(36, activation='relu'))
combined_rnn.add(Dropout(0.3))
combined_rnn.add(Dense(24, activation='relu'))
combined_rnn.add(Dropout(0.3))
combined_rnn.add(Dense(12, activation='relu'))
combined_rnn.add(Dropout(0.3))
if mode == 'classification':
combined_rnn.add(Dense(6, activation='softmax'))
combined_rnn.compile('adam', 'sparse_categorical_crossentropy')

elif mode == 'regression':
combined_rnn.add(Dense(1))
combined_rnn.compile('adam', 'mae')

try:
weights_path = os.path.join('weights', 'combinedrnn-progress'+model_name+mode+'.h5')
combined_rnn.load_weights(weights_path)
print "Loaded Pre-trained Weights"
except IOError:
print 'Pre-trained weights not found'

logger = logging.getLogger('Train')
logger.info('Compiling the net')
try:
combined_rnn.fit([X_jets_train, X_photons_train, X_event_train, X_muons_train, X_electrons_train],
y_train, batch_size=16, class_weight={
k : (float(len(y_train)) / float(len(np.unique(y_train)) * (len(y_train[y_train == k])))) for k in np.unique(y_train)
},
callbacks = [
EarlyStopping(verbose=True, patience=20, monitor='val_loss'),
ModelCheckpoint(weights_path,
monitor='val_loss', verbose=True, save_best_only=True)
],
nb_epoch=1, validation_split = 0.2)

except KeyboardInterrupt:
print 'Training ended early.'

#saving the combined recurrent neural network
combined_rnn.load_weights(weights_path)
combined_rnn_json=combined_rnn.to_json()
open('TestModel'+model_name+'.json','w').write(combined_rnn_json)

return combined_rnn
14 changes: 6 additions & 8 deletions nn_with_modes.py → nets/nn_with_modes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
import matplotlib.pyplot as plt
import os

MODEL_NAME = 'jennymodes_nobtag'

def train(data, mode):
def train(data, model_name, mode):
'''
Args:
data: an OrderedDict containing all X, y, w ndarrays for all particles (both train and test), e.g.:
Expand All @@ -23,6 +21,7 @@ def train(data, mode):
"w_train" : w_train,
"w_test" : w_test
}
model_name: string, nn identifier
mode: a string specifying the type of task, either 'regression' or 'classification'
Returns:
combine_rnn: a Sequential trained on data
Expand Down Expand Up @@ -81,7 +80,7 @@ def train(data, mode):
combined_rnn.summary()

try:
weights_path = os.path.join('weights', MODEL_NAME + '-progress.h5')
weights_path = os.path.join('weights', model_name + '-progress.h5')
combined_rnn.load_weights(weights_path)
except IOError:
print 'Pre-trained weights not found'
Expand All @@ -106,9 +105,9 @@ def train(data, mode):
# -- load best weights back into the net
combined_rnn.load_weights(weights_path)

return combined_rnn, MODEL_NAME
return combined_rnn

def test(net, data):
def test(net, data, model_name):
'''
Args:
net: a Sequential instance trained on data
Expand Down Expand Up @@ -153,9 +152,8 @@ def test(net, data):
X_photon_test = data['X_photon_test']
X_muon_test = data['X_muon_test']
X_event_test = data['X_event_test']
y_test= data ['y_test']

yhat = net.predict([X_jet_test, X_photon_test, X_muon_test, X_event_test], verbose=True, batch_size=1024)
np.save('yhat_' + MODEL_NAME + '.npy', yhat)
np.save('yhat_' + model_name + '.npy', yhat)

return yhat
51 changes: 25 additions & 26 deletions pipeline.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import json
from data_processing import read_in, shuffle_split_scale, padding
import numpy as np
import pandautils as pup
import cPickle
import utils
import logging
from plotting import plot_inputs, plot_confusion, plot_regression, save_roc_curves
from nn_with_modes import train, test
import deepdish.io as io

from data_processing import read_in, shuffle_split_scale, padding
from plotting import plot_performance
from nets import nn_with_modes
import utils

def main(json_config, mode, tree_name):
def main(json_config, tree_name, model_name, mode):
'''
Args:
-----
Expand Down Expand Up @@ -55,7 +57,7 @@ def sha(s):
try:
logger.info('Attempting to read from {}'.format(pickle_name))
data = cPickle.load(open(pickle_name, 'rb'))
logger.info('Pre-processed data found and loaded from pickle')
logger.info('Pre-processed data found and loaded from pickle')
# -- otherwise, process the new data
except IOError:
logger.info('Pre-processed data not found in {}'.format(pickle_name))
Expand All @@ -64,8 +66,8 @@ def sha(s):
X, y, w, le = read_in(class_files_dict, tree_name, particles_dict, mode)

# -- shuffle, split samples into train and test set, scale features
data = shuffle_split_scale(X, y, w)
data = shuffle_split_scale(X, y, w)

data.update({
'varlist' : [
branch
Expand All @@ -74,7 +76,6 @@ def sha(s):
],
'LabelEncoder' : le
})

# -- plot distributions:
'''
This should produce normed, weighted histograms of the input distributions for all variables
Expand All @@ -96,23 +97,20 @@ def sha(s):
open(pickle_name, 'wb'),
protocol=cPickle.HIGHEST_PROTOCOL)

# # -- train
# # design a Keras NN with three RNN streams (jets, photons, muons)
# # combine the outputs and process them through a bunch of FF layers
# # use a validation split of 20%
# # save out the weights to hdf5 and the model to yaml
net, model_name = train(data, mode)
# -- plot distributions:

# # -- test
# # evaluate performance on the test set
yhat = test(net, data)
# -- train
# design a Keras NN with three RNN streams (jets, photons, muons)
# combine the outputs and process them through a bunch of FF layers
# use a validation split of 20%
# save out the weights to hdf5 and the model to json
net = nn_with_modes.train(data, model_name, mode)
yhat = nn_with_modes.test(net, data, model_name)

# # -- plot performance by mode
if mode == 'regression':
plot_regression(yhat, data)
if mode == 'classification':
plot_confusion(yhat, data)
save_roc_curves(yhat, data, model_name)
# -- plot performance by mode
plot_performance(yhat, data, model_name, mode)

# --------------------------------------------------------------

if __name__ == '__main__':

Expand All @@ -124,12 +122,13 @@ def sha(s):
# -- read in arguments
parser = argparse.ArgumentParser()
parser.add_argument('config', help="path to JSON file that specifies classes and corresponding ROOT files' paths")
parser.add_argument('model_name', help="name of the set from particular network")
parser.add_argument('mode', help="classification or regression")
parser.add_argument('--tree', help="name of the tree to open in the ntuples", default='mini')
parser.add_argument('--tree', help="name of the tree to open in the ntuples", default='CollectionTree')
args = parser.parse_args()

if args.mode != 'classification' and args.mode != 'regression':
raise ValueError('Mode must be classification or regression')

# -- pass arguments to main
sys.exit(main(args.config, args.mode, args.tree))
sys.exit(main(args.config, args.tree, args.model_name, args.mode))
Loading

0 comments on commit 274864c

Please sign in to comment.