diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1f5127d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,68 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# ROOT files
+*.root*
+
+# OSX files
+.DS_Store
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Ipython Notebook
+.ipynb_checkpoints
\ No newline at end of file
diff --git a/README.md b/README.md
index 8582fb6..014477e 100644
--- a/README.md
+++ b/README.md
@@ -1,30 +1,22 @@
 # hh2yybb Event Classifier
 Event level classifier for the hh-->yybb analysis using multi-stream RNNs
 
-## Project Title: Develop a ML algorithm to distinguish SM hh (gamma-gamma bb) events from background 
+## RNN-based Event Level Classifier for ATLAS Analysis (hh-->yybb) 
  
 ### Purpose: 
-ATLAS has a ‘cut based’ hh data analysis. To improve on this, a ML algoorithm is being trained to select the correct “second b jet” in single-tagged events ([bbyy jet classifier](https://github.com/jemrobinson/bbyy_jet_classifier)). The project proposed here will take this a step further and develop an algorithm for all events passing some minimum selection criteria, not just single-tagged events. 
+'Cut-based' methods are the prevalent data analysis strategy in ATLAS, which is also currently being used in the hh-->yybb analysis group. To improve on this, a ML algorithm is being trained to select the correct “second b jet” in single-tagged events ([bbyy jet classifier](https://github.com/jemrobinson/bbyy_jet_classifier)). The project in this repo will take this a step further and develop an event classifier. 
  
+### Current Status:
+![Pipeline](images/UGsWorkflow.png)
+
+### Deep Learning Module:
+![Net](images/MultiStreamRNN.png)
+Our goal is to train an event-level classifier, in order to be able to distinguish di-Higgs resonant decays from photons+jets backgrounds. To describe an event, we combine the pre-calculated event-level information with a series of RNN streams that extract information about the event from the properties of the different types of particles in it (jets, electrons, photons, muons, ...).
+
+We phrase our project both as a classification and a regression. In the classification task, we consider every mass hypothesis for the di-Higgs parent particle as a separate class, and we attempt to solve a multi-class classification problem. In the regression task, we use the nominal mass of the parent particle as the continuous variable to predict.
+
 ### To-do list:
- 
-* Produce the ntuples using `HGamAnalysisFramework`:  
-     Decide what info to include (jets and photons, but also leptons, pileup info?)   
-     Apply the pre-selection </br>
-     Assign truth labels using b-quark-from-Higgs labeling scheme </br>
-     Actually make the ntuples on grid – run on signal and bkg events
- 
-* Analysis Coding Tasks -- Modules needed: 
-   1. Read-in module that knows about the ntuple format 
-   2. Data processing module that uses `scikit-learn` to: </br>
-      scale all variables to have mean zero and sd of 1 </br>
-      shuffle events  </br>
-      split data into training and testing samples 
-   3. Plotting module to check all variables before training, both scaled and pre-scaled variables to make sure things look reasonable and there are no bugs 
-   4. Training module that uses `Keras` (design RNN, test different NN architectures, etc.) 
-   5. Testing module to check performance and produce ROC curves. Plot ROC curve as a function of mu (pile-up), pt of largest jet, Njets, etc. 
-  
-* Write presentations
+ 1. Testing module to check performance and produce ROC curves. Plot ROC curve as a function of mu (pile-up), pt of largest jet, Njets, etc. 
 
 ---
 This project has been assigned to [@gstark12](https://github.com/gstark12) and [@jennyailin](https://github.com/jennyailin) as part of their Summer 2016 internship at CERN. They will work under the supervision of [@mickypaganini](https://github.com/mickypaganini) and Prof. Paul Tipton.
diff --git a/config_hh.json b/config_hh.json
index e0783c3..1b5ca14 100644
--- a/config_hh.json
+++ b/config_hh.json
@@ -62,6 +62,19 @@
                     "photon_topoEtcone40"
                 ],
             "max_length" : 3
+        },
+
+        "event":
+        {
+            "branches" :
+                [
+                    "jet_n",
+                    "met",
+                    "met_phi",
+                    "met_sumet",
+                    "photon_n"
+                ],
+            "max_length" : 1
         }
     }
 }
\ No newline at end of file
diff --git a/functional_nn.py b/functional_nn.py
new file mode 100644
index 0000000..97d738a
--- /dev/null
+++ b/functional_nn.py
@@ -0,0 +1,137 @@
+from keras.layers import Masking, GRU, Input, merge, Activation, Dense, Dropout
+from keras.models import Model
+from keras.callbacks import EarlyStopping, ModelCheckpoint
+import logging
+import numpy as np
+import deepdish.io as io
+import os
+
+def train(data):
+	'''
+	'''
+	logger = logging.getLogger('Train')
+
+	# -- extract training matrices from `data`
+	X_jets_train = data['X_jet_train']
+	X_photons_train = data['X_photon_train']
+	X_event_train = data['X_event_train']
+	y_train = data['y_train']
+
+	# -- class weight needed if we want equal class representation
+	#class_weight = {k : (1./len(np.unique(y_train))) / (float(len(y_train[y_train == k])) / float(len(y_train))) for k in np.unique(y_train)}
+	class_weight = {k : (float(len(y_train)) / float(len(np.unique(y_train)) * (len(y_train[y_train == k])))) for k in np.unique(y_train)}
+
+
+	# -- placeholders for matrix shapes
+	JET_SHAPE = X_jets_train.shape[1:]
+	PHOTON_SHAPE = X_photons_train.shape[1:]
+	EVENT_SHAPE = (X_event_train.shape[1], )
+
+	# -- input layers (3 streams)
+	jet_inputs = Input(shape=JET_SHAPE)
+	photon_inputs = Input(shape=PHOTON_SHAPE)
+	event_inputs = Input(shape=EVENT_SHAPE)
+
+	# -- jet RNN
+	jet_outputs = GRU(25, name='jet_gru')(
+				  	  Masking(mask_value=-999, name='jet_masking')(
+					  	  jet_inputs
+				  	  )
+			  	  )
+
+	# -- photon RNN
+	photon_outputs = GRU(10, name='photon_gru')(
+					 	Masking(mask_value=-999, name='photon_masking')(
+					    	photon_inputs
+					  	)
+				  	 )
+			  	  
+	# -- merge (jet, photon) RNNs
+	rnn_outputs = merge([jet_outputs, photon_outputs], mode='concat')
+	rnn_outputs = Dense(16, activation='relu')(
+					#Dropout(0.3)(
+						#Dense(32, activation='relu')(
+							#Dropout(0.3)(
+								rnn_outputs
+							#)
+						#)
+					#)
+				  )
+
+	# -- merge event level info as well
+	merged_streams = merge([rnn_outputs, event_inputs], mode='concat')
+
+	# -- final feed-forward processing of info up to output layer with softmax
+	yhat = Dense(len(np.unique(y_train)), activation='softmax')(
+			Dropout(0.2)(
+				#Dense(8, activation='relu')(
+					#Dropout(0.2)(
+						Dense(8, activation='relu')(
+							Dropout(0.2)(
+								merged_streams
+							)
+						)
+					#)
+				#)
+			)
+		   )
+
+	# -- build Model from symbolic structure above
+	net = Model(input=[jet_inputs, photon_inputs, event_inputs], output=yhat)
+	net.summary()
+	net.compile('adam', 'sparse_categorical_crossentropy', metrics=['accuracy'])
+
+	try:
+		weights_path = os.path.join('weights', 'functional_combined_rnn-progress.h5')
+		logger.info('Trying to load weights from ' + weights_path)
+		net.load_weights(weights_path)
+		logger.info('Weights found and loaded from ' + weights_path)
+	except IOError:
+		logger.info('Pre-trained weights not found in ' + weights_path)
+
+	logger.info('Compiling the net')
+	# -- train!
+	try:
+		net.fit([X_jets_train, X_photons_train, X_event_train], 
+    		y_train, batch_size=16, 
+    		class_weight=class_weight,
+        	callbacks = [
+            	EarlyStopping(verbose=True, patience=20, monitor='val_loss'),
+          		ModelCheckpoint(weights_path,
+          		monitor='val_loss', verbose=True, save_best_only=True)
+        	],
+    	nb_epoch=100, validation_split=0.2) 
+
+	except KeyboardInterrupt:
+		print 'Training ended early.'
+
+	# -- load best weights back into the net
+	net.load_weights(weights_path)
+	return net
+
+def test(net, data):
+	'''
+	Args:
+	-----
+		net: a trained keras Model instance
+		data: an OrderedDict containing all X, y, w ndarrays for all particles (both train and test), e.g.:
+              data = {
+                "X_jet_train" : X_jet_train,
+                "X_jet_test" : X_jet_test,
+                "X_photon_train" : X_photon_train,
+                "X_photon_test" : X_photon_test,
+                "y_train" : y_train,
+                "y_test" : y_test,
+                "w_train" : w_train,
+                "w_test" : w_test
+              }
+    Returns:
+    --------
+    	yhat: numpy array of dim [n_ev, n_classes] with the net predictions on the test data 
+	'''
+	yhat = net.predict([data['X_jet_test'], data['X_photon_test'], data['X_event_test']], 
+		verbose = True, batch_size = 512)
+	io.save(open(os.path.join('output','yhat.h5'), 'wb'), yhat)
+	# -- example of other quantities that can be evaluated from yhat
+	#class_predictions = [np.argmax(ev) for ev in yhat])
+	return yhat
\ No newline at end of file
diff --git a/images/MultiStreamRNN.png b/images/MultiStreamRNN.png
new file mode 100644
index 0000000..a28b04e
Binary files /dev/null and b/images/MultiStreamRNN.png differ
diff --git a/images/UGsWorkflow.png b/images/UGsWorkflow.png
new file mode 100644
index 0000000..5cff6dc
Binary files /dev/null and b/images/UGsWorkflow.png differ
diff --git a/images/UGsWorkflow_v1.png b/images/UGsWorkflow_v1.png
new file mode 100644
index 0000000..e942e71
Binary files /dev/null and b/images/UGsWorkflow_v1.png differ
diff --git a/nn.py b/nn.py
new file mode 100644
index 0000000..ddccb25
--- /dev/null
+++ b/nn.py
@@ -0,0 +1,66 @@
+from keras.models import Sequential
+from keras.layers.core import Activation, Dense, Dropout, Lambda
+from keras.layers import Masking, GRU, Merge, Input, merge
+from keras.callbacks import EarlyStopping, ModelCheckpoint
+import logging
+import numpy as np
+
+def train(data):
+
+	# -- extract training matrices from `data`
+	X_jets_train = data['X_jet_train']
+	X_photons_train = data['X_photon_train']
+	X_event_train = data['X_event_train']
+	y_train = data['y_train']
+
+	# -- build net
+	jet_channel = Sequential()
+	photon_channel = Sequential()
+	event_level = Sequential()
+
+	JET_SHAPE = X_jets_train.shape[1:]
+	PHOTON_SHAPE = X_photons_train.shape[1:]
+	EVENT_SHAPE = X_event_train.shape[1]
+
+	jet_channel.add(Masking(mask_value=-999, input_shape=JET_SHAPE, name='jet_masking'))
+	jet_channel.add(GRU(25, name='jet_gru'))
+	jet_channel.add(Dropout(0.3, name='jet_dropout'))
+
+	photon_channel.add(Masking(mask_value=-999, input_shape=PHOTON_SHAPE, name='photon_masking'))
+	photon_channel.add(GRU(10, name='photon_gru'))
+	photon_channel.add(Dropout(0.3, name='photon_dropout'))
+
+	event_level.add(Lambda(lambda x: x, input_shape=(EVENT_SHAPE, )))
+
+	combined_rnn = Sequential()
+	combined_rnn.add(Merge([jet_channel, photon_channel, event_level], mode='concat'))	
+	combined_rnn.add(Dense(32, activation='relu'))
+	# combined_rnn.add(Dropout(0.3))
+	# combined_rnn.add(Dense(32, activation='relu'))
+	# combined_rnn.add(Dropout(0.3))
+	# combined_rnn.add(Dense(16, activation='relu'))
+	combined_rnn.add(Dropout(0.3))
+	combined_rnn.add(Dense(8, activation='relu'))
+	combined_rnn.add(Dropout(0.3))
+	combined_rnn.add(Dense(len(np.unique(y_train)), activation='softmax'))
+
+
+	
+	combined_rnn.compile('adam', 'sparse_categorical_crossentropy')
+
+	logger = logging.getLogger('Train')
+	logger.info('Compiling the net')
+	try:
+		combined_rnn.fit([X_jets_train, X_photons_train, X_event_train], 
+    		y_train, batch_size=16, 
+        	callbacks = [
+            	EarlyStopping(verbose=True, patience=10, monitor='val_loss'),
+          		ModelCheckpoint('./combinedrnn-progress',
+          		monitor='val_loss', verbose=True, save_best_only=True)
+        	],
+    	nb_epoch=30, validation_split = 0.2) 
+
+	except KeyboardInterrupt:
+		print 'Training ended early.'
+
+	return combined_rnn
\ No newline at end of file
diff --git a/pipeline.py b/pipeline.py
index 49bbdb0..b272634 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -11,6 +11,8 @@
 #from plotting import plot_inputs, plot_performance
 
 #from nn_model import train, test
+#from nn_model import train, test
+#from functional_nn import train, test
 
 def main(json_config, model_name, tree_name):
     '''
@@ -85,11 +87,11 @@ def sha(s):
         Plots should be saved out a pdf with informative names
         '''
         logger.info('Saving input distributions in ./plots/')
-        plot_inputs(data, particles_dict.keys())
+        plot_inputs(data, particles_dict)
 
         logger.info('Padding')
         for key in data:
-            if key.startswith('X_'):
+            if ((key.startswith('X_')) and ('event' not in key)): # no padding for `event` matrix
                 data[key] = padding(data[key], particles_dict[key.split('_')[1]]['max_length']) 
                 # ^ assuming naming convention: X_<particle>_train, X_<particle>_test 
 
@@ -110,6 +112,7 @@ def sha(s):
     # # -- test
     # # evaluate performance on the test set
     yhat=NN_test(net, data)
+  
 
     # # -- plot performance
     #plot_NN(yhat, data)
diff --git a/plotting.py b/plotting.py
index 37a98a9..ed03fcd 100644
--- a/plotting.py
+++ b/plotting.py
@@ -7,8 +7,9 @@
 from sklearn.preprocessing import LabelEncoder
 from viz import calculate_roc, ROC_plotter, add_curve
 import cPickle 
+from sklearn.metrics import confusion_matrix
 
-def _plot_X(train, test, y_train, y_test, w_train, w_test, varlist, le, feature):
+def _plot_X(train, test, y_train, y_test, w_train, w_test, le, particle, particles_dict):
 	'''
 	Args:
 		train: ndarray [n_ev_train, n_muon_feat] containing the events allocated for training
@@ -19,65 +20,69 @@ def _plot_X(train, test, y_train, y_test, w_train, w_test, varlist, le, feature)
         w_test: ndarray [n_ev_test, 1] containing the shuffled EventWeights allocated for testing
         varlist: list of names of branches like 'jet_px', 'photon_E', 'muon_Iso'
         le: LabelEncoder to transform numerical y back to its string values
-		feature: a string like 'Jet', 'Muon', 'Photon'
+		particle: a string like 'jet', 'muon', 'photon', ...
+		particles_dict:
 	Returns:
 		Saves .pdf histograms for each feature-related branch plotting the training and test sets for each class
 	'''
 	# -- extend w and y arrays to match the total number of particles per event
-	w_train_ext = np.array(pup.flatten([[w] * (len(train[i, 0])) for i, w in enumerate(w_train)]))
-	w_test_ext = np.array(pup.flatten([[w] * (len(test[i, 0])) for i, w in enumerate(w_test)]))
-	y_train_ext = np.array(pup.flatten([[y] * (len(train[i, 0])) for i, y in enumerate(y_train)]))
-	y_test_ext = np.array(pup.flatten([[y] * (len(test[i, 0])) for i, y in enumerate(y_test)]))
+	try:
+		w_train = np.array(pup.flatten([[w] * (len(train[i, 0])) for i, w in enumerate(w_train)]))
+		w_test = np.array(pup.flatten([[w] * (len(test[i, 0])) for i, w in enumerate(w_test)]))
+		y_train = np.array(pup.flatten([[y] * (len(train[i, 0])) for i, y in enumerate(y_train)]))
+		y_test = np.array(pup.flatten([[y] * (len(test[i, 0])) for i, y in enumerate(y_test)]))
+	except TypeError: # `event` has a different structure that does not require all this
+		pass
+	
+	varlist = particles_dict[particle]['branches']
 	
-	# -- we keep a column counter because `varlist` contains all variables for all particles,
-	# but each X matrix only contains as many columns as the number of variables 
-	# related to that specific paritcle type
-	column_counter = 0
 	# -- loop through the variables
-	for key in varlist:
-		if key.startswith(feature):
-			flat_train = pup.flatten(train[:, column_counter])
-			flat_test = pup.flatten(test[:, column_counter])
-			matplotlib.rcParams.update({'font.size': 16})
-			fig = plt.figure(figsize=(11.69, 8.27), dpi=100)
+	for column_counter, key in enumerate(varlist):
+		
+		flat_train = pup.flatten(train[:, column_counter])
+		flat_test = pup.flatten(test[:, column_counter])
+		
+		matplotlib.rcParams.update({'font.size': 16})
+		fig = plt.figure(figsize=(11.69, 8.27), dpi=100)
+
+		bins = np.linspace(
+			min(min(flat_train), min(flat_test)), 
+			max(max(flat_train), max(flat_test)), 
+			30)
+		color = iter(cm.rainbow(np.linspace(0, 1, len(np.unique(y_train)))))
+
+		# -- loop through the classes
+		for k in range(len(np.unique(y_train))):
+			c = next(color)
+			_ = plt.hist(flat_train[y_train == k], 
+				bins=bins, 
+				histtype='step', 
+				normed=True, 
+				label='Train - ' + le.inverse_transform(k),
+				weights=w_train[y_train == k],
+				color=c, 
+				linewidth=1)
+			_ = plt.hist(flat_test[y_test == k], 
+				bins=bins, 
+				histtype='step', 
+				normed=True,
+				label='Test  - ' + le.inverse_transform(k),
+				weights=w_test[y_test == k], 
+				color=c,
+				linewidth=2, 
+				linestyle='dashed')	
 
-			bins = np.linspace(
-				min(min(flat_train), min(flat_test)), 
-				max(max(flat_train), max(flat_test)), 
-				30)
-			color = iter(cm.rainbow(np.linspace(0, 1, len(np.unique(y_train)))))
-			# -- loop through the classes
-			for k in range(len(np.unique(y_train))):
-				c = next(color)
-				_ = plt.hist(flat_train[y_train_ext == k], 
-					bins=bins, 
-					histtype='step', 
-					normed=True, 
-					label='Train - ' + le.inverse_transform(k),
-					weights=w_train_ext[y_train_ext == k],
-					color=c, 
-					linewidth=1)
-				_ = plt.hist(flat_test[y_test_ext == k], 
-					bins=bins, 
-					histtype='step', 
-					normed=True,
-					label='Test  - ' + le.inverse_transform(k),
-					weights=w_test_ext[y_test_ext == k], 
-					color=c,
-					linewidth=2, 
-					linestyle='dashed')	
-			plt.title(key)
-			plt.yscale('log')
-			plt.ylabel('Weighted Events')
-			plt.legend(prop={'size': 10}, fancybox=True, framealpha=0.5)
-			try:
-				plt.savefig(os.path.join('plots', key + '.pdf'))
-			except IOError:
-				os.makedirs('plots')
-				plt.savefig(os.path.join('plots', key + '.pdf'))
-			column_counter += 1
+		plt.title(key)
+		plt.yscale('log')
+		plt.ylabel('Weighted Events')
+		plt.legend(prop={'size': 10}, fancybox=True, framealpha=0.5)
+		try:
+			plt.savefig(os.path.join('plots', key + '.pdf'))
+		except IOError:
+			os.makedirs('plots')
+			plt.savefig(os.path.join('plots', key + '.pdf'))
 
-def plot_inputs(data, particle_names):
+def plot_inputs(data, particles_dict):
 	'''
 	Args:
 		data: an OrderedDict containing all X, y, w ndarrays for all particles (both train and test), e.g.:
@@ -91,13 +96,14 @@ def plot_inputs(data, particle_names):
                 "w_train" : w_train,
                 "w_test" : w_test
               }
-        particle_names: list of strings, names of particle streams
+        #particle_names: list of strings, names of particle streams
+        particles_dict:
 	Returns:
 		Saves .pdf histograms plotting the training and test 
 		sets of each class for each feature 
 	'''
 	
-	for particle in particle_names:
+	for particle in particles_dict.keys():
 		_plot_X(
 			data['X_' + particle + '_train'], 
 			data['X_' + particle + '_test'], 
@@ -105,10 +111,11 @@ def plot_inputs(data, particle_names):
 			data['y_test'], 
 			data['w_train'], 
 			data['w_test'], 
-			data['varlist'],
 			data['LabelEncoder'],
-			particle
+			particle,
+			particles_dict
 			)
+
 def plot_NN(yhat, data):
 	'''
     Args:
@@ -173,3 +180,41 @@ def plot_roc_Curve(yhat, data, le, model_name):
 		fig.savefig('/Users/gigifstark/CERN_Work/HH2YYBB/roc'+ str(k)+'.pdf')
 	cPickle.dump(pkl_dict, open(model_name+"_pkl", 'wb'))
 
+def plot_confusion(yhat, data):
+	'''
+	Args:
+		yhat: numpy array of dim [n_ev, n_classes] with the net predictions on the test data 
+		data: an OrderedDict containing all X, y, w ndarrays for all particles (both train and test), e.g.:
+              data = {
+                "X_jet_train" : X_jet_train,
+                "X_jet_test" : X_jet_test,
+                "X_photon_train" : X_photon_train,
+                "X_photon_test" : X_photon_test,
+                "y_train" : y_train,
+                "y_test" : y_test,
+                "w_train" : w_train,
+                "w_test" : w_test
+              }
+	Returns:
+		Saves confusion.pdf confusion matrix
+	'''
+	
+	y_test = data['y_test']
+
+	def _plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):
+	    plt.imshow(cm, interpolation='nearest', cmap=cmap)
+	    plt.title(title)
+	    plt.colorbar()
+	    tick_marks = np.arange(len(np.unique(y_test)))
+	    plt.xticks(tick_marks, sorted(np.unique(y_test)))
+	    plt.yticks(tick_marks, sorted(np.unique(y_test)))
+	    plt.tight_layout()
+	    plt.ylabel('True label')
+	    plt.xlabel('Predicted label')
+
+	cm = confusion_matrix(y_test, np.argmax(yhat, axis=1))
+	# Normalize the confusion matrix by row (i.e by the number of samples
+	# in each class)
+	cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
+	_plot_confusion_matrix(cm_normalized, title='Normalized confusion matrix')
+	plt.savefig('confusion.pdf')