diff --git a/config_hh.json b/config_hh.json
new file mode 100644
index 0000000..74b9eb6
--- /dev/null
+++ b/config_hh.json
@@ -0,0 +1,67 @@
+{
+    "classes" : 
+    {
+    	"X400" :
+        [
+            "/Users/mp744/Documents/CERN/hh2yybb/hh2yybbEventClassifier/ntuples/MGPy8_X400tohh_yybb.root"
+        ],
+    	"X350" :
+        [
+            "/Users/mp744/Documents/CERN/hh2yybb/hh2yybbEventClassifier/ntuples/MGPy8_X350tohh_yybb.root"
+        ],
+    	"X325" :
+        [
+            "/Users/mp744/Documents/CERN/hh2yybb/hh2yybbEventClassifier/ntuples/MGPy8_X325tohh_yybb.root"
+        ],
+        "H300" :
+        [
+            "/Users/mp744/Documents/CERN/hh2yybb/hh2yybbEventClassifier/ntuples/MGPy8_H300_Xtohh_yybb.root"
+        ],
+        "X275" :
+        [
+            "/Users/mp744/Documents/CERN/hh2yybb/hh2yybbEventClassifier/ntuples/MGPy8_X275tohh_yybb.root"
+        ],
+        "bkg" :
+        [
+        	"/Users/mp744/Documents/CERN/hh2yybb/hh2yybbEventClassifier/ntuples/MGPy8_ybbj.root",
+        	"/Users/mp744/Documents/CERN/hh2yybb/hh2yybbEventClassifier/ntuples/MGPy8_ybjj.root",
+        	"/Users/mp744/Documents/CERN/hh2yybb/hh2yybbEventClassifier/ntuples/MGPy8_yjjj.root",
+        	"/Users/mp744/Documents/CERN/hh2yybb/hh2yybbEventClassifier/ntuples/MGPy8_yybb.root",
+        	"/Users/mp744/Documents/CERN/hh2yybb/hh2yybbEventClassifier/ntuples/MGPy8_yybj.root"
+        ]
+    },
+
+    "particles" : 
+    {
+        "jet" : 
+        {
+            "branches" :
+                [
+                    "jet_pt",
+                    "jet_eta",
+                    "jet_phi",
+                    "jet_m",
+                    "jet_Jvt",
+                    "jet_MV2c10_FixedCutBEff_60",
+                    "jet_MV2c10_FixedCutBEff_70",
+                    "jet_MV2c10_FixedCutBEff_77",
+                    "jet_MV2c10_FixedCutBEff_85"
+                ], 
+            "max_length" : 5
+        },
+
+        "photon": 
+        {
+            "branches" :
+                [
+                    "photon_pt", 
+                    "photon_eta", 
+                    "photon_phi", 
+                    "photon_isTight", 
+                    "photon_ptcone20", 
+                    "photon_topoEtcone40"
+                ],
+            "max_length" : 3
+        }
+    }
+}
\ No newline at end of file
diff --git a/data_processing.py b/data_processing.py
index e803038..8275b03 100644
--- a/data_processing.py
+++ b/data_processing.py
@@ -5,21 +5,19 @@
 from sklearn.cross_validation import train_test_split
 import pandautils as pup
 import warnings
+import logging
+from collections import OrderedDict
+from itertools import izip
 
-def _build_X(events, phrase, exclude_vars):
-    '''slices related branches into a numpy array
-    Args:
-        events: a pandas DataFrame containing the complete data by event
-        phrase: a string like 'Jet' corresponding to the related branches wanted
-    Returns:
-        output_array: a numpy array containing data only pertaining to the related branches
-    '''
-    branch_names = [key for key in events.keys() if (key.startswith(phrase) and (key not in exclude_vars))]
-    sliced_events = events[branch_names].as_matrix()
-    return sliced_events, branch_names
+logger = logging.getLogger('data_processing')
 
+def _pairwise(iterable):
+    '''s -> (s0, s1), (s2, s3), (s4, s5), ...'''
+    a = iter(iterable)
+    return izip(a, a)
 
-def read_in(class_files_dict, exclude_vars):
+
+def read_in(class_files_dict, tree_name, particles):
     '''
     takes in dict mapping class names to list of root files, loads them and slices them into ML format
     Args:
@@ -40,43 +38,67 @@ def read_in(class_files_dict, exclude_vars):
                             ],
                             ...
                           } 
-        exclude_vars: list of strings of names of branches not to be used for training
+        tree_name: string, name of the tree to open in the ntuples
+        particles: dictionary that provides various informations about the different streams in the events,
+                   for example:
+                   {
+                    "jet" :
+                        {
+                            "branches" :
+                                [
+                                    "jet_pt",
+                                    "jet_eta"
+                                ],
+                            "max_length" : 5
+                        },
+                    "photon" :
+                        {
+                            "branches" :
+                                [
+                                    "photon_pt",
+                                    "photon_eta"
+                                ],
+                            "max_length" : 3
+                        }
+                   }
     Returns:
-        X_jets: ndarray [n_ev, n_jet_feat] containing jet related branches
-        X_photons: ndarray [n_ev, n_photon_feat] containing photon related branches
-        X_muons: ndarray [n_ev, n_muon_feat] containing muon related branches
+        X: an OrderedDict containing the feature matrices for the different particle types, e.g.:
+           X = {
+                    "jet" : X_jet,
+                    "photon" : X_photon,
+                    "muon" : X_muon
+           }
+           where each X_<particle> is an ndarray of dimensions [n_ev, n_<particle>features]
         y: ndarray [n_ev, 1] containing the truth labels
-        w: ndarray [n_ev, 1] containing EventWeights
-        jet_branches + photon_branches + muon_branches = list of strings that concatenates the individual 
-                lists of variables for each particle type, e.g.:
-                ['Jet_Px', 'Jet_E', 'Muon_ID', 'Photon_Px']
+        w: ndarray [n_ev, 1] containing the event weights
+        le: LabelEncoder to transform numerical y back to its string values
     '''
     
     #convert files to pd data frames, assign key to y, concat all files
     def _make_df(val, key):
-        df = pup.root2panda(val, 'events')
+        df = pup.root2panda(val, tree_name)
         df['y'] = key
         return df
 
     all_events = pd.concat([_make_df(val, key) for key, val in class_files_dict.iteritems()], ignore_index=True)
-        
-    #slice related branches
-    X_jets, jet_branches = _build_X(all_events, 'Jet', exclude_vars)
-    X_photons, photon_branches = _build_X(all_events, 'Photon', exclude_vars)
-    X_muons, muon_branches = _build_X(all_events, 'Muon', exclude_vars)
     
+    X = OrderedDict()
+    for particle_name, particle_info in particles.iteritems():
+        logger.info('Building X_{}'.format(particle_name))
+        X[particle_name] = all_events[particle_info["branches"]].values
+
     #transform string labels to integer classes
     le = LabelEncoder()
     y = le.fit_transform(all_events['y'].values)
     
-    w = all_events['EventWeight'].values
+    w = all_events['yybb_weight'].values
     
-    return X_jets, X_photons, X_muons, y, w, jet_branches + photon_branches + muon_branches
+    return X, y, w, le
 
 
 def _scale(matrix_train, matrix_test):
     '''
-    Use scikit learn to sclae features to 0 mean, 1 std. 
+    Use scikit learn to scale features to 0 mean, 1 std. 
     Because of event-level structure, we need to flatten X, scale, and then reshape back into event format.
     Args:
         matrix_train: X_train [n_ev_train, n_particle_features], numpy ndarray of unscaled features of events allocated for training
@@ -99,47 +121,62 @@ def _scale(matrix_train, matrix_test):
     return matrix_train, matrix_test
 
 
-def shuffle_split_scale(X_jets, X_photons, X_muons, y, w):
+def shuffle_split_scale(X, y, w):
     '''
-    takes in X_jets, X_photons, X_Muons, y and w nd arrays, shuffles them, splits them into test (40%) and training (60%) sets
+    Shuffle data, split it into test (40%) and training (60%) sets, scale X
     Args:
-        X_jets: ndarray [n_ev, n_jet_feat] containing jet related branches
-        X_photons: ndarray [n_ev, n_photon_feat] containing photon related branches
-        X_muons: ndarray [n_ev, n_muon_feat] containing muon related branches
+        X: an OrderedDict containing the feature matrices for the different particle types, e.g.:
+           X = {
+                    "jet" : X_jet,
+                    "photon" : X_photon,
+                    "muon" : X_muon
+           }
+           where each X_<particle> is an ndarray of dimensions [n_ev, n_<particle>features]
         y: ndarray [n_ev, 1] containing the truth labels
-        w: ndarray [n_ev, 1] containing EventWeights
+        w: ndarray [n_ev, 1] containing the event weights
     Returns:
-        X_jets_train: ndarray [n_ev_train, n_jet_feat] containing the events of jet related branches allocated for training
-        X_jets_test: ndarray [n_ev_test, n_jet_feat] containing the events of jet related branches allocated for testing
-        X_photons_train: ndarray [n_ev_train, n_photon_feat] containing the events of photon related branches allocated for training
-        X_photons_test: ndarray [n_ev_test, n_photon_feat] containing the events of photon related branches allocated for testing
-        X_muons_train: ndarray [n_ev_train, n_muon_feat] containing the events of muon related branches allocated for training
-        X_muons_test: ndarray [n_ev_test, n_muon_feat] containing the events of muon related branches allocated for testing
-        Y_train: ndarray [n_ev_train, 1] containing the shuffled truth labels for training
-        Y_test: ndarray [n_ev_test, 1] containing the shuffled truth labels allocated for testing
-        W_train: ndarray [n_ev_train, 1] containing the shuffled EventWeights allocated for training
-        W_test: ndarray [n_ev_test, 1] containing the shuffled EventWeights allocated for testing
+        data: an OrderedDict containing all X, y, w ndarrays for all particles (both train and test), e.g.:
+              data = {
+                "X_jet_train" : X_jet_train,
+                "X_jet_test" : X_jet_test,
+                "X_photon_train" : X_photon_train,
+                "X_photon_test" : X_photon_test,
+                "y_train" : y_train,
+                "y_test" : y_test,
+                "w_train" : w_train,
+                "w_test" : w_test
+              }
     '''
-    #shuffle events & split into testing and training sets
-    X_jets_train, X_jets_test, \
-    X_photons_train, X_photons_test, \
-    X_muons_train, X_muons_test, \
-    Y_train, Y_test, \
-    W_train, W_test = train_test_split(X_jets, X_photons, X_muons, y, w, test_size=0.4)
+    logger.info('Shuffling, splitting and scaling')
 
-    X_jets_train, X_jets_test = _scale(X_jets_train, X_jets_test)
-    X_photons_train, X_photons_test = _scale(X_photons_train, X_photons_test)
-    X_muons_train, X_muons_test = _scale(X_muons_train, X_muons_test)
+    data_tuple = train_test_split(*(X.values() + [y, w]), test_size=0.4)
 
-    return X_jets_train, X_jets_test, X_photons_train, X_photons_test, X_muons_train, X_muons_test, Y_train, Y_test, W_train, W_test
+    data = OrderedDict()
+    for particle, (train, test) in zip(X.keys(), _pairwise(data_tuple[:(2 * len(X))])):
+        data['X_' + particle + '_train'], data['X_' + particle+ '_test'] = _scale(train, test)
 
+    data['y_train'], data['y_test'], data['w_train'], data['w_test'] = data_tuple[-4:]
+
+    return data
 
-def zero_padding(X, max_length):
-    '''
 
+def padding(X, max_length, value=-999):
+    '''
+    Transforms X to a 3D array where the dimensions correspond to [n_ev, n_particles, n_features].
+    n_particles is now fixed and equal to max_length.
+    If the number of particles in an event was < max_length, the missing particles will be filled with default values
+    If the number of particles in an event was > max_length, the excess particles will be removed
+    Args:
+        X: ndarray [n_ev, n_features] with an arbitrary number of particles per event
+        max_length: int, the number of particles to keep per event 
+        value (optional): the value to input in case there are not enough particles in the event, default=-999
+    Returns:
+        X_pad: ndarray [n_ev, n_particles, n_features], padded version of X with fixed number of particles
+    Note: 
+        Use Masking to avoid the particles with artificial entries = -999
     '''
-    data = -999 * np.ones((X.shape[0], max_length, X.shape[1]), dtype='float32')
+    X_pad = value * np.ones((X.shape[0], max_length, X.shape[1]), dtype='float32')
     for i, row in enumerate(X):
-        data[i, :min(len(row[0]), max_length), :] = np.array(row.tolist()).T[:min(len(row[0]), max_length), :]
+        X_pad[i, :min(len(row[0]), max_length), :] = np.array(row.tolist()).T[:min(len(row[0]), max_length), :]
 
-    return data
+    return X_pad
diff --git a/pipeline.py b/pipeline.py
index 1ff81cd..5d7844d 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -1,5 +1,5 @@
 import json
-from data_processing import read_in, shuffle_split_scale, zero_padding
+from data_processing import read_in, shuffle_split_scale, padding
 import pandautils as pup
 import cPickle
 from plotting import plot_inputs, plot_NN
@@ -10,11 +10,11 @@
 #from plotting import plot_inputs, plot_performance
 #from nn_model import train, test
 
-def main(json_config, exclude_vars):
+def main(json_config, tree_name):
     '''
     Args:
     -----
-        json_config: a JSON file, containing a dictionary that links the names of the different
+        json_config: path to a JSON file, containing a dictionary that links the names of the different
                      classes in the classification problem to the paths of the ROOT files
                      associated with each class; for example:
 
@@ -31,16 +31,18 @@ def main(json_config, exclude_vars):
                         ],
                         ...
                      }
-         exclude_vars: list of strings of names of branches not to be used for training   
+         tree_name: string, name of the tree that contains the correct branches
     Saves:
     ------
-        'processed_data.h5': dictionary with processed ndarrays (X, y, w) for all particles for training and testing
+        'processed_data_<hash>.pkl': dictionary with processed ndarrays (X, y, w) for all particles for training and testing
     '''
     logger = logging.getLogger('Main')
 
     # -- load in the JSON file
-    logger.info('Loading JSON config')
-    class_files_dict = json.load(open(json_config))
+    logger.info('Loading information from ' + json_config)
+    config = utils.load_config(json_config)
+    class_files_dict = config['classes']
+    particles_dict = config['particles']
 
     # -- hash the config dictionary to check if the pickled data exists
     from hashlib import md5
@@ -50,52 +52,53 @@ def sha(s):
         m.update(s.__repr__())
         return m.hexdigest()[:5]
 
-    # -- if the pickle exists, use it
+    #-- if the pickle exists, use it
+    pickle_name = 'processed_data_' + sha(config) + '.pkl'
     try:
-        data = cPickle.load(open('processed_data_' + sha(class_files_dict) + '.pkl', 'rb'))
-        logger.info('Preprocessed data found in pickle')
-        X_jets_train = data['X_jets_train']
-        X_jets_test = data['X_jets_test']
-        X_photons_train = data['X_photons_train']
-        X_photons_test = data['X_photons_test']
-        X_muons_train = data['X_muons_train']
-        X_muons_test = data['X_muons_test']
-        y_train = data['y_train']
-        y_test = data['y_test']
-        w_train = data['w_train']
-        w_test = data['w_test']
-        varlist = data['varlist']
-
-    # -- otherwise, process the new data
+        logger.info('Attempting to read from {}'.format(pickle_name))
+        data = cPickle.load(open(pickle_name, 'rb'))
+        logger.info('Pre-processed data found and loaded from pickle')
+    # -- otherwise, process the new data 
     except IOError:
-        logger.info('Preprocessed data not found')
+        logger.info('Pre-processed data not found in {}'.format(pickle_name))
         logger.info('Processing data')
         # -- transform ROOT files into standard ML format (ndarrays) 
-        X_jets, X_photons, X_muons, y, w, varlist = read_in(class_files_dict, exclude_vars)
+        X, y, w, le = read_in(class_files_dict, tree_name, particles_dict)
+
         # -- shuffle, split samples into train and test set, scale features
-        X_jets_train, X_jets_test, \
-        X_photons_train, X_photons_test, \
-        X_muons_train, X_muons_test, \
-        y_train, y_test, \
-        w_train, w_test = shuffle_split_scale(X_jets, X_photons, X_muons, y, w)
+        data = shuffle_split_scale(X, y, w) 
+  
+        data.update({
+            'varlist' : [
+                branch 
+                for particle_info in particles_dict.values() 
+                for branch in particle_info['branches']
+            ],
+            'LabelEncoder' : le
+        })
+
+        # -- plot distributions:
+        '''
+        This should produce normed, weighted histograms of the input distributions for all variables
+        The train and test distributions should be shown for every class
+        Plots should be saved out a pdf with informative names
+        '''
+        logger.info('Saving input distributions in ./plots/')
+        plot_inputs(data, particles_dict.keys())
+
+        logger.info('Padding')
+        for key in data:
+            if key.startswith('X_'):
+                data[key] = padding(data[key], particles_dict[key.split('_')[1]]['max_length']) 
+                # ^ assuming naming convention: X_<particle>_train, X_<particle>_test 
+
         # -- save out to pickle
-        logger.info('Saving processed data to pickle')
-        cPickle.dump({
-            'X_jets_train' : X_jets_train,
-            'X_jets_test' : X_jets_test,
-            'X_photons_train' : X_photons_train,
-            'X_photons_test' : X_photons_test,
-            'X_muons_train' : X_muons_train,
-            'X_muons_test' : X_muons_test,
-            'y_train' : y_train,
-            'y_test' : y_test,
-            'w_train' : w_train,
-            'w_test' : w_test,
-            'varlist' : varlist
-            }, 
-            open('processed_data_' + sha(class_files_dict) + '.pkl', 'wb'),
+        logger.info('Saving processed data to {}'.format(pickle_name))
+        cPickle.dump(data, 
+            open(pickle_name, 'wb'),
             protocol=cPickle.HIGHEST_PROTOCOL)
 
+<<<<<<< HEAD
     # -- plot distributions:
     '''
     This should produce normed, weighted histograms of the input distributions for all variables
@@ -123,6 +126,38 @@ def sha(s):
         [5, 5, 3, 3, 2, 2]
     )
 
+||||||| merged common ancestors
+    # -- plot distributions:
+    '''
+    This should produce normed, weighted histograms of the input distributions for all variables
+    The train and test distributions should be shown for every class
+    Plots should be saved out a pdf with informative names
+    '''
+    logger.info('Plotting input distributions')
+    plot_inputs(
+        X_jets_train, X_jets_test, 
+        X_photons_train, X_photons_test, 
+        X_muons_train, X_muons_test, 
+        y_train, y_test, 
+        w_train, w_test,
+        varlist 
+        )
+
+    X_jets_train, X_jets_test, \
+    X_photons_train, X_photons_test, \
+    X_muons_train, X_muons_test = map(zero_padding, 
+        [
+            X_jets_train, X_jets_test, 
+            X_photons_train, X_photons_test, 
+            X_muons_train, X_muons_test
+        ],
+        [5, 5, 3, 3, 2, 2]
+    )
+
+    print  X_jets_train.shape, X_photons_train.shape
+
+=======
+>>>>>>> 1a214b6daea28f00860eb31110c2790fe6417efb
     # # -- train
     # # design a Keras NN with three RNN streams (jets, photons, muons)
     io.save(('X_jets_NN.h5'), NN(X_jets_train, X_jets_test, y_train))
@@ -139,16 +174,16 @@ def sha(s):
     # # combine the outputs and process them through a bunch of FF layers
     # # use a validation split of 20%
     # # save out the weights to hdf5 and the model to yaml
-    # net = train(X_jets_train, X_photons_train, X_muons_train, y_train, w_train)
+    # net = train(data)
 
     # # -- test
     # # evaluate performance on the test set
-    # yhat = test(net, X_jets_test, X_photons_test, X_muons_test, y_test, w_test)
+    # yhat = test(net, data)
 
     # # -- plot performance
     # # produce ROC curves to evaluate performance
     # # save them out to pdf
-    # plot_performance(yhat, y_test, w_test)
+    # plot_performance(yhat, data['y_test'], data['w_test'])
 
 if __name__ == '__main__':
     
@@ -159,9 +194,9 @@ def sha(s):
 
     # -- read in arguments
     parser = argparse.ArgumentParser()
-    parser.add_argument('config', help="JSON file that specifies classes and corresponding ROOT files' paths")
-    parser.add_argument('--exclude', help="names of branches to exclude from training", nargs="*", default=[])
+    parser.add_argument('config', help="path to JSON file that specifies classes and corresponding ROOT files' paths")
+    parser.add_argument('--tree', help="name of the tree to open in the ntuples", default='mini')
     args = parser.parse_args()
 
     # -- pass arguments to main
-    sys.exit(main(args.config, args.exclude))
+    sys.exit(main(args.config, args.tree))
diff --git a/plotting.py b/plotting.py
index 92452e5..505ff3a 100644
--- a/plotting.py
+++ b/plotting.py
@@ -4,17 +4,19 @@
 from matplotlib.pyplot import cm 
 import pandautils as pup
 import os
+from sklearn.preprocessing import LabelEncoder
 
-def _plot_X(train, test, y_train, y_test, w_train, w_test, varlist, feature):
+def _plot_X(train, test, y_train, y_test, w_train, w_test, varlist, le, feature):
 	'''
 	Args:
 		train: ndarray [n_ev_train, n_muon_feat] containing the events allocated for training
         test: ndarray [n_ev_test, n_muon_feat] containing the events allocated for testing
-       	y_train: ndarray [n_ev_train, 1] containing the shuffled truth labels for training
-        y_test: ndarray [n_ev_test, 1] containing the shuffled truth labels allocated for testing
+       	y_train: ndarray [n_ev_train, 1] containing the shuffled truth labels for training in numerical format
+        y_test: ndarray [n_ev_test, 1] containing the shuffled truth labels allocated for testing in numerical format
         w_train: ndarray [n_ev_train, 1] containing the shuffled EventWeights allocated for training
         w_test: ndarray [n_ev_test, 1] containing the shuffled EventWeights allocated for testing
-        varlist: list of names of branches like 'Jet_px', 'Photon_E', 'Muon_Iso'
+        varlist: list of names of branches like 'jet_px', 'photon_E', 'muon_Iso'
+        le: LabelEncoder to transform numerical y back to its string values
 		feature: a string like 'Jet', 'Muon', 'Photon'
 	Returns:
 		Saves .pdf histograms for each feature-related branch plotting the training and test sets for each class
@@ -36,11 +38,12 @@ def _plot_X(train, test, y_train, y_test, w_train, w_test, varlist, feature):
 			flat_test = pup.flatten(test[:, column_counter])
 			matplotlib.rcParams.update({'font.size': 16})
 			fig = plt.figure(figsize=(11.69, 8.27), dpi=100)
+
 			bins = np.linspace(
 				min(min(flat_train), min(flat_test)), 
 				max(max(flat_train), max(flat_test)), 
 				30)
-			color = iter(cm.rainbow(np.linspace(0, 1, 2)))
+			color = iter(cm.rainbow(np.linspace(0, 1, len(np.unique(y_train)))))
 			# -- loop through the classes
 			for k in range(len(np.unique(y_train))):
 				c = next(color)
@@ -48,7 +51,7 @@ def _plot_X(train, test, y_train, y_test, w_train, w_test, varlist, feature):
 					bins=bins, 
 					histtype='step', 
 					normed=True, 
-					label='Train - class: '+str(k),
+					label='Train - ' + le.inverse_transform(k),
 					weights=w_train_ext[y_train_ext == k],
 					color=c, 
 					linewidth=1)
@@ -56,55 +59,43 @@ def _plot_X(train, test, y_train, y_test, w_train, w_test, varlist, feature):
 					bins=bins, 
 					histtype='step', 
 					normed=True,
-					label='Test  - class: ' + str(k),
+					label='Test  - ' + le.inverse_transform(k),
 					weights=w_test_ext[y_test_ext == k], 
 					color=c,
 					linewidth=2, 
 					linestyle='dashed')	
-			plt.xlabel(key)
+			plt.title(key)
 			plt.yscale('log')
 			plt.ylabel('Weighted Events')
-			plt.legend()
+			plt.legend(prop={'size': 10}, fancybox=True, framealpha=0.5)
 			try:
 				plt.savefig(os.path.join('plots', key + '.pdf'))
 			except IOError:
 				os.makedirs('plots')
 				plt.savefig(os.path.join('plots', key + '.pdf'))
-			#plt.show()
 			column_counter += 1
 
-def plot_inputs(X_jets_train, X_jets_test, X_photons_train, X_photons_test, 
-	X_muons_train, X_muons_test, y_train, y_test, w_train, w_test, varlist):
+def plot_inputs(data, particle_names):
 	'''
 	Args:
-		X_jets_train: ndarray [n_ev_train, n_jet_feat] containing the 
-				events of jet related branches allocated for training
-        X_jets_test: ndarray [n_ev_test, n_jet_feat] containing the 
-        		events of jet related branches allocated for testing
-        X_photons_train: ndarray [n_ev_train, n_photon_feat] containing 
-        		the events of photon related branches allocated for training
-        X_photons_test: ndarray [n_ev_test, n_photon_feat] containing 
-        		the events of photon related branches allocated for testing
-        X_muons_train: ndarray [n_ev_train, n_muon_feat] containing the 
-        		events of muon related branches allocated for training
-        X_muons_test: ndarray [n_ev_test, n_muon_feat] containing the 
-        		events of muon related branches allocated for testing
-        Y_train: ndarray [n_ev_train, 1] containing the shuffled truth 
-        		labels for training
-        Y_test: ndarray [n_ev_test, 1] containing the shuffled truth labels 
-        		allocated for testing
-        W_train: ndarray [n_ev_train, 1] containing the shuffled EventWeights 
-        		allocated for training
-        W_test: ndarray [n_ev_test, 1] containing the shuffled EventWeights 
-        		allocated for testing
-        varlist: list of strings that concatenates the individual 
-                lists of variables for each particle type, e.g.:
-                ['Jet_Px', 'Jet_E', 'Muon_ID', 'Photon_Px']
+		data: an OrderedDict containing all X, y, w ndarrays for all particles (both train and test), e.g.:
+              data = {
+                "X_jet_train" : X_jet_train,
+                "X_jet_test" : X_jet_test,
+                "X_photon_train" : X_photon_train,
+                "X_photon_test" : X_photon_test,
+                "y_train" : y_train,
+                "y_test" : y_test,
+                "w_train" : w_train,
+                "w_test" : w_test
+              }
+        particle_names: list of strings, names of particle streams
 	Returns:
 		Saves .pdf histograms plotting the training and test 
 		sets of each class for each feature 
 	'''
 	
+<<<<<<< HEAD
 	_plot_X(X_jets_train, X_jets_test, y_train, y_test, w_train, w_test, varlist, 'Jet')
 	_plot_X(X_photons_train, X_photons_test, y_train, y_test, w_train, w_test, varlist, 'Photon')
 	_plot_X(X_muons_train, X_muons_test, y_train, y_test, w_train, w_test, varlist, 'Muon')
@@ -129,3 +120,21 @@ def plot_NN(yhat, y_test, w_test):
 			plt.legend()
 			plt.show()
 
+||||||| merged common ancestors
+	_plot_X(X_jets_train, X_jets_test, y_train, y_test, w_train, w_test, varlist, 'Jet')
+	_plot_X(X_photons_train, X_photons_test, y_train, y_test, w_train, w_test, varlist, 'Photon')
+	_plot_X(X_muons_train, X_muons_test, y_train, y_test, w_train, w_test, varlist, 'Muon')
+=======
+	for particle in particle_names:
+		_plot_X(
+			data['X_' + particle + '_train'], 
+			data['X_' + particle + '_test'], 
+			data['y_train'],
+			data['y_test'], 
+			data['w_train'], 
+			data['w_test'], 
+			data['varlist'],
+			data['LabelEncoder'],
+			particle
+			)
+>>>>>>> 1a214b6daea28f00860eb31110c2790fe6417efb
diff --git a/utils.py b/utils.py
index a61fc72..e62f570 100644
--- a/utils.py
+++ b/utils.py
@@ -1,4 +1,5 @@
 import logging
+import json
 
 def configure_logging():
     rlogger = logging.getLogger()
@@ -7,4 +8,23 @@ def configure_logging():
     logging.addLevelName(logging.WARNING, "\033[1;31m{:8}\033[1;0m".format(logging.getLevelName(logging.WARNING)))
     logging.addLevelName(logging.ERROR, "\033[1;35m{:8}\033[1;0m".format(logging.getLevelName(logging.ERROR)))
     logging.addLevelName(logging.INFO, "\033[1;32m{:8}\033[1;0m".format(logging.getLevelName(logging.INFO)))
-    logging.addLevelName(logging.DEBUG, "\033[1;34m{:8}\033[1;0m".format(logging.getLevelName(logging.DEBUG)))
\ No newline at end of file
+    logging.addLevelName(logging.DEBUG, "\033[1;34m{:8}\033[1;0m".format(logging.getLevelName(logging.DEBUG)))
+
+def load_config(config_file):
+	# TO DO: validate types of entries in the config
+	config = json.load(open(config_file, 'r'))
+	required_keys = ['classes', 'particles']
+	required_particle_keys = ['branches', 'max_length']
+
+	for k in required_keys:
+		if k not in config.keys():
+			raise KeyError('Pipeline configuration requires key: {}'.format(k))
+
+	for particle_name, particle_info in config['particles'].iteritems():
+		if '_' in particle_name:
+			raise ValueError('Particle names cannot have _ in them')
+		for k in required_particle_keys:
+			if k not in particle_info.keys():
+				raise KeyError('Particle configuration requires key: {}'.format(k))
+
+	return config
\ No newline at end of file