diff --git a/falcon_challenge/evaluator.py b/falcon_challenge/evaluator.py index d2d78d3..ec48c13 100644 --- a/falcon_challenge/evaluator.py +++ b/falcon_challenge/evaluator.py @@ -241,7 +241,7 @@ def evaluate( dataset_pred = dataset_pred[:dataset_mask.shape[0]] # In case excess timesteps are predicted due to batching, reduce if dataset in DATASET_HELDINOUT_MAP[datasplit]['held_in']: - if 'h2' not in datasplit: + if 'h2' not in datasplit and 'b1' not in datasplit: # For splits with multiple datasets per session (H1 and M2), we need to map predictions, targets, and masks for each dataset to the session ID session_id = reduce_key(dataset) dset_len_dict['held_in'][session_id].append(dataset_mask.shape[0]) @@ -249,7 +249,7 @@ def evaluate( tgt_dict['held_in'].append(dataset_tgt) mask_dict['held_in'].append(dataset_mask) elif dataset in DATASET_HELDINOUT_MAP[datasplit]['held_out']: - if not 'h2' in datasplit: + if not 'h2' in datasplit and 'b1' not in datasplit: # For splits with multiple datasets per session (H1 and M2), we need to map predictions, targets, and masks for each dataset to the session ID session_id = reduce_key(dataset) dset_len_dict['held_out'][session_id].append(dataset_mask.shape[0]) diff --git a/setup.py b/setup.py index f77981b..f92a46a 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='falcon_challenge', - version='0.4.0', + version='0.4.1', url='https://github.com/snel-repo/stability-benchmark', author='Joel Ye',