From 712712b604b10f61f51ce9755a3c67144d9bd7da Mon Sep 17 00:00:00 2001 From: James Moore Date: Tue, 10 Oct 2023 20:39:22 +0100 Subject: [PATCH] Contamination now working :) --- libnnpdf/src/NNPDF/dataset.h | 4 ++- libnnpdf/src/dataset.cc | 8 +++-- libnnpdf/src/experiments.cc | 8 +++-- libnnpdf/wrapper/src/nnpdf.i | 1 + .../example_contamination.yaml | 12 +++---- nnpdfcpp/src/common/src/loadutils.cc | 6 +++- validphys2/src/validphys/config.py | 15 +++++++- validphys2/src/validphys/core.py | 34 +++++++++++++++++-- validphys2/src/validphys/loader.py | 4 +++ 9 files changed, 74 insertions(+), 18 deletions(-) diff --git a/libnnpdf/src/NNPDF/dataset.h b/libnnpdf/src/NNPDF/dataset.h index a5e31a417..24a716cc1 100644 --- a/libnnpdf/src/NNPDF/dataset.h +++ b/libnnpdf/src/NNPDF/dataset.h @@ -39,6 +39,7 @@ namespace NNPDF bool UseFixedPredictions; //!< Flag to indicate using fixed predictions from simu_factor file int SpecialTheoryID; //!< The special hacky theory ID + std::vector ContaminationFactor; //!< The factor for contamination in closure tests // private methods for constructor void GenCovMat() const; //!< Generate covariance matrix @@ -48,7 +49,7 @@ namespace NNPDF DataSet(); //!< Disable default constructor public: - DataSet(CommonData const&, FKSet const&, double weight=1., bool use_fixed_predictions=false, int special_theory_id=0); //!< Constructor + DataSet(CommonData const&, FKSet const&, std::vector contamination_factor, double weight=1., bool use_fixed_predictions=false, int special_theory_id=0); //!< Constructor DataSet(const DataSet&, std::vector const&); //!< Masked Copy constructor DataSet(const DataSet&) = default; //!< Masked Copy constructor virtual ~DataSet(); //!< The destructor. @@ -63,6 +64,7 @@ namespace NNPDF bool GetUseFixedPredictions() const {return UseFixedPredictions; } //!< Getter for UseFixedPredictions int GetSpecialTheoryID() const {return SpecialTheoryID; } //!< The special hacky theory ID + std::vector GetContaminationFactor() const {return ContaminationFactor; } //!< The contamination factor double const& GetT0Pred(int i) const { return fT0Pred[i];} //!< Return t0 prediction double GetWeight() const {return fWeight;} diff --git a/libnnpdf/src/dataset.cc b/libnnpdf/src/dataset.cc index c043898f3..0d1b66a47 100644 --- a/libnnpdf/src/dataset.cc +++ b/libnnpdf/src/dataset.cc @@ -35,14 +35,15 @@ using namespace NNPDF; * \param weight the factor by which the importance of the dataset in the fit chiĀ² * is increased. */ -DataSet::DataSet(CommonData const& data, FKSet const& set, double weight, bool use_fixed_predictions, int special_theory_id): +DataSet::DataSet(CommonData const& data, FKSet const& set, std::vector contamination_factor, double weight, bool use_fixed_predictions, int special_theory_id): CommonData(data), FKSet(set), fIsArtificial(false), fIsT0(false), fWeight(weight), UseFixedPredictions(use_fixed_predictions), - SpecialTheoryID(special_theory_id) + SpecialTheoryID(special_theory_id), + ContaminationFactor(contamination_factor) { fT0Pred.reserve(fNData); @@ -58,7 +59,8 @@ DataSet::DataSet(const DataSet& set, std::vector const& mask): fIsT0(set.fIsT0), fWeight(set.fWeight), UseFixedPredictions(set.UseFixedPredictions), - SpecialTheoryID(set.SpecialTheoryID) + SpecialTheoryID(set.SpecialTheoryID), + ContaminationFactor(set.ContaminationFactor) { fT0Pred.reserve(fNData); diff --git a/libnnpdf/src/experiments.cc b/libnnpdf/src/experiments.cc index 05fa61c55..dccbe27df 100644 --- a/libnnpdf/src/experiments.cc +++ b/libnnpdf/src/experiments.cc @@ -407,16 +407,18 @@ void Experiment::MakeClosure(const vector& predictions, bool cons for (int i = 0; i < set.GetNData(); i++) { newdata[i] = theory.GetObsCV(i); - // This is our opportunity to CONTAMINATE things, and sort out the fixed observables + + // Sort out fixed observables first, if necessary if (set.GetUseFixedPredictions()) { string fixed_prediction_path = get_data_path() + "theory_" + std::to_string(set.GetSpecialTheoryID()) + "/simu_factors/SIMU_" + set.GetSetName() + ".yaml"; - cout << fixed_prediction_path << endl; YAML::Node read_file = YAML::LoadFile(fixed_prediction_path); - std::vector fixed_predictions = read_file["SM_fixed"].as>(); newdata[i] = fixed_predictions[i]; } + + // Finally, add the contamination + newdata[i] = newdata[i]*set.GetContaminationFactor()[i]; } set.UpdateData(newdata.data()); // MakeClosure treated as shifts rather than normalisations diff --git a/libnnpdf/wrapper/src/nnpdf.i b/libnnpdf/wrapper/src/nnpdf.i index 86875de31..e7a4fe3ea 100644 --- a/libnnpdf/wrapper/src/nnpdf.i +++ b/libnnpdf/wrapper/src/nnpdf.i @@ -37,6 +37,7 @@ %template(vector_str) std::vector; %template(vector_double) std::vector; %template(vector_int) std::vector; +%template(vector_float) std::vector; %typemap(out) NNPDF::matrix { auto size = $1.size(0)*$1.size(1); diff --git a/n3fit/runcards/examples/simunet_examples/example_contamination.yaml b/n3fit/runcards/examples/simunet_examples/example_contamination.yaml index fb342f2b8..e04d6e839 100644 --- a/n3fit/runcards/examples/simunet_examples/example_contamination.yaml +++ b/n3fit/runcards/examples/simunet_examples/example_contamination.yaml @@ -4,10 +4,10 @@ description: "Runcard template for the new (more flexible) contaminated fits. Th ############################################################ dataset_inputs: - {dataset: NMC, frac: 0.75} -- {dataset: ATLASTTBARTOT7TEV, cfac: [QCD], contamination: 'EFT-LO'} -- {dataset: ATLAS_TOPDIFF_DILEPT_8TEV_TTMNORM, cfac: [QCD], contamination: 'EFT-LO'} -- {dataset: ATLAS_TTBAR_8TEV_ASY, cfac: [QCD], contamination: 'EFT-LO'} -- {dataset: ATLAS_SINGLETOPW_8TEV_TOTAL, use_fixed_predictions: True, contamination: 'EFT-LO'} +- {dataset: ATLASTTBARTOT7TEV, cfac: [QCD], contamination: 'EFT_LO'} +- {dataset: ATLAS_TOPDIFF_DILEPT_8TEV_TTMNORM, cfac: [QCD], contamination: 'EFT_LO'} +- {dataset: ATLAS_TTBAR_8TEV_ASY, cfac: [QCD], contamination: 'EFT_LO'} +- {dataset: ATLAS_SINGLETOPW_8TEV_TOTAL, use_fixed_predictions: True, contamination: 'EFT_LO'} fixed_pdf_fit: False # load_weights_from_fit: 221103-jmm-no_top_1000_iterated # If this is uncommented, training starts here. @@ -27,8 +27,8 @@ closuretest: rancuttrnval: false # 0(1) to output training(valiation) chi2 in report printpdf4gen: false # To print info on PDFs during minimization contamination_parameters: - - {name: 'OtG', value: 0.01} - - {name: 'Opt', value: 0.02} + - {name: 'OtG', value: 10} + - {name: 'Opt', value: 1} seed: 0 rngalgo: 0 diff --git a/nnpdfcpp/src/common/src/loadutils.cc b/nnpdfcpp/src/common/src/loadutils.cc index 50908682b..f4d78ed50 100644 --- a/nnpdfcpp/src/common/src/loadutils.cc +++ b/nnpdfcpp/src/common/src/loadutils.cc @@ -33,7 +33,11 @@ DataSet LoadDataSet(NNPDFSettings const &settings, std::string const &setname, if (mask.size() > 0) { fk = FKSet(fk, mask); } - return DataSet(cd, fk, weight); + + // Since this is outside the contamination pipeline, set the contamination factor to be 1. + std::vector contamination_factor(cd.GetNData(), 1.0); + + return DataSet(cd, fk, contamination_factor, weight); } /** diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py index 78fd31cd9..f5f43a2eb 100644 --- a/validphys2/src/validphys/config.py +++ b/validphys2/src/validphys/config.py @@ -493,7 +493,7 @@ def produce_simu_parameters_scales(self, simu_parameters=None): def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_parameters_scales, n_simu_parameters, simu_parameters=None): """The mapping that corresponds to the dataset specifications in the fit files""" - known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "simu_fac", "use_fixed_predictions"} + known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "simu_fac", "use_fixed_predictions", "contamination"} try: name = dataset["dataset"] if not isinstance(name, str): @@ -507,6 +507,7 @@ def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_para cfac = dataset.get("cfac", tuple()) frac = dataset.get("frac", 1) use_fixed_predictions = dataset.get("use_fixed_predictions", False) + contamination = dataset.get("contamination", None) if not isinstance(frac, numbers.Real): raise ConfigError(f"'frac' must be a number, not '{frac}'") if frac < 0 or frac > 1: @@ -542,6 +543,7 @@ def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_para weight=weight, custom_group=custom_group, use_fixed_predictions=use_fixed_predictions, + contamination=contamination, **bsm_data ) @@ -698,6 +700,12 @@ def produce_cuts(self, *, commondata, use_cuts): return self._produce_similarity_cuts(commondata) raise TypeError("Wrong use_cuts") + def produce_contamination_data(self, closuretest): + if "contamination_parameters" in closuretest.keys(): + return closuretest["contamination_parameters"] + else: + return None + def produce_dataset( self, *, @@ -707,6 +715,7 @@ def produce_dataset( use_fitcommondata=False, fit=None, check_plotting: bool = False, + contamination_data = None, ): """Dataset specification from the theory and CommonData. Use the cuts from the fit, if provided. If check_plotting is set to @@ -719,6 +728,8 @@ def produce_dataset( weight = dataset_input.weight simu_parameters_names = dataset_input.simu_parameters_names use_fixed_predictions = dataset_input.use_fixed_predictions + contamination = dataset_input.contamination + contamination_data = contamination_data try: ds = self.loader.check_dataset( @@ -733,6 +744,8 @@ def produce_dataset( weight=weight, simu_parameters_names=simu_parameters_names, use_fixed_predictions=use_fixed_predictions, + contamination=contamination, + contamination_data=contamination_data, ) except DataNotFoundError as e: raise ConfigError(str(e), name, self.loader.available_datasets) diff --git a/validphys2/src/validphys/core.py b/validphys2/src/validphys/core.py index 3d21905b4..230b45825 100644 --- a/validphys2/src/validphys/core.py +++ b/validphys2/src/validphys/core.py @@ -324,7 +324,7 @@ def plot_kinlabels(self): class DataSetInput(TupleComp): """Represents whatever the user enters in the YAML to specify a dataset.""" - def __init__(self, *, name, sys, cfac, frac, weight, custom_group, simu_parameters_names, use_fixed_predictions): + def __init__(self, *, name, sys, cfac, frac, weight, custom_group, simu_parameters_names, use_fixed_predictions, contamination): self.name=name self.sys=sys self.cfac = cfac @@ -333,6 +333,7 @@ def __init__(self, *, name, sys, cfac, frac, weight, custom_group, simu_paramete self.custom_group = custom_group self.simu_parameters_names = simu_parameters_names self.use_fixed_predictions = use_fixed_predictions + self.contamination = contamination super().__init__(name, sys, cfac, frac, weight, custom_group) def __str__(self): @@ -461,10 +462,12 @@ def cut_mask(cuts): class DataSetSpec(TupleComp): def __init__(self, *, name, commondata, fkspecs, thspec, cuts, - frac=1, op=None, weight=1, simu_parameters_names_CF=None, simu_parameters_names=None, use_fixed_predictions=False): + frac=1, op=None, weight=1, simu_parameters_names_CF=None, simu_parameters_names=None, use_fixed_predictions=False, contamination=None, contamination_data=None): self.name = name self.commondata = commondata self.use_fixed_predictions = use_fixed_predictions + self.contamination = contamination + self.contamination_data = contamination_data if isinstance(fkspecs, FKTableSpec): fkspecs = (fkspecs,) @@ -503,7 +506,32 @@ def load(self): fkset = FKSet(FKSet.parseOperator(self.op), fktables) - data = DataSet(cd, fkset, self.weight, self.use_fixed_predictions, int(self.thspec.id)) + if self.contamination: + # Determine the multiplicative factors that we have to contaminate the closure test + # pseudodata with. + simu_fac_path = str(self.fkspecs[0].fkpath).split('fastkernel')[0] + "simu_factors/SIMU_" + cd.GetSetName() + ".yaml" + with open(simu_fac_path, 'rb') as file: + simu_file = yaml.safe_load(file) + contamination_values = np.array([1.0]*cd.GetNData()) + for parameter in self.contamination_data: + if parameter['name'] in simu_file[self.contamination].keys(): + op_prediction = parameter['value']*np.array(simu_file[self.contamination][parameter['name']]) + sm_prediction = np.array(simu_file[self.contamination]['SM']) + percentages = op_prediction / sm_prediction + contamination_values += percentages + contamination_values = contamination_values.tolist() + else: + # The contamination is just an array of 1s + contamination_values = [1.0]*cd.GetNData() + + data = DataSet( + cd, + fkset, + contamination_values, + self.weight, + self.use_fixed_predictions, + int(self.thspec.id), + ) if self.cuts is not None: #ugly need to convert from numpy.int64 to int, so we can pass diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py index d5625c7ea..0ddbd6164 100644 --- a/validphys2/src/validphys/loader.py +++ b/validphys2/src/validphys/loader.py @@ -546,6 +546,8 @@ def check_dataset( weight=1, simu_parameters_names=None, use_fixed_predictions=False, + contamination=None, + contamination_data=None, ): if not isinstance(theoryid, TheoryIDSpec): @@ -595,6 +597,8 @@ def check_dataset( simu_parameters_names_CF=simu_parameters_names_CF, simu_parameters_names=simu_parameters_names, use_fixed_predictions=use_fixed_predictions, + contamination=contamination, + contamination_data=contamination_data, ) def check_experiment(self, name: str, datasets: List[DataSetSpec]) -> DataGroupSpec: