HEP-PBSP · FrancescoMerlotti · Apr 14, 2024 · Apr 14, 2024 · Apr 14, 2024 · Apr 14, 2024
diff --git a/PBSP_logos/PBSP_black.pdf b/PBSP_logos/PBSP_black.pdf
diff --git a/PBSP_logos/PBSP_dark.pdf b/PBSP_logos/PBSP_dark.pdf
diff --git a/PBSP_logos/PBSP_light.pdf b/PBSP_logos/PBSP_light.pdf
diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py
@@ -13,6 +13,8 @@
 from validphys.core import peek_commondata_metadata
 from validphys.coredata import CommonData
 
+EXT = "pineappl.lz4"
+
 def load_commondata(spec):
     """
     Load the data corresponding to a CommonDataSpec object.

diff --git a/validphys2/src/validphys/commondatawriter.py b/validphys2/src/validphys/commondatawriter.py
@@ -0,0 +1,84 @@
+"""
+This module contains functions to write commondata and systypes
+tables to files
+"""
+
+
+def write_commondata_data(commondata, buffer):
+    """
+    write commondata table to buffer, this can be a memory map,
+    compressed archive or strings (using for instance StringIO)
+
+
+    Parameters
+    ----------
+
+    commondata : validphys.coredata.CommonData
+
+    buffer : memory map, compressed archive or strings
+            example: StringIO object
+
+
+    Example
+    -------
+    >>> from validphys.loader import Loader
+    >>> from io import StringIO
+
+    >>> l = Loader()
+    >>> cd = l.check_commondata("NMC").load_commondata_instance()
+    >>> sio = StringIO()
+    >>> write_commondata_data(cd,sio)
+    >>> print(sio.getvalue())
+
+    """
+    header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n"
+    buffer.write(header)
+    commondata.commondata_table.to_csv(buffer, sep="\t", header=None)
+
+
+def write_commondata_to_file(commondata, path):
+    """
+    write commondata table to file
+    """
+    with open(path, "w") as file:
+        write_commondata_data(commondata, file)
+
+
+def write_systype_data(commondata, buffer):
+    """
+    write systype table to buffer, this can be a memory map,
+    compressed archive or strings (using for instance StringIO)
+
+
+    Parameters
+    ----------
+
+    commondata : validphys.coredata.CommonData
+
+    buffer : memory map, compressed archive or strings
+            example: StringIO object
+
+
+    Example
+    -------
+    >>> from validphys.loader import Loader
+    >>> from io import StringIO
+
+    >>> l = Loader()
+    >>> cd = l.check_commondata("NMC").load_commondata_instance()
+    >>> sio = StringIO()
+    >>> write_systype_data(cd,sio)
+    >>> print(sio.getvalue())
+
+    """
+    header = f"{commondata.nsys}\n"
+    buffer.write(header)
+    commondata.systype_table.to_csv(buffer, sep="\t", header=None)
+
+
+def write_systype_to_file(commondata, path):
+    """
+    write systype table to file
+    """
+    with open(path, "w") as file:
+        write_systype_data(commondata, file)
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
@@ -30,6 +30,14 @@
 from reportengine import report
 from reportengine.compat import yaml
 
+from validphys.filters import (
+            Rule,
+            FilterRule,
+            AddedFilterRule,
+            RuleProcessingError,
+            default_filter_rules_input,
+        )
+
 from validphys.core import (
     DataGroupSpec,
     DataSetInput,
@@ -38,6 +46,7 @@
     MatchedCuts,
     SimilarCuts,
     ThCovMatSpec,
+    PDF,
 )
 from validphys.fitdata import fitted_replica_indexes, num_fitted_replicas
 from validphys.loader import (
@@ -171,6 +180,10 @@ def parse_pdf(self, name: str):
         except NotImplementedError as e:
             raise ConfigError(str(e))
         return pdf
+
+    def parse_fakepdf(self, name: str) -> PDF:
+        """PDF set used to generate the fake data in a closure test."""
+        return self.parse_pdf(name)
 
     def parse_load_weights_from_fit(self, name: str):
         """A fit in the results folder, containing at least a valid filter result."""
@@ -181,7 +194,7 @@ def parse_load_weights_from_fit(self, name: str):
 
     @element_of("theoryids")
     @_id_with_label
-    def parse_theoryid(self, theoryID: (str, int)):
+    def parse_theoryid(self, theoryID: (str, int)): # type: ignore
         """A number corresponding to the database theory ID where the
         corresponding theory folder is installed in te data directory."""
         try:
@@ -194,7 +207,7 @@ def parse_theoryid(self, theoryID: (str, int)):
                 display_alternatives="all",
             )
 
-    def parse_use_cuts(self, use_cuts: (bool, str)):
+    def parse_use_cuts(self, use_cuts: (bool, str)): # type: ignore # type: ignore
         """Whether to filter the points based on the cuts applied in the fit,
         or the whole data in the dataset. The possible options are:
 
@@ -232,7 +245,7 @@ def produce_replicas(self, nreplica: int):
         return NSList(range(1, nreplica+1), nskey="replica")
 
     def produce_inclusive_use_scalevar_uncertainties(self, use_scalevar_uncertainties: bool = False,
-                                        point_prescription: (str, None) = None):
+                                        point_prescription: (str, None) = None): # type: ignore
         """Whether to use a scale variation uncertainty theory covmat.
         Checks whether a point prescription is included in the runcard and if so 
         assumes scale uncertainties are to be used."""
@@ -507,7 +520,7 @@ def produce_simu_parameters_linear_combinations(self, simu_parameters=None):
     def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_parameters_scales, n_simu_parameters, simu_parameters_linear_combinations, simu_parameters=None):
         """The mapping that corresponds to the dataset specifications in the
         fit files"""
-        known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "simu_fac", "use_fixed_predictions", "contamination"}
+        known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "simu_fac", "use_fixed_predictions", "contamination", "new_commondata"}
         try:
             name = dataset["dataset"]
             if not isinstance(name, str):
@@ -517,6 +530,7 @@ def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_para
                 "'dataset' must be a mapping with " "'dataset' and 'sysnum'"
             )
 
+        new_commondata = dataset.get("new_commondata", False)
         sysnum = dataset.get("sys")
         cfac = dataset.get("cfac", tuple())
         frac = dataset.get("frac", 1)
@@ -559,7 +573,8 @@ def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_para
             custom_group=custom_group,
             use_fixed_predictions=use_fixed_predictions,
             contamination=contamination,
-            **bsm_data
+            **bsm_data,
+            new_commondata=new_commondata,
         )
 
     def parse_use_fitcommondata(self, do_use: bool):
@@ -746,6 +761,7 @@ def produce_dataset(
         use_fixed_predictions = dataset_input.use_fixed_predictions
         contamination = dataset_input.contamination
         contamination_data = contamination_data
+        new_commondata = dataset_input.new_commondata
 
         try:
             ds = self.loader.check_dataset(
@@ -763,6 +779,7 @@ def produce_dataset(
                 use_fixed_predictions=use_fixed_predictions,
                 contamination=contamination,
                 contamination_data=contamination_data,
+                new_commondata=new_commondata,
             )
         except DataNotFoundError as e:
             raise ConfigError(str(e), name, self.loader.available_datasets)
@@ -1199,7 +1216,7 @@ def res(*args, **kwargs):
         return res
 
     def produce_fitthcovmat(
-        self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None
+        self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None # type: ignore
     ):
         """If a `fit` is specified and `use_thcovmat_if_present` is `True` then returns the
         corresponding covariance matrix for the given fit if it exists. If the fit doesn't have a
@@ -1253,7 +1270,7 @@ def produce_fitthcovmat(
             fit_theory_covmat = None
         return fit_theory_covmat
 
-    def parse_speclabel(self, label: (str, type(None))):
+    def parse_speclabel(self, label: (str, type(None))): # type: ignore
         """A label for a dataspec. To be used in some plots"""
         return label
 
@@ -1287,7 +1304,7 @@ def parse_groupby(self, grouping: str):
             )
         return grouping
 
-    def parse_norm_threshold(self, val: (numbers.Number, type(None))):
+    def parse_norm_threshold(self, val: (numbers.Number, type(None))): # type: ignore
         """The threshold to use for covariance matrix normalisation, sets
         the maximum l2 norm of the inverse covariance matrix, by clipping
         smallest eigenvalues
@@ -1310,7 +1327,7 @@ def produce_no_covmat_reg(self):
         return {"norm_threshold": None}
 
     @configparser.record_from_defaults
-    def parse_default_filter_rules(self, spec: (str, type(None))):
+    def parse_default_filter_rules(self, spec: (str, type(None))): # type: ignore
         return spec
 
     def load_default_default_filter_rules(self, spec):
@@ -1334,7 +1351,7 @@ def load_default_default_filter_rules(self, spec):
                 display_alternatives="all",
             )
 
-    def parse_filter_rules(self, filter_rules: (list, type(None))):
+    def parse_filter_rules(self, filter_rules: (list, type(None))): # type: ignore
         """A list of filter rules. See https://docs.nnpdf.science/vp/filters.html
         for details on the syntax"""
         log.warning("Overwriting filter rules")
@@ -1346,6 +1363,13 @@ def parse_default_filter_rules_recorded_spec_(self, spec):
         it reportengine detects a conflict in the `dataset` key.
         """
         return spec
+
+    def parse_added_filter_rules(self, rules: (list, type(None)) = None): # type: ignore
+        """
+        Returns a tuple of AddedFilterRule objects. Rules are immutable after parsing.
+        AddedFilterRule objects inherit from FilterRule objects.
+        """
+        return tuple(AddedFilterRule(**rule) for rule in rules) if rules else None
 
     def produce_rules(
         self,
@@ -1355,15 +1379,9 @@ def produce_rules(
         default_filter_rules=None,
         filter_rules=None,
         default_filter_rules_recorded_spec_=None,
+        added_filter_rules=None,
     ):
 
-        """Produce filter rules based on the user defined input and defaults."""
-        from validphys.filters import (
-            Rule,
-            RuleProcessingError,
-            default_filter_rules_input,
-        )
-
         theory_parameters = theoryid.get_description()
 
         if filter_rules is None:
@@ -1387,11 +1405,25 @@ def produce_rules(
             ]
         except RuleProcessingError as e:
             raise ConfigError(f"Error Processing filter rules: {e}") from e
-
-        return rule_list
+
+        if added_filter_rules:
+            for i, rule in enumerate(added_filter_rules):
+                try:
+                    rule_list.append(
+                        Rule(
+                            initial_data=rule,
+                            defaults=defaults,
+                            theory_parameters=theory_parameters,
+                            loader=self.loader,
+                        )
+                    )
+                except RuleProcessingError as e:
+                    raise ConfigError(f"Error processing added rule {i+1}: {e}") from e
+
+        return tuple(rule_list)
 
     @configparser.record_from_defaults
-    def parse_default_filter_settings(self, spec: (str, type(None))):
+    def parse_default_filter_settings(self, spec: (str, type(None))): # type: ignore
         return spec
 
     def load_default_default_filter_settings(self, spec):
@@ -1415,7 +1447,7 @@ def load_default_default_filter_settings(self, spec):
                 display_alternatives="all",
             )
 
-    def parse_filter_defaults(self, filter_defaults: (dict, type(None))):
+    def parse_filter_defaults(self, filter_defaults: (dict, type(None))): # type: ignore
         """A mapping containing the default kinematic limits to be used when
         filtering data (when using internal cuts).
         Currently these limits are ``q2min`` and ``w2min``.
@@ -1495,8 +1527,8 @@ def produce_data(
 
     def _parse_data_input_from_(
         self,
-        parse_from_value: (str, type(None)),
-        additional_context: (dict, type(None)) = None,
+        parse_from_value: (str, type(None)), # type: ignore
+        additional_context: (dict, type(None)) = None, # type: ignore
     ):
         """Function which parses the ``data_input`` from a namespace. Usage
         is similar to :py:meth:`self.parse_from_` except this function bridges