Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added filter cuts to cut high Q data #71

Open
wants to merge 35 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
992abff
added new pineparser from nnpdf and legacy option to load_fktable fun…
comane Apr 14, 2024
c33b941
added get_np_fkatable method to FKTableData
comane Apr 14, 2024
ccd7f14
added legacy and metadata args to FKTableSpec
comane Apr 14, 2024
2e641b0
first version of new fktable parser
comane Apr 14, 2024
e27f7ee
added luminosity mapping method to FKTableData
comane Apr 14, 2024
145d528
xgrid reshape
comane Apr 14, 2024
89fd055
include cuts when loading fk table, seems to works for DIS
comane Apr 14, 2024
ff86914
works with theory 270
comane Apr 14, 2024
8552a1e
tmp modifications
comane Apr 27, 2024
32a2891
added load_commondata method to CommonDataSpec
comane Apr 28, 2024
dfa6617
added commondatawriter module
comane Apr 28, 2024
9701a59
added export method to CommonData -> allows to use _filter_real_data …
comane Apr 28, 2024
e69ac93
common_data_reader_dataset and experiment in n3fit_data_utils now onl…
comane Apr 28, 2024
8fe5bea
when new_commondata: True -> legacy: False -> pass a TheoryMeta to FK…
comane Apr 28, 2024
a5878fa
added test for metadata existence, array append, compatibility with 2…
FrancescoMerlotti Jul 10, 2024
82245d8
fix typo
FrancescoMerlotti May 8, 2024
247ec4d
added load_commondata to core, level0_commondata_wc and make_level1_d…
FrancescoMerlotti Jul 15, 2024
39659d9
added parse_fakepdf to config.py
FrancescoMerlotti May 16, 2024
ff762f9
add chi2 provider functions
FrancescoMerlotti May 20, 2024
5366d70
added usage write_chi2
FrancescoMerlotti May 20, 2024
9a88500
fixed repo
FrancescoMerlotti May 20, 2024
bbf01ca
moved function in simunet_analysis & changed their name
FrancescoMerlotti May 26, 2024
7099803
changed cuts to commondata_table_indices
FrancescoMerlotti May 30, 2024
a596c62
changed cuts to commondata_table_indices
FrancescoMerlotti May 30, 2024
5dbdb45
added rules classes, static KIN_LABEL dict, and replaced cpp Export m…
FrancescoMerlotti Jul 15, 2024
ea24cba
added commondatawriter.py & export method for CommonData python objects
FrancescoMerlotti Jul 15, 2024
9e13d11
added xq2 map for hadronic MQQ processes ref. [2303.06159]
FrancescoMerlotti Jun 12, 2024
c8514a9
add .pdf PBSP logos
FrancescoMerlotti May 13, 2024
9b3aa02
Revert "added xq2 map for hadronic MQQ processes ref. [2303.06159]"
FrancescoMerlotti Jun 18, 2024
201234c
Revert "added commondatawriter.py & export method for CommonData pyth…
FrancescoMerlotti Jul 15, 2024
2f30c4a
Revert "added rules classes, static KIN_LABEL dict, and replaced cpp …
FrancescoMerlotti Jun 18, 2024
c416434
Revert "Revert "added rules classes, static KIN_LABEL dict, and repla…
FrancescoMerlotti Jun 18, 2024
fa1766e
Revert "Revert "added commondatawriter.py & export method for CommonD…
FrancescoMerlotti Jul 15, 2024
44b97d9
Revert "Revert "added xq2 map for hadronic MQQ processes ref. [2303.0…
FrancescoMerlotti Jun 18, 2024
957a992
added Q2min, Q2max to plot kincov with fixed y_lim
FrancescoMerlotti Jun 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added PBSP_logos/PBSP_black.pdf
Binary file not shown.
Binary file added PBSP_logos/PBSP_dark.pdf
Binary file not shown.
Binary file added PBSP_logos/PBSP_light.pdf
Binary file not shown.
2 changes: 2 additions & 0 deletions validphys2/src/validphys/commondataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from validphys.core import peek_commondata_metadata
from validphys.coredata import CommonData

EXT = "pineappl.lz4"

def load_commondata(spec):
"""
Load the data corresponding to a CommonDataSpec object.
Expand Down
84 changes: 84 additions & 0 deletions validphys2/src/validphys/commondatawriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
This module contains functions to write commondata and systypes
tables to files
"""


def write_commondata_data(commondata, buffer):
"""
write commondata table to buffer, this can be a memory map,
compressed archive or strings (using for instance StringIO)


Parameters
----------

commondata : validphys.coredata.CommonData

buffer : memory map, compressed archive or strings
example: StringIO object


Example
-------
>>> from validphys.loader import Loader
>>> from io import StringIO

>>> l = Loader()
>>> cd = l.check_commondata("NMC").load_commondata_instance()
>>> sio = StringIO()
>>> write_commondata_data(cd,sio)
>>> print(sio.getvalue())

"""
header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n"
buffer.write(header)
commondata.commondata_table.to_csv(buffer, sep="\t", header=None)


def write_commondata_to_file(commondata, path):
"""
write commondata table to file
"""
with open(path, "w") as file:
write_commondata_data(commondata, file)


def write_systype_data(commondata, buffer):
"""
write systype table to buffer, this can be a memory map,
compressed archive or strings (using for instance StringIO)


Parameters
----------

commondata : validphys.coredata.CommonData

buffer : memory map, compressed archive or strings
example: StringIO object


Example
-------
>>> from validphys.loader import Loader
>>> from io import StringIO

>>> l = Loader()
>>> cd = l.check_commondata("NMC").load_commondata_instance()
>>> sio = StringIO()
>>> write_systype_data(cd,sio)
>>> print(sio.getvalue())

"""
header = f"{commondata.nsys}\n"
buffer.write(header)
commondata.systype_table.to_csv(buffer, sep="\t", header=None)


def write_systype_to_file(commondata, path):
"""
write systype table to file
"""
with open(path, "w") as file:
write_systype_data(commondata, file)
78 changes: 55 additions & 23 deletions validphys2/src/validphys/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@
from reportengine import report
from reportengine.compat import yaml

from validphys.filters import (
Rule,
FilterRule,
AddedFilterRule,
RuleProcessingError,
default_filter_rules_input,
)

from validphys.core import (
DataGroupSpec,
DataSetInput,
Expand All @@ -38,6 +46,7 @@
MatchedCuts,
SimilarCuts,
ThCovMatSpec,
PDF,
)
from validphys.fitdata import fitted_replica_indexes, num_fitted_replicas
from validphys.loader import (
Expand Down Expand Up @@ -171,6 +180,10 @@ def parse_pdf(self, name: str):
except NotImplementedError as e:
raise ConfigError(str(e))
return pdf

def parse_fakepdf(self, name: str) -> PDF:
"""PDF set used to generate the fake data in a closure test."""
return self.parse_pdf(name)

def parse_load_weights_from_fit(self, name: str):
"""A fit in the results folder, containing at least a valid filter result."""
Expand All @@ -181,7 +194,7 @@ def parse_load_weights_from_fit(self, name: str):

@element_of("theoryids")
@_id_with_label
def parse_theoryid(self, theoryID: (str, int)):
def parse_theoryid(self, theoryID: (str, int)): # type: ignore
"""A number corresponding to the database theory ID where the
corresponding theory folder is installed in te data directory."""
try:
Expand All @@ -194,7 +207,7 @@ def parse_theoryid(self, theoryID: (str, int)):
display_alternatives="all",
)

def parse_use_cuts(self, use_cuts: (bool, str)):
def parse_use_cuts(self, use_cuts: (bool, str)): # type: ignore # type: ignore
"""Whether to filter the points based on the cuts applied in the fit,
or the whole data in the dataset. The possible options are:

Expand Down Expand Up @@ -232,7 +245,7 @@ def produce_replicas(self, nreplica: int):
return NSList(range(1, nreplica+1), nskey="replica")

def produce_inclusive_use_scalevar_uncertainties(self, use_scalevar_uncertainties: bool = False,
point_prescription: (str, None) = None):
point_prescription: (str, None) = None): # type: ignore
"""Whether to use a scale variation uncertainty theory covmat.
Checks whether a point prescription is included in the runcard and if so
assumes scale uncertainties are to be used."""
Expand Down Expand Up @@ -507,7 +520,7 @@ def produce_simu_parameters_linear_combinations(self, simu_parameters=None):
def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_parameters_scales, n_simu_parameters, simu_parameters_linear_combinations, simu_parameters=None):
"""The mapping that corresponds to the dataset specifications in the
fit files"""
known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "simu_fac", "use_fixed_predictions", "contamination"}
known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "simu_fac", "use_fixed_predictions", "contamination", "new_commondata"}
try:
name = dataset["dataset"]
if not isinstance(name, str):
Expand All @@ -517,6 +530,7 @@ def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_para
"'dataset' must be a mapping with " "'dataset' and 'sysnum'"
)

new_commondata = dataset.get("new_commondata", False)
sysnum = dataset.get("sys")
cfac = dataset.get("cfac", tuple())
frac = dataset.get("frac", 1)
Expand Down Expand Up @@ -559,7 +573,8 @@ def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_para
custom_group=custom_group,
use_fixed_predictions=use_fixed_predictions,
contamination=contamination,
**bsm_data
**bsm_data,
new_commondata=new_commondata,
)

def parse_use_fitcommondata(self, do_use: bool):
Expand Down Expand Up @@ -746,6 +761,7 @@ def produce_dataset(
use_fixed_predictions = dataset_input.use_fixed_predictions
contamination = dataset_input.contamination
contamination_data = contamination_data
new_commondata = dataset_input.new_commondata

try:
ds = self.loader.check_dataset(
Expand All @@ -763,6 +779,7 @@ def produce_dataset(
use_fixed_predictions=use_fixed_predictions,
contamination=contamination,
contamination_data=contamination_data,
new_commondata=new_commondata,
)
except DataNotFoundError as e:
raise ConfigError(str(e), name, self.loader.available_datasets)
Expand Down Expand Up @@ -1199,7 +1216,7 @@ def res(*args, **kwargs):
return res

def produce_fitthcovmat(
self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None
self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None # type: ignore
):
"""If a `fit` is specified and `use_thcovmat_if_present` is `True` then returns the
corresponding covariance matrix for the given fit if it exists. If the fit doesn't have a
Expand Down Expand Up @@ -1253,7 +1270,7 @@ def produce_fitthcovmat(
fit_theory_covmat = None
return fit_theory_covmat

def parse_speclabel(self, label: (str, type(None))):
def parse_speclabel(self, label: (str, type(None))): # type: ignore
"""A label for a dataspec. To be used in some plots"""
return label

Expand Down Expand Up @@ -1287,7 +1304,7 @@ def parse_groupby(self, grouping: str):
)
return grouping

def parse_norm_threshold(self, val: (numbers.Number, type(None))):
def parse_norm_threshold(self, val: (numbers.Number, type(None))): # type: ignore
"""The threshold to use for covariance matrix normalisation, sets
the maximum l2 norm of the inverse covariance matrix, by clipping
smallest eigenvalues
Expand All @@ -1310,7 +1327,7 @@ def produce_no_covmat_reg(self):
return {"norm_threshold": None}

@configparser.record_from_defaults
def parse_default_filter_rules(self, spec: (str, type(None))):
def parse_default_filter_rules(self, spec: (str, type(None))): # type: ignore
return spec

def load_default_default_filter_rules(self, spec):
Expand All @@ -1334,7 +1351,7 @@ def load_default_default_filter_rules(self, spec):
display_alternatives="all",
)

def parse_filter_rules(self, filter_rules: (list, type(None))):
def parse_filter_rules(self, filter_rules: (list, type(None))): # type: ignore
"""A list of filter rules. See https://docs.nnpdf.science/vp/filters.html
for details on the syntax"""
log.warning("Overwriting filter rules")
Expand All @@ -1346,6 +1363,13 @@ def parse_default_filter_rules_recorded_spec_(self, spec):
it reportengine detects a conflict in the `dataset` key.
"""
return spec

def parse_added_filter_rules(self, rules: (list, type(None)) = None): # type: ignore
"""
Returns a tuple of AddedFilterRule objects. Rules are immutable after parsing.
AddedFilterRule objects inherit from FilterRule objects.
"""
return tuple(AddedFilterRule(**rule) for rule in rules) if rules else None

def produce_rules(
self,
Expand All @@ -1355,15 +1379,9 @@ def produce_rules(
default_filter_rules=None,
filter_rules=None,
default_filter_rules_recorded_spec_=None,
added_filter_rules=None,
):

"""Produce filter rules based on the user defined input and defaults."""
from validphys.filters import (
Rule,
RuleProcessingError,
default_filter_rules_input,
)

theory_parameters = theoryid.get_description()

if filter_rules is None:
Expand All @@ -1387,11 +1405,25 @@ def produce_rules(
]
except RuleProcessingError as e:
raise ConfigError(f"Error Processing filter rules: {e}") from e

return rule_list

if added_filter_rules:
for i, rule in enumerate(added_filter_rules):
try:
rule_list.append(
Rule(
initial_data=rule,
defaults=defaults,
theory_parameters=theory_parameters,
loader=self.loader,
)
)
except RuleProcessingError as e:
raise ConfigError(f"Error processing added rule {i+1}: {e}") from e

return tuple(rule_list)

@configparser.record_from_defaults
def parse_default_filter_settings(self, spec: (str, type(None))):
def parse_default_filter_settings(self, spec: (str, type(None))): # type: ignore
return spec

def load_default_default_filter_settings(self, spec):
Expand All @@ -1415,7 +1447,7 @@ def load_default_default_filter_settings(self, spec):
display_alternatives="all",
)

def parse_filter_defaults(self, filter_defaults: (dict, type(None))):
def parse_filter_defaults(self, filter_defaults: (dict, type(None))): # type: ignore
"""A mapping containing the default kinematic limits to be used when
filtering data (when using internal cuts).
Currently these limits are ``q2min`` and ``w2min``.
Expand Down Expand Up @@ -1495,8 +1527,8 @@ def produce_data(

def _parse_data_input_from_(
self,
parse_from_value: (str, type(None)),
additional_context: (dict, type(None)) = None,
parse_from_value: (str, type(None)), # type: ignore
additional_context: (dict, type(None)) = None, # type: ignore
):
"""Function which parses the ``data_input`` from a namespace. Usage
is similar to :py:meth:`self.parse_from_` except this function bridges
Expand Down
Loading