Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pineparser for compatibility with new theories #63

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
992abff
added new pineparser from nnpdf and legacy option to load_fktable fun…
comane Apr 14, 2024
c33b941
added get_np_fkatable method to FKTableData
comane Apr 14, 2024
ccd7f14
added legacy and metadata args to FKTableSpec
comane Apr 14, 2024
2e641b0
first version of new fktable parser
comane Apr 14, 2024
e27f7ee
added luminosity mapping method to FKTableData
comane Apr 14, 2024
145d528
xgrid reshape
comane Apr 14, 2024
89fd055
include cuts when loading fk table, seems to works for DIS
comane Apr 14, 2024
ff86914
works with theory 270
comane Apr 14, 2024
8552a1e
tmp modifications
comane Apr 27, 2024
32a2891
added load_commondata method to CommonDataSpec
comane Apr 28, 2024
dfa6617
added commondatawriter module
comane Apr 28, 2024
9701a59
added export method to CommonData -> allows to use _filter_real_data …
comane Apr 28, 2024
e69ac93
common_data_reader_dataset and experiment in n3fit_data_utils now onl…
comane Apr 28, 2024
8fe5bea
when new_commondata: True -> legacy: False -> pass a TheoryMeta to FK…
comane Apr 28, 2024
a5878fa
added test for metadata existence, array append, compatibility with 2…
FrancescoMerlotti Jul 10, 2024
003f065
fix typo
FrancescoMerlotti May 8, 2024
a57a3b5
add .pdf PBSP logos
FrancescoMerlotti May 13, 2024
af2dc71
changed cuts to commondata_table_indices
FrancescoMerlotti May 30, 2024
848e9d1
added xq2 map for hadronic MQQ processes ref. [2303.06159]
FrancescoMerlotti Jun 12, 2024
ff92599
Revert "added xq2 map for hadronic MQQ processes ref. [2303.06159]"
FrancescoMerlotti Jun 18, 2024
5848365
Merge branch 'main' into new_pineparser
FrancescoMerlotti Jul 24, 2024
433cfd5
updated to support new theory format
FrancescoMerlotti Jul 26, 2024
1ad0780
added operation in new format framework
FrancescoMerlotti Nov 23, 2024
1ecbc0b
added new_commondata to check_compound in loader
FrancescoMerlotti Nov 24, 2024
2331eed
right formatting filtered artificial data
FrancescoMerlotti Nov 24, 2024
188a57d
loading cfac for new theory fktables
FrancescoMerlotti Nov 26, 2024
18665c1
fixed cfac array for new format
Nov 26, 2024
9eb043b
contamination working in python
FrancescoMerlotti Nov 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion validphys2/src/validphys/commondataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from validphys.core import peek_commondata_metadata
from validphys.coredata import CommonData

EXT = "pineappl.lz4"

def load_commondata(spec):
"""
Load the data corresponding to a CommonDataSpec object.
Expand Down Expand Up @@ -53,7 +55,7 @@ def parse_commondata(commondatafile, systypefile, setname):
commondatatable.columns = commondataheader
commondatatable.set_index("entry", inplace=True)
ndata = len(commondatatable)
commondataproc = commondatatable["process"][1]
commondataproc = commondatatable["process"].iloc[0]
# Check for consistency with commondata metadata
cdmetadata = peek_commondata_metadata(commondatafile)
if (setname, nsys, ndata) != attrgetter('name', 'nsys', 'ndata')(cdmetadata):
Expand Down
86 changes: 86 additions & 0 deletions validphys2/src/validphys/commondatawriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import numpy as np
"""
This module contains functions to write commondata and systypes
tables to files
"""


def write_commondata_data(commondata, buffer):
"""
write commondata table to buffer, this can be a memory map,
compressed archive or strings (using for instance StringIO)


Parameters
----------

commondata : validphys.coredata.CommonData

buffer : memory map, compressed archive or strings
example: StringIO object


Example
-------
>>> from validphys.loader import Loader
>>> from io import StringIO

>>> l = Loader()
>>> cd = l.check_commondata("NMC").load_commondata_instance()
>>> sio = StringIO()
>>> write_commondata_data(cd,sio)
>>> print(sio.getvalue())

"""
header = f"{commondata.setname} {commondata.nsys} {commondata.ndata}\n"
buffer.write(header)
commondata.commondata_table.index = np.arange(1, len(commondata.commondata_table)+1)
commondata.commondata_table.to_csv(buffer, float_format="%20.12e", sep="\t", header=None)


def write_commondata_to_file(commondata, path):
"""
write commondata table to file
"""
with open(path, "w") as file:
write_commondata_data(commondata, file)


def write_systype_data(commondata, buffer):
"""
write systype table to buffer, this can be a memory map,
compressed archive or strings (using for instance StringIO)


Parameters
----------

commondata : validphys.coredata.CommonData

buffer : memory map, compressed archive or strings
example: StringIO object


Example
-------
>>> from validphys.loader import Loader
>>> from io import StringIO

>>> l = Loader()
>>> cd = l.check_commondata("NMC").load_commondata_instance()
>>> sio = StringIO()
>>> write_systype_data(cd,sio)
>>> print(sio.getvalue())

"""
header = f"{commondata.nsys}\n"
buffer.write(header)
commondata.systype_table.to_csv(buffer, sep="\t", header=None)


def write_systype_to_file(commondata, path):
"""
write systype table to file
"""
with open(path, "w") as file:
write_systype_data(commondata, file)
8 changes: 6 additions & 2 deletions validphys2/src/validphys/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def produce_simu_parameters_linear_combinations(self, simu_parameters=None):
def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_parameters_scales, n_simu_parameters, simu_parameters_linear_combinations, simu_parameters=None):
"""The mapping that corresponds to the dataset specifications in the
fit files"""
known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "simu_fac", "use_fixed_predictions", "contamination"}
known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "simu_fac", "use_fixed_predictions", "contamination", "new_commondata"}
try:
name = dataset["dataset"]
if not isinstance(name, str):
Expand All @@ -522,6 +522,7 @@ def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_para
"'dataset' must be a mapping with " "'dataset' and 'sysnum'"
)

new_commondata = dataset.get("new_commondata", False)
sysnum = dataset.get("sys")
cfac = dataset.get("cfac", tuple())
frac = dataset.get("frac", 1)
Expand Down Expand Up @@ -564,7 +565,8 @@ def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_para
custom_group=custom_group,
use_fixed_predictions=use_fixed_predictions,
contamination=contamination,
**bsm_data
**bsm_data,
new_commondata=new_commondata,
)

def parse_use_fitcommondata(self, do_use: bool):
Expand Down Expand Up @@ -751,6 +753,7 @@ def produce_dataset(
use_fixed_predictions = dataset_input.use_fixed_predictions
contamination = dataset_input.contamination
contamination_data = contamination_data
new_commondata = dataset_input.new_commondata

try:
ds = self.loader.check_dataset(
Expand All @@ -768,6 +771,7 @@ def produce_dataset(
use_fixed_predictions=use_fixed_predictions,
contamination=contamination,
contamination_data=contamination_data,
new_commondata=new_commondata,
)
except DataNotFoundError as e:
raise ConfigError(str(e), name, self.loader.available_datasets)
Expand Down
24 changes: 12 additions & 12 deletions validphys2/src/validphys/convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _predictions(dataset, pdf, fkfunc):
all replicas, central, etc) according to the provided ``fkfunc``, which
should have the same interface as e.g. ``fk_predictions``.
"""
opfunc = OP[dataset.op]
opfunc = OP[dataset.op.upper()]
if dataset.cuts is None:
raise PredictionsRequireCutsError(
"FKTables do not always generate predictions for some datapoints "
Expand All @@ -119,17 +119,17 @@ def _predictions(dataset, pdf, fkfunc):
# predictions instead.
all_predictions = []
for fk in dataset.fkspecs:
if not fk.use_fixed_predictions:
all_predictions.append(fkfunc(load_fktable(fk).with_cuts(cuts), pdf))
else:
with open(fk.fixed_predictions_path, 'rb') as f:
fixed_predictions = np.array(yaml.safe_load(f)['SM_fixed'])
# Now need to reshape it according it to the expected number of predictions
if fkfunc == central_fk_predictions:
all_predictions.append(pd.DataFrame(fixed_predictions, columns=['data']))
elif fkfunc == fk_predictions:
fixed_predictions = np.tile(fixed_predictions, (pdf.get_members(), 1))
all_predictions.append(pd.DataFrame(fixed_predictions.T, columns=[i for i in range(pdf.get_members())]))
if not fk.use_fixed_predictions:
all_predictions.append(fkfunc(load_fktable(fk).with_cuts(cuts), pdf))
else:
with open(fk.fixed_predictions_path, 'rb') as f:
fixed_predictions = np.array(yaml.safe_load(f)['SM_fixed'])
# Now need to reshape it according it to the expected number of predictions
if fkfunc == central_fk_predictions:
all_predictions.append(pd.DataFrame(fixed_predictions, columns=['data']))
elif fkfunc == fk_predictions:
fixed_predictions = np.tile(fixed_predictions, (pdf.get_members(), 1))
all_predictions.append(pd.DataFrame(fixed_predictions.T, columns=[i for i in range(pdf.get_members())]))

return opfunc(*all_predictions)

Expand Down
41 changes: 37 additions & 4 deletions validphys2/src/validphys/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def load_commondata(self, cuts=None):
if cuts is not None:
cd = cd.with_cuts(cuts)
return cd

@property
def plot_kinlabels(self):
return get_plot_kinlabels(self)
Expand All @@ -336,7 +336,7 @@ def plot_kinlabels(self):
class DataSetInput(TupleComp):
"""Represents whatever the user enters in the YAML to specify a
dataset."""
def __init__(self, *, name, sys, cfac, frac, weight, custom_group, simu_parameters_names, simu_parameters_linear_combinations, use_fixed_predictions, contamination):
def __init__(self, *, name, sys, cfac, frac, weight, custom_group, simu_parameters_names, simu_parameters_linear_combinations, use_fixed_predictions, contamination, new_commondata):
self.name=name
self.sys=sys
self.cfac = cfac
Expand All @@ -347,6 +347,7 @@ def __init__(self, *, name, sys, cfac, frac, weight, custom_group, simu_paramete
self.simu_parameters_linear_combinations = simu_parameters_linear_combinations
self.use_fixed_predictions = use_fixed_predictions
self.contamination = contamination
self.new_commondata = new_commondata
super().__init__(name, sys, cfac, frac, weight, custom_group)

def __str__(self):
Expand Down Expand Up @@ -584,19 +585,51 @@ def __str__(self):
return self.name

class FKTableSpec(TupleComp):
def __init__(self, fkpath, cfactors, use_fixed_predictions=False, fixed_predictions_path=None):
def __init__(self, fkpath, cfactors, use_fixed_predictions=False, fixed_predictions_path=None, theory_meta=None, legacy=True):
self.fkpath = fkpath
self.cfactors = cfactors
self.cfactors = cfactors if cfactors is not None else []
self.legacy = legacy
self.use_fixed_predictions = use_fixed_predictions
self.fixed_predictions_path = fixed_predictions_path

# if not isinstance(fkpath, (tuple, list)):
# self.legacy = True
# else:
# fkpath = tuple(fkpath)

if not self.legacy:
fkpath = tuple([fkpath])
self.theory_meta = theory_meta

# For non-legacy theory, add the metadata since it defines how the theory is to be loaded
# and thus, it should also define the hash of the class
# if not self.legacy:
# super().__init__(fkpath, cfactors, self.metadata)
# else:
super().__init__(fkpath, cfactors)


#NOTE: We cannot do this because Fkset owns the fktable, and trying
#to reuse the loaded one fails after it gets deleted.
#@functools.lru_cache()
def load(self):
return FKTable(str(self.fkpath), [str(factor) for factor in self.cfactors])


def load_cfactors(self):
"""Each of the sub-fktables that form the complete FKTable can have several cfactors
applied to it. This function uses ``parse_cfactor`` to make them into CFactorData
"""
from validphys.fkparser import parse_cfactor
if self.legacy:
raise NotImplementedError("cfactor loading from spec not implemented for old theories")
cfacs = []
for c in self.cfactors:
with open(c, "rb") as f:
cfacs.append(parse_cfactor(f))
f.close()
return [cfacs]

class PositivitySetSpec(DataSetSpec):
"""Extends DataSetSpec to work around the particularities of the positivity datasets"""

Expand Down
81 changes: 79 additions & 2 deletions validphys2/src/validphys/coredata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

"""
import dataclasses
from typing import Dict
import yaml

import numpy as np
import pandas as pd

from validphys.commondatawriter import write_commondata_to_file, write_systype_to_file

@dataclasses.dataclass(eq=False)
class FKTableData:
Expand Down Expand Up @@ -97,6 +97,68 @@ def with_cuts(self, cuts):
newsigma = self.sigma.loc[cuts]
return dataclasses.replace(self, ndata=newndata, sigma=newsigma)

def get_np_fktable(self):
"""Returns the fktable as a dense numpy array that can be directly
manipulated with numpy

The return shape is:
(ndata, nx, nbasis) for DIS
(ndata, nx, nx, nbasis) for hadronic
where nx is the length of the xgrid
and nbasis the number of flavour contributions that contribute
"""
# Read up the shape of the output table
ndata = self.ndata
nx = len(self.xgrid)
nbasis = self.sigma.shape[1]

if ndata == 0:
if self.hadronic:
return np.zeros((ndata, nbasis, nx, nx))
return np.zeros((ndata, nbasis, nx))

# Make the dataframe into a dense numpy array

# First get the data index out of the way
# this is necessary because cuts/shifts and for performance reasons
# otherwise we will be putting things in a numpy array in very awkward orders
ns = self.sigma.unstack(level=("data",), fill_value=0)
x1 = ns.index.get_level_values(0)

if self.hadronic:
x2 = ns.index.get_level_values(1)
fk_raw = np.zeros((nx, nx, ns.shape[1]))
fk_raw[x2, x1, :] = ns.values

# The output is (ndata, basis, x1, x2)
fktable = fk_raw.reshape((nx, nx, nbasis, ndata)).T
else:
fk_raw = np.zeros((nx, ns.shape[1]))
fk_raw[x1, :] = ns.values

# The output is (ndata, basis, x1)
fktable = fk_raw.reshape((nx, nbasis, ndata)).T

return fktable


@property
def luminosity_mapping(self):
"""Return the flavour combinations that contribute to the fktable
in the form of a single array

The return shape is:
(nbasis,) for DIS
(nbasis*2,) for hadronic
"""
basis = self.sigma.columns.to_numpy()
if self.hadronic:
ret = np.zeros(14 * 14, dtype=bool)
ret[basis] = True
basis = np.array(np.where(ret.reshape(14, 14))).T.reshape(-1)
return basis



@dataclasses.dataclass(eq=False)
class CFactorData:
Expand Down Expand Up @@ -302,3 +364,18 @@ def with_central_value(self, cv):
tb = self.commondata_table.copy()
tb["data"] = cv
return dataclasses.replace(self, commondata_table=tb)

def export(self, path):
"""Export the data, and error types
Use the same format as libNNPDF:

- A DATA_<dataset>.dat file with the dataframe of accepted points
- A systypes/STYPES_<dataset>.dat file with the error types
"""

dat_path = path / f"DATA_{self.setname}.dat"
sys_path = path / "systypes" / f"SYSTYPE_{self.setname}_DEFAULT.dat"
sys_path.parent.mkdir(exist_ok=True)

write_systype_to_file(self, sys_path)
write_commondata_to_file(self, dat_path)
Loading