Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: HEP-PBSP/SIMUnet
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 09759a211269f4300b83b4c97a9214d34226f910
Choose a base ref
..
head repository: HEP-PBSP/SIMUnet
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 957a9923b73f01a76c79c1126d9024ba805bf536
Choose a head ref
2 changes: 2 additions & 0 deletions validphys2/src/validphys/commondataparser.py
Original file line number Diff line number Diff line change
@@ -13,6 +13,8 @@
from validphys.core import peek_commondata_metadata
from validphys.coredata import CommonData

EXT = "pineappl.lz4"

def load_commondata(spec):
"""
Load the data corresponding to a CommonDataSpec object.
8 changes: 6 additions & 2 deletions validphys2/src/validphys/config.py
Original file line number Diff line number Diff line change
@@ -520,7 +520,7 @@ def produce_simu_parameters_linear_combinations(self, simu_parameters=None):
def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_parameters_scales, n_simu_parameters, simu_parameters_linear_combinations, simu_parameters=None):
"""The mapping that corresponds to the dataset specifications in the
fit files"""
known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "simu_fac", "use_fixed_predictions", "contamination"}
known_keys = {"dataset", "sys", "cfac", "frac", "weight", "custom_group", "simu_fac", "use_fixed_predictions", "contamination", "new_commondata"}
try:
name = dataset["dataset"]
if not isinstance(name, str):
@@ -530,6 +530,7 @@ def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_para
"'dataset' must be a mapping with " "'dataset' and 'sysnum'"
)

new_commondata = dataset.get("new_commondata", False)
sysnum = dataset.get("sys")
cfac = dataset.get("cfac", tuple())
frac = dataset.get("frac", 1)
@@ -572,7 +573,8 @@ def parse_dataset_input(self, dataset: Mapping, simu_parameters_names, simu_para
custom_group=custom_group,
use_fixed_predictions=use_fixed_predictions,
contamination=contamination,
**bsm_data
**bsm_data,
new_commondata=new_commondata,
)

def parse_use_fitcommondata(self, do_use: bool):
@@ -759,6 +761,7 @@ def produce_dataset(
use_fixed_predictions = dataset_input.use_fixed_predictions
contamination = dataset_input.contamination
contamination_data = contamination_data
new_commondata = dataset_input.new_commondata

try:
ds = self.loader.check_dataset(
@@ -776,6 +779,7 @@ def produce_dataset(
use_fixed_predictions=use_fixed_predictions,
contamination=contamination,
contamination_data=contamination_data,
new_commondata=new_commondata,
)
except DataNotFoundError as e:
raise ConfigError(str(e), name, self.loader.available_datasets)
35 changes: 32 additions & 3 deletions validphys2/src/validphys/core.py
Original file line number Diff line number Diff line change
@@ -336,7 +336,7 @@ def plot_kinlabels(self):
class DataSetInput(TupleComp):
"""Represents whatever the user enters in the YAML to specify a
dataset."""
def __init__(self, *, name, sys, cfac, frac, weight, custom_group, simu_parameters_names, simu_parameters_linear_combinations, use_fixed_predictions, contamination):
def __init__(self, *, name, sys, cfac, frac, weight, custom_group, simu_parameters_names, simu_parameters_linear_combinations, use_fixed_predictions, contamination, new_commondata):
self.name=name
self.sys=sys
self.cfac = cfac
@@ -347,6 +347,7 @@ def __init__(self, *, name, sys, cfac, frac, weight, custom_group, simu_paramete
self.simu_parameters_linear_combinations = simu_parameters_linear_combinations
self.use_fixed_predictions = use_fixed_predictions
self.contamination = contamination
self.new_commondata = new_commondata
super().__init__(name, sys, cfac, frac, weight, custom_group)

def __str__(self):
@@ -584,19 +585,47 @@ def __str__(self):
return self.name

class FKTableSpec(TupleComp):
def __init__(self, fkpath, cfactors, use_fixed_predictions=False, fixed_predictions_path=None):
def __init__(self, fkpath, cfactors, use_fixed_predictions=False, fixed_predictions_path=None, theory_meta=None, legacy=True):
self.fkpath = fkpath
self.cfactors = cfactors
self.cfactors = cfactors if cfactors is not None else []
self.legacy = legacy
self.use_fixed_predictions = use_fixed_predictions
self.fixed_predictions_path = fixed_predictions_path

# if not isinstance(fkpath, (tuple, list)):
# self.legacy = True
# else:
# fkpath = tuple(fkpath)

if not self.legacy:
fkpath = tuple([fkpath])
self.theory_meta = theory_meta

# For non-legacy theory, add the metadata since it defines how the theory is to be loaded
# and thus, it should also define the hash of the class
# if not self.legacy:
# super().__init__(fkpath, cfactors, self.metadata)
# else:
super().__init__(fkpath, cfactors)


#NOTE: We cannot do this because Fkset owns the fktable, and trying
#to reuse the loaded one fails after it gets deleted.
#@functools.lru_cache()
def load(self):
return FKTable(str(self.fkpath), [str(factor) for factor in self.cfactors])


def load_cfactors(self):
"""Each of the sub-fktables that form the complete FKTable can have several cfactors
applied to it. This function uses ``parse_cfactor`` to make them into CFactorData
"""
from validphys.fkparser import parse_cfactor
if self.legacy:
raise NotImplementedError("cfactor loading from spec not implemented for old theories")

return [[parse_cfactor(c.open("rb")) for c in cfacs] for cfacs in self.cfactors]

class PositivitySetSpec(DataSetSpec):
"""Extends DataSetSpec to work around the particularities of the positivity datasets"""

66 changes: 64 additions & 2 deletions validphys2/src/validphys/coredata.py
Original file line number Diff line number Diff line change
@@ -5,13 +5,13 @@
"""
import dataclasses
from typing import Dict
import yaml

from validphys.commondatawriter import write_commondata_to_file, write_systype_to_file

import numpy as np
import pandas as pd

from validphys.commondatawriter import write_commondata_to_file, write_systype_to_file

@dataclasses.dataclass(eq=False)
class FKTableData:
@@ -99,6 +99,68 @@ def with_cuts(self, cuts):
newsigma = self.sigma.loc[cuts]
return dataclasses.replace(self, ndata=newndata, sigma=newsigma)

def get_np_fktable(self):
"""Returns the fktable as a dense numpy array that can be directly
manipulated with numpy
The return shape is:
(ndata, nx, nbasis) for DIS
(ndata, nx, nx, nbasis) for hadronic
where nx is the length of the xgrid
and nbasis the number of flavour contributions that contribute
"""
# Read up the shape of the output table
ndata = self.ndata
nx = len(self.xgrid)
nbasis = self.sigma.shape[1]

if ndata == 0:
if self.hadronic:
return np.zeros((ndata, nbasis, nx, nx))
return np.zeros((ndata, nbasis, nx))

# Make the dataframe into a dense numpy array

# First get the data index out of the way
# this is necessary because cuts/shifts and for performance reasons
# otherwise we will be putting things in a numpy array in very awkward orders
ns = self.sigma.unstack(level=("data",), fill_value=0)
x1 = ns.index.get_level_values(0)

if self.hadronic:
x2 = ns.index.get_level_values(1)
fk_raw = np.zeros((nx, nx, ns.shape[1]))
fk_raw[x2, x1, :] = ns.values

# The output is (ndata, basis, x1, x2)
fktable = fk_raw.reshape((nx, nx, nbasis, ndata)).T
else:
fk_raw = np.zeros((nx, ns.shape[1]))
fk_raw[x1, :] = ns.values

# The output is (ndata, basis, x1)
fktable = fk_raw.reshape((nx, nbasis, ndata)).T

return fktable


@property
def luminosity_mapping(self):
"""Return the flavour combinations that contribute to the fktable
in the form of a single array
The return shape is:
(nbasis,) for DIS
(nbasis*2,) for hadronic
"""
basis = self.sigma.columns.to_numpy()
if self.hadronic:
ret = np.zeros(14 * 14, dtype=bool)
ret[basis] = True
basis = np.array(np.where(ret.reshape(14, 14))).T.reshape(-1)
return basis



@dataclasses.dataclass(eq=False)
class CFactorData:
10 changes: 8 additions & 2 deletions validphys2/src/validphys/fkparser.py
Original file line number Diff line number Diff line change
@@ -29,6 +29,7 @@
import pandas as pd

from validphys.coredata import FKTableData, CFactorData
from validphys.pineparser import pineappl_reader



@@ -53,8 +54,13 @@ class GridInfo:
def load_fktable(spec):
"""Load the data corresponding to a FKSpec object. The cfactors
will be applied to the grid."""
with open_fkpath(spec.fkpath) as handle:
tabledata = parse_fktable(handle)
if spec.legacy:
with open_fkpath(spec.fkpath) as handle:
tabledata = parse_fktable(handle)

else:
tabledata = pineappl_reader(spec)

if not spec.cfactors:
return tabledata

47 changes: 40 additions & 7 deletions validphys2/src/validphys/loader.py
Original file line number Diff line number Diff line change
@@ -32,6 +32,7 @@
InternalCutsWrapper, HyperscanSpec)
from validphys.utils import tempfile_cleaner
from validphys import lhaindex
from validphys.pineparser import parse_theory_meta

DEFAULT_NNPDF_PROFILE_PATH = f"{sys.prefix}/share/NNPDF/nnprofile.yaml"

@@ -351,7 +352,7 @@ def get_commondata(self, setname, sysnum):
return cd.load()

# @functools.lru_cache()
def check_fktable(self, theoryID, setname, cfac, use_fixed_predictions=False):
def check_fktable(self, theoryID, setname, cfac, use_fixed_predictions=False, new_commondata=False):
_, theopath = self.check_theoryID(theoryID)

if use_fixed_predictions:
@@ -362,14 +363,45 @@ def check_fktable(self, theoryID, setname, cfac, use_fixed_predictions=False):
fixed_predictions_path = theopath/ 'simu_factors' / ('SIMU_%s.yaml' % setname)
cfactors = self.check_cfactor(theoryID, setname, cfac)
return FKTableSpec(fkpath, cfactors, use_fixed_predictions=True, fixed_predictions_path=fixed_predictions_path)

# use different file name for the FK table if the commondata is new
if new_commondata:
# Need to pass a TheoryMeta object to FKTableSpec
path_metadata = theopath / 'fastkernel' / f'{setname}_metadata.yaml'
if not path_metadata.exists():
raise InconsistentMetaDataError(f"Could not find '_metadata.yaml' file for set {setname}."
f"File '{path_metadata}' not found.")
# get observable name from the setname
with open(path_metadata, 'r') as f:
metadata = yaml.safe_load(f)
# NOTE: write a "_metadata.yaml" file for each observable (then `metadata["implemented_observables"][0]` makes sense)
fktables = metadata["implemented_observables"][0]["theory"]["FK_tables"][0]
fkpath = tuple([theopath/ 'fastkernel' / (f'{fktable}.pineappl.lz4') for fktable in fktables])
for path in fkpath:
if not path.exists():
raise FKTableNotFound(("Could not find FKTable for set '%s'. "
"File '%s' not found") % (setname, path) )
else:
fkpath = theopath/ 'fastkernel' / ('FK_%s.dat' % setname)

fkpath = theopath/ 'fastkernel' / ('FK_%s.dat' % setname)
if not fkpath.exists():
raise FKTableNotFound(("Could not find FKTable for set '%s'. "
"File '%s' not found") % (setname, fkpath) )
if not fkpath.exists():
raise FKTableNotFound(("Could not find FKTable for set '%s'. "
"File '%s' not found") % (setname, fkpath) )

cfactors = self.check_cfactor(theoryID, setname, cfac)
return FKTableSpec(fkpath, cfactors)
if new_commondata:

common_prefix = os.path.commonprefix([metadata['setname'], setname])

observable_name = setname[len(common_prefix):]
if observable_name.startswith('_'):
observable_name = observable_name[1:]

theory_meta = parse_theory_meta(path_metadata, observable_name=observable_name)

return FKTableSpec(fkpath, cfactors, theory_meta=theory_meta, legacy=False)
else:
return FKTableSpec(fkpath, cfactors)

def check_compound(self, theoryID, setname, cfac):
thid, theopath = self.check_theoryID(theoryID)
@@ -549,6 +581,7 @@ def check_dataset(
use_fixed_predictions=False,
contamination=None,
contamination_data=None,
new_commondata=False,
):

if not isinstance(theoryid, TheoryIDSpec):
@@ -561,7 +594,7 @@ def check_dataset(
try:
fkspec, op = self.check_compound(theoryno, name, cfac)
except CompoundNotFound:
fkspec = self.check_fktable(theoryno, name, cfac, use_fixed_predictions=use_fixed_predictions)
fkspec = self.check_fktable(theoryno, name, cfac, use_fixed_predictions=use_fixed_predictions, new_commondata=new_commondata)
op = None

#Note this is simply for convenience when scripting. The config will
31 changes: 20 additions & 11 deletions validphys2/src/validphys/n3fit_data.py
Original file line number Diff line number Diff line change
@@ -180,11 +180,20 @@ def _mask_fk_tables(dataset_dicts, tr_masks):
vl_fks = []
ex_fks = []
vl_mask = ~tr_mask

for fktable_dict in dataset_dict["fktables"]:
tr_fks.append(fktable_dict["fktable"][tr_mask])
vl_fks.append(fktable_dict["fktable"][vl_mask])
ex_fks.append(fktable_dict.get("fktable"))
dataset_dict['ds_tr_mask'] = tr_mask
if not dataset_dict["use_fixed_predictions"]:
tr_fks.append(fktable_dict["fktable"][tr_mask])
vl_fks.append(fktable_dict["fktable"][vl_mask])
ex_fks.append(fktable_dict.get("fktable"))
dataset_dict['ds_tr_mask'] = tr_mask
# note: fixed observables have a fake fktable
else:
tr_fks.append(fktable_dict["fktable"])
vl_fks.append([])
ex_fks.append(fktable_dict.get("fktable"))
dataset_dict['ds_tr_mask'] = tr_mask

dataset_dict["tr_fktables"] = tr_fks
dataset_dict["vl_fktables"] = vl_fks
dataset_dict["ex_fktables"] = ex_fks
@@ -243,13 +252,13 @@ def fitting_data_dict(
# TODO: Plug in the python data loading when available. Including but not
# limited to: central values, ndata, replica generation, covmat construction
if data.datasets:
try:
spec_c = data.load()
except:
breakpoint()
ndata = spec_c.GetNData()
expdata_true = spec_c.get_cv().reshape(1, ndata)
datasets = common_data_reader_experiment(spec_c, data)
ndata = sum([ds.commondata.load_commondata(cuts=ds.cuts).ndata for ds in data.datasets])
expdata_true = np.array([])
for ds in data.datasets:
expdata_true = np.append(expdata_true, ds.commondata.load_commondata(cuts=ds.cuts).central_values)
expdata_true = expdata_true.reshape(1, ndata)
# expdata_true = np.array([ds.commondata.load_commondata(cuts=ds.cuts).central_values for ds in data.datasets]).reshape(1,ndata)
datasets = common_data_reader_experiment(data)
for i in range(len(data.datasets)):
if data.datasets[i].use_fixed_predictions:
datasets[i]['use_fixed_predictions'] = True
Loading