Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Theory uncertainties implemented #35

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion n3fit/src/n3fit/model_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
from importlib.util import spec_from_loader
import numpy as np
import tensorflow as tf
import yaml

import os

import scipy as sp

from validphys import bsmnames

Expand All @@ -25,6 +30,8 @@
from n3fit.backends import base_layer_selector, regularizer_selector
from n3fit.layers.CombineCfac import CombineCfacLayer

from validphys.loader import _get_nnpdf_profile

import logging
log = logging.getLogger(__name__)

Expand Down Expand Up @@ -148,7 +155,12 @@ def __call__(self, pdf_layer, mask=None):


def observable_generator(
spec_dict, positivity_initial=1.0, integrability=False, post_observable=None
spec_dict,
positivity_initial=1.0,
integrability=False,
post_observable=None,
use_th_covmat=False,
theoryid=None,
): # pylint: disable=too-many-locals
"""
This function generates the observable model for each experiment.
Expand Down Expand Up @@ -318,6 +330,29 @@ def observable_generator(
obsrot_tr = None
obsrot_vl = None

if use_th_covmat:
data_path = str(_get_nnpdf_profile()['data_path']) + 'theory_' + theoryid.id + '/simu_factors'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably need to have a check here to ensure that only the SIMUnet theory is used.
No other theory has simu_factors

# If using the theory covariance matrix, we must build it here.
th_covmats = []
for ds in spec_dict.get("datasets"):
simu_fac_path = data_path + '/SIMU_' + ds['name'] + '.yaml'
if os.path.exists(simu_fac_path):
with open(simu_fac_path, 'rb') as file:
simu_file = yaml.safe_load(file)
if 'theory_cov' in simu_file.keys():
th_covmats += [simu_file['theory_cov']]
else:
th_covmats += [np.zeros((ds['ndata'],ds['ndata']))]
else:
th_covmats += [np.zeros((ds['ndata'],ds['ndata']))]

th_covmat = sp.linalg.block_diag(*th_covmats)

if not np.all((th_covmat == 0.0)):
covmat = th_covmat + spec_dict["covmat"]
invcovmat = np.linalg.inv(covmat)
spec_dict["invcovmat"] = invcovmat

Comment on lines +354 to +355
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @J-M-Moore, here you are changing the invcovmat entry of spec_dict.
What about the covmat one, is that not needed?

I am asking since out_exp at line 378 is given the covmat not augmented with the theory one.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm yeah you're probably right that this is a mistake

out_tr = ObservableWrapper(
spec_name,
model_obs_tr,
Expand Down
6 changes: 5 additions & 1 deletion n3fit/src/n3fit/model_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ def __init__(
model_file=None,
sum_rules=None,
parallel_models=1,
use_th_covmat=False,
theoryid=None,
):
"""
Parameters
Expand Down Expand Up @@ -190,6 +192,8 @@ def __init__(
self.bsm_initialisation_seed = bsm_initialisation_seed
self.fixed_pdf = fixed_pdf
self.replicas = replicas
self.use_th_covmat = use_th_covmat
self.theoryid = theoryid

# Initialise internal variables which define behaviour
if debug:
Expand Down Expand Up @@ -522,7 +526,7 @@ def _generate_observables(
if not self.mode_hyperopt:
log.info("Generating layers for experiment %s", exp_dict["name"])

exp_layer = model_gen.observable_generator(exp_dict, post_observable=combiner)
exp_layer = model_gen.observable_generator(exp_dict, post_observable=combiner, use_th_covmat=self.use_th_covmat, theoryid=self.theoryid)

# Save the input(s) corresponding to this experiment
self.input_list += exp_layer["inputs"]
Expand Down
3 changes: 3 additions & 0 deletions n3fit/src/n3fit/performfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def performfit(
parallel_models=False,
simu_parameters_names=None,
bsm_initialisation_seed=0,
use_th_covmat=False,
):
"""
This action will (upon having read a validcard) process a full PDF fit
Expand Down Expand Up @@ -208,6 +209,8 @@ def performfit(
simu_parameters_scales=simu_parameters_scales,
bsm_fac_initialisations=bsm_fac_initialisations,
bsm_initialisation_seed=bsm_initialisation_seed,
use_th_covmat=use_th_covmat,
theoryid=theoryid,
)

# This is just to give a descriptive name to the fit function
Expand Down
1 change: 0 additions & 1 deletion validphys2/src/validphys/dataplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ def _plot_fancy_impl(results, commondata, cutlist,
table[('err', i)] = np.abs(err/norm_cv)
cvcols.append(cvcol)


figby = sane_groupby_iter(table, info.figure_by)


Expand Down
68 changes: 56 additions & 12 deletions validphys2/src/validphys/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
import numpy as np
import pandas as pd
import scipy.linalg as la
import os
import yaml
import scipy as sp

from NNPDF import CommonData
from reportengine.checks import require_one, remove_outer, check_not_empty
Expand Down Expand Up @@ -39,7 +42,11 @@
)

from validphys.n3fit_data_utils import parse_simu_parameters_names_CF
from validphys.loader import _get_nnpdf_profile

from NNPDF import DataSet, Experiment

import validphys.covmats as vp_covmats

log = logging.getLogger(__name__)

Expand All @@ -49,8 +56,9 @@ class Result:


class StatsResult(Result):
def __init__(self, stats):
def __init__(self, stats, th_covmat=None):
self.stats = stats
self.th_covmat = th_covmat

@property
def rawdata(self):
Expand All @@ -68,7 +76,10 @@ def central_value(self):

@property
def std_error(self):
return self.stats.std_error()
err = self.stats.std_error()
if self.th_covmat is not None:
err = [np.sqrt(err[i]**2 + self.th_covmat[i,i]) for i in range(len(err))]
return err

def __len__(self):
"""Returns the number of data points in the result"""
Expand Down Expand Up @@ -110,12 +121,12 @@ def sqrtcovmat(self):
class ThPredictionsResult(StatsResult):
"""Class holding theory prediction, inherits from StatsResult"""

def __init__(self, dataobj, stats_class, bsm_factor, label=None):
def __init__(self, dataobj, stats_class, bsm_factor, label=None, th_covmat=None):
self.stats_class = stats_class
self.label = label
dataobj = pd.DataFrame(dataobj.values * bsm_factor)
statsobj = stats_class(dataobj.T)
super().__init__(statsobj)
super().__init__(statsobj, th_covmat)

@staticmethod
def make_label(pdf, dataset):
Expand All @@ -133,7 +144,7 @@ def make_label(pdf, dataset):
return label

@classmethod
def from_convolution(cls, pdf, dataset, bsm_factor):
def from_convolution(cls, pdf, dataset, bsm_factor, th_covmat):
# This should work for both single dataset and whole groups
try:
datasets = dataset.datasets
Expand All @@ -150,7 +161,7 @@ def from_convolution(cls, pdf, dataset, bsm_factor):

label = cls.make_label(pdf, dataset)

return cls(th_predictions, pdf.stats_class, bsm_factor, label)
return cls(th_predictions, pdf.stats_class, bsm_factor, label, th_covmat)


class PositivityResult(StatsResult):
Expand Down Expand Up @@ -502,7 +513,7 @@ def procs_corrmat(procs_covmat):
return groups_corrmat(procs_covmat)


def results(dataset: (DataSetSpec), pdf: PDF, covariance_matrix, sqrt_covmat, dataset_bsm_factor):
def results(dataset: (DataSetSpec), pdf: PDF, covariance_matrix, sqrt_covmat, dataset_bsm_factor, use_th_covmat, theoryid):

"""Tuple of data and theory results for a single pdf. The data will have an associated
covariance matrix, which can include a contribution from the theory covariance matrix which
Expand All @@ -512,19 +523,50 @@ def results(dataset: (DataSetSpec), pdf: PDF, covariance_matrix, sqrt_covmat, da
A group of datasets is also allowed.
(as a result of the C++ code layout)."""
data = dataset.load()

if isinstance(data, DataSet):
dataset_list = [data]
elif isinstance(data, Experiment):
dataset_list = [ds for ds in data.DataSets()]

if use_th_covmat:
data_path = str(_get_nnpdf_profile()['data_path']) + 'theory_' + theoryid.id + '/simu_factors'
# If using the theory covariance matrix, we must build it here.
th_covmats = []
for ds in dataset_list:
simu_fac_path = data_path + '/SIMU_' + ds.GetSetName() + '.yaml'
if os.path.exists(simu_fac_path):
with open(simu_fac_path, 'rb') as file:
simu_file = yaml.safe_load(file)
if 'theory_cov' in simu_file.keys():
th_covmats += [simu_file['theory_cov']]
else:
th_covmats += [np.zeros((ds['ndata'],ds['ndata']))]
else:
th_covmats += [np.zeros((ds['ndata'],ds['ndata']))]

th_covmat = sp.linalg.block_diag(*th_covmats)

if not np.all((th_covmat == 0.0)):
covariance_matrix = th_covmat + covariance_matrix
sqrt_covmat = vp_covmats.sqrt_covmat(covariance_matrix)

else:
th_covmat = None

return (
DataResult(data, covariance_matrix, sqrt_covmat),
ThPredictionsResult.from_convolution(pdf, dataset, bsm_factor=dataset_bsm_factor),
ThPredictionsResult.from_convolution(pdf, dataset, bsm_factor=dataset_bsm_factor, th_covmat=th_covmat),
)



def dataset_inputs_results(
data, pdf: PDF, dataset_inputs_covariance_matrix, dataset_inputs_sqrt_covmat, dataset_inputs_bsm_factor
data, pdf: PDF, dataset_inputs_covariance_matrix, dataset_inputs_sqrt_covmat, dataset_inputs_bsm_factor, use_th_covmat, theoryid
):
"""Like `results` but for a group of datasets"""
return results(
data, pdf, dataset_inputs_covariance_matrix, dataset_inputs_sqrt_covmat, dataset_inputs_bsm_factor
data, pdf, dataset_inputs_covariance_matrix, dataset_inputs_sqrt_covmat, dataset_inputs_bsm_factor, use_th_covmat, theoryid
)


Expand Down Expand Up @@ -552,17 +594,19 @@ def one_or_more_results(
covariance_matrix,
sqrt_covmat,
dataset_bsm_factor,
theoryid,
pdfs: (type(None), Sequence) = None,
pdf: (type(None), PDF) = None,
use_th_covmat=False,
):
"""Generate a list of results, where the first element is the data values,
and the next is either the prediction for pdf or for each of the pdfs.
Which of the two is selected intelligently depending on the namespace,
when executing as an action."""
if pdf:
return results(dataset, pdf, covariance_matrix, sqrt_covmat, dataset_bsm_factor)
return results(dataset, pdf, covariance_matrix, sqrt_covmat, dataset_bsm_factor, use_th_covmat, theoryid)
else:
return pdf_results(dataset, pdfs, covariance_matrix, sqrt_covmat, dataset_bsm_factor)
return pdf_results(dataset, pdfs, covariance_matrix, sqrt_covmat, dataset_bsm_factor, use_th_covmat, theoryid)
raise ValueError("Either 'pdf' or 'pdfs' is required")


Expand Down
6 changes: 6 additions & 0 deletions validphys2/src/validphys/scripts/vp_comparefits.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ def complete_mapping(self):
},
'bsm_sector_data': {
'from_': 'fit'
},
'use_th_covmat': {
'from_': 'fit'
}
}
refmap = {'id': args['reference_fit'], 'label': args['reference_fit_label']}
Expand All @@ -250,6 +253,9 @@ def complete_mapping(self):
},
'bsm_sector_data': {
'from_': 'fit'
},
'use_th_covmat': {
'from_': 'fit'
}
}
autosettings['use_thcovmat_if_present'] = args['thcovmat_if_present']
Expand Down