Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

k-factor loading for plot_fancy of contaminated fits #70

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions validphys2/examples/plot_data_theory_comparison_contaminated.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
meta:
title: TTBAR_13TEV_TTMNORM data/theory comparison baseline
keywords: [contamination, ttbar, zhat]
author: Francesco Merlotti

pdfs:
- {id: 240516_fm_cont_ttbar_zhat_nojets_08, label: Contaminated fit (Z=8e-4)}

pdf: 240516_fm_cont_ttbar_zhat_nojets_08

fit: 240516_fm_base_zhat_nojets_lv0

use_fitcommondata: True

theoryid: 270

use_cuts: internal

dataset_inputs:
- {dataset: ATLAS_TTBAR_13TEV_TTMNORM, cfac: [QCD], contamination: EFT_LO}
- {dataset: CMS_TTBAR_13TEV_TTMNORM, cfac: [QCD], contamination: EFT_LO}

contamination_parameters:
name: Z
value: 0.0008
linear_combination:
OtG: 0.
O8qd: -173.42
O1qd: 0.
O1qu: 0.
O8qu: -173.42
O1dt: 0.
O8dt: -173.42
O1qt: 0.
O8qt: -173.42
O1ut: 0.
O8ut: -173.42
O11qq: 0.
O13qq: 0.
O81qq: -173.42
O83qq: 0.

template_text: |
{@ dataset_inputs plot_fancy@}
{@ dataset_inputs::pdfs plot_fancy(normalize_to=data)@}

actions_:
- report(main=true)
8 changes: 4 additions & 4 deletions validphys2/src/validphys/dataplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def check_normalize_to(ns, **kwargs):
#TODO: This interface is horrible. We need to think how to adapt libnnpdf
#to make this use case easier
def _plot_fancy_impl(results, commondata, cutlist,
normalize_to:(int,type(None)) = None, labellist=None):
normalize_to:(int,type(None)) = None, labellist=None): # type: ignore

"""Implementation of the data-theory comparison plots. Providers are
supposed to call (yield from) this.
Expand Down Expand Up @@ -383,7 +383,7 @@ def _plot_fancy_impl(results, commondata, cutlist,
@check_normalize_to
@figuregen
def plot_fancy(one_or_more_results, commondata, cuts,
normalize_to: (int, str, type(None)) = None):
normalize_to: (int, str, type(None)) = None): # type: ignore
"""
Read the PLOTTING configuration for the dataset and generate the
corrspondig data theory plot.
Expand Down Expand Up @@ -439,7 +439,7 @@ def plot_fancy_dataspecs(
dataspecs_commondata,
dataspecs_cuts,
dataspecs_speclabel,
normalize_to: (str, int, type(None)) = None,
normalize_to: (str, int, type(None)) = None, # type: ignore
):
"""
General interface for data-theory comparison plots.
Expand Down Expand Up @@ -1069,7 +1069,7 @@ def plot_xq2(
display_cuts:bool=True,
marker_by:str='process type',
highlight_label:str='highlight',
highlight_datasets:(Sequence,type(None))=None,
highlight_datasets:(Sequence,type(None))=None, # type: ignore
aspect:str='landscape',
):
"""Plot the (x,Q²) coverage based of the data based on some LO
Expand Down
66 changes: 55 additions & 11 deletions validphys2/src/validphys/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import numpy as np
import pandas as pd
import scipy.linalg as la
import os
import yaml

from NNPDF import CommonData
from reportengine.checks import require_one, remove_outer, check_not_empty
Expand All @@ -38,11 +40,13 @@
PredictionsRequireCutsError,
)

from validphys.loader import Loader

from validphys.n3fit_data_utils import parse_simu_parameters_names_CF


log = logging.getLogger(__name__)

l = Loader()

class Result:
pass
Expand Down Expand Up @@ -308,16 +312,16 @@ def group_result_table_68cl(
return res


def dataset_inputs_bsm_factor(data, pdf, read_bsm_facs):
def dataset_inputs_bsm_factor(data, pdf, read_bsm_facs, contamination_parameters=None, theoryid=None):
"""Same as :py:func:`validphys.results.dataset_bsm_factor`
but for a list of dataset inputs.
"""
res = np.concatenate(
[dataset_bsm_factor(dataset, pdf, read_bsm_facs) for dataset in data.datasets]
[dataset_bsm_factor(dataset, pdf, read_bsm_facs, contamination_parameters, theoryid) for dataset in data.datasets]
)
return res

def dataset_bsm_factor(dataset, pdf, read_bsm_facs):
def dataset_bsm_factor(dataset, pdf, read_bsm_facs, contamination_parameters=None, theoryid=None):
"""For each replica of ``pdf``, scale the fitted BSM-factors by
the best fit value.
Returns
Expand All @@ -330,13 +334,53 @@ def dataset_bsm_factor(dataset, pdf, read_bsm_facs):
dataset.simu_parameters_linear_combinations,
dataset.cuts
)

ndata = len(dataset.load().get_cv())
nrep = len(pdf)

if parsed_bsm_facs is None:
if parsed_bsm_facs is None and dataset.contamination is None:
# We want an array of ones that ndata x nrep
# where ndata is the number of post cut datapoints
ndata = len(dataset.load().get_cv())
nrep = len(pdf)
return np.ones((ndata, nrep))

# after running a contamination fit we do not get fitted bsm factors in a csv
# hence, to produce a comparison between the data used for the fitting and the
# theory prediction of such fit, we must load k-factors in another way
# we write the contamination name, value (e.g. zhat), and linear combination
# in the yaml file, and if the `contamination` keyword is present in the dataset_input
# we access the `SIMU_` file of the desired dataset, and load the k-factors
if dataset.contamination:

cont_order = dataset.contamination
bsm_path = l.datapath / f"theory_{theoryid.id}" / "simu_factors"

if contamination_parameters is None:
log.warning("No Contamination parameters provided")
return np.ones((ndata, nrep))

else:
bsm_file = bsm_path / f"SIMU_{dataset.name}.yaml"
if not os.path.exists(bsm_file):
log.error(f"Could not find a BSM-factor for {dataset.name}. Are you sure they exist in the given theory?")
return np.ones((ndata, nrep))

log.info(f"Loading {dataset.name}")

cont_name = contamination_parameters["name"]
cont_value = contamination_parameters["value"]
cont_lin_comb = contamination_parameters["linear_combination"]

with open(bsm_file, "r+") as stream:
simu_card = yaml.safe_load(stream)
stream.close()

k_factors = np.zeros(len(simu_card["SM_fixed"]))
for op in cont_lin_comb:
k_factors += cont_lin_comb[op] * np.array(simu_card[cont_order][op])
k_factors = 1. + k_factors * cont_value / np.array(simu_card[cont_order]["SM"])

cuts = dataset.cuts.load()
return np.array([k_factors[cuts]] * nrep).T

fit_bsm_fac_df = pd.DataFrame(
{k: v.central_value for k, v in parsed_bsm_facs.items()}
Expand Down Expand Up @@ -541,7 +585,7 @@ def dataset_inputs_results(
# ``results`` to support this.
# TODO: The above comment doesn't make sense after adding T0. Deprecate this
def pdf_results(
dataset: (DataSetSpec, DataGroupSpec),
dataset: (DataSetSpec, DataGroupSpec), # type: ignore
pdfs: Sequence,
covariance_matrix,
sqrt_covmat,
Expand All @@ -557,12 +601,12 @@ def pdf_results(
@require_one("pdfs", "pdf")
@remove_outer("pdfs", "pdf")
def one_or_more_results(
dataset: (DataSetSpec, DataGroupSpec),
dataset: (DataSetSpec, DataGroupSpec), # type: ignore
covariance_matrix,
sqrt_covmat,
dataset_bsm_factor,
pdfs: (type(None), Sequence) = None,
pdf: (type(None), PDF) = None,
pdfs: (type(None), Sequence) = None, # type: ignore
pdf: (type(None), PDF) = None, # type: ignore
):
"""Generate a list of results, where the first element is the data values,
and the next is either the prediction for pdf or for each of the pdfs.
Expand Down