Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Multi closure internal loader data class #2111

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 57 additions & 37 deletions validphys2/src/validphys/closuretest/multiclosure.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
``multiclosure_output.py``

"""

import numpy as np
import scipy.linalg as la
import scipy.special as special
import dataclasses

from reportengine import collect
from validphys.calcutils import calc_chi2
Expand All @@ -30,6 +32,31 @@
SAMPLING_INTERVAL = 5


@dataclasses.dataclass(frozen=True)
class MulticlosureInternalLoader:
"""
Parameters
----------
closures_th: list
list containing validphys.results.ThPredictionsResult objects
for each fit

law_th: ThPredictionsResult object
underlying law theory predictions

covmat: np.array
experimental t0 covariance matrix

sqrt_covmat: np.array
cholesky decomposed covariance matrix
"""

closures_th: list
law_th: ThPredictionsResult
covmat: np.array
sqrt_covmat: np.array


# TODO: deprecate this at some point
@check_fits_underlying_law_match
@check_fits_areclosures
Expand Down Expand Up @@ -89,12 +116,11 @@ def internal_multiclosure_dataset_loader(

sqrt_covmat = la.cholesky(t0_covmat_from_systematics, lower=True)
# TODO: support covmat reg and theory covariance matrix
# possibly make this a named tuple
return (
fits_dataset_predictions,
fits_underlying_predictions,
t0_covmat_from_systematics,
sqrt_covmat,
return MulticlosureInternalLoader(
closures_th=fits_dataset_predictions,
law_th=fits_underlying_predictions,
covmat=t0_covmat_from_systematics,
sqrt_covmat=sqrt_covmat,
)


Expand All @@ -114,9 +140,7 @@ def internal_multiclosure_data_loader(

@check_multifit_replicas
def fits_dataset_bias_variance(
internal_multiclosure_dataset_loader,
_internal_max_reps=None,
_internal_min_reps=20,
internal_multiclosure_dataset_loader, _internal_max_reps=None, _internal_min_reps=20
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this on a single line now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

black

):
"""For a single dataset, calculate the bias and variance for each fit
and return tuple (bias, variance, n_data), where bias and variance are
Expand All @@ -132,7 +156,10 @@ def fits_dataset_bias_variance(
``_internal_max_reps``.

"""
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
closures_th = internal_multiclosure_dataset_loader.closures_th
law_th = internal_multiclosure_dataset_loader.law_th
sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat

# The dimentions here are (fit, data point, replica)
reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
Expand All @@ -159,9 +186,7 @@ def expected_dataset_bias_variance(fits_dataset_bias_variance):

@check_multifit_replicas
def fits_data_bias_variance(
internal_multiclosure_data_loader,
_internal_max_reps=None,
_internal_min_reps=20,
internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

):
"""Like `fits_dataset_bias_variance` but for all data"""
return fits_dataset_bias_variance(
Expand Down Expand Up @@ -212,7 +237,10 @@ def dataset_replica_and_central_diff(internal_multiclosure_dataset_loader, diago
default behaviour.

"""
closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader
closures_th = internal_multiclosure_dataset_loader.closures_th
law_th = internal_multiclosure_dataset_loader.law_th
covmat = internal_multiclosure_dataset_loader.covmat

replicas = np.asarray([th.error_members for th in closures_th])
centrals = np.mean(replicas, axis=-1)
underlying = law_th.central_value
Expand Down Expand Up @@ -309,11 +337,7 @@ def n_replica_samples(fits_pdf, _internal_max_reps=None, _internal_min_reps=20):
_internal_max_reps.
"""
return list(
range(
_internal_min_reps,
_internal_max_reps + SAMPLING_INTERVAL,
SAMPLING_INTERVAL,
)
range(_internal_min_reps, _internal_max_reps + SAMPLING_INTERVAL, SAMPLING_INTERVAL)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is black doing this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes exactly

)


Expand All @@ -329,13 +353,7 @@ def __init__(self, data):


def _bootstrap_multiclosure_fits(
internal_multiclosure_dataset_loader,
rng,
n_fit_max,
n_fit,
n_rep_max,
n_rep,
use_repeats,
internal_multiclosure_dataset_loader, rng, n_fit_max, n_fit, n_rep_max, n_rep, use_repeats
):
"""Perform a single bootstrap resample of the multiclosure fits and return
a proxy of the base internal object used by relevant estimator actions
Expand All @@ -361,7 +379,12 @@ def _bootstrap_multiclosure_fits(
np.random.choice

"""
closure_th, *input_tuple = internal_multiclosure_dataset_loader
closure_th = internal_multiclosure_dataset_loader.closures_th
law_th = internal_multiclosure_dataset_loader.law_th
covmat = internal_multiclosure_dataset_loader.covmat
sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat
input_tuple = law_th, covmat, sqrtcov

fit_boot_index = rng.choice(n_fit_max, size=n_fit, replace=use_repeats)
fit_boot_th = [closure_th[i] for i in fit_boot_index]
boot_ths = []
Expand Down Expand Up @@ -774,9 +797,7 @@ def total_bootstrap_xi(experiments_bootstrap_xi):


def dataset_fits_bias_replicas_variance_samples(
internal_multiclosure_dataset_loader,
_internal_max_reps=None,
_internal_min_reps=20,
internal_multiclosure_dataset_loader, _internal_max_reps=None, _internal_min_reps=20
):
"""For a single dataset, calculate the samples of chi2-quantities which
are used to calculate the bias and variance for each fit. The output of this
Expand All @@ -800,7 +821,10 @@ def dataset_fits_bias_replicas_variance_samples(
``_internal_max_reps``.

"""
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
closures_th = internal_multiclosure_dataset_loader.closures_th
law_th = internal_multiclosure_dataset_loader.law_th
sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat

# The dimentions here are (fit, data point, replica)
reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
Expand All @@ -817,14 +841,10 @@ def dataset_fits_bias_replicas_variance_samples(


def dataset_inputs_fits_bias_replicas_variance_samples(
internal_multiclosure_data_loader,
_internal_max_reps=None,
_internal_min_reps=20,
internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20
):
return dataset_fits_bias_replicas_variance_samples(
internal_multiclosure_data_loader,
_internal_max_reps=None,
_internal_min_reps=20,
internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20
)


Expand Down
120 changes: 92 additions & 28 deletions validphys2/src/validphys/tests/test_closuretest.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,129 @@
"""
test_closuretest.py

contains some unit tests for closure test estimators
contains some unit tests for closure test module

"""

import numpy as np
from unittest.mock import Mock

from validphys.api import API
from validphys.tests.conftest import SINGLE_DATASET, THEORYID, PDF
from validphys.closuretest import bias_dataset, variance_dataset
from validphys.closuretest.multiclosure import (
internal_multiclosure_dataset_loader,
internal_multiclosure_data_loader,
)
from validphys.results import ThPredictionsResult


class TestResult:
"""class for testing base level estimators which expect a results object"""

def __init__(self, central_value, rawdata=None):
self.central_value = central_value
self.rawdata = rawdata
self.error_members = rawdata
self.ndata = len(central_value)
self.sqrtcovmat = np.identity(self.ndata)

def __len__(self,):
def __len__(self):
return self.ndata


N_DATA = 5
N_REPLICAS = 10

#TODO: make these fixtures?
# TODO: make these fixtures?
# these are proxies for results tuples of data and theory
ones_results = 2*[TestResult(np.ones(N_DATA), np.ones((N_DATA, N_REPLICAS)))]
twos_results = 2*[TestResult(2*np.ones(N_DATA), 2*np.ones((N_DATA, N_REPLICAS)))]
ones_results = 2 * [TestResult(np.ones(N_DATA), np.ones((N_DATA, N_REPLICAS)))]
twos_results = 2 * [TestResult(2 * np.ones(N_DATA), 2 * np.ones((N_DATA, N_REPLICAS)))]

replicas = np.arange(N_REPLICAS)[np.newaxis, :] * np.ones((N_DATA, 1))
replicas_result = 2 * [TestResult(replicas.mean(axis=1), replicas)]

DATASET_INPUT = {"dataset_input": SINGLE_DATASET, "theoryid": THEORYID, "use_cuts": "internal"}

DATASET_INPUTS = {"dataset_inputs": [SINGLE_DATASET], "theoryid": THEORYID, "use_cuts": "internal"}

MOCK_FIT = Mock()
MOCK_FIT.name = "Mock fit spec"
MOCK_FIT.path = "mock/fit/path"
MOCK_FIT.label = MOCK_FIT.name

MOCK_FITS = [MOCK_FIT]


def test_internal_multiclosure_dataset_loader():
"""
Test the internal_multiclosure_dataset_loader
"""
dataset = API.dataset(**DATASET_INPUT)

t0_covmat_from_systematics = API.t0_covmat_from_systematics(
**{**DATASET_INPUT, "use_t0": True, "t0pdfset": PDF}
)

fits_pdf = [API.pdf(pdf=PDF)]
multiclosure_underlyinglaw = API.pdf(pdf=PDF)
fits = MOCK_FITS

loader = internal_multiclosure_dataset_loader(
dataset, fits_pdf, multiclosure_underlyinglaw, fits, t0_covmat_from_systematics
)

assert loader.closures_th is not None
assert type(loader.closures_th) == list
assert loader.law_th is not None
assert type(loader.law_th) == ThPredictionsResult
assert loader.covmat is not None
assert type(loader.covmat) == np.ndarray
assert loader.sqrt_covmat is not None
assert type(loader.sqrt_covmat) == np.ndarray


def test_internal_multiclosure_data_loader():
"""
Test the internal_multiclosure_data_loader
"""
data = API.data(**DATASET_INPUTS)

t0_covmat_from_systematics = API.t0_covmat_from_systematics(
**{**DATASET_INPUT, "use_t0": True, "t0pdfset": PDF}
)

fits_pdf = [API.pdf(pdf=PDF)]
multiclosure_underlyinglaw = API.pdf(pdf=PDF)
fits = MOCK_FITS

loader = internal_multiclosure_data_loader(
data, fits_pdf, multiclosure_underlyinglaw, fits, t0_covmat_from_systematics
)

assert loader.closures_th is not None
assert type(loader.closures_th) == list
assert loader.law_th is not None
assert type(loader.law_th) == ThPredictionsResult
assert loader.covmat is not None
assert type(loader.covmat) == np.ndarray
assert loader.sqrt_covmat is not None
assert type(loader.sqrt_covmat) == np.ndarray

replicas = np.arange(N_REPLICAS)[np.newaxis, :]*np.ones((N_DATA, 1))
replicas_result = 2*[TestResult(replicas.mean(axis=1), replicas)]

def test_bias_function():
bias_ones = bias_dataset(
ones_results,
[ones_results], # need list of length one to emulate collect
None,
None,
ones_results, [ones_results], None, None # need list of length one to emulate collect
)
assert np.allclose(0, bias_ones.bias)
bias_one_two = bias_dataset(
ones_results,
[twos_results],
None,
None,
)
bias_one_two = bias_dataset(ones_results, [twos_results], None, None)
assert np.allclose(N_DATA, bias_one_two.bias)


def test_variance_function():
vardata = variance_dataset(
ones_results,
None,
None,
)
vardata = variance_dataset(ones_results, None, None)
assert np.allclose(0, vardata.variance)
var_reps = variance_dataset(
replicas_result,
None,
None,
)
var_reps = variance_dataset(replicas_result, None, None)
# calc explicitly what variance should be
expected = np.sum(((np.arange(N_REPLICAS) - 4.5)**2)*N_DATA/N_REPLICAS)
expected = np.sum(((np.arange(N_REPLICAS) - 4.5) ** 2) * N_DATA / N_REPLICAS)
assert np.allclose(expected, var_reps.variance)