NNPDF · comane · Jun 11, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 19, 2024
diff --git a/validphys2/src/validphys/closuretest/multiclosure.py b/validphys2/src/validphys/closuretest/multiclosure.py
@@ -7,9 +7,11 @@
 ``multiclosure_output.py``
 
 """
+
 import numpy as np
 import scipy.linalg as la
 import scipy.special as special
+import dataclasses
 
 from reportengine import collect
 from validphys.calcutils import calc_chi2
@@ -30,6 +32,31 @@
 SAMPLING_INTERVAL = 5
 
 
+@dataclasses.dataclass(frozen=True)
+class MulticlosureInternalLoader:
+    """
+    Parameters
+    ----------
+    closures_th: list
+        list containing validphys.results.ThPredictionsResult objects
+        for each fit
+
+    law_th: ThPredictionsResult object
+        underlying law theory predictions
+
+    covmat: np.array
+        experimental t0 covariance matrix
+
+    sqrt_covmat: np.array
+        cholesky decomposed covariance matrix
+    """
+
+    closures_th: list
+    law_th: ThPredictionsResult
+    covmat: np.array
+    sqrt_covmat: np.array
+
+
 # TODO: deprecate this at some point
 @check_fits_underlying_law_match
 @check_fits_areclosures
@@ -89,12 +116,11 @@ def internal_multiclosure_dataset_loader(
 
     sqrt_covmat = la.cholesky(t0_covmat_from_systematics, lower=True)
     # TODO: support covmat reg and theory covariance matrix
-    # possibly make this a named tuple
-    return (
-        fits_dataset_predictions,
-        fits_underlying_predictions,
-        t0_covmat_from_systematics,
-        sqrt_covmat,
+    return MulticlosureInternalLoader(
+        closures_th=fits_dataset_predictions,
+        law_th=fits_underlying_predictions,
+        covmat=t0_covmat_from_systematics,
+        sqrt_covmat=sqrt_covmat,
     )
 
 
@@ -114,9 +140,7 @@ def internal_multiclosure_data_loader(
 
 @check_multifit_replicas
 def fits_dataset_bias_variance(
-    internal_multiclosure_dataset_loader,
-    _internal_max_reps=None,
-    _internal_min_reps=20,
+    internal_multiclosure_dataset_loader, _internal_max_reps=None, _internal_min_reps=20
 ):
     """For a single dataset, calculate the bias and variance for each fit
     and return tuple (bias, variance, n_data), where bias and variance are
@@ -132,7 +156,10 @@ def fits_dataset_bias_variance(
     ``_internal_max_reps``.
 
     """
-    closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
+    closures_th = internal_multiclosure_dataset_loader.closures_th
+    law_th = internal_multiclosure_dataset_loader.law_th
+    sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat
+
     # The dimentions here are (fit, data point, replica)
     reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th])
     # take mean across replicas - since we might have changed no. of reps
@@ -159,9 +186,7 @@ def expected_dataset_bias_variance(fits_dataset_bias_variance):
 
 @check_multifit_replicas
 def fits_data_bias_variance(
-    internal_multiclosure_data_loader,
-    _internal_max_reps=None,
-    _internal_min_reps=20,
+    internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20
 ):
     """Like `fits_dataset_bias_variance` but for all data"""
     return fits_dataset_bias_variance(
@@ -212,7 +237,10 @@ def dataset_replica_and_central_diff(internal_multiclosure_dataset_loader, diago
     default behaviour.
 
     """
-    closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader
+    closures_th = internal_multiclosure_dataset_loader.closures_th
+    law_th = internal_multiclosure_dataset_loader.law_th
+    covmat = internal_multiclosure_dataset_loader.covmat
+
     replicas = np.asarray([th.error_members for th in closures_th])
     centrals = np.mean(replicas, axis=-1)
     underlying = law_th.central_value
@@ -309,11 +337,7 @@ def n_replica_samples(fits_pdf, _internal_max_reps=None, _internal_min_reps=20):
     _internal_max_reps.
     """
     return list(
-        range(
-            _internal_min_reps,
-            _internal_max_reps + SAMPLING_INTERVAL,
-            SAMPLING_INTERVAL,
-        )
+        range(_internal_min_reps, _internal_max_reps + SAMPLING_INTERVAL, SAMPLING_INTERVAL)
     )
 
 
@@ -329,13 +353,7 @@ def __init__(self, data):
 
 
 def _bootstrap_multiclosure_fits(
-    internal_multiclosure_dataset_loader,
-    rng,
-    n_fit_max,
-    n_fit,
-    n_rep_max,
-    n_rep,
-    use_repeats,
+    internal_multiclosure_dataset_loader, rng, n_fit_max, n_fit, n_rep_max, n_rep, use_repeats
 ):
     """Perform a single bootstrap resample of the multiclosure fits and return
     a proxy of the base internal object used by relevant estimator actions
@@ -361,7 +379,12 @@ def _bootstrap_multiclosure_fits(
     np.random.choice
 
     """
-    closure_th, *input_tuple = internal_multiclosure_dataset_loader
+    closure_th = internal_multiclosure_dataset_loader.closures_th
+    law_th = internal_multiclosure_dataset_loader.law_th
+    covmat = internal_multiclosure_dataset_loader.covmat
+    sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat
+    input_tuple = law_th, covmat, sqrtcov
+
     fit_boot_index = rng.choice(n_fit_max, size=n_fit, replace=use_repeats)
     fit_boot_th = [closure_th[i] for i in fit_boot_index]
     boot_ths = []
@@ -774,9 +797,7 @@ def total_bootstrap_xi(experiments_bootstrap_xi):
 
 
 def dataset_fits_bias_replicas_variance_samples(
-    internal_multiclosure_dataset_loader,
-    _internal_max_reps=None,
-    _internal_min_reps=20,
+    internal_multiclosure_dataset_loader, _internal_max_reps=None, _internal_min_reps=20
 ):
     """For a single dataset, calculate the samples of chi2-quantities which
     are used to calculate the bias and variance for each fit. The output of this
@@ -800,7 +821,10 @@ def dataset_fits_bias_replicas_variance_samples(
     ``_internal_max_reps``.
 
     """
-    closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
+    closures_th = internal_multiclosure_dataset_loader.closures_th
+    law_th = internal_multiclosure_dataset_loader.law_th
+    sqrtcov = internal_multiclosure_dataset_loader.sqrt_covmat
+
     # The dimentions here are (fit, data point, replica)
     reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th])
     # take mean across replicas - since we might have changed no. of reps
@@ -817,14 +841,10 @@ def dataset_fits_bias_replicas_variance_samples(
 
 
 def dataset_inputs_fits_bias_replicas_variance_samples(
-    internal_multiclosure_data_loader,
-    _internal_max_reps=None,
-    _internal_min_reps=20,
+    internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20
 ):
     return dataset_fits_bias_replicas_variance_samples(
-        internal_multiclosure_data_loader,
-        _internal_max_reps=None,
-        _internal_min_reps=20,
+        internal_multiclosure_data_loader, _internal_max_reps=None, _internal_min_reps=20
     )
 
 

diff --git a/validphys2/src/validphys/tests/test_closuretest.py b/validphys2/src/validphys/tests/test_closuretest.py
@@ -1,65 +1,129 @@
 """
 test_closuretest.py
 
-contains some unit tests for closure test estimators
+contains some unit tests for closure test module
 
 """
 
 import numpy as np
+from unittest.mock import Mock
 
+from validphys.api import API
+from validphys.tests.conftest import SINGLE_DATASET, THEORYID, PDF
 from validphys.closuretest import bias_dataset, variance_dataset
+from validphys.closuretest.multiclosure import (
+    internal_multiclosure_dataset_loader,
+    internal_multiclosure_data_loader,
+)
+from validphys.results import ThPredictionsResult
+
 
 class TestResult:
     """class for testing base level estimators which expect a results object"""
+
     def __init__(self, central_value, rawdata=None):
         self.central_value = central_value
         self.rawdata = rawdata
         self.error_members = rawdata
         self.ndata = len(central_value)
         self.sqrtcovmat = np.identity(self.ndata)
 
-    def __len__(self,):
+    def __len__(self):
         return self.ndata
 
+
 N_DATA = 5
 N_REPLICAS = 10
 
-#TODO: make these fixtures?
+# TODO: make these fixtures?
 # these are proxies for results tuples of data and theory
-ones_results = 2*[TestResult(np.ones(N_DATA), np.ones((N_DATA, N_REPLICAS)))]
-twos_results = 2*[TestResult(2*np.ones(N_DATA), 2*np.ones((N_DATA, N_REPLICAS)))]
+ones_results = 2 * [TestResult(np.ones(N_DATA), np.ones((N_DATA, N_REPLICAS)))]
+twos_results = 2 * [TestResult(2 * np.ones(N_DATA), 2 * np.ones((N_DATA, N_REPLICAS)))]
+
+replicas = np.arange(N_REPLICAS)[np.newaxis, :] * np.ones((N_DATA, 1))
+replicas_result = 2 * [TestResult(replicas.mean(axis=1), replicas)]
+
+DATASET_INPUT = {"dataset_input": SINGLE_DATASET, "theoryid": THEORYID, "use_cuts": "internal"}
+
+DATASET_INPUTS = {"dataset_inputs": [SINGLE_DATASET], "theoryid": THEORYID, "use_cuts": "internal"}
+
+MOCK_FIT = Mock()
+MOCK_FIT.name = "Mock fit spec"
+MOCK_FIT.path = "mock/fit/path"
+MOCK_FIT.label = MOCK_FIT.name
+
+MOCK_FITS = [MOCK_FIT]
+
+
+def test_internal_multiclosure_dataset_loader():
+    """
+    Test the internal_multiclosure_dataset_loader
+    """
+    dataset = API.dataset(**DATASET_INPUT)
+
+    t0_covmat_from_systematics = API.t0_covmat_from_systematics(
+        **{**DATASET_INPUT, "use_t0": True, "t0pdfset": PDF}
+    )
+
+    fits_pdf = [API.pdf(pdf=PDF)]
+    multiclosure_underlyinglaw = API.pdf(pdf=PDF)
+    fits = MOCK_FITS
+
+    loader = internal_multiclosure_dataset_loader(
+        dataset, fits_pdf, multiclosure_underlyinglaw, fits, t0_covmat_from_systematics
+    )
+
+    assert loader.closures_th is not None
+    assert type(loader.closures_th) == list
+    assert loader.law_th is not None
+    assert type(loader.law_th) == ThPredictionsResult
+    assert loader.covmat is not None
+    assert type(loader.covmat) == np.ndarray
+    assert loader.sqrt_covmat is not None
+    assert type(loader.sqrt_covmat) == np.ndarray
+
+
+def test_internal_multiclosure_data_loader():
+    """
+    Test the internal_multiclosure_data_loader
+    """
+    data = API.data(**DATASET_INPUTS)
+
+    t0_covmat_from_systematics = API.t0_covmat_from_systematics(
+        **{**DATASET_INPUT, "use_t0": True, "t0pdfset": PDF}
+    )
+
+    fits_pdf = [API.pdf(pdf=PDF)]
+    multiclosure_underlyinglaw = API.pdf(pdf=PDF)
+    fits = MOCK_FITS
+
+    loader = internal_multiclosure_data_loader(
+        data, fits_pdf, multiclosure_underlyinglaw, fits, t0_covmat_from_systematics
+    )
+
+    assert loader.closures_th is not None
+    assert type(loader.closures_th) == list
+    assert loader.law_th is not None
+    assert type(loader.law_th) == ThPredictionsResult
+    assert loader.covmat is not None
+    assert type(loader.covmat) == np.ndarray
+    assert loader.sqrt_covmat is not None
+    assert type(loader.sqrt_covmat) == np.ndarray
 
-replicas = np.arange(N_REPLICAS)[np.newaxis, :]*np.ones((N_DATA, 1))
-replicas_result = 2*[TestResult(replicas.mean(axis=1), replicas)]
 
 def test_bias_function():
     bias_ones = bias_dataset(
-        ones_results,
-        [ones_results], # need list of length one to emulate collect
-        None,
-        None,
+        ones_results, [ones_results], None, None  # need list of length one to emulate collect
     )
     assert np.allclose(0, bias_ones.bias)
-    bias_one_two = bias_dataset(
-        ones_results,
-        [twos_results],
-        None,
-        None,
-    )
+    bias_one_two = bias_dataset(ones_results, [twos_results], None, None)
     assert np.allclose(N_DATA, bias_one_two.bias)
 
+
 def test_variance_function():
-    vardata = variance_dataset(
-        ones_results,
-        None,
-        None,
-    )
+    vardata = variance_dataset(ones_results, None, None)
     assert np.allclose(0, vardata.variance)
-    var_reps = variance_dataset(
-        replicas_result,
-        None,
-        None,
-    )
+    var_reps = variance_dataset(replicas_result, None, None)
     # calc explicitly what variance should be
-    expected = np.sum(((np.arange(N_REPLICAS) - 4.5)**2)*N_DATA/N_REPLICAS)
+    expected = np.sum(((np.arange(N_REPLICAS) - 4.5) ** 2) * N_DATA / N_REPLICAS)
     assert np.allclose(expected, var_reps.variance)