deel-ai · jdalch · Oct 13, 2023 · Oct 13, 2023 · Oct 13, 2023 · M-Mouhcine
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@
 # Virtual environment
 puncc-dev-env/
 puncc-user-env/
+.venv/
 
 # Local demo files
 **demo/

diff --git a/deel/puncc/api/nonconformity_scores.py b/deel/puncc/api/nonconformity_scores.py
@@ -142,8 +142,8 @@ def _raps_score_function(Y_pred: Iterable, y_true: Iterable) -> np.ndarray:
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Regression ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 
-def mad(y_pred: Iterable, y_true: Iterable) -> Iterable:
-    """Mean Absolute Deviation (MAD).
+def absolute_difference(y_pred: Iterable, y_true: Iterable) -> Iterable:
+    """Absolute Deviation.
 
     .. math::
 
@@ -169,19 +169,20 @@ def mad(y_pred: Iterable, y_true: Iterable) -> Iterable:
     return abs(y_pred - y_true)
 
 
-def scaled_mad(Y_pred: Iterable, y_true: Iterable) -> Iterable:
-    """Scaled Mean Absolute Deviation (MAD). Considering
+def scaled_ad(Y_pred: Iterable, y_true: Iterable) -> Iterable:
+    """Scaled Absolute Deviation, normalized by an estimation of the conditional
+    mean absolute deviation (conditional MAD). Considering
     :math:`Y_{\\text{pred}} = (\mu_{\\text{pred}}, \sigma_{\\text{pred}})`:
 
     .. math::
 
         R = \\frac{|y_{\\text{true}}-\mu_{\\text{pred}}|}{\sigma_{\\text{pred}}}
 
-    :param Iterable Y_pred:
+    :param Iterable Y_pred: point and conditional MAD predictions.
         :math:`Y_{\\text{pred}}=(y_{\\text{pred}}, \sigma_{\\text{pred}})`
     :param Iterable y_true: true labels.
 
-    :returns: scaled mean absolute deviation.
+    :returns: scaled absolute deviation.
     :rtype: Iterable
 
     :raises TypeError: unsupported data types.
@@ -205,7 +206,7 @@ def scaled_mad(Y_pred: Iterable, y_true: Iterable) -> Iterable:
         y_pred, sigma_pred = Y_pred[:, 0], Y_pred[:, 1]
 
     # MAD then Scaled MAD and computed
-    mean_absolute_deviation = mad(y_pred, y_true)
+    mean_absolute_deviation = absolute_difference(y_pred, y_true)
     if np.any(sigma_pred < 0):
         raise RuntimeError("All MAD predictions should be positive.")
     return mean_absolute_deviation / (sigma_pred + EPSILON)
@@ -271,3 +272,30 @@ def cqr_score(Y_pred: Iterable, y_true: Iterable) -> Iterable:
     #     return torch.maximum(diff_lo, diff_hi)
 
     raise RuntimeError("Fatal Error. Type check failed !")
+
+
+def difference(y_pred: Iterable, y_true: Iterable) -> Iterable:
+    """Coordinatewise difference.
+
+    .. math::
+
+        R = y_{\\text{pred}}-y_{\\text{true}}
+
+    :param Iterable y_pred: predictions.
+    :param Iterable y_true: true labels.
+
+    :returns: coordinatewise difference.
+    :rtype: Iterable
+
+    :raises TypeError: unsupported data types.
+    """
+    supported_types_check(y_pred, y_true)
+
+    if pkgutil.find_loader("torch") is not None and isinstance(
+        y_pred, torch.Tensor
+    ):
+        y_pred = y_pred.cpu().detach().numpy()
+        y_true = y_true.cpu().detach().numpy()
+        return np.squeeze(y_pred) - np.squeeze(y_true)
+
+    return y_pred - y_true
diff --git a/deel/puncc/api/prediction.py b/deel/puncc/api/prediction.py
@@ -461,5 +461,5 @@ def fit(
         """
         self.models[0].fit(X, y, **dictargs[0])
         mu_pred = self.models[0].predict(X)
-        mads = nonconformity_scores.mad(mu_pred, y)
+        mads = nonconformity_scores.absolute_difference(mu_pred, y)
         self.models[1].fit(X, mads, **dictargs[1])
diff --git a/deel/puncc/api/prediction_sets.py b/deel/puncc/api/prediction_sets.py
@@ -280,3 +280,107 @@ def cqr_interval(
     y_lo = q_lo - scores_quantile
     y_hi = q_hi + scores_quantile
     return y_lo, y_hi
+
+
+def constant_set(
+    y_pred: Iterable, scores_quantile: np.ndarray
+) -> Tuple[np.ndarray]:
+    """Constant prediction set centered on `y_pred`. The size of the
+    margin is `scores_quantile` (noted :math:`\gamma_{\\alpha}`).
+
+    .. math::
+
+        I = \\Pi_{k=1}^m[y^k_{\\text{pred}} - \gamma^k_{\\alpha}, 
+        y^k_{\\text{pred}} + \gamma^k_{\\alpha}]
+
+    :param Iterable y_pred: predictions.
+    :param ndarray scores_quantile: multivariate quantile of nonconformity scores computed
+        on a calibration set for a given :math:`\\alpha`.
+
+    :returns: prediction sets :math:`I`.
+    :rtype: Tuple[ndarray]
+    """
+    supported_types_check(y_pred)
+
+
+    y_lo = y_pred - scores_quantile
+    y_hi = y_pred + scores_quantile
+    return y_lo, y_hi
+
+
+def scaled_set(
+    Y_pred: Iterable, scores_quantile: np.ndarray
+) -> Tuple[np.ndarray]:
+    """Scaled prediction set centered on `y_pred`. Considering
+    :math:`Y_{\\text{pred}} = (\mu_{\\text{pred}}, \sigma_{\\text{pred}})`,
+    the size of the margin is proportional to `scores_quantile`
+    :math:`\gamma_{\\alpha}`.
+
+    .. math::
+
+        I = \\Pi_{k=1}^m[\mu^k_{\\text{pred}} - \gamma^k_{\\alpha} \cdot \sigma^k_{\\text{pred}},
+        y^k_{\\text{pred}} + \gamma^k_{\\alpha} \cdot \sigma^k_{\\text{pred}}]
+
+    :param Iterable y_pred: predictions.
+    :param ndarray scores_quantile: multivariate quantile of nonconformity scores computed
+        on a calibration set for a given :math:`\\alpha`.
+
+    :returns: scaled prediction sets :math:`I`.
+    :rtype: Tuple[ndarray]
+    """
+    supported_types_check(Y_pred)
+
+    if Y_pred.shape[1] != 2:  # check Y_pred contains two predictions
+        raise RuntimeError(
+            "Each Y_pred must contain a point prediction and a dispersion estimation."
+        )
+
+    if pkgutil.find_loader("pandas") is not None and isinstance(
+        Y_pred, pd.DataFrame
+    ):
+        y_pred, sigma_pred = Y_pred.iloc[:, 0], Y_pred.iloc[:, 1]
+    else:
+        y_pred, sigma_pred = Y_pred[:, 0], Y_pred[:, 1]
+
+    y_lo = y_pred - scores_quantile * sigma_pred
+    y_hi = y_pred + scores_quantile * sigma_pred
+    return y_lo, y_hi
+
+
+def cqr_set(
+    Y_pred: Iterable, scores_quantile: np.ndarray
+) -> Tuple[np.ndarray]:
+    """CQR prediction set. Considering
+    :math:`Y_{\\text{pred}} = (q_{\\text{lo}}, q_{\\text{hi}})`, the prediction
+    interval is built from the upper and lower quantiles predictions and
+    `scores_quantile` :math:`\gamma_{\\alpha}`.
+
+    .. math::
+
+        I = \Pi_{k=1}^m[q^k_{\\text{lo}} - \gamma^k_{\\alpha}, 
+        q^k_{\\text{lo}} + \gamma^k_{\\alpha}]
+
+    :param Iterable y_pred: predictions.
+    :param ndarray scores_quantile: multivariate quantile of nonconformity scores computed
+        on a calibration set for a given :math:`\\alpha`.
+
+    :returns: scaled prediction sets :math:`I`.
+    :rtype: Tuple[ndarray]
+    """
+    supported_types_check(Y_pred)
+
+    if Y_pred.shape[1] != 2:  # check Y_pred contains two predictions
+        raise RuntimeError(
+            "Each Y_pred must contain lower and higher quantiles predictions, respectively."
+        )
+
+    if pkgutil.find_loader("pandas") is not None and isinstance(
+        Y_pred, pd.DataFrame
+    ):
+        q_lo, q_hi = Y_pred.iloc[:, 0], Y_pred.iloc[:, 1]
+    else:
+        q_lo, q_hi = Y_pred[:, 0], Y_pred[:, 1]
+
+    y_lo = q_lo - scores_quantile
+    y_hi = q_hi + scores_quantile
+    return y_lo, y_hi
diff --git a/deel/puncc/regression.py b/deel/puncc/regression.py
@@ -115,7 +115,7 @@ def __init__(
     ):
         self.predictor = predictor
         self.calibrator = BaseCalibrator(
-            nonconf_score_func=nonconformity_scores.mad,
+            nonconf_score_func=nonconformity_scores.absolute_difference,
             pred_set_func=prediction_sets.constant_interval,
             weight_func=weight_func,
         )
@@ -336,7 +336,7 @@ def __init__(
             weight_func=weight_func,
         )
         self.calibrator = BaseCalibrator(
-            nonconf_score_func=nonconformity_scores.scaled_mad,
+            nonconf_score_func=nonconformity_scores.scaled_ad,
             pred_set_func=prediction_sets.scaled_interval,
             weight_func=weight_func,
         )
@@ -495,7 +495,7 @@ class CVPlus:
     def __init__(self, predictor, *, K: int, random_state=None):
         self.predictor = predictor
         self.calibrator = BaseCalibrator(
-            nonconf_score_func=nonconformity_scores.mad,
+            nonconf_score_func=nonconformity_scores.absolute_difference,
             pred_set_func=prediction_sets.constant_interval,
             weight_func=None,
         )
@@ -673,7 +673,7 @@ def _compute_residuals(self, y_pred, y_true):
         :returns: residuals.
         :rtype: ndarray
         """
-        return nonconformity_scores.mad(y_pred, y_true)
+        return nonconformity_scores.absolute_difference(y_pred, y_true)
 
     def _compute_pi(self, y_pred, w):
         """Compute prediction intervals.
@@ -702,7 +702,7 @@ def _compute_boot_residuals(self, boot_pred, y_true):
         #   For each training sample X_i, the LOO estimate is built from
         #   averaging the predictions of bootstrap models whose OOB include X_i
         loo_pred = (self._oob_matrix * boot_pred.T).sum(-1)
-        residuals = nonconformity_scores.mad(y_pred=loo_pred, y_true=y_true)
+        residuals = nonconformity_scores.absolute_difference(y_pred=loo_pred, y_true=y_true)
         return list(residuals)
 
     def _compute_loo_predictions(self, boot_pred):
@@ -997,7 +997,7 @@ def _compute_residuals(self, y_pred, y_true):
         :rtype: ndarray
 
         """
-        return nonconformity_scores.scaled_mad(y_pred, y_true)
+        return nonconformity_scores.scaled_ad(y_pred, y_true)
 
     def _compute_boot_residuals(self, boot_pred, y_true):
         loo_pred = (self._oob_matrix * boot_pred[:, :, 0].T).sum(-1)

diff --git a/docs/api_intro.ipynb b/docs/api_intro.ipynb
@@ -208,7 +208,7 @@
    "source": [
     "### Calibrator <a class=\"anchor\" id=\"calibrator\"></a>  \n",
     "\n",
-    "The calibrator instance provides a way of estimating the nonconformity scores on the calibration set and how to compute the prediction sets. For the split conformal prediction procedure, the `BaseCalibrator` uses the mean absolute deviation as nonconformity score and and prediction set are built as constant intervals. These two functions are already provided in `deel.puncc.api.nonconformity_scores.mad` and `deel.puncc.api.prediction_sets.constant_interval`, respectively. Alternatively, one can define custom functions and pass them as arguments to the calibrator. "
+    "The calibrator instance provides a way of estimating the nonconformity scores on the calibration set and how to compute the prediction sets. For the split conformal prediction procedure, the `BaseCalibrator` uses the mean absolute deviation as nonconformity score and and prediction set are built as constant intervals. These two functions are already provided in `deel.puncc.api.nonconformity_scores.absolute_difference` and `deel.puncc.api.prediction_sets.constant_interval`, respectively. Alternatively, one can define custom functions and pass them as arguments to the calibrator. "
    ]
   },
   {
@@ -223,7 +223,7 @@
     "from deel.puncc.api import prediction_sets\n",
     "\n",
     "## Calibrator construction\n",
-    "my_calibrator = BaseCalibrator(nonconf_score_func=nonconformity_scores.mad,\n",
+    "my_calibrator = BaseCalibrator(nonconf_score_func=nonconformity_scores.absolute_difference,\n",
     "                               pred_set_func=prediction_sets.constant_interval)"
    ]
   },

diff --git a/docs/puncc_architecture.ipynb b/docs/puncc_architecture.ipynb
@@ -284,14 +284,14 @@
     "The calibrator provides a structure to estimate the nonconformity scores\n",
     "on the calibration set and to compute the prediction sets. At the constructor `deel.puncc.api.calibration.BaseCalibrator`,\n",
     "one decides which nonconformity score and prediction set functions to use.\n",
-    "Then, the calibrator instance computes **nonconformity scores** (e.g., mean absolute deviation) by calling\n",
+    "Then, the calibrator instance computes **nonconformity scores** (e.g., absolute difference) by calling\n",
     "`deel.puncc.api.calibration.Calibrator.fit` on the calibration dataset. Based on the estimated quantiles of nonconformity scores,\n",
     "the method `deel.puncc.api.calibration.BaseCalibrator.calibrate` enables to **construct** and/or **calibrate** prediction sets.\n",
     "\n",
     "For example, the `BaseCalibrator` in the split conformal prediction procedure\n",
-    "uses the mean absolute deviation as nonconformity score and and prediction set\n",
+    "uses the absolute difference as nonconformity score and prediction sets\n",
     "are built as constant intervals. These two functions are already provided in\n",
-    "`deel.puncc.api.nonconformity_scores.mad` and `deel.puncc.api.prediction_sets.constant_interval`, respectively:"
+    "`deel.puncc.api.nonconformity_scores.absolute_difference` and `deel.puncc.api.prediction_sets.constant_interval`, respectively:"
    ]
   },
   {
@@ -306,7 +306,7 @@
     "from deel.puncc.api import prediction_sets\n",
     "\n",
     "## Calibrator construction\n",
-    "my_calibrator = BaseCalibrator(nonconf_score_func=nonconformity_scores.mad,\n",
+    "my_calibrator = BaseCalibrator(nonconf_score_func=nonconformity_scores.absolute_difference,\n",
     "                                pred_set_func=prediction_sets.constant_interval)"
    ]
   },

diff --git a/tests/api/test_conformalization.py b/tests/api/test_conformalization.py
@@ -63,7 +63,7 @@ def setUp(self):
 
         # Definition of a calibrator
         self.calibrator = BaseCalibrator(
-            nonconf_score_func=nonconformity_scores.mad,
+            nonconf_score_func=nonconformity_scores.absolute_difference,
             pred_set_func=prediction_sets.constant_interval,
         )
 

diff --git a/tests/examples/test_examples_conformalization.py b/tests/examples/test_examples_conformalization.py
@@ -57,7 +57,7 @@ def test_conformalpredictor():
     # Definition of a calibrator, built for a given nonconformity scores
     # and a procedure to build the prediction sets
     calibrator = BaseCalibrator(
-        nonconf_score_func=nonconformity_scores.mad,
+        nonconf_score_func=nonconformity_scores.absolute_difference,
         pred_set_func=prediction_sets.constant_interval,
     )