Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Joseba/scores predsets #37

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# Virtual environment
puncc-dev-env/
puncc-user-env/
.venv/

# Local demo files
**demo/
Expand Down
42 changes: 35 additions & 7 deletions deel/puncc/api/nonconformity_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ def _raps_score_function(Y_pred: Iterable, y_true: Iterable) -> np.ndarray:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Regression ~~~~~~~~~~~~~~~~~~~~~~~~~~~~


def mad(y_pred: Iterable, y_true: Iterable) -> Iterable:
"""Mean Absolute Deviation (MAD).
def absolute_difference(y_pred: Iterable, y_true: Iterable) -> Iterable:
"""Absolute Deviation.

.. math::

Expand All @@ -169,19 +169,20 @@ def mad(y_pred: Iterable, y_true: Iterable) -> Iterable:
return abs(y_pred - y_true)


def scaled_mad(Y_pred: Iterable, y_true: Iterable) -> Iterable:
"""Scaled Mean Absolute Deviation (MAD). Considering
def scaled_ad(Y_pred: Iterable, y_true: Iterable) -> Iterable:
"""Scaled Absolute Deviation, normalized by an estimation of the conditional
mean absolute deviation (conditional MAD). Considering
:math:`Y_{\\text{pred}} = (\mu_{\\text{pred}}, \sigma_{\\text{pred}})`:

.. math::

R = \\frac{|y_{\\text{true}}-\mu_{\\text{pred}}|}{\sigma_{\\text{pred}}}

:param Iterable Y_pred:
:param Iterable Y_pred: point and conditional MAD predictions.
:math:`Y_{\\text{pred}}=(y_{\\text{pred}}, \sigma_{\\text{pred}})`
:param Iterable y_true: true labels.

:returns: scaled mean absolute deviation.
:returns: scaled absolute deviation.
:rtype: Iterable

:raises TypeError: unsupported data types.
Expand All @@ -205,7 +206,7 @@ def scaled_mad(Y_pred: Iterable, y_true: Iterable) -> Iterable:
y_pred, sigma_pred = Y_pred[:, 0], Y_pred[:, 1]

# MAD then Scaled MAD and computed
mean_absolute_deviation = mad(y_pred, y_true)
mean_absolute_deviation = absolute_difference(y_pred, y_true)
if np.any(sigma_pred < 0):
raise RuntimeError("All MAD predictions should be positive.")
return mean_absolute_deviation / (sigma_pred + EPSILON)
Expand Down Expand Up @@ -271,3 +272,30 @@ def cqr_score(Y_pred: Iterable, y_true: Iterable) -> Iterable:
# return torch.maximum(diff_lo, diff_hi)

raise RuntimeError("Fatal Error. Type check failed !")


def difference(y_pred: Iterable, y_true: Iterable) -> Iterable:
"""Coordinatewise difference.

.. math::

R = y_{\\text{pred}}-y_{\\text{true}}

:param Iterable y_pred: predictions.
:param Iterable y_true: true labels.

:returns: coordinatewise difference.
:rtype: Iterable

:raises TypeError: unsupported data types.
"""
supported_types_check(y_pred, y_true)

if pkgutil.find_loader("torch") is not None and isinstance(
y_pred, torch.Tensor
):
y_pred = y_pred.cpu().detach().numpy()
y_true = y_true.cpu().detach().numpy()
return np.squeeze(y_pred) - np.squeeze(y_true)

return y_pred - y_true
2 changes: 1 addition & 1 deletion deel/puncc/api/prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,5 +461,5 @@ def fit(
"""
self.models[0].fit(X, y, **dictargs[0])
mu_pred = self.models[0].predict(X)
mads = nonconformity_scores.mad(mu_pred, y)
mads = nonconformity_scores.absolute_difference(mu_pred, y)
self.models[1].fit(X, mads, **dictargs[1])
104 changes: 104 additions & 0 deletions deel/puncc/api/prediction_sets.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that you changed the name of the functions from xxx_interval to xxx_set. Can you please motivate your choice?

I specifically used "interval" for regression and "set" for classification. An interval is basically a set but the naming was voluntarily different to discriminate between the two tasks (reg and classif).

Anyways, this opens us to some issues if we forget to echo the changes in other modules or documentation files. For example, the call to cqr_interval wasn't updated in deel.puncc.regression module (line #429). By the way, how did the code pass the test_regression_seed test for CQR?

Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,107 @@ def cqr_interval(
y_lo = q_lo - scores_quantile
y_hi = q_hi + scores_quantile
return y_lo, y_hi


def constant_set(
y_pred: Iterable, scores_quantile: np.ndarray
) -> Tuple[np.ndarray]:
"""Constant prediction set centered on `y_pred`. The size of the
margin is `scores_quantile` (noted :math:`\gamma_{\\alpha}`).

.. math::

I = \\Pi_{k=1}^m[y^k_{\\text{pred}} - \gamma^k_{\\alpha},
y^k_{\\text{pred}} + \gamma^k_{\\alpha}]

:param Iterable y_pred: predictions.
:param ndarray scores_quantile: multivariate quantile of nonconformity scores computed
on a calibration set for a given :math:`\\alpha`.

:returns: prediction sets :math:`I`.
:rtype: Tuple[ndarray]
"""
supported_types_check(y_pred)


y_lo = y_pred - scores_quantile
y_hi = y_pred + scores_quantile
return y_lo, y_hi


def scaled_set(
Y_pred: Iterable, scores_quantile: np.ndarray
) -> Tuple[np.ndarray]:
"""Scaled prediction set centered on `y_pred`. Considering
:math:`Y_{\\text{pred}} = (\mu_{\\text{pred}}, \sigma_{\\text{pred}})`,
the size of the margin is proportional to `scores_quantile`
:math:`\gamma_{\\alpha}`.

.. math::

I = \\Pi_{k=1}^m[\mu^k_{\\text{pred}} - \gamma^k_{\\alpha} \cdot \sigma^k_{\\text{pred}},
y^k_{\\text{pred}} + \gamma^k_{\\alpha} \cdot \sigma^k_{\\text{pred}}]

:param Iterable y_pred: predictions.
:param ndarray scores_quantile: multivariate quantile of nonconformity scores computed
on a calibration set for a given :math:`\\alpha`.

:returns: scaled prediction sets :math:`I`.
:rtype: Tuple[ndarray]
"""
supported_types_check(Y_pred)

if Y_pred.shape[1] != 2: # check Y_pred contains two predictions
raise RuntimeError(
"Each Y_pred must contain a point prediction and a dispersion estimation."
)

if pkgutil.find_loader("pandas") is not None and isinstance(
Y_pred, pd.DataFrame
):
y_pred, sigma_pred = Y_pred.iloc[:, 0], Y_pred.iloc[:, 1]
else:
y_pred, sigma_pred = Y_pred[:, 0], Y_pred[:, 1]

y_lo = y_pred - scores_quantile * sigma_pred
y_hi = y_pred + scores_quantile * sigma_pred
return y_lo, y_hi


def cqr_set(
Y_pred: Iterable, scores_quantile: np.ndarray
) -> Tuple[np.ndarray]:
"""CQR prediction set. Considering
:math:`Y_{\\text{pred}} = (q_{\\text{lo}}, q_{\\text{hi}})`, the prediction
interval is built from the upper and lower quantiles predictions and
`scores_quantile` :math:`\gamma_{\\alpha}`.

.. math::

I = \Pi_{k=1}^m[q^k_{\\text{lo}} - \gamma^k_{\\alpha},
q^k_{\\text{lo}} + \gamma^k_{\\alpha}]

:param Iterable y_pred: predictions.
:param ndarray scores_quantile: multivariate quantile of nonconformity scores computed
on a calibration set for a given :math:`\\alpha`.

:returns: scaled prediction sets :math:`I`.
:rtype: Tuple[ndarray]
"""
supported_types_check(Y_pred)

if Y_pred.shape[1] != 2: # check Y_pred contains two predictions
raise RuntimeError(
"Each Y_pred must contain lower and higher quantiles predictions, respectively."
)

if pkgutil.find_loader("pandas") is not None and isinstance(
Y_pred, pd.DataFrame
):
q_lo, q_hi = Y_pred.iloc[:, 0], Y_pred.iloc[:, 1]
else:
q_lo, q_hi = Y_pred[:, 0], Y_pred[:, 1]

y_lo = q_lo - scores_quantile
y_hi = q_hi + scores_quantile
return y_lo, y_hi
12 changes: 6 additions & 6 deletions deel/puncc/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def __init__(
):
self.predictor = predictor
self.calibrator = BaseCalibrator(
nonconf_score_func=nonconformity_scores.mad,
nonconf_score_func=nonconformity_scores.absolute_difference,
pred_set_func=prediction_sets.constant_interval,
weight_func=weight_func,
)
Expand Down Expand Up @@ -336,7 +336,7 @@ def __init__(
weight_func=weight_func,
)
self.calibrator = BaseCalibrator(
nonconf_score_func=nonconformity_scores.scaled_mad,
nonconf_score_func=nonconformity_scores.scaled_ad,
pred_set_func=prediction_sets.scaled_interval,
weight_func=weight_func,
)
Expand Down Expand Up @@ -495,7 +495,7 @@ class CVPlus:
def __init__(self, predictor, *, K: int, random_state=None):
self.predictor = predictor
self.calibrator = BaseCalibrator(
nonconf_score_func=nonconformity_scores.mad,
nonconf_score_func=nonconformity_scores.absolute_difference,
pred_set_func=prediction_sets.constant_interval,
weight_func=None,
)
Expand Down Expand Up @@ -673,7 +673,7 @@ def _compute_residuals(self, y_pred, y_true):
:returns: residuals.
:rtype: ndarray
"""
return nonconformity_scores.mad(y_pred, y_true)
return nonconformity_scores.absolute_difference(y_pred, y_true)

def _compute_pi(self, y_pred, w):
"""Compute prediction intervals.
Expand Down Expand Up @@ -702,7 +702,7 @@ def _compute_boot_residuals(self, boot_pred, y_true):
# For each training sample X_i, the LOO estimate is built from
# averaging the predictions of bootstrap models whose OOB include X_i
loo_pred = (self._oob_matrix * boot_pred.T).sum(-1)
residuals = nonconformity_scores.mad(y_pred=loo_pred, y_true=y_true)
residuals = nonconformity_scores.absolute_difference(y_pred=loo_pred, y_true=y_true)
return list(residuals)

def _compute_loo_predictions(self, boot_pred):
Expand Down Expand Up @@ -997,7 +997,7 @@ def _compute_residuals(self, y_pred, y_true):
:rtype: ndarray

"""
return nonconformity_scores.scaled_mad(y_pred, y_true)
return nonconformity_scores.scaled_ad(y_pred, y_true)

def _compute_boot_residuals(self, boot_pred, y_true):
loo_pred = (self._oob_matrix * boot_pred[:, :, 0].T).sum(-1)
Expand Down
4 changes: 2 additions & 2 deletions docs/api_intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@
"source": [
"### Calibrator <a class=\"anchor\" id=\"calibrator\"></a> \n",
"\n",
"The calibrator instance provides a way of estimating the nonconformity scores on the calibration set and how to compute the prediction sets. For the split conformal prediction procedure, the `BaseCalibrator` uses the mean absolute deviation as nonconformity score and and prediction set are built as constant intervals. These two functions are already provided in `deel.puncc.api.nonconformity_scores.mad` and `deel.puncc.api.prediction_sets.constant_interval`, respectively. Alternatively, one can define custom functions and pass them as arguments to the calibrator. "
"The calibrator instance provides a way of estimating the nonconformity scores on the calibration set and how to compute the prediction sets. For the split conformal prediction procedure, the `BaseCalibrator` uses the mean absolute deviation as nonconformity score and and prediction set are built as constant intervals. These two functions are already provided in `deel.puncc.api.nonconformity_scores.absolute_difference` and `deel.puncc.api.prediction_sets.constant_interval`, respectively. Alternatively, one can define custom functions and pass them as arguments to the calibrator. "
]
},
{
Expand All @@ -223,7 +223,7 @@
"from deel.puncc.api import prediction_sets\n",
"\n",
"## Calibrator construction\n",
"my_calibrator = BaseCalibrator(nonconf_score_func=nonconformity_scores.mad,\n",
"my_calibrator = BaseCalibrator(nonconf_score_func=nonconformity_scores.absolute_difference,\n",
" pred_set_func=prediction_sets.constant_interval)"
]
},
Expand Down
8 changes: 4 additions & 4 deletions docs/puncc_architecture.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -284,14 +284,14 @@
"The calibrator provides a structure to estimate the nonconformity scores\n",
"on the calibration set and to compute the prediction sets. At the constructor `deel.puncc.api.calibration.BaseCalibrator`,\n",
"one decides which nonconformity score and prediction set functions to use.\n",
"Then, the calibrator instance computes **nonconformity scores** (e.g., mean absolute deviation) by calling\n",
"Then, the calibrator instance computes **nonconformity scores** (e.g., absolute difference) by calling\n",
"`deel.puncc.api.calibration.Calibrator.fit` on the calibration dataset. Based on the estimated quantiles of nonconformity scores,\n",
"the method `deel.puncc.api.calibration.BaseCalibrator.calibrate` enables to **construct** and/or **calibrate** prediction sets.\n",
"\n",
"For example, the `BaseCalibrator` in the split conformal prediction procedure\n",
"uses the mean absolute deviation as nonconformity score and and prediction set\n",
"uses the absolute difference as nonconformity score and prediction sets\n",
"are built as constant intervals. These two functions are already provided in\n",
"`deel.puncc.api.nonconformity_scores.mad` and `deel.puncc.api.prediction_sets.constant_interval`, respectively:"
"`deel.puncc.api.nonconformity_scores.absolute_difference` and `deel.puncc.api.prediction_sets.constant_interval`, respectively:"
]
},
{
Expand All @@ -306,7 +306,7 @@
"from deel.puncc.api import prediction_sets\n",
"\n",
"## Calibrator construction\n",
"my_calibrator = BaseCalibrator(nonconf_score_func=nonconformity_scores.mad,\n",
"my_calibrator = BaseCalibrator(nonconf_score_func=nonconformity_scores.absolute_difference,\n",
" pred_set_func=prediction_sets.constant_interval)"
]
},
Expand Down
2 changes: 1 addition & 1 deletion tests/api/test_conformalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def setUp(self):

# Definition of a calibrator
self.calibrator = BaseCalibrator(
nonconf_score_func=nonconformity_scores.mad,
nonconf_score_func=nonconformity_scores.absolute_difference,
pred_set_func=prediction_sets.constant_interval,
)

Expand Down
2 changes: 1 addition & 1 deletion tests/examples/test_examples_conformalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_conformalpredictor():
# Definition of a calibrator, built for a given nonconformity scores
# and a procedure to build the prediction sets
calibrator = BaseCalibrator(
nonconf_score_func=nonconformity_scores.mad,
nonconf_score_func=nonconformity_scores.absolute_difference,
pred_set_func=prediction_sets.constant_interval,
)

Expand Down