Skip to content

Commit

Permalink
Merge pull request #2604 from cta-observatory/outlier_detector
Browse files Browse the repository at this point in the history
OutlierDetector components
  • Loading branch information
kosack authored Aug 7, 2024
2 parents 447f44c + 0aff4d7 commit 6758c1b
Show file tree
Hide file tree
Showing 5 changed files with 256 additions and 1 deletion.
3 changes: 2 additions & 1 deletion docs/api-reference/monitoring/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Monitoring data are time-series used to monitor the status or quality of hardwar

This module provides some code to help to generate monitoring data from processed event data, particularly for the purposes of calibration and data quality assessment.

Currently, only code related to :ref:`stats_aggregator` is implemented here.
Currently, only code related to :ref:`stats_aggregator` and :ref:`outlier_detector` is implemented here.


Submodules
Expand All @@ -21,6 +21,7 @@ Submodules
:glob:

aggregator
outlier


Reference/API
Expand Down
11 changes: 11 additions & 0 deletions docs/api-reference/monitoring/outlier.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.. _outlier_detector:

****************
Outlier Detector
****************


Reference/API
=============

.. automodapi:: ctapipe.monitoring.outlier
1 change: 1 addition & 0 deletions docs/changes/2604.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add outlier detection components to identify faulty pixels.
138 changes: 138 additions & 0 deletions src/ctapipe/monitoring/outlier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
Outlier detection algorithms to identify faulty pixels
"""

__all__ = [
"OutlierDetector",
"RangeOutlierDetector",
"MedianOutlierDetector",
"StdOutlierDetector",
]

from abc import abstractmethod

import numpy as np

from ctapipe.core import TelescopeComponent
from ctapipe.core.traits import Float, List


class OutlierDetector(TelescopeComponent):
"""
Base class for outlier detection algorithms.
"""

@abstractmethod
def __call__(self, column) -> bool:
"""
Detect outliers in the provided column.
This function should be implemented by subclasses to define the specific
outlier detection approach. The function examines the statistics in the
given column of the table and returns a boolean mask indicating which
entries are considered as outliers.
Parameters
----------
column : astropy.table.Column
column with chunk-wise aggregated statistic values (mean, median, or std)
of shape (n_entries, n_channels, n_pix)
Returns
-------
boolean mask
mask of outliers of shape (n_entries, n_channels, n_pix)
"""
pass


class RangeOutlierDetector(OutlierDetector):
"""
Detect outliers based on a valid range.
The clipping interval to set the thresholds for detecting outliers corresponds to
a configurable range of valid statistic values.
"""

validity_range = List(
trait=Float(),
default_value=[1.0, 2.0],
help=(
"Defines the range of acceptable values (lower, upper) in units of image values. "
"Values outside this range will be flagged as outliers."
),
minlen=2,
maxlen=2,
).tag(config=True)

def __call__(self, column):
# Remove outliers is statistical values out a given range
outliers = np.logical_or(
column < self.validity_range[0],
column > self.validity_range[1],
)
return outliers


class MedianOutlierDetector(OutlierDetector):
"""
Detect outliers based on the deviation from the camera median.
The clipping interval to set the thresholds for detecting outliers is computed by multiplying
the configurable factors and the camera median of the statistic values.
"""

median_range_factors = List(
trait=Float(),
default_value=[-1.0, 1.0],
help=(
"Defines the range of acceptable values (lower, upper) as fractional deviations from the mean. "
"Values outside of this range will be flagged as outliers."
),
minlen=2,
maxlen=2,
).tag(config=True)

def __call__(self, column):
# Camera median
camera_median = np.ma.median(column, axis=2)
# Detect outliers based on the deviation of the median distribution
deviation = column - camera_median[:, :, np.newaxis]
outliers = np.logical_or(
deviation < self.median_range_factors[0] * camera_median[:, :, np.newaxis],
deviation > self.median_range_factors[1] * camera_median[:, :, np.newaxis],
)
return outliers


class StdOutlierDetector(OutlierDetector):
"""
Detect outliers based on the deviation from the camera standard deviation.
The clipping interval to set the thresholds for detecting outliers is computed by multiplying
the configurable factors and the camera standard deviation of the statistic values.
"""

std_range_factors = List(
trait=Float(),
default_value=[-1.0, 1.0],
help=(
"Defines the range of acceptable values (lower, upper) in units of standard deviations. "
"Values outside of this range will be flagged as outliers."
),
minlen=2,
maxlen=2,
).tag(config=True)

def __call__(self, column):
# Camera median
camera_median = np.ma.median(column, axis=2)
# Camera std
camera_std = np.ma.std(column, axis=2)
# Detect outliers based on the deviation of the standard deviation distribution
deviation = column - camera_median[:, :, np.newaxis]
outliers = np.logical_or(
deviation < self.std_range_factors[0] * camera_std[:, :, np.newaxis],
deviation > self.std_range_factors[1] * camera_std[:, :, np.newaxis],
)
return outliers
104 changes: 104 additions & 0 deletions src/ctapipe/monitoring/tests/test_outlier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
Tests for OutlierDetector and related functions
"""

import numpy as np
from astropy.table import Table

from ctapipe.monitoring.outlier import (
MedianOutlierDetector,
RangeOutlierDetector,
StdOutlierDetector,
)


def test_range_detection(example_subarray):
"""test the RangeOutlierDetector"""

# Create dummy data for testing
rng = np.random.default_rng(0)
# Distribution mimics the mean values of peak times of flat-field events
mean = rng.normal(18.0, 0.4, size=(50, 2, 1855))
# Insert outliers
mean[12, 0, 120] = 3.0
mean[42, 1, 67] = 35.0
# Create astropy table
table = Table([mean], names=("mean",))
# Initialize the outlier detector based on the range of valid values
# In this test, the interval [15, 25] corresponds to the range (in waveform samples)
# of accepted mean (or median) values of peak times of flat-field events
detector = RangeOutlierDetector(
subarray=example_subarray, validity_range=[15.0, 25.0]
)
# Detect outliers
outliers = detector(table["mean"])
# Construct the expected outliers
expected_outliers = np.zeros((50, 2, 1855), dtype=bool)
expected_outliers[12, 0, 120] = True
expected_outliers[42, 1, 67] = True
# Check if outliers where detected correctly
np.testing.assert_array_equal(outliers, expected_outliers)


def test_median_detection(example_subarray):
"""test the MedianOutlierDetector"""

# Create dummy data for testing
rng = np.random.default_rng(0)
# Distribution mimics the median values of charge images of flat-field events
median = rng.normal(77.0, 0.6, size=(50, 2, 1855))
# Insert outliers
median[12, 0, 120] = 1.2
median[42, 1, 67] = 21045.1
# Create astropy table
table = Table([median], names=("median",))
# Initialize the outlier detector based on the deviation from the camera median
# In this test, the interval [-0.9, 8] corresponds to multiplication factors
# typical used for the median values of charge images of flat-field events
detector = MedianOutlierDetector(
subarray=example_subarray, median_range_factors=[-0.9, 8.0]
)
# Detect outliers
outliers = detector(table["median"])
# Construct the expected outliers
expected_outliers = np.zeros((50, 2, 1855), dtype=bool)
expected_outliers[12, 0, 120] = True
expected_outliers[42, 1, 67] = True
# Check if outliers where detected correctly
np.testing.assert_array_equal(outliers, expected_outliers)


def test_std_detection(example_subarray):
"""test the StdOutlierDetector"""

# Create dummy data for testing
rng = np.random.default_rng(0)
# Distribution mimics the std values of charge images of flat-field events
ff_std = rng.normal(10.0, 2.0, size=(50, 2, 1855))
# Distribution mimics the median values of charge images of pedestal events
ped_median = rng.normal(2.0, 1.5, size=(50, 2, 1855))
# Insert outliers
ff_std[12, 0, 120] = 45.5
ped_median[42, 1, 67] = 77.2
# Create astropy table
ff_table = Table([ff_std], names=("std",))
ped_table = Table([ped_median], names=("median",))

# Initialize the outlier detector based on the deviation from the camera standard deviation
# In this test, the interval [-15, 15] corresponds to multiplication factors
# typical used for the std values of charge images of flat-field events
# and median (and std) values of charge images of pedestal events
detector = StdOutlierDetector(
subarray=example_subarray, std_range_factors=[-15.0, 15.0]
)
ff_outliers = detector(ff_table["std"])
ped_outliers = detector(ped_table["median"])
# Construct the expected outliers
ff_expected_outliers = np.zeros((50, 2, 1855), dtype=bool)
ff_expected_outliers[12, 0, 120] = True
ped_expected_outliers = np.zeros((50, 2, 1855), dtype=bool)
ped_expected_outliers[42, 1, 67] = True

# Check if outliers where detected correctly
np.testing.assert_array_equal(ff_outliers, ff_expected_outliers)
np.testing.assert_array_equal(ped_outliers, ped_expected_outliers)

0 comments on commit 6758c1b

Please sign in to comment.