Merge pull request #244 from matchms/update_plotting_functions

Update plotting functions
matchms · Oct 28, 2024 · 052bab6 · 052bab6
2 parents a820e2c + 9816ef0
commit 052bab6
Show file tree

Hide file tree

Showing 45 changed files with 963 additions and 1,795 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+- New plotting functions for benchmarking [#244](https://github.com/matchms/ms2deepscore/pull/244)
+
+### Changed
+- Integrated new plotting functions in automated training pipeline [#244](https://github.com/matchms/ms2deepscore/pull/244)
+- Removed automatic storing of benchmarking scores [#244](https://github.com/matchms/ms2deepscore/pull/244)
+- Integrated loss calculation for validation loss and plots [#244](https://github.com/matchms/ms2deepscore/pull/244)
+- Validation loss uses all spectrum pairs instead of only 1 spectrum per inchikey [#244](https://github.com/matchms/ms2deepscore/pull/244)
+
 ### removed
 - Removed version warning
 

diff --git a/ms2deepscore/SettingsMS2Deepscore.py b/ms2deepscore/SettingsMS2Deepscore.py
@@ -5,6 +5,7 @@
 from typing import Optional
 import numpy as np
 from ms2deepscore.models.loss_functions import LOSS_FUNCTIONS
+from ms2deepscore.utils import validate_bin_order
 
 
 class SettingsMS2Deepscore:
@@ -176,13 +177,14 @@ def __init__(self, validate_settings=True, **settings):
             np.random.seed(self.random_seed)
 
     def validate_settings(self):
-        assert self.ionisation_mode in ("positive", "negative", "both")
-        assert 0.0 <= self.augment_removal_max <= 1.0, "Expected value within [0,1]"
-        assert 0.0 <= self.augment_removal_intensity <= 1.0, "Expected value within [0,1]"
+        if self.ionisation_mode not in ("positive", "negative", "both"):
+            raise ValueError("Expected ionisation mode to be 'positive' , 'negative', or 'both'.")
+        if not (0.0 <= self.augment_removal_max <= 1.0) or (not 0.0 <= self.augment_removal_intensity <= 1.0):
+            raise ValueError("Expected value within [0,1]")
         if self.use_fixed_set and self.shuffle:
             warnings.warn('When using a fixed set, data will not be shuffled')
-        if self.random_seed is not None:
-            assert isinstance(self.random_seed, int), "Random seed must be integer number."
+        if (self.random_seed is not None) and not isinstance(self.random_seed, int):
+            raise ValueError("Random seed must be integer number.")
         if self.loss_function.lower() not in LOSS_FUNCTIONS:
             raise ValueError(f"Unknown loss function. Must be one of: {LOSS_FUNCTIONS.keys()}")
         validate_bin_order(self.same_prob_bins)
@@ -204,40 +206,6 @@ def default(self, o):
             json.dump(self.__dict__, file, indent=4, cls=NumpyArrayEncoder)
 
 
-def validate_bin_order(score_bins):
-    """
-    Checks that the given bins are of the correct format:
-    - Each bin is a tuple/list of two numbers [low, high], with low <= high
-    - Bins cover the entire interval from 0 to 1, with no gaps or overlaps
-    - The lowest bin starts below 0 (since pairs >=0 are selected and we want to include zero)
-    """
-
-    # Sort bins by their lower bound
-    sorted_bins = sorted(score_bins, key=lambda b: b[0])
-
-    # Check upper and lower bound
-    if sorted_bins[0][0] >= 0:
-        raise ValueError(f"The first bin should start below 0, but starts at {sorted_bins[0][0]}")
-
-    if sorted_bins[-1][1] != 1:
-        raise ValueError(f"The last bin should end at 1, but ends at {sorted_bins[-1][1]}")
-
-    # Check order, format, and overlaps
-    previous_high = None
-    for score_bin in sorted_bins:
-        if len(score_bin) != 2:
-            raise ValueError("Each bin should have exactly two elements")
-        low, high = score_bin
-        if low > high:
-            raise ValueError("The first number in the bin should be smaller than or equal to the second")
-        if high < 0:
-            raise ValueError("No bin should be entirely below 0.")
-        if previous_high is not None:
-            if low != previous_high:
-                raise ValueError("There is a gap or overlap between bins; The bins should cover everything between 0 and 1.")
-        previous_high = high
-
-
 class SettingsEmbeddingEvaluator:
     """Contains all the settings used for training a EmbeddingEvaluator model.
 

diff --git a/ms2deepscore/benchmarking/CalculateScoresBetweenAllIonmodes.py b/ms2deepscore/benchmarking/CalculateScoresBetweenAllIonmodes.py
@@ -0,0 +1,44 @@
+import torch
+
+from ms2deepscore import MS2DeepScore
+from ms2deepscore.validation_loss_calculation.PredictionsAndTanimotoScores import PredictionsAndTanimotoScores
+from ms2deepscore.validation_loss_calculation.calculate_scores_for_validation import create_embedding_matrix_symmetric, \
+    create_embedding_matrix_not_symmetric, calculate_tanimoto_scores_unique_inchikey
+from ms2deepscore.models.load_model import load_model
+
+
+class CalculateScoresBetweenAllIonmodes:
+    """Calculates the true tanimoto scores and scores between all ionmodes"""
+    def __init__(self,
+                 model_file_name, positive_validation_spectra, negative_validation_spectra):
+        self.model_file_name = model_file_name
+        self.postive_validation_spectra = positive_validation_spectra
+        self.negative_validation_spectra = negative_validation_spectra
+        self.model = MS2DeepScore(load_model(model_file_name))
+
+        self.pos_vs_neg_scores = self.get_tanimoto_and_prediction_pairs(
+            positive_validation_spectra, negative_validation_spectra, label="positive vs negative")
+        self.pos_vs_pos_scores = self.get_tanimoto_and_prediction_pairs(
+            positive_validation_spectra, label="positive vs positive")
+        self.neg_vs_neg_scores = self.get_tanimoto_and_prediction_pairs(
+            negative_validation_spectra, label="negative vs negative")
+        # Avoid memory leakage
+        torch.cuda.empty_cache()
+        del self.model
+
+    def get_tanimoto_and_prediction_pairs(self, spectra_1, spectra_2=None, label="") -> PredictionsAndTanimotoScores:
+        symmetric = False
+        if spectra_2 is None:
+            spectra_2 = spectra_1
+            symmetric = True
+        if symmetric:
+            predictions_df = create_embedding_matrix_symmetric(self.model, spectra_1)
+        else:
+            predictions_df = create_embedding_matrix_not_symmetric(self.model, spectra_1, spectra_2)
+        tanimoto_scores_df = calculate_tanimoto_scores_unique_inchikey(spectra_1, spectra_2)
+        return PredictionsAndTanimotoScores(predictions_df, tanimoto_scores_df, symmetric, label)
+
+    def list_of_predictions_and_tanimoto_scores(self):
+        return [self.pos_vs_pos_scores,
+                self.pos_vs_neg_scores,
+                self.neg_vs_neg_scores, ]
diff --git a/ms2deepscore/benchmarking/calculate_scores_for_validation.py b/ms2deepscore/benchmarking/calculate_scores_for_validation.py
diff --git a/ms2deepscore/benchmarking/plot_average_per_bin.py b/ms2deepscore/benchmarking/plot_average_per_bin.py
@@ -0,0 +1,22 @@
+from matplotlib import pyplot as plt
+
+from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import CalculateScoresBetweenAllIonmodes
+from ms2deepscore.utils import create_evenly_spaced_bins
+
+
+def plot_average_per_bin(scores_between_ionmodes: CalculateScoresBetweenAllIonmodes, nr_of_bins):
+    bins = create_evenly_spaced_bins(nr_of_bins)
+    bin_centers = [(bin_borders[0] + bin_borders[1])/2 for bin_borders in bins]
+    fig, ax = plt.subplots()
+
+    for predictions_and_tanimoto_scores in scores_between_ionmodes.list_of_predictions_and_tanimoto_scores():
+        average_predictions = predictions_and_tanimoto_scores.get_average_prediction_per_inchikey_pair()
+        _, average_per_bin = predictions_and_tanimoto_scores.get_average_per_bin(average_predictions, bins)
+        ax.plot(bin_centers, average_per_bin, label=predictions_and_tanimoto_scores.label)
+
+    ax.set_xlabel("True chemical similarity")
+    ax.set_ylabel("Average predicted chemical similarity")
+    ax.set_xlim(0, 1)
+    ax.set_ylim(0, 1)
+    ax.legend()
+    return fig
diff --git a/ms2deepscore/benchmarking/plot_heatmaps.py b/ms2deepscore/benchmarking/plot_heatmaps.py
@@ -0,0 +1,82 @@
+from typing import Tuple, List
+
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+
+from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import CalculateScoresBetweenAllIonmodes
+from ms2deepscore.validation_loss_calculation.PredictionsAndTanimotoScores import PredictionsAndTanimotoScores
+
+
+def create_3_heatmaps(pairs: CalculateScoresBetweenAllIonmodes, nr_of_bins):
+    minimum_y_axis = 0
+    maximum_y_axis = 1
+    for predictions_and_tanimoto_score in pairs.list_of_predictions_and_tanimoto_scores():
+        average_pred_per_inchikey_pair = predictions_and_tanimoto_score.get_average_prediction_per_inchikey_pair()
+        minimum = average_pred_per_inchikey_pair.min().min()
+        maximum = average_pred_per_inchikey_pair.max().max()
+        if minimum < minimum_y_axis:
+            minimum_y_axis = minimum
+        if maximum > maximum_y_axis:
+            maximum_y_axis = maximum
+
+    x_bins = np.linspace(0, 1, nr_of_bins + 1)
+    y_bins = np.linspace(minimum_y_axis, maximum_y_axis + 0.00001, nr_of_bins + 1)
+
+    # Take the average per bin
+    pos_pos_normalized_heatmap = create_normalized_heatmap_data(pairs.pos_vs_pos_scores, x_bins, y_bins)
+    neg_neg_normalized_heatmap = create_normalized_heatmap_data(pairs.neg_vs_neg_scores, x_bins, y_bins)
+    pos_neg_normalized_heatmap = create_normalized_heatmap_data(pairs.pos_vs_neg_scores, x_bins, y_bins)
+
+    maximum_heatmap_intensity = max(pos_pos_normalized_heatmap.max(), neg_neg_normalized_heatmap.max(),
+                                    pos_neg_normalized_heatmap.max())
+
+    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
+    axes[0].imshow(neg_neg_normalized_heatmap.T, origin='lower', interpolation='nearest',
+                   cmap="inferno", vmax=maximum_heatmap_intensity, extent=[0, 1, minimum_y_axis, maximum_y_axis])
+    axes[0].set_title("Negative vs negative")
+    axes[1].imshow(pos_pos_normalized_heatmap.T, origin='lower', interpolation='nearest',
+                   cmap="inferno", vmax=maximum_heatmap_intensity, extent=[0, 1, minimum_y_axis, maximum_y_axis])
+    axes[1].set_title("Positive vs positive")
+    im2 = axes[2].imshow(pos_neg_normalized_heatmap.T, origin='lower', interpolation='nearest',
+                         cmap="inferno", vmax=maximum_heatmap_intensity, extent=[0, 1, minimum_y_axis, maximum_y_axis])
+    axes[2].set_title("Positive vs negative")
+    for ax in axes:
+        ax.set_xlabel("True chemical similarity")
+        ax.set_ylabel("Predicted chemical similarity")
+        ax.set_xlim(0, 1)
+        ax.set_ylim(minimum_y_axis, maximum_y_axis)
+
+    cbar = fig.colorbar(im2, ax=axes, orientation='vertical', fraction=0.02, pad=0.04)
+    cbar.set_label('Density')  # Label for the colorbar
+    return fig
+
+
+def create_normalized_heatmap_data(prediction_and_tanimoto_scores: PredictionsAndTanimotoScores,
+                                   x_bins, y_bins):
+    average_prediction = \
+        prediction_and_tanimoto_scores.get_average_prediction_per_inchikey_pair()
+    list_of_tanimoto_scores, list_of_average_predictions = convert_dataframes_to_lists_with_matching_pairs(
+        prediction_and_tanimoto_scores.tanimoto_df,
+        average_prediction)
+    heatmap = np.histogram2d(list_of_tanimoto_scores,
+                             list_of_average_predictions,
+                             bins=(x_bins, y_bins))[0]
+    normalized_heatmap = heatmap / heatmap.sum(axis=1, keepdims=True)
+    return normalized_heatmap
+
+
+def convert_dataframes_to_lists_with_matching_pairs(tanimoto_df: pd.DataFrame,
+                                                    average_predictions_per_inchikey_pair: pd.DataFrame
+                                                    ) -> Tuple[List[float], List[float]]:
+    """Takes in two dataframes with inchikeys as index and returns two lists with scores, which correspond to pairs"""
+    predictions = []
+    tanimoto_scores = []
+    for inchikey_1 in average_predictions_per_inchikey_pair.index:
+        for inchikey_2 in average_predictions_per_inchikey_pair.columns:
+            prediction = average_predictions_per_inchikey_pair[inchikey_2][inchikey_1]
+            # don't include pairs where the prediciton is Nan (this is the case when only a pair against itself is available)
+            if not np.isnan(prediction):
+                predictions.append(prediction)
+                tanimoto_scores.append(tanimoto_df[inchikey_2][inchikey_1])
+    return tanimoto_scores, predictions