Remove get_tanimoto_score_between_spectra since not needed anymore

matchms · Oct 23, 2024 · ab1e5f9 · ab1e5f9
1 parent f041d38
commit ab1e5f9
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 81 deletions.
diff --git a/ms2deepscore/benchmarking/calculate_scores_for_validation.py b/ms2deepscore/benchmarking/calculate_scores_for_validation.py
@@ -1,22 +0,0 @@
-from typing import List
-from matchms.Spectrum import Spectrum
-from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import calculate_tanimoto_scores_unique_inchikey
-
-
-def get_tanimoto_score_between_spectra(spectra_1: List[Spectrum],
-                                       spectra_2: List[Spectrum],
-                                       fingerprint_type="daylight",
-                                       nbits=2048):
-    """Gets the tanimoto scores between two list of spectra
-
-    It is optimized by calculating the tanimoto scores only between unique fingerprints/smiles.
-    The tanimoto scores are derived after.
-
-    """
-    tanimoto_df = calculate_tanimoto_scores_unique_inchikey(spectra_1, spectra_2,
-                                                            fingerprint_type,
-                                                            nbits)
-    inchikeys_1 = [spectrum.get("inchikey")[:14] for spectrum in spectra_1]
-    inchikeys_2 = [spectrum.get("inchikey")[:14] for spectrum in spectra_2]
-    tanimoto_scores = tanimoto_df.loc[inchikeys_1, inchikeys_2].values
-    return tanimoto_scores

diff --git a/tests/test_calculate_tanimoto_scores_for_plotting.py b/tests/test_calculate_tanimoto_scores_for_plotting.py
@@ -2,8 +2,6 @@
 from pathlib import Path
 import numpy as np
 from matchms import Spectrum
-from ms2deepscore.benchmarking.calculate_scores_for_validation import (
-    get_tanimoto_score_between_spectra)
 from ms2deepscore.benchmarking.CalculateScoresBetweenAllIonmodes import calculate_tanimoto_scores_unique_inchikey
 
 TEST_RESOURCES_PATH = Path(__file__).parent / 'resources'
@@ -24,37 +22,8 @@ def create_dummy_data(nr_of_spectra):
     return spectrums
 
 
-def test_get_tanimoto_score_between_spectra_duplicated_inchikeys():
-    nr_of_test_spectra = 3
-    spectrums = create_dummy_data(nr_of_test_spectra)
-    # We duplicate the spectra, since we want to test if it works with duplicated inchikeys
-    tanimoto_scores = get_tanimoto_score_between_spectra(spectrums+spectrums,
-                                                         spectrums+spectrums)
-    assert tanimoto_scores.shape == (nr_of_test_spectra*2, nr_of_test_spectra*2)
-    expected_values = np.array([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
-                                [0.0, 1.0, 0.5, 0.0, 1.0, 0.5],
-                                [0.0, 0.5, 1.0, 0.0, 0.5, 1.0],
-                                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
-                                [0.0, 1.0, 0.5, 0.0, 1.0, 0.5],
-                                [0.0, 0.5, 1.0, 0.0, 0.5, 1.0],
-                                ])
-    assert np.array_equal(tanimoto_scores, expected_values)
-
-
-def test_get_tanimoto_score_between_spectra_not_symmetric():
-    dummy_spectra = create_dummy_data(5)
-    tanimoto_scores = get_tanimoto_score_between_spectra(dummy_spectra[:3] + dummy_spectra[2:3],
-                                                         dummy_spectra[2:])
-    assert tanimoto_scores.shape == (4, 3)
-    expected_values = np.array([[0.0, 0.0, 0.0],
-                                [0.5, 0.333333, 0.25],
-                                [1.0, 0.666667, 0.5],
-                                [1.0, 0.666667, 0.5],
-                                ])
-    assert np.allclose(tanimoto_scores, expected_values, atol=1e-04)
-
-
 def test_calculate_tanimoto_scores_unique_inchikey():
+    """Tests that only scores are calculated between unique inchikeys"""
     nr_of_test_spectra = 4
     spectrums = create_dummy_data(nr_of_test_spectra)
     tanimoto_scores = calculate_tanimoto_scores_unique_inchikey(

diff --git a/tests/test_validation_loss_calculator.py b/tests/test_validation_loss_calculator.py
@@ -6,9 +6,9 @@
 from ms2deepscore.models.loss_functions import LOSS_FUNCTIONS
 from ms2deepscore.SettingsMS2Deepscore import SettingsMS2Deepscore
 from ms2deepscore.train_new_model.ValidationLossCalculator import (
-    ValidationLossCalculator, select_spectra_per_inchikey)
+    ValidationLossCalculator)
 from tests.create_test_spectra import (pesticides_test_spectra,
-                                       siamese_spectral_model, create_test_spectra)
+                                       siamese_spectral_model)
 
 
 @pytest.fixture()
@@ -37,31 +37,6 @@ def simple_test_spectra():
     return spectra
 
 
-@pytest.mark.parametrize("nr_of_inchikeys,nr_of_spectra_per_inchikey,nr_of_sampled_spectra_per_inchikey",
-                         [[2, 2, 1],
-                          [2, 2, 5],
-                          [1, 2, 1],
-                          [2, 30, 100],])
-def test_select_one_spectrum_per_inchikey(nr_of_inchikeys, nr_of_spectra_per_inchikey,
-                                          nr_of_sampled_spectra_per_inchikey):
-    test_spectra = create_test_spectra(nr_of_inchikeys, nr_of_spectra_per_inchikey)
-    selected_spectra = select_spectra_per_inchikey(test_spectra, 42, nr_of_sampled_spectra_per_inchikey)
-    assert len(selected_spectra) == nr_of_inchikeys*nr_of_sampled_spectra_per_inchikey
-
-    # Check if the spectra only are unique inchikeys
-    inchikeys_list = [s.get("inchikey") for s in selected_spectra]
-    assert set(inchikeys_list) == set([s.get("inchikey") for s in test_spectra]), "not all inchikeys are selected"
-
-    for inchikey_count in Counter(inchikeys_list).values():
-        assert inchikey_count == nr_of_sampled_spectra_per_inchikey
-
-    hashed_spectra = [spectrum.set("fingerprint", None).__hash__() for spectrum in selected_spectra]
-    for spectrum_count in Counter(hashed_spectra).values():
-        minimum_spectrum_count = nr_of_sampled_spectra_per_inchikey // nr_of_spectra_per_inchikey
-        assert minimum_spectrum_count <= spectrum_count <= minimum_spectrum_count + 1, \
-            "The spectra are not sampled equally"
-
-
 def test_validation_loss_calculator():
     model = siamese_spectral_model()
     test_spectra = pesticides_test_spectra()