From 62572cf9d735adad9645ef8cd73a5d2aaa6a6a30 Mon Sep 17 00:00:00 2001 From: musasizivictoria Date: Sun, 27 Oct 2024 23:57:57 +0300 Subject: [PATCH] added tests for is_smile, is_inchikey and guess_type --- ersilia/utils/identifiers/compound.py | 12 +-- test/test_compound_identifier.py | 110 ++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 8 deletions(-) create mode 100644 test/test_compound_identifier.py diff --git a/ersilia/utils/identifiers/compound.py b/ersilia/utils/identifiers/compound.py index 8834906dc..1217aea2e 100644 --- a/ersilia/utils/identifiers/compound.py +++ b/ersilia/utils/identifiers/compound.py @@ -39,17 +39,13 @@ def is_key_header(self, h): return False def _is_smiles(self, text): + if not isinstance(text, str) or not text.strip(): + return False if self.Chem is None: - if self._pubchem_smiles_to_inchikey(text) is not None: - return True - else: - return False + return self._pubchem_smiles_to_inchikey(text) is not None else: mol = self.Chem.MolFromSmiles(text) - if mol is None: - return False - else: - return True + return mol is not None @staticmethod def _is_inchikey(text): diff --git a/test/test_compound_identifier.py b/test/test_compound_identifier.py new file mode 100644 index 000000000..aa7edfea6 --- /dev/null +++ b/test/test_compound_identifier.py @@ -0,0 +1,110 @@ +from ersilia.default import UNPROCESSABLE_INPUT +import pytest +from ersilia.utils.identifiers.compound import CompoundIdentifier + +@pytest.fixture +def compound_identifier(): + return CompoundIdentifier() + +@pytest.mark.parametrize("header", ["smiles", "input", "SMILES", "INPUT"]) +def test_is_input_header_positive(compound_identifier, header): + """Test that valid input headers return True.""" + assert compound_identifier.is_input_header(header) is True +@pytest.mark.parametrize("header", ["output", "invalid", "InChI", "inchikey"]) +def test_is_input_header_negative(compound_identifier, header): + """Test that invalid input headers return False.""" + assert compound_identifier.is_input_header(header) is False + +@pytest.mark.parametrize("header", ["key", "inchiKey", "KEY", "INCHIKEY"]) +def test_is_key_header_positive(compound_identifier, header): + """Test that valid key headers return True.""" + assert compound_identifier.is_key_header(header) is True + +@pytest.mark.parametrize("header", ["id","smiles","inchi","input", "some_header", "random", "header", ""]) +def test_is_key_header_negative(compound_identifier, header): + assert not compound_identifier.is_key_header(header) + +@pytest.fixture(params=[True, False], ids=["Chem_None", "Chem_Not_None"]) +def compound_identifier(request): + """Fixture that initializes CompoundIdentifier with or without RDKit.""" + return CompoundIdentifier(local=request.param) + +@pytest.mark.parametrize("smiles, expected", [ + ("C", True), + ("CCO", True) +]) +def test_is_smiles_positive(compound_identifier, smiles, expected): + """Test _is_smiles returns True for valid SMILES strings.""" + if compound_identifier.Chem is None: + assert compound_identifier._is_smiles(smiles) == expected + +@pytest.mark.parametrize("smiles, expected", [ + ("invalid_smiles", False), + ("", False) +]) +def test_is_smiles_negative(compound_identifier, smiles, expected): + """Test _is_smiles returns False for invalid or empty SMILES strings.""" + assert compound_identifier._is_smiles(smiles) == expected + +@pytest.mark.parametrize("inchikey, expected", [ + ("BQJCRHHNABKAKU-KBQPJGBKSA-N", True), +]) +def test_is_inchikey_positive(inchikey, expected): + """Test _is_inchikey returns True for valid InChIKey.""" + assert CompoundIdentifier._is_inchikey(inchikey) == expected + +@pytest.mark.parametrize("inchikey, expected", [ + ("invalid_inchikey", False), + ("BQJCRHHNABKAKU-KBQPJGBKSA", False) +]) +def test_is_inchikey_negative(inchikey, expected): + """Test _is_inchikey returns False for invalid InChIKeys.""" + assert CompoundIdentifier._is_inchikey(inchikey) == expected + +@pytest.mark.parametrize("inchikey, expected", [ + ("BQJCRHHNABKAKU-KBQPJGBKSA-N", "inchikey"), + ("ABCDEFGHIJKLMN-OPQRSTUVWX-Y", "inchikey"), +]) +def test_guess_type_inchikey(compound_identifier, inchikey, expected): + """Ensure guess_type correctly identifies valid InChIKeys.""" + result = compound_identifier.guess_type(inchikey) + assert result == expected, f"Expected 'inchikey', but got '{result}' for input '{inchikey}'" + +@pytest.mark.parametrize("smiles, expected", [ + ("C", "smiles"), + ("CCO", "smiles"), +]) +def test_guess_type_smiles(compound_identifier, smiles, expected): + """Ensure guess_type correctly identifies valid SMILES strings.""" + result = compound_identifier.guess_type(smiles) + assert result == expected, f"Expected 'smiles', but got '{result}' for input '{smiles}'" + +@pytest.mark.parametrize("input_data, expected", [ + (None, UNPROCESSABLE_INPUT), + (UNPROCESSABLE_INPUT, UNPROCESSABLE_INPUT), +]) +def test_guess_type_unprocessable(compound_identifier, input_data, expected): + """Ensure guess_type returns UNPROCESSABLE_INPUT for None or unprocessable inputs.""" + result = compound_identifier.guess_type(input_data) + assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}', but got '{result}'" + +@pytest.mark.parametrize("whitespace_input, expected", [ + ("\n", UNPROCESSABLE_INPUT), + ("\t", UNPROCESSABLE_INPUT), + (" ", UNPROCESSABLE_INPUT), +]) +def test_guess_type_whitespace(compound_identifier, whitespace_input, expected): + """Ensure guess_type returns UNPROCESSABLE_INPUT for whitespace-only input.""" + result = compound_identifier.guess_type(whitespace_input) + assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}' for input '{whitespace_input}'" + +@pytest.mark.parametrize("non_char_input, expected", [ + (12345, UNPROCESSABLE_INPUT), + (3.14, UNPROCESSABLE_INPUT), + ("𠜎𠜱𡿺𠬠", UNPROCESSABLE_INPUT), +]) +def test_guess_type_non_character(compound_identifier, non_char_input, expected): + """Ensure guess_type returns UNPROCESSABLE_INPUT for non-character input.""" + result = compound_identifier.guess_type(non_char_input) + assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}' for input '{non_char_input}'" +