diff --git a/ersilia/utils/identifiers/compound.py b/ersilia/utils/identifiers/compound.py index 18c98667b..bbe1c44fb 100644 --- a/ersilia/utils/identifiers/compound.py +++ b/ersilia/utils/identifiers/compound.py @@ -56,6 +56,8 @@ def is_key_header(self, h): return h.lower() in self.key_header_synonyms def _is_smiles(self, text): + if not isinstance(text, str) or not text.strip(): + return False if self.Chem is None: return asyncio.run(self._process_pubchem_inchikey(text)) is not None else: diff --git a/test/test_compound_identifier.py b/test/test_compound_identifier.py index 7247a4789..228fa35db 100644 --- a/test/test_compound_identifier.py +++ b/test/test_compound_identifier.py @@ -1,3 +1,5 @@ + +from ersilia.default import UNPROCESSABLE_INPUT import pytest from ersilia.utils.identifiers.compound import CompoundIdentifier from unittest.mock import patch @@ -38,29 +40,89 @@ def test_is_inchikey_positive(compound_identifier, inchikey): """Test that valid InChIKeys return True.""" assert compound_identifier._is_inchikey(inchikey) is True +@pytest.fixture(params=[True, False], ids=["Chem_None", "Chem_Not_None"]) +def compound_identifier(request): + """Fixture that initializes CompoundIdentifier with or without RDKit.""" + return CompoundIdentifier(local=request.param) -@pytest.mark.parametrize("inchikey", [ - "BSYNRYMUTXBXSQUHFFFAOYSA", - "BSYNRYMUTXBXSQ-UHFFFAOYSA-XY", - "12345678901234-1234567890-X", - "BSYNRYMUTXBXSQ_UHFFFAOYSA-N", - "BSYNRYMUTXBXSQ-UHFFFAOYSA" +@pytest.mark.parametrize("smiles, expected", [ + ("C", True), + ("CCO", True) ]) -def test_is_inchikey_negative(compound_identifier, inchikey): - """Test that invalid InChIKeys return False.""" - assert not compound_identifier._is_inchikey(inchikey) +def test_is_smiles_positive(compound_identifier, smiles, expected): + """Test _is_smiles returns True for valid SMILES strings.""" + if compound_identifier.Chem is None: + assert compound_identifier._is_smiles(smiles) == expected - -def test_guess_type_with_inchikey(compound_identifier): - inchikey = "LFQSCWFLJHTTHZ-UHFFFAOYSA-N" +@pytest.mark.parametrize("smiles, expected", [ + ("invalid_smiles", False), + ("", False) +]) +def test_is_smiles_negative(compound_identifier, smiles, expected): + """Test _is_smiles returns False for invalid or empty SMILES strings.""" + assert compound_identifier._is_smiles(smiles) == expected + +@pytest.mark.parametrize("inchikey, expected", [ + ("BQJCRHHNABKAKU-KBQPJGBKSA-N", True), +]) +def test_is_inchikey_positive(inchikey, expected): + """Test _is_inchikey returns True for valid InChIKey.""" + assert CompoundIdentifier._is_inchikey(inchikey) == expected + +@pytest.mark.parametrize("inchikey, expected", [ + ("invalid_inchikey", False), + ("BQJCRHHNABKAKU-KBQPJGBKSA", False) +]) +def test_is_inchikey_negative(inchikey, expected): + """Test _is_inchikey returns False for invalid InChIKeys.""" + assert CompoundIdentifier._is_inchikey(inchikey) == expected + +@pytest.mark.parametrize("inchikey, expected", [ + ("BQJCRHHNABKAKU-KBQPJGBKSA-N", "inchikey"), + ("ABCDEFGHIJKLMN-OPQRSTUVWX-Y", "inchikey"), +]) +def test_guess_type_inchikey(compound_identifier, inchikey, expected): + """Ensure guess_type correctly identifies valid InChIKeys.""" result = compound_identifier.guess_type(inchikey) - assert result == "inchikey" - - -@patch('ersilia.utils.identifiers.compound.CompoundIdentifier._pubchem_smiles_to_inchikey') -def test_is_smiles_positive_chem_none(mock_pubchem, compound_identifier): - compound_identifier.Chem = None - mock_pubchem.return_value = "InChIKey" + assert result == expected, f"Expected 'inchikey', but got '{result}' for input '{inchikey}'" + +@pytest.mark.parametrize("smiles, expected", [ + ("C", "smiles"), + ("CCO", "smiles"), +]) +def test_guess_type_smiles(compound_identifier, smiles, expected): + """Ensure guess_type correctly identifies valid SMILES strings.""" + result = compound_identifier.guess_type(smiles) + assert result == expected, f"Expected 'smiles', but got '{result}' for input '{smiles}'" + +@pytest.mark.parametrize("input_data, expected", [ + (None, UNPROCESSABLE_INPUT), + (UNPROCESSABLE_INPUT, UNPROCESSABLE_INPUT), +]) +def test_guess_type_unprocessable(compound_identifier, input_data, expected): + """Ensure guess_type returns UNPROCESSABLE_INPUT for None or unprocessable inputs.""" + result = compound_identifier.guess_type(input_data) + assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}', but got '{result}'" + +@pytest.mark.parametrize("whitespace_input, expected", [ + ("\n", UNPROCESSABLE_INPUT), + ("\t", UNPROCESSABLE_INPUT), + (" ", UNPROCESSABLE_INPUT), +]) +def test_guess_type_whitespace(compound_identifier, whitespace_input, expected): + """Ensure guess_type returns UNPROCESSABLE_INPUT for whitespace-only input.""" + result = compound_identifier.guess_type(whitespace_input) + assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}' for input '{whitespace_input}'" + +@pytest.mark.parametrize("non_char_input, expected", [ + (12345, UNPROCESSABLE_INPUT), + (3.14, UNPROCESSABLE_INPUT), + ("𠜎𠜱𡿺𠬠", UNPROCESSABLE_INPUT), +]) +def test_guess_type_non_character(compound_identifier, non_char_input, expected): + """Ensure guess_type returns UNPROCESSABLE_INPUT for non-character input.""" + result = compound_identifier.guess_type(non_char_input) + assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}' for input '{non_char_input}'" # Test with a valid SMILES input smiles_string = 'CCO' #Ethanol SMILES