Skip to content

Commit

Permalink
added tests for is_smile, is_inchikey and guess_type (#1358)
Browse files Browse the repository at this point in the history
Co-authored-by: Dhanshree Arora <[email protected]>
  • Loading branch information
musasizivictoria and DhanshreeA authored Nov 21, 2024
1 parent ebc1066 commit 0d8c248
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 19 deletions.
2 changes: 2 additions & 0 deletions ersilia/utils/identifiers/compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def is_key_header(self, h):
return h.lower() in self.key_header_synonyms

def _is_smiles(self, text):
if not isinstance(text, str) or not text.strip():
return False
if self.Chem is None:
return asyncio.run(self._process_pubchem_inchikey(text)) is not None
else:
Expand Down
100 changes: 81 additions & 19 deletions test/test_compound_identifier.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@

from ersilia.default import UNPROCESSABLE_INPUT
import pytest
from ersilia.utils.identifiers.compound import CompoundIdentifier
from unittest.mock import patch
Expand Down Expand Up @@ -38,29 +40,89 @@ def test_is_inchikey_positive(compound_identifier, inchikey):
"""Test that valid InChIKeys return True."""
assert compound_identifier._is_inchikey(inchikey) is True

@pytest.fixture(params=[True, False], ids=["Chem_None", "Chem_Not_None"])
def compound_identifier(request):
"""Fixture that initializes CompoundIdentifier with or without RDKit."""
return CompoundIdentifier(local=request.param)

@pytest.mark.parametrize("inchikey", [
"BSYNRYMUTXBXSQUHFFFAOYSA",
"BSYNRYMUTXBXSQ-UHFFFAOYSA-XY",
"12345678901234-1234567890-X",
"BSYNRYMUTXBXSQ_UHFFFAOYSA-N",
"BSYNRYMUTXBXSQ-UHFFFAOYSA"
@pytest.mark.parametrize("smiles, expected", [
("C", True),
("CCO", True)
])
def test_is_inchikey_negative(compound_identifier, inchikey):
"""Test that invalid InChIKeys return False."""
assert not compound_identifier._is_inchikey(inchikey)
def test_is_smiles_positive(compound_identifier, smiles, expected):
"""Test _is_smiles returns True for valid SMILES strings."""
if compound_identifier.Chem is None:
assert compound_identifier._is_smiles(smiles) == expected


def test_guess_type_with_inchikey(compound_identifier):
inchikey = "LFQSCWFLJHTTHZ-UHFFFAOYSA-N"
@pytest.mark.parametrize("smiles, expected", [
("invalid_smiles", False),
("", False)
])
def test_is_smiles_negative(compound_identifier, smiles, expected):
"""Test _is_smiles returns False for invalid or empty SMILES strings."""
assert compound_identifier._is_smiles(smiles) == expected

@pytest.mark.parametrize("inchikey, expected", [
("BQJCRHHNABKAKU-KBQPJGBKSA-N", True),
])
def test_is_inchikey_positive(inchikey, expected):
"""Test _is_inchikey returns True for valid InChIKey."""
assert CompoundIdentifier._is_inchikey(inchikey) == expected

@pytest.mark.parametrize("inchikey, expected", [
("invalid_inchikey", False),
("BQJCRHHNABKAKU-KBQPJGBKSA", False)
])
def test_is_inchikey_negative(inchikey, expected):
"""Test _is_inchikey returns False for invalid InChIKeys."""
assert CompoundIdentifier._is_inchikey(inchikey) == expected

@pytest.mark.parametrize("inchikey, expected", [
("BQJCRHHNABKAKU-KBQPJGBKSA-N", "inchikey"),
("ABCDEFGHIJKLMN-OPQRSTUVWX-Y", "inchikey"),
])
def test_guess_type_inchikey(compound_identifier, inchikey, expected):
"""Ensure guess_type correctly identifies valid InChIKeys."""
result = compound_identifier.guess_type(inchikey)
assert result == "inchikey"


@patch('ersilia.utils.identifiers.compound.CompoundIdentifier._pubchem_smiles_to_inchikey')
def test_is_smiles_positive_chem_none(mock_pubchem, compound_identifier):
compound_identifier.Chem = None
mock_pubchem.return_value = "InChIKey"
assert result == expected, f"Expected 'inchikey', but got '{result}' for input '{inchikey}'"

@pytest.mark.parametrize("smiles, expected", [
("C", "smiles"),
("CCO", "smiles"),
])
def test_guess_type_smiles(compound_identifier, smiles, expected):
"""Ensure guess_type correctly identifies valid SMILES strings."""
result = compound_identifier.guess_type(smiles)
assert result == expected, f"Expected 'smiles', but got '{result}' for input '{smiles}'"

@pytest.mark.parametrize("input_data, expected", [
(None, UNPROCESSABLE_INPUT),
(UNPROCESSABLE_INPUT, UNPROCESSABLE_INPUT),
])
def test_guess_type_unprocessable(compound_identifier, input_data, expected):
"""Ensure guess_type returns UNPROCESSABLE_INPUT for None or unprocessable inputs."""
result = compound_identifier.guess_type(input_data)
assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}', but got '{result}'"

@pytest.mark.parametrize("whitespace_input, expected", [
("\n", UNPROCESSABLE_INPUT),
("\t", UNPROCESSABLE_INPUT),
(" ", UNPROCESSABLE_INPUT),
])
def test_guess_type_whitespace(compound_identifier, whitespace_input, expected):
"""Ensure guess_type returns UNPROCESSABLE_INPUT for whitespace-only input."""
result = compound_identifier.guess_type(whitespace_input)
assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}' for input '{whitespace_input}'"

@pytest.mark.parametrize("non_char_input, expected", [
(12345, UNPROCESSABLE_INPUT),
(3.14, UNPROCESSABLE_INPUT),
("𠜎𠜱𡿺𠬠", UNPROCESSABLE_INPUT),
])
def test_guess_type_non_character(compound_identifier, non_char_input, expected):
"""Ensure guess_type returns UNPROCESSABLE_INPUT for non-character input."""
result = compound_identifier.guess_type(non_char_input)
assert result == expected, f"Expected '{UNPROCESSABLE_INPUT}' for input '{non_char_input}'"

# Test with a valid SMILES input
smiles_string = 'CCO' #Ethanol SMILES
Expand Down

0 comments on commit 0d8c248

Please sign in to comment.