diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 000000000..8dc493be3 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,6 @@ +{ + "name": "ersilia", + "lockfileVersion": 3, + "requires": true, + "packages": {} +} diff --git a/test/inputs/README.md b/test/inputs/README.md index 55cd1c1ba..3ee65be0c 100644 --- a/test/inputs/README.md +++ b/test/inputs/README.md @@ -1,8 +1,19 @@ -# Inputs for testing +# Inputs for Testing -In this folder we outline different kinds of inputs for testing. +This folder contains various input files used for testing different components of the Ersilia project. Each test may require specific inputs in different formats, and these are organized to maintain clarity and separation. -## Input formats +## Folder Structure + +- **Test-Specific Folders**: Each test has its own dedicated folder, named after the test. Inside these folders, you'll find input files specific to that test. + - For example, the folder `test_inputs/` contains inputs for the `test_inputs.py` test. + +## Adding New Test Inputs + +When adding new tests, create a folder named after the test and place all relevant input files inside. Make sure to follow the format conventions mentioned above. + +## Input Formats + +The input files are available in different formats to accommodate various needs during testing: * `.csv`: Input in tabular format. * `.json`: Input in JSON format. @@ -10,7 +21,7 @@ In this folder we outline different kinds of inputs for testing. ## Chemistry -The molecules (drugs) considered in this files are the following (in SMILES format): +Some tests such as test_inputs require chemical structures of molecules (drugs), which are following (in SMILES format): ``` CC1C2C(CC3(C=CC(=O)C(=C3C2OC1=O)C)C)O # artemisin diff --git a/test/inputs/catalog_samples.json b/test/inputs/catalog_samples.json new file mode 100644 index 000000000..b18257db5 --- /dev/null +++ b/test/inputs/catalog_samples.json @@ -0,0 +1,102 @@ +[ + { + "Identifier": "eos1579", + "Slug": "metabokiller", + "Title": "Carcinogenic potential of metabolites and small molecules" + }, + { + "Identifier": "eos157v", + "Slug": "grover-freesolv", + "Title": "Hydration free energy of small molecules in water" + }, + { + "Identifier": "eos18ie", + "Slug": "antibiotics-ai", + "Title": "Substructure-based search of novel antibiotics" + }, + { + "Identifier": "eos1af5", + "Slug": "molgrad-caco2", + "Title": "Coloring molecules for Caco-2 cell permeability" + }, + { + "Identifier": "eos1amn", + "Slug": null, + "Title": "3D pharmacophore descriptor" + }, + { + "Identifier": "eos1amr", + "Slug": "grover-bbbp", + "Title": "Blood-brain barrier penetration" + }, + { + "Identifier": "eos1bba", + "Slug": null, + "Title": "GeoGNN Molecular Representation Prediction" + }, + { + "Identifier": "eos1d7r", + "Slug": "small-world-zinc", + "Title": "Small World Zinc search" + }, + { + "Identifier": "eos1mxi", + "Slug": "smiles-pe", + "Title": "SmilesPE: tokenizer algorithm for SMILES, DeepSMILES, and SELFIES" + }, + { + "Identifier": "eos1n4b", + "Slug": "hdac3-inh", + "Title": "Identifying HDAC3 inhibitors" + }, + { + "Identifier": "eos1noy", + "Slug": "chembl-sampler", + "Title": "ChEMBL Molecular Sampler" + }, + { + "Identifier": "eos1pu1", + "Slug": "cardiotox-dictrank", + "Title": "Cardiotoxicity Classifier" + }, + { + "Identifier": "eos1ut3", + "Slug": "molfeat-usrcat", + "Title": "USR descriptors with pharmacophoric constraints" + }, + { + "Identifier": "eos1vms", + "Slug": "chembl-multitask-descriptor", + "Title": "Multi-target prediction based on ChEMBL data" + }, + { + "Identifier": "eos1xje", + "Slug": "biogpt-embeddings", + "Title": "BioGPT embeddings" + }, + { + "Identifier": "eos21q7", + "Slug": "inter_dili", + "Title": "InterDILI: drug-induced injury prediction" + }, + { + "Identifier": "eos22io", + "Slug": "idl-ppbopt", + "Title": "Human Plasma Protein Binding (PPB) of Compounds" + }, + { + "Identifier": "eos238c", + "Slug": "mesh-therapeutic-use", + "Title": "MeSH therapeutic use based on chemical structure" + }, + { + "Identifier": "eos2401", + "Slug": "scaffold-decoration", + "Title": "Scaffold decoration" + }, + { + "Identifier": "eos24ci", + "Slug": "drugtax", + "Title": "DrugTax: Drug taxonomy" + } +] \ No newline at end of file diff --git a/test/inputs/compound_list.csv b/test/inputs/compound_list.csv deleted file mode 100644 index e95c0ad9e..000000000 --- a/test/inputs/compound_list.csv +++ /dev/null @@ -1,8 +0,0 @@ -smiles -CC1C2C(CC3(C=CC(=O)C(=C3C2OC1=O)C)C)O -C1=CN=CC=C1C(=O)NN -CC(CN1C=NC2=C(N=CN=C21)N)OCP(=O)(O)O -CC(=O)OC1=CC=CC=C1C(=O)O -CC(C)CC1=CC=C(C=C1)C(C)C(=O)O -CC1(OC2C(OC(C2O1)(C#N)C3=CC=C4N3N=CN=C4N)CO)C -COC1=CC23CCCN2CCC4=CC5=C(C=C4C3C1O)OCO5 diff --git a/test/inputs/compound_lists.csv b/test/inputs/compound_lists.csv deleted file mode 100644 index 492e958e6..000000000 --- a/test/inputs/compound_lists.csv +++ /dev/null @@ -1,3 +0,0 @@ -smiles -CC1C2C(CC3(C=CC(=O)C(=C3C2OC1=O)C)C)O.C1=CN=CC=C1C(=O)NN.CC(CN1C=NC2=C(N=CN=C21)N)OCP(=O)(O)O -CC(=O)OC1=CC=CC=C1C(=O)O.CC(C)CC1=CC=C(C=C1)C(C)C(=O)O.CC1(OC2C(OC(C2O1)(C#N)C3=CC=C4N3N=CN=C4N)CO)C.COC1=CC23CCCN2CCC4=CC5=C(C=C4C3C1O)OCO5 diff --git a/test/inputs/compound_pair_of_lists.csv b/test/inputs/compound_pair_of_lists.csv deleted file mode 100644 index 436299e6c..000000000 --- a/test/inputs/compound_pair_of_lists.csv +++ /dev/null @@ -1,5 +0,0 @@ -smiles_1,smiles_2 -CC1C2C(CC3(C=CC(=O)C(=C3C2OC1=O)C)C)O,CC(=O)OC1=CC=CC=C1C(=O)O -C1=CN=CC=C1C(=O)NN,CC(C)CC1=CC=C(C=C1)C(C)C(=O)O -CC(CN1C=NC2=C(N=CN=C21)N)OCP(=O)(O)O,CC1(OC2C(OC(C2O1)(C#N)C3=CC=C4N3N=CN=C4N)CO)C -,COC1=CC23CCCN2CCC4=CC5=C(C=C4C3C1O)OCO5 diff --git a/test/inputs/compound_pairs_of_lists.csv b/test/inputs/compound_pairs_of_lists.csv deleted file mode 100644 index 11f906f78..000000000 --- a/test/inputs/compound_pairs_of_lists.csv +++ /dev/null @@ -1,3 +0,0 @@ -smiles_1,smiles_2 -CC1C2C(CC3(C=CC(=O)C(=C3C2OC1=O)C)C)O.C1=CN=CC=C1C(=O)NN,CC(CN1C=NC2=C(N=CN=C21)N)OCP(=O)(O)O -CC(=O)OC1=CC=CC=C1C(=O)O.CC(C)CC1=CC=C(C=C1)C(C)C(=O)O,CC1(OC2C(OC(C2O1)(C#N)C3=CC=C4N3N=CN=C4N)CO)C.COC1=CC23CCCN2CCC4=CC5=C(C=C4C3C1O)OCO5 diff --git a/test/inputs/compound_single.csv b/test/inputs/compound_single.csv deleted file mode 100644 index a773e9603..000000000 --- a/test/inputs/compound_single.csv +++ /dev/null @@ -1,2 +0,0 @@ -smiles -CC1C2C(CC3(C=CC(=O)C(=C3C2OC1=O)C)C)O diff --git a/test/inputs/compound_singles.csv b/test/inputs/compound_singles.csv deleted file mode 100644 index e95c0ad9e..000000000 --- a/test/inputs/compound_singles.csv +++ /dev/null @@ -1,8 +0,0 @@ -smiles -CC1C2C(CC3(C=CC(=O)C(=C3C2OC1=O)C)C)O -C1=CN=CC=C1C(=O)NN -CC(CN1C=NC2=C(N=CN=C21)N)OCP(=O)(O)O -CC(=O)OC1=CC=CC=C1C(=O)O -CC(C)CC1=CC=C(C=C1)C(C)C(=O)O -CC1(OC2C(OC(C2O1)(C#N)C3=CC=C4N3N=CN=C4N)CO)C -COC1=CC23CCCN2CCC4=CC5=C(C=C4C3C1O)OCO5 diff --git a/test/test_catalog.py b/test/test_catalog.py new file mode 100644 index 000000000..2e0abd1fe --- /dev/null +++ b/test/test_catalog.py @@ -0,0 +1,24 @@ +import json +import os +import pytest +from ersilia.hub.content.catalog import CatalogTable + +@pytest.fixture +def catalog_samples(): + file_path = os.path.join(os.path.dirname(__file__), 'inputs', 'catalog_samples.json') + with open(file_path, 'r') as f: + samples = json.load(f) + return samples + +def test_as_list_of_dicts(catalog_samples): + columns = ['Identifier', 'Slug', 'Title'] + + # Test with standard catalog samples + catalog_table = CatalogTable(data=[list(item.values()) for item in catalog_samples], columns=columns) + result = catalog_table.as_list_of_dicts() + assert result == catalog_samples, "The result does not match the expected catalog samples" + + # Test with empty catalog data + catalog_table_empty = CatalogTable(data=[], columns=columns) + result_empty = catalog_table_empty.as_list_of_dicts() + assert result_empty == [], "The result should be an empty list for empty input data"