Skip to content

Commit

Permalink
Merge pull request #119 from cdanielmachado/development
Browse files Browse the repository at this point in the history
merge development branch
  • Loading branch information
cdanielmachado authored Apr 19, 2021
2 parents 1c5b972 + e83f187 commit 0f2f48d
Show file tree
Hide file tree
Showing 51 changed files with 769,646 additions and 614,193 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ Dockerfile
.vscode/*
carveme/universe/download bigg fasta.ipynb
carveme/data/generated/bigg_proteins.dmnd
venv/*
2 changes: 1 addition & 1 deletion carveme/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from reframed import set_default_solver
from reframed.solvers.solver import default_parameters, Parameter

__version__ = '1.4.1'
__version__ = '1.5.0'

project_dir = os.path.abspath(os.path.dirname(__file__)) + '/'

Expand Down
92 changes: 78 additions & 14 deletions carveme/cli/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@
'sone': 'Soneidensis_MR1.faa'
}

gram_status = {
'bsub': 'grampos',
'ecol': 'gramneg',
'mgen': 'gramneg',
'paer': 'gramneg',
'rsol': 'gramneg',
'sone': 'gramneg',
}


data_path = project_dir + '/data/benchmark'

biolog_media = {
Expand Down Expand Up @@ -67,11 +77,20 @@
}


def build_models():
for org_id, genome in genomes.items():
def build_models(extra_args=None, species=None):

if extra_args is None:
extra_args = ''

if species is None:
species = sorted(organisms.keys())
else:
species = [species]

for org_id in species:
print(f'Carving model for {organisms[org_id]}')

fasta_file = f"{data_path}/fasta/{genome}"
fasta_file = f"{data_path}/fasta/{genomes[org_id]}"
model_file = f"{data_path}/models/{org_id}.xml"
mediadb = f"{data_path}/media_db.tsv"

Expand All @@ -84,7 +103,7 @@ def build_models():

gapfill = f'-g "{media}" --mediadb {mediadb}' if media else ''

call(f'carve {fasta_file} -o {model_file} {gapfill} --fbc2', shell=True)
call(f'carve {fasta_file} -u {gram_status[org_id]} -o {model_file} {gapfill} --fbc2 {extra_args}', shell=True)


def load_models():
Expand Down Expand Up @@ -117,31 +136,60 @@ def load_essentiality_data():
return essential, non_essential


def run_biolog_benchmark(models, biolog_data, media_db):
def run_biolog_benchmark(models, biolog_data, media_db, species=None):

biolog_results = []

for org_id, medium in biolog_media.items():
if species is None:
species = sorted(organisms.keys())
else:
species = [species]

for org_id in species:
if org_id not in biolog_media:
continue

print(f'Running biolog benchmark for {organisms[org_id]}')
model = models[org_id]
medium = biolog_media[org_id]

tp, fp, fn, tn = 0, 0, 0, 0

for source in biolog_sources[org_id]:
compounds = set(media_db[medium]) - biolog_compounds[source]
data = biolog_data[org_id][source]
result = benchmark_biolog(model, compounds, data)
result = [(org_id, source, met, res) for met, res in result.items()]
biolog_results.extend(result)
tp += len([x for x in result if x[3] == 'TP'])
fp += len([x for x in result if x[3] == 'FP'])
fn += len([x for x in result if x[3] == 'FN'])
tn += len([x for x in result if x[3] == 'TN'])

print(f"TP: {tp} FP: {fp} FN: {fn} TN: {tn}")

return pd.DataFrame(biolog_results, columns=['org', 'source', 'met', 'value'])


def run_essentiality_benchmark(models, essential, non_essential, media_db):
def run_essentiality_benchmark(models, essential, non_essential, media_db, species=None):
essentiality_results = []

for org_id, medium in essentiality_media.items():
if species is None:
species = sorted(organisms.keys())
else:
species = [species]

for org_id in species:

if org_id not in essentiality_media:
continue

print(f'Running essentiality benchmark for {organisms[org_id]}')
model = models[org_id]
medium = essentiality_media[org_id]

in_vivo = {x: True for x in essential[org_id] & set(model.genes)}

if non_essential[org_id]:
in_vivo.update({x: False for x in non_essential[org_id] & set(model.genes)})
else:
Expand All @@ -152,27 +200,36 @@ def run_essentiality_benchmark(models, essential, non_essential, media_db):
result = [(org_id, gene, res) for gene, res in result.items()]
essentiality_results.extend(result)

tp = len([x for x in result if x[2] == 'TP'])
fp = len([x for x in result if x[2] == 'FP'])
fn = len([x for x in result if x[2] == 'FN'])
tn = len([x for x in result if x[2] == 'TN'])
print(f"TP: {tp} FP: {fp} FN: {fn} TN: {tn}")

return pd.DataFrame(essentiality_results, columns=['org', 'gene', 'value'])


def benchmark(rebuild=True, biolog=True, essentiality=True):
def benchmark(rebuild=True, biolog=True, essentiality=True, extra_args=None, species=None):

if species is not None:
if species not in organisms:
print(f"No such species available: {species}")
if rebuild:
build_models()
build_models(extra_args, species)

models = load_models()
media_db = load_media_db(f'{data_path}/media_db.tsv')

if biolog:
biolog_data = load_biolog_data()
df_biolog = run_biolog_benchmark(models, biolog_data, media_db)
df_biolog = run_biolog_benchmark(models, biolog_data, media_db, species)
df_biolog.to_csv(f'{data_path}/results/biolog.tsv', sep='\t', index=False)
value = mcc(df_biolog)
print(f'Biolog final MCC value: {value:.3f}')

if essentiality:
essential, non_essential = load_essentiality_data()
df_essentiality = run_essentiality_benchmark(models, essential, non_essential, media_db)
df_essentiality = run_essentiality_benchmark(models, essential, non_essential, media_db, species)
df_essentiality.to_csv(f'{data_path}/results/essentiality.tsv', sep='\t', index=False)
value = mcc(df_essentiality)
print(f'Essentiality final MCC value: {value:.3f}')
Expand All @@ -186,13 +243,20 @@ def main():
parser.add_argument('--skip-biolog', action='store_true', dest='no_biolog',
help="Skip biolog benchmark.")
parser.add_argument('--skip-essentiality', action='store_true', dest='no_essentiality',
help="Skip biolog benchmark.")
help="Skip essentiality benchmark.")

parser.add_argument('--carve-args', dest='carve_args', help="Additional arguments for carving.")

parser.add_argument('--species', dest='species', help="Benchmark only one species.")

args = parser.parse_args()

benchmark(rebuild=(not args.no_rebuild),
biolog=(not args.no_biolog),
essentiality=(not args.no_essentiality))
essentiality=(not args.no_essentiality),
extra_args=args.carve_args,
species=args.species
)


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 0f2f48d

Please sign in to comment.