From edc05baa7f970651153170980a3f068c6bfcdf4c Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 17 Jan 2023 00:45:22 -0600 Subject: [PATCH 01/57] Minor fixes and adding some utility functionality --- modelseedpy/core/msmodelutl.py | 26 ++++++++++++++++++++++++ modelseedpy/core/mstemplate.py | 37 +++++++++++++++------------------- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index d4494938..af499773 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -4,6 +4,7 @@ import time import json import sys +import pandas as pd from cobra import Model, Reaction, Metabolite from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.biochem.modelseed_biochem import ModelSEEDBiochem @@ -306,6 +307,31 @@ def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): print(len(output)) self.model.add_reactions(output) return output + + ################################################################################# + # Functions related to utility functions + ################################################################################# + def build_model_data_hash(self): + data = { + "Model":self.id, + "Genome":self.genome.info.metadata["Name"], + "Genes":self.genome.info.metadata["Number of Protein Encoding Genes"], + + } + return data + + def compare_reactions(self, reaction_list,filename): + data = {} + for rxn in reaction_list: + for met in rxn.metabolites: + if met.id not in data: + data[met.id] = {} + for other_rxn in reaction_list: + data[met.id][other_rxn.id] = 0 + data[met.id][rxn.id] = rxn.metabolites[met] + df = pd.DataFrame(data) + df = df.transpose() + df.to_csv(filename) ################################################################################# # Functions related to managing biomass reactions diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 51dc2e38..1814e774 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -640,7 +640,7 @@ def get_or_create_metabolite(self, model, baseid, compartment=None, index=None): return model.metabolites.get_by_id(fullid) if tempid in self._template.compcompounds: met = self._template.compcompounds.get_by_id(tempid).to_metabolite(index) - model.metabolites.add(met) + model.add_metabolites([met]) return met logger.error( "Could not find biomass metabolite [%s] in model or template!", @@ -658,13 +658,13 @@ def get_or_create_reaction(self, model, baseid, compartment=None, index=None): return model.reactions.get_by_id(fullid) if tempid in self._template.reactions: rxn = self._template.reactions.get_by_id(tempid).to_reaction(model, index) - model.reactions.add(rxn) + model.add_reactions([rxn]) return rxn newrxn = Reaction(fullid, fullid, "biomasses", 0, 1000) - model.reactions.add(newrxn) + model.add_reactions(newrxn) return newrxn - def build_biomass(self, model, index="0", classic=False, GC=0.5): + def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=True): types = [ "cofactor", "lipid", @@ -700,7 +700,8 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): specific_reactions["dna"].subtract_metabolites( specific_reactions["dna"].metabolites ) - specific_reactions["dna"].metabolites[met] = 1 + specific_reactions["dna"].add_metabolites({met:1}) + metabolites[met] = 1 metabolites[met] = -1 * self.dna if not classic and self.protein > 0: met = self.get_or_create_metabolite(model, "cpd11463", "c", index) @@ -710,7 +711,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): specific_reactions["protein"].subtract_metabolites( specific_reactions["protein"].metabolites ) - specific_reactions["protein"].metabolites[met] = 1 + specific_reactions["protein"].add_metabolites({met:1}) metabolites[met] = -1 * self.protein if not classic and self.rna > 0: met = self.get_or_create_metabolite(model, "cpd11462", "c", index) @@ -720,7 +721,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): specific_reactions["rna"].subtract_metabolites( specific_reactions["rna"].metabolites ) - specific_reactions["rna"].metabolites[met] = 1 + specific_reactions["rna"].add_metabolites({met:1}) metabolites[met] = -1 * self.rna bio_type_hash = {} for type in types: @@ -752,13 +753,13 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): coef = comp.coefficient elif comp.coefficient_type == "AT": coef = ( - comp.coefficient + 2 * comp.coefficient * (1 - GC) * (type_abundances[type] / bio_type_hash[type]["total_mw"]) ) elif comp.coefficient_type == "GC": coef = ( - comp.coefficient + 2 * comp.coefficient * GC * (type_abundances[type] / bio_type_hash[type]["total_mw"]) ) @@ -771,10 +772,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): metabolites[met] = coef elif not classic: coef = coef / type_abundances[type] - if met in metabolites: - specific_reactions[type].metabolites[met] += coef - else: - specific_reactions[type].metabolites[met] = coef + specific_reactions[type].add_metabolites({met:coef}) for l_met in comp.linked_metabolites: met = self.get_or_create_metabolite( model, l_met.id, None, index @@ -787,16 +785,13 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5): else: metabolites[met] = coef * comp.linked_metabolites[l_met] elif not classic: - if met in metabolites: - specific_reactions[type].metabolites[met] += ( - coef * comp.linked_metabolites[l_met] - ) - else: - specific_reactions[type].metabolites[met] = ( - coef * comp.linked_metabolites[l_met] - ) + specific_reactions[type].add_metabolites({met:coef * comp.linked_metabolites[l_met]}) biorxn.annotation[SBO_ANNOTATION] = "SBO:0000629" biorxn.add_metabolites(metabolites) + if add_to_model: + if biorxn.id in model.reactions: + model.remove_reactions([biorxn.id]) + model.add_reactions([biorxn]) return biorxn def get_data(self): From e0a7f4fb882fe321901e13a62c3ce529edf1abb2 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Wed, 18 Jan 2023 04:06:01 -0600 Subject: [PATCH 02/57] index fix --- modelseedpy/core/msbuilder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index c8763c2d..e8b21f0b 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -896,6 +896,7 @@ def build( @param annotate_with_rast: @return: """ + self.index = index if annotate_with_rast: rast = RastClient() From a27257ca9321b8a8e5cefc576456dfcd206940e0 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 31 Jan 2023 08:57:33 -0600 Subject: [PATCH 03/57] missing import --- modelseedpy/biochem/modelseed_biochem.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelseedpy/biochem/modelseed_biochem.py b/modelseedpy/biochem/modelseed_biochem.py index ccdd8d76..80594e0e 100644 --- a/modelseedpy/biochem/modelseed_biochem.py +++ b/modelseedpy/biochem/modelseed_biochem.py @@ -7,6 +7,7 @@ from modelseedpy.biochem.modelseed_compound import ModelSEEDCompound, ModelSEEDCompound2 from modelseedpy.biochem.modelseed_reaction import ModelSEEDReaction, ModelSEEDReaction2 from modelseedpy.helpers import config +from modelseedpy.core.msmodel import get_reaction_constraints_from_direction logger = logging.getLogger(__name__) From f2eb10e3fdeae8d0f2c3b62edf8e277a8031b59a Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Wed, 1 Feb 2023 10:25:06 -0600 Subject: [PATCH 04/57] fixed builder to detect biomass added compounds --- examples/Model Reconstruction/Biomass.ipynb | 162 +++++++++- .../build_metabolic_model.ipynb | 283 ++++++++++++++++++ modelseedpy/core/msbuilder.py | 29 +- modelseedpy/core/mstemplate.py | 1 + 4 files changed, 468 insertions(+), 7 deletions(-) diff --git a/examples/Model Reconstruction/Biomass.ipynb b/examples/Model Reconstruction/Biomass.ipynb index e4a2c901..3726f959 100644 --- a/examples/Model Reconstruction/Biomass.ipynb +++ b/examples/Model Reconstruction/Biomass.ipynb @@ -2,18 +2,17 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "5434992c-fc67-40f5-ae08-82f44790666c", "metadata": {}, "outputs": [], "source": [ - "from modelseedpy.helpers import get_template\n", - "from modelseedpy.core.mstemplate import MSTemplateBuilder" + "import modelseedpy" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 2, "id": "b243e00a-4a8b-489d-a778-61844a439e63", "metadata": {}, "outputs": [ @@ -21,7 +20,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "cobrakbase 0.2.8\n" + "cobrakbase 0.3.1\n" ] } ], @@ -30,6 +29,157 @@ "kbase = cobrakbase.KBaseAPI()" ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3a177c16-ecb0-4050-bbf5-47aad10f2af9", + "metadata": {}, + "outputs": [], + "source": [ + "template = kbase.get_from_ws('GramNegModelTemplateV3', 'NewKBaseModelTemplates')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4ce52552-dce2-4c44-9884-cf00d15e76ab", + "metadata": {}, + "outputs": [], + "source": [ + "from modelseedpy import MSBuilder" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6f216f6a-5e25-4697-bf6b-9ae63475b5c7", + "metadata": {}, + "outputs": [], + "source": [ + "from cobra.core import Model\n", + "model = Model('test')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d9763d58-daba-4751-811f-23581b390025", + "metadata": {}, + "outputs": [], + "source": [ + "biomass = template.biomasses[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d3e884ac-2568-445a-ac04-1508b536c88a", + "metadata": {}, + "outputs": [], + "source": [ + "reaction = biomass.build_biomass(model, '0', True)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f5140ac5-273f-4eb5-b806-ddd9178b252e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cpd00010_c0 {'modelseed_template_id': 'cpd00010_c'}\n", + "cpd11493_c0 {'modelseed_template_id': 'cpd11493_c'}\n", + "cpd12370_c0 {'modelseed_template_id': 'cpd12370_c'}\n", + "cpd00003_c0 {'modelseed_template_id': 'cpd00003_c'}\n", + "cpd00006_c0 {'modelseed_template_id': 'cpd00006_c'}\n", + "cpd00205_c0 {'modelseed_template_id': 'cpd00205_c'}\n", + "cpd00254_c0 {'modelseed_template_id': 'cpd00254_c'}\n", + "cpd10516_c0 {'modelseed_template_id': 'cpd10516_c'}\n", + "cpd00063_c0 {'modelseed_template_id': 'cpd00063_c'}\n", + "cpd00009_c0 {'modelseed_template_id': 'cpd00009_c'}\n", + "cpd00099_c0 {'modelseed_template_id': 'cpd00099_c'}\n", + "cpd00149_c0 {'modelseed_template_id': 'cpd00149_c'}\n", + "cpd00058_c0 {'modelseed_template_id': 'cpd00058_c'}\n", + "cpd00015_c0 {'modelseed_template_id': 'cpd00015_c'}\n", + "cpd10515_c0 {'modelseed_template_id': 'cpd10515_c'}\n", + "cpd00030_c0 {'modelseed_template_id': 'cpd00030_c'}\n", + "cpd00048_c0 {'modelseed_template_id': 'cpd00048_c'}\n", + "cpd00034_c0 {'modelseed_template_id': 'cpd00034_c'}\n", + "cpd00016_c0 {'modelseed_template_id': 'cpd00016_c'}\n", + "cpd00220_c0 {'modelseed_template_id': 'cpd00220_c'}\n", + "cpd00017_c0 {'modelseed_template_id': 'cpd00017_c'}\n", + "cpd00201_c0 {'modelseed_template_id': 'cpd00201_c'}\n", + "cpd00087_c0 {'modelseed_template_id': 'cpd00087_c'}\n", + "cpd00345_c0 {'modelseed_template_id': 'cpd00345_c'}\n", + "cpd00042_c0 {'modelseed_template_id': 'cpd00042_c'}\n", + "cpd00028_c0 {'modelseed_template_id': 'cpd00028_c'}\n", + "cpd00557_c0 {'modelseed_template_id': 'cpd00557_c'}\n", + "cpd00264_c0 {'modelseed_template_id': 'cpd00264_c'}\n", + "cpd00118_c0 {'modelseed_template_id': 'cpd00118_c'}\n", + "cpd00056_c0 {'modelseed_template_id': 'cpd00056_c'}\n", + "cpd15560_c0 {'modelseed_template_id': 'cpd15560_c'}\n", + "cpd15352_c0 {'modelseed_template_id': 'cpd15352_c'}\n", + "cpd15500_c0 {'modelseed_template_id': 'cpd15500_c'}\n", + "cpd00166_c0 {'modelseed_template_id': 'cpd00166_c'}\n", + "cpd01997_c0 {'modelseed_template_id': 'cpd01997_c'}\n", + "cpd03422_c0 {'modelseed_template_id': 'cpd03422_c'}\n", + "cpd00104_c0 {'modelseed_template_id': 'cpd00104_c'}\n", + "cpd00037_c0 {'modelseed_template_id': 'cpd00037_c'}\n", + "cpd00050_c0 {'modelseed_template_id': 'cpd00050_c'}\n", + "cpd15793_c0 {'modelseed_template_id': 'cpd15793_c'}\n", + "cpd15540_c0 {'modelseed_template_id': 'cpd15540_c'}\n", + "cpd15533_c0 {'modelseed_template_id': 'cpd15533_c'}\n", + "cpd15432_c0 {'modelseed_template_id': 'cpd15432_c'}\n", + "cpd02229_c0 {'modelseed_template_id': 'cpd02229_c'}\n", + "cpd15665_c0 {'modelseed_template_id': 'cpd15665_c'}\n", + "cpd15666_c0 {'modelseed_template_id': 'cpd15666_c'}\n", + "cpd00023_c0 {'modelseed_template_id': 'cpd00023_c'}\n", + "cpd00001_c0 {'modelseed_template_id': 'cpd00001_c'}\n", + "cpd00033_c0 {'modelseed_template_id': 'cpd00033_c'}\n", + "cpd00035_c0 {'modelseed_template_id': 'cpd00035_c'}\n", + "cpd00039_c0 {'modelseed_template_id': 'cpd00039_c'}\n", + "cpd00041_c0 {'modelseed_template_id': 'cpd00041_c'}\n", + "cpd00051_c0 {'modelseed_template_id': 'cpd00051_c'}\n", + "cpd00053_c0 {'modelseed_template_id': 'cpd00053_c'}\n", + "cpd00054_c0 {'modelseed_template_id': 'cpd00054_c'}\n", + "cpd00060_c0 {'modelseed_template_id': 'cpd00060_c'}\n", + "cpd00065_c0 {'modelseed_template_id': 'cpd00065_c'}\n", + "cpd00066_c0 {'modelseed_template_id': 'cpd00066_c'}\n", + "cpd00069_c0 {'modelseed_template_id': 'cpd00069_c'}\n", + "cpd00084_c0 {'modelseed_template_id': 'cpd00084_c'}\n", + "cpd00107_c0 {'modelseed_template_id': 'cpd00107_c'}\n", + "cpd00119_c0 {'modelseed_template_id': 'cpd00119_c'}\n", + "cpd00129_c0 {'modelseed_template_id': 'cpd00129_c'}\n", + "cpd00132_c0 {'modelseed_template_id': 'cpd00132_c'}\n", + "cpd00156_c0 {'modelseed_template_id': 'cpd00156_c'}\n", + "cpd00161_c0 {'modelseed_template_id': 'cpd00161_c'}\n", + "cpd00322_c0 {'modelseed_template_id': 'cpd00322_c'}\n", + "cpd00115_c0 {'modelseed_template_id': 'cpd00115_c'}\n", + "cpd00012_c0 {'modelseed_template_id': 'cpd00012_c'}\n", + "cpd00241_c0 {'modelseed_template_id': 'cpd00241_c'}\n", + "cpd00356_c0 {'modelseed_template_id': 'cpd00356_c'}\n", + "cpd00357_c0 {'modelseed_template_id': 'cpd00357_c'}\n", + "cpd00002_c0 {'modelseed_template_id': 'cpd00002_c'}\n", + "cpd00038_c0 {'modelseed_template_id': 'cpd00038_c'}\n", + "cpd00052_c0 {'modelseed_template_id': 'cpd00052_c'}\n", + "cpd00062_c0 {'modelseed_template_id': 'cpd00062_c'}\n", + "cpd00008_c0 {'modelseed_template_id': 'cpd00008_c'}\n", + "cpd00067_c0 {'modelseed_template_id': 'cpd00067_c'}\n", + "cpd11416_c0 {'modelseed_template_id': 'cpd11416_c'}\n", + "cpd17041_c0 {'modelseed_template_id': 'cpd17041_c'}\n", + "cpd17042_c0 {'modelseed_template_id': 'cpd17042_c'}\n", + "cpd17043_c0 {'modelseed_template_id': 'cpd17043_c'}\n" + ] + } + ], + "source": [ + "for m in reaction.metabolites:\n", + " print(m, m.notes)" + ] + }, { "cell_type": "code", "execution_count": 42, @@ -551,7 +701,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/examples/Model Reconstruction/build_metabolic_model.ipynb b/examples/Model Reconstruction/build_metabolic_model.ipynb index 2f1e8d3f..6a817c0f 100644 --- a/examples/Model Reconstruction/build_metabolic_model.ipynb +++ b/examples/Model Reconstruction/build_metabolic_model.ipynb @@ -19,6 +19,24 @@ "genome = MSGenome.from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = MSBuilder.build_metabolic_model('ecoli', genome, classic_biomass=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.summary()" + ] + }, { "cell_type": "code", "execution_count": 3, @@ -36,6 +54,271 @@ "print('Number of features:', len(genome.features))" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "builder = MSBuilder(genome)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "IndexError", + "evalue": "The genomes or genomeSet that you have submitted wasn’t annotated using the RAST annotation pipeline. Please annotate the genomes via ‘Annotate Microbial Genome’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genome/release)or genomeSets via Annotate Multiple Microbial Genomes’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genomes/release) and resubmit the RAST annotated genome/genomeSets into the Predict Phenotype app. (", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/ml/predict_phenotype.py\u001b[0m in \u001b[0;36mcreate_indicator_matrix\u001b[0;34m(ref_to_role, master_role_list)\u001b[0m\n\u001b[1;32m 93\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 94\u001b[0;31m \u001b[0mindicators\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmatching_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 95\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mIndexError\u001b[0m: arrays used as indices must be of integer (or boolean) type", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_3016957/3197840996.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbuilder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_select_template\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/msbuilder.py\u001b[0m in \u001b[0;36mauto_select_template\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 664\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 665\u001b[0m \u001b[0mgenome_classifier\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_classifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"knn_ACNP_RAST_filter_01_17_2023\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 666\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenome_class\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenome_classifier\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclassify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgenome\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 667\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 668\u001b[0m \u001b[0;31m# TODO: update with enum MSGenomeClass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/msgenomeclassifier.py\u001b[0m in \u001b[0;36mclassify\u001b[0;34m(self, genome_or_roles, ontology_term)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[0mgenome_or_roles\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0montology_term\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 32\u001b[0m )\n\u001b[0;32m---> 33\u001b[0;31m indicator_df, master_role_list = create_indicator_matrix(\n\u001b[0m\u001b[1;32m 34\u001b[0m \u001b[0mgenome_or_roles\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 35\u001b[0m )\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/ml/predict_phenotype.py\u001b[0m in \u001b[0;36mcreate_indicator_matrix\u001b[0;34m(ref_to_role, master_role_list)\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0mindicators\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmatching_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 96\u001b[0;31m raise IndexError(\n\u001b[0m\u001b[1;32m 97\u001b[0m \u001b[0;31m\"\u001b[0m\u001b[0mThe\u001b[0m \u001b[0mgenomes\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mgenomeSet\u001b[0m \u001b[0mthat\u001b[0m \u001b[0myou\u001b[0m \u001b[0mhave\u001b[0m \u001b[0msubmitted\u001b[0m \u001b[0mwasn\u001b[0m\u001b[0;31m’\u001b[0m\u001b[0mt\u001b[0m \u001b[0mannotated\u001b[0m \u001b[0musing\u001b[0m \u001b[0mthe\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0mRAST\u001b[0m \u001b[0mannotation\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mPlease\u001b[0m \u001b[0mannotate\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mgenomes\u001b[0m \u001b[0mvia\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m‘\u001b[0m\u001b[0mAnnotate\u001b[0m \u001b[0mMicrobial\u001b[0m \u001b[0mGenome\u001b[0m\u001b[0;31m’\u001b[0m \u001b[0mapp\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mIndexError\u001b[0m: The genomes or genomeSet that you have submitted wasn’t annotated using the RAST annotation pipeline. Please annotate the genomes via ‘Annotate Microbial Genome’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genome/release)or genomeSets via Annotate Multiple Microbial Genomes’ app (https://narrative.kbase.us/#appcatalog/app/RAST_SDK/reannotate_microbial_genomes/release) and resubmit the RAST annotated genome/genomeSets into the Predict Phenotype app. (" + ] + } + ], + "source": [ + "builder.auto_select_template()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from cobra.core import Reaction" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "rxn = Reaction('SK_cpd11416_c0', 'SK_cpd11416_c0', '', 0, 1000)\n", + "rxn.add_metabolites({model.metabolites.cpd11416_c0: -1})\n", + "model.add_reactions([rxn])" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:89: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.float):\n", + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:91: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.bool):\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Nameecoli
Memory address7f3dd51e8400
Number of metabolites1458
Number of reactions1772
Number of genes1295
Number of groups1323
Objective expression1.0*bio1 - 1.0*bio1_reverse_b18f7
CompartmentsCytosol, Extracellular
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MSBuilder.gapfill_model(model, \"bio1\", builder.template, None)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Objective

1.0 bio1 = 0.0

Uptake

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux

Secretion

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cpd00010_c0 CoA [c0] 80\n", + "cpd11493_c0 ACP [c0] 39\n", + "cpd12370_c0 apo-ACP [c0] 3\n", + "cpd00003_c0 NAD [c0] 127\n", + "cpd00006_c0 NADP [c0] 89\n", + "cpd00205_c0 K+ [c0] 5\n", + "cpd00254_c0 Mg [c0] 3\n", + "cpd10516_c0 fe3 [c0] 5\n", + "cpd00063_c0 Ca2+ [c0] 2\n", + "cpd00009_c0 Phosphate [c0] 210\n", + "cpd00099_c0 Cl- [c0] 3\n", + "cpd00149_c0 Co2+ [c0] 2\n", + "cpd00058_c0 Cu2+ [c0] 3\n", + "cpd00015_c0 FAD [c0] 13\n", + "cpd10515_c0 Fe2+ [c0] 5\n", + "cpd00030_c0 Mn2+ [c0] 2\n", + "cpd00048_c0 Sulfate [c0] 4\n", + "cpd00034_c0 Zn2+ [c0] 2\n", + "cpd00016_c0 Pyridoxal phosphate [c0] 5\n", + "cpd00220_c0 Riboflavin [c0] 5\n", + "cpd00017_c0 S-Adenosyl-L-methionine [c0] 21\n", + "cpd00201_c0 10-Formyltetrahydrofolate [c0] 7\n", + "cpd00087_c0 Tetrahydrofolate [c0] 12\n", + "cpd00345_c0 5-Methyltetrahydrofolate [c0] 3\n", + "cpd00042_c0 GSH [c0] 13\n", + "cpd00028_c0 Heme [c0] 4\n", + "cpd00557_c0 Siroheme [c0] 2\n", + "cpd00264_c0 Spermidine [c0] 8\n", + "cpd00118_c0 Putrescine [c0] 9\n", + "cpd00056_c0 TPP [c0] 7\n", + "cpd15560_c0 Ubiquinone-8 [c0] 18\n", + "cpd15352_c0 2-Demethylmenaquinone 8 [c0] 7\n", + "cpd15500_c0 Menaquinone 8 [c0] 12\n", + "cpd00166_c0 Calomide [c0] 4\n", + "cpd01997_c0 Dimethylbenzimidazole [c0] 2\n", + "cpd03422_c0 Cobinamide [c0] 2\n", + "cpd00104_c0 BIOT [c0] 5\n", + "cpd00037_c0 UDP-N-acetylglucosamine [c0] 16\n", + "cpd00050_c0 FMN [c0] 11\n", + "cpd15793_c0 Stearoylcardiolipin (B. subtilis) [c0] 1\n", + "cpd15540_c0 Phosphatidylglycerol dioctadecanoyl [c0] 3\n", + "cpd15533_c0 phosphatidylethanolamine dioctadecanoyl [c0] 3\n", + "cpd15432_c0 core oligosaccharide lipid A [c0] 2\n", + "cpd02229_c0 Bactoprenyl diphosphate [c0] 5\n", + "cpd15665_c0 Peptidoglycan polymer (n subunits) [c0] 2\n", + "cpd15666_c0 Peptidoglycan polymer (n-1 subunits) [c0] 2\n", + "cpd00023_c0 L-Glutamate [c0] 57\n", + "cpd00001_c0 H2O [c0] 556\n", + "cpd00033_c0 Glycine [c0] 21\n", + "cpd00035_c0 L-Alanine [c0] 17\n", + "cpd00039_c0 L-Lysine [c0] 8\n", + "cpd00041_c0 L-Aspartate [c0] 19\n", + "cpd00051_c0 L-Arginine [c0] 6\n", + "cpd00053_c0 L-Glutamine [c0] 17\n", + "cpd00054_c0 L-Serine [c0] 23\n", + "cpd00060_c0 L-Methionine [c0] 19\n", + "cpd00065_c0 L-Tryptophan [c0] 5\n", + "cpd00066_c0 L-Phenylalanine [c0] 4\n", + "cpd00069_c0 L-Tyrosine [c0] 6\n", + "cpd00084_c0 L-Cysteine [c0] 14\n", + "cpd00107_c0 L-Leucine [c0] 6\n", + "cpd00119_c0 L-Histidine [c0] 4\n", + "cpd00129_c0 L-Proline [c0] 11\n", + "cpd00132_c0 L-Asparagine [c0] 6\n", + "cpd00156_c0 L-Valine [c0] 5\n", + "cpd00161_c0 L-Threonine [c0] 7\n", + "cpd00322_c0 L-Isoleucine [c0] 4\n", + "cpd00115_c0 dATP [c0] 7\n", + "cpd00012_c0 PPi [c0] 134\n", + "cpd00241_c0 dGTP [c0] 8\n", + "cpd00356_c0 dCTP [c0] 6\n", + "cpd00357_c0 TTP [c0] 7\n", + "cpd00002_c0 ATP [c0] 276\n", + "cpd00038_c0 GTP [c0] 20\n", + "cpd00052_c0 CTP [c0] 25\n", + "cpd00062_c0 UTP [c0] 13\n", + "cpd00008_c0 ADP [c0] 214\n", + "cpd00067_c0 H+ [c0] 896\n", + "cpd11416_c0 Biomass [c0] 2\n", + "cpd17041_c0 Protein biosynthesis [c0] 2\n", + "cpd17042_c0 DNA replication [c0] 2\n", + "cpd17043_c0 RNA transcription [c0] 2\n" + ] + } + ], + "source": [ + "for m in model.reactions.bio1.metabolites:\n", + " print(m, m.name, len(m.reactions))" + ] + }, { "cell_type": "code", "execution_count": 4, diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index e8b21f0b..1c456d19 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -886,6 +886,8 @@ def build( index="0", allow_all_non_grp_reactions=False, annotate_with_rast=True, + biomass_classic=False, + biomass_gc=0.5, ): """ @@ -894,6 +896,8 @@ def build( @param index: @param allow_all_non_grp_reactions: @param annotate_with_rast: + @param biomass_classic: + @param biomass_gc: @return: """ self.index = index @@ -931,6 +935,23 @@ def build( cobra_model.add_groups(list(complex_groups.values())) self.add_exchanges_to_model(cobra_model) + biomass_reactions = [] + for rxn_biomass in self.template.biomasses: + reaction = rxn_biomass.build_biomass( + cobra_model, "0", biomass_classic, biomass_gc + ) + for m in reaction.metabolites: + if "modelseed_template_id" in m.notes: + self.template_species_to_model_species[ + m.notes["modelseed_template_id"] + ] = m + biomass_reactions.append(reaction) + + if len(biomass_reactions) > 0: + cobra_model.add_reactions(biomass_reactions) + cobra_model.objective = biomass_reactions[0].id + + """ if ( self.template.name.startswith("CoreModel") or self.template.name.startswith("GramNeg") @@ -940,6 +961,7 @@ def build( self.build_static_biomasses(cobra_model, self.template) ) cobra_model.objective = "bio1" + """ reactions_sinks = self.build_drains() cobra_model.add_reactions(reactions_sinks) @@ -1027,10 +1049,15 @@ def build_metabolic_model( allow_all_non_grp_reactions=False, annotate_with_rast=True, gapfill_model=True, + classic_biomass=False, ): builder = MSBuilder(genome, template) model = builder.build( - model_id, index, allow_all_non_grp_reactions, annotate_with_rast + model_id, + index, + allow_all_non_grp_reactions, + annotate_with_rast, + classic_biomass, ) # Gapfilling model if gapfill_model: diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index b0d384eb..d33846f3 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -174,6 +174,7 @@ def to_metabolite(self, index="0"): if len(str(index)) > 0: name = f"{self.name} [{compartment}]" metabolite = Metabolite(cpd_id, self.formula, name, self.charge, compartment) + metabolite.notes["modelseed_template_id"] = self.id return metabolite @property From a8583236fdfdd9c02e948b9573ff89a34a226c82 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Fri, 3 Feb 2023 16:54:03 -0600 Subject: [PATCH 05/57] notebook update --- .../build_metabolic_model.ipynb | 1140 ++++++++++++++++- 1 file changed, 1135 insertions(+), 5 deletions(-) diff --git a/examples/Model Reconstruction/build_metabolic_model.ipynb b/examples/Model Reconstruction/build_metabolic_model.ipynb index 6a817c0f..8cdd7a12 100644 --- a/examples/Model Reconstruction/build_metabolic_model.ipynb +++ b/examples/Model Reconstruction/build_metabolic_model.ipynb @@ -1,12 +1,26 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build Metabolic Model from Genome .faa file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* MSGenome: to read a faa file\n", + "* MSBuilder: to build metabolic model from the genome" + ] + }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "import modelseedpy\n", "from modelseedpy import MSBuilder, MSGenome" ] }, @@ -19,20 +33,1136 @@ "genome = MSGenome.from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`MSBuilder.build_metabolic_model` default parameters runs RAST, ML prediction to select template (gram neg, gram pos, cyano [not implemented], archaea [not implemented]), builds draft model and gapfills with complete media" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:89: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.float):\n", + "/home/fliu/.local/lib/python3.8/site-packages/cobra/io/dict.py:91: DeprecationWarning: `np.bool` is a deprecated alias for the builtin `bool`. To silence this warning, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " if isinstance(value, np.bool):\n" + ] + } + ], "source": [ "model = MSBuilder.build_metabolic_model('ecoli', genome, classic_biomass=True)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "

Objective

1.0 bio1 = 141.02637369025626

Uptake

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00007_e0EX_cpd00007_e0244.300.00%
cpd00024_e0EX_cpd00024_e083.0752.58%
cpd00028_e0EX_cpd00028_e00.3955340.08%
cpd00030_e0EX_cpd00030_e00.395500.00%
cpd00033_e0EX_cpd00033_e079.8120.99%
cpd00034_e0EX_cpd00034_e00.395500.00%
cpd00039_e0EX_cpd00039_e031.4261.17%
cpd00051_e0EX_cpd00051_e034.7461.29%
cpd00054_e0EX_cpd00054_e034.3530.64%
cpd00058_e0EX_cpd00058_e00.395500.00%
cpd00060_e0EX_cpd00060_e031.0950.96%
cpd00063_e0EX_cpd00063_e00.395500.00%
cpd00065_e0EX_cpd00065_e06.647110.45%
cpd00066_e0EX_cpd00066_e021.7691.21%
cpd00069_e0EX_cpd00069_e016.9990.95%
cpd00079_e0EX_cpd00079_e0499.9618.61%
cpd00080_e0EX_cpd00080_e0609.4311.34%
cpd00099_e0EX_cpd00099_e00.395500.00%
cpd00106_e0EX_cpd00106_e0401.249.96%
cpd00107_e0EX_cpd00107_e052.8661.97%
cpd00118_e0EX_cpd00118_e00.395540.01%
cpd00119_e0EX_cpd00119_e011.1660.42%
cpd00129_e0EX_cpd00129_e025.9650.81%
cpd00130_e0EX_cpd00130_e0199.144.94%
cpd00132_e0EX_cpd00132_e028.2840.70%
cpd00136_e0EX_cpd00136_e00.395570.02%
cpd00149_e0EX_cpd00149_e00.395500.00%
cpd00156_e0EX_cpd00156_e049.651.54%
cpd00161_e0EX_cpd00161_e029.7240.74%
cpd00184_e0EX_cpd00184_e0221.11013.71%
cpd00205_e0EX_cpd00205_e00.395500.00%
cpd00208_e0EX_cpd00208_e03.526120.26%
cpd00209_e0EX_cpd00209_e019000.00%
cpd00249_e0EX_cpd00249_e011.5690.65%
cpd00254_e0EX_cpd00254_e00.395500.00%
cpd00264_e0EX_cpd00264_e00.395570.02%
cpd00268_e0EX_cpd00268_e00.197800.00%
cpd00277_e0EX_cpd00277_e022.59101.40%
cpd00305_e0EX_cpd00305_e00.3955120.03%
cpd00322_e0EX_cpd00322_e034.0561.27%
cpd00355_e0EX_cpd00355_e00.791110.05%
cpd00367_e0EX_cpd00367_e012.9990.73%
cpd00383_e0EX_cpd00383_e01.97870.09%
cpd00412_e0EX_cpd00412_e02.76990.15%
cpd00438_e0EX_cpd00438_e02411014.95%
cpd00644_e0EX_cpd00644_e00.79190.04%
cpd00794_e0EX_cpd00794_e014.1121.05%
cpd01080_e0EX_cpd01080_e035.09183.92%
cpd03847_e0EX_cpd03847_e03.526140.31%
cpd10515_e0EX_cpd10515_e00.79100.00%
cpd10516_e0EX_cpd10516_e00.395500.00%
cpd17041_c0rxn13782_c014100.00%
cpd17042_c0rxn13783_c014100.00%
cpd17043_c0rxn13784_c014100.00%

Secretion

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00009_e0EX_cpd00009_e0-100000.00%
cpd00011_e0EX_cpd00011_e0-796.817.50%
cpd00020_e0EX_cpd00020_e0-282.137.97%
cpd00027_e0EX_cpd00027_e0-445.8625.18%
cpd00029_e0EX_cpd00029_e0-49029.22%
cpd00035_e0EX_cpd00035_e0-185.235.23%
cpd00047_e0EX_cpd00047_e0-2.37310.02%
cpd00100_e0EX_cpd00100_e0-4.38630.12%
cpd00108_e0EX_cpd00108_e0-3.52660.20%
cpd00116_e0EX_cpd00116_e0-0.395510.00%
cpd00139_e0EX_cpd00139_e0-1.18720.02%
cpd00151_e0EX_cpd00151_e0-221.1510.40%
cpd00159_e0EX_cpd00159_e0-835.5323.60%
cpd00226_e0EX_cpd00226_e0-220.8510.39%
cpd02701_c0SK_cpd02701_c0-0.3955150.06%
cpd03091_c0SK_cpd03091_c0-0.791100.07%
cpd11416_c0SK_cpd11416_c0-14100.00%
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ignore this below ..." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from modelseedpy import RastClient\n", + "rast = RastClient()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Objective

1.0 bio1 = 141.02637369025626

Uptake

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00007_e0EX_cpd00007_e0244.300.00%
cpd00024_e0EX_cpd00024_e083.0752.58%
cpd00028_e0EX_cpd00028_e00.3955340.08%
cpd00030_e0EX_cpd00030_e00.395500.00%
cpd00033_e0EX_cpd00033_e079.8120.99%
cpd00034_e0EX_cpd00034_e00.395500.00%
cpd00039_e0EX_cpd00039_e031.4261.17%
cpd00051_e0EX_cpd00051_e034.7461.29%
cpd00054_e0EX_cpd00054_e034.3530.64%
cpd00058_e0EX_cpd00058_e00.395500.00%
cpd00060_e0EX_cpd00060_e031.0950.96%
cpd00063_e0EX_cpd00063_e00.395500.00%
cpd00065_e0EX_cpd00065_e06.647110.45%
cpd00066_e0EX_cpd00066_e021.7691.21%
cpd00069_e0EX_cpd00069_e016.9990.95%
cpd00079_e0EX_cpd00079_e0499.9618.61%
cpd00080_e0EX_cpd00080_e0609.4311.34%
cpd00099_e0EX_cpd00099_e00.395500.00%
cpd00106_e0EX_cpd00106_e0401.249.96%
cpd00107_e0EX_cpd00107_e052.8661.97%
cpd00118_e0EX_cpd00118_e00.395540.01%
cpd00119_e0EX_cpd00119_e011.1660.42%
cpd00129_e0EX_cpd00129_e025.9650.81%
cpd00130_e0EX_cpd00130_e0199.144.94%
cpd00132_e0EX_cpd00132_e028.2840.70%
cpd00136_e0EX_cpd00136_e00.395570.02%
cpd00149_e0EX_cpd00149_e00.395500.00%
cpd00156_e0EX_cpd00156_e049.651.54%
cpd00161_e0EX_cpd00161_e029.7240.74%
cpd00184_e0EX_cpd00184_e0221.11013.71%
cpd00205_e0EX_cpd00205_e00.395500.00%
cpd00208_e0EX_cpd00208_e03.526120.26%
cpd00209_e0EX_cpd00209_e019000.00%
cpd00249_e0EX_cpd00249_e011.5690.65%
cpd00254_e0EX_cpd00254_e00.395500.00%
cpd00264_e0EX_cpd00264_e00.395570.02%
cpd00268_e0EX_cpd00268_e00.197800.00%
cpd00277_e0EX_cpd00277_e022.59101.40%
cpd00305_e0EX_cpd00305_e00.3955120.03%
cpd00322_e0EX_cpd00322_e034.0561.27%
cpd00355_e0EX_cpd00355_e00.791110.05%
cpd00367_e0EX_cpd00367_e012.9990.73%
cpd00383_e0EX_cpd00383_e01.97870.09%
cpd00412_e0EX_cpd00412_e02.76990.15%
cpd00438_e0EX_cpd00438_e02411014.95%
cpd00644_e0EX_cpd00644_e00.79190.04%
cpd00794_e0EX_cpd00794_e014.1121.05%
cpd01080_e0EX_cpd01080_e035.09183.92%
cpd03847_e0EX_cpd03847_e03.526140.31%
cpd10515_e0EX_cpd10515_e00.79100.00%
cpd10516_e0EX_cpd10516_e00.395500.00%
cpd17041_c0rxn13782_c014100.00%
cpd17042_c0rxn13783_c014100.00%
cpd17043_c0rxn13784_c014100.00%

Secretion

\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MetaboliteReactionFluxC-NumberC-Flux
cpd00009_e0EX_cpd00009_e0-100000.00%
cpd00011_e0EX_cpd00011_e0-796.817.50%
cpd00020_e0EX_cpd00020_e0-282.137.97%
cpd00027_e0EX_cpd00027_e0-445.8625.18%
cpd00029_e0EX_cpd00029_e0-49029.22%
cpd00035_e0EX_cpd00035_e0-185.235.23%
cpd00047_e0EX_cpd00047_e0-2.37310.02%
cpd00100_e0EX_cpd00100_e0-4.38630.12%
cpd00108_e0EX_cpd00108_e0-3.52660.20%
cpd00116_e0EX_cpd00116_e0-0.395510.00%
cpd00139_e0EX_cpd00139_e0-1.18720.02%
cpd00151_e0EX_cpd00151_e0-221.1510.40%
cpd00159_e0EX_cpd00159_e0-835.5323.60%
cpd00226_e0EX_cpd00226_e0-220.8510.39%
cpd02701_c0SK_cpd02701_c0-0.3955150.06%
cpd03091_c0SK_cpd03091_c0-0.791100.07%
cpd11416_c0SK_cpd11416_c0-14100.00%
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.summary()" ] From f00dbc1b429410443878f5f1f2b8b4611f83da89 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 9 Feb 2023 13:47:14 -0600 Subject: [PATCH 06/57] Fixes for new biomass code --- modelseedpy/core/fbahelper.py | 16 +++-- modelseedpy/core/msbuilder.py | 100 +++------------------------- modelseedpy/core/msgapfill.py | 2 +- modelseedpy/core/mstemplate.py | 23 +++++-- modelseedpy/fbapkg/gapfillingpkg.py | 43 ++++++++---- 5 files changed, 72 insertions(+), 112 deletions(-) diff --git a/modelseedpy/core/fbahelper.py b/modelseedpy/core/fbahelper.py index 6c44108f..8605fef3 100644 --- a/modelseedpy/core/fbahelper.py +++ b/modelseedpy/core/fbahelper.py @@ -115,18 +115,24 @@ def modelseed_id_from_cobra_reaction(reaction): @staticmethod def metabolite_mw(metabolite): + fixed_masses = {"cpd11416":1,"cpd17041":0,"cpd17042":0,"cpd17043":0} + msid = FBAHelper.modelseed_id_from_cobra_metabolite(metabolite) + if msid in fixed_masses: + return fixed_masses[msid] + if not metabolite.formula: + return 0 + formula = re.sub("R\d*", "", metabolite.formula) try: - if not metabolite.formula: - return 0 - formula = re.sub("R\d*", "", metabolite.formula) chem_mw = ChemMW(printing=False) chem_mw.mass(formula) return chem_mw.raw_mw except: - warn( + logger.warn( "The compound " + metabolite.id - + " possesses an unconventional formula {metabolite.formula}; hence, the MW cannot be computed." + + " possesses an unconventional formula " + + metabolite.formula + + "; hence, the MW cannot be computed." ) return 0 diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 06869289..2a2415cb 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -555,85 +555,6 @@ def get_or_create_metabolite( pass return model.metabolites.get_by_id(full_id) - @staticmethod - def build_biomass_new(model, template, index): - biomasses = [] - types = ["cofactor", "lipid", "cellwall"] - for bio in template.biomasses: - # Creating biomass reaction object - metabolites = {} - biorxn = Reaction(bio.id, bio.name, "biomasses", 0, 1000) - # Adding standard compounds for DNA, RNA, protein, and biomass - if bio["type"] == "growth": - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11416", "c", index - ) - metabolites[met] = 1 - if "dna" in bio and bio["dna"] > 0: - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11461", "c", index - ) - metabolites[met] = -1 * bio["dna"] - if "protein" in bio and bio["protein"] > 0: - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11463", "c", index - ) - metabolites[met] = -1 * bio["protein"] - if "rna" in bio and bio["rna"] > 0: - met = MSBuilder.get_or_create_metabolite( - model, template, "cpd11462", "c", index - ) - metabolites[met] = -1 * bio["rna"] - bio_type_hash = {} - for type in types: - for comp in bio["templateBiomassComponents"]: - fullid = FBAHelper.id_from_ref(comp["templatecompcompound_ref"]) - (baseid, compartment, ignore_index) = FBAHelper.parse_id(fullid) - comp["met"] = MSBuilder.get_or_create_metabolite( - model, template, baseid, compartment, index - ) - if type not in bio_type_hash: - bio_type_hash[type] = {"items": [], "total_mw": 0} - if FBAHelper.metabolite_mw(comp["met"]): - types[type] += FBAHelper.metabolite_mw(comp["met"]) / 1000 - bio_type_hash[type].append(comp) - for type in bio_type_hash: - compmass = bio[type] - for comp in bio_type_hash[type]: - coef = None - if comp["coefficient_type"] == "MOLFRACTION": - coef = compmass / types[type] * comp["coefficient"] - elif comp["coefficient_type"] == "MOLSPLIT": - coef = compmass / types[type] * comp["coefficient"] - elif comp["coefficient_type"] == "MULTIPLIER": - coef = biorxn[type] * comp["coefficient"] - elif comp["coefficient_type"] == "EXACT": - coef = comp["coefficient"] - if coef: - met = model.metabolites.get_by_id("cpd11416_c0") - if met in metabolites: - metabolites[met] += coef - else: - metabolites[met] = coef - metabolites[met] = coef - for count, value in enumerate(comp["linked_compound_refs"]): - met = model.metabolites.get_by_id( - FBAHelper.id_from_ref(value) - ) - if met in metabolites: - metabolites[met] += ( - coef * comp["link_coefficients"][count] - ) - else: - metabolites[met] = ( - coef * comp["link_coefficients"][count] - ) - - biorxn.annotation[SBO_ANNOTATION] = "SBO:0000629" - biorxn.add_metabolites(metabolites) - biomasses.append(biorxn) - return biomasses - def build_static_biomasses(self, model, template): res = [] if template.name.startswith("CoreModel"): @@ -737,7 +658,7 @@ def build_complex_groups(self, complex_sets): group_complexes = {} for complex_set in complex_sets: for complex_id in complex_set: - if complex_id not in group_complexes: + if complex_id not in group_complexes and complex_id in self.template.complexes: cpx = self.template.complexes.get_by_id(complex_id) g = Group(complex_id) g.notes["complex_source"] = cpx.source @@ -924,9 +845,12 @@ def build( or self.template.name.startswith("GramNeg") or self.template.name.startswith("GramPos") ): - cobra_model.add_reactions( - self.build_static_biomasses(cobra_model, self.template) - ) + gc = 0.5 + if hasattr(self.genome,"info"): + gc = float(self.genome.info.metadata["GC content"]) + print("Genome custom GC:",gc) + for bio in self.template.biomasses: + bio.build_biomass(cobra_model, index, classic=False, GC=gc,add_to_model=True) cobra_model.objective = "bio1" reactions_sinks = self.build_drains() @@ -983,13 +907,9 @@ def build_full_template_model(template, model_id=None, index="0"): bio_rxn2 = build_biomass("bio2", model, template, core_atp, index) model.add_reactions([bio_rxn1, bio_rxn2]) model.objective = "bio1" - if template.name.startswith("GramNeg"): - bio_rxn1 = build_biomass("bio1", model, template, gramneg, index) - model.add_reactions([bio_rxn1]) - model.objective = "bio1" - if template.name.startswith("GramPos"): - bio_rxn1 = build_biomass("bio1", model, template, grampos, index) - model.add_reactions([bio_rxn1]) + else: + for bio in template.biomasses: + bio.build_biomass(self, model, index, classic=False, GC=0.5,add_to_model=True) model.objective = "bio1" reactions_sinks = [] diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 9b42e17d..c48cf94b 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -147,7 +147,7 @@ def integrate_gapfill_solution(self, solution, cumulative_solution=[]): ---------- solution : dict Specifies the reactions to be added to the model to implement the gapfilling solution - cumulation_solution : list + cumulative_solution : list Optional array to cumulatively track all reactions added to the model when integrating multiple solutions """ for rxn_id in solution["reversed"]: diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 4335ef45..ed44ce0c 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -693,15 +693,18 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru metabolites = {} biorxn = Reaction(self.id, self.name, "biomasses", 0, 1000) # Adding standard compounds for DNA, RNA, protein, and biomass - if not classic and self.type == "growth": - met = self.get_or_create_metabolite(model, "cpd11416", "c", index) - metabolites[met] = 1 specific_reactions = {"dna": None, "rna": None, "protein": None} + exclusions = {"cpd17041_c":1,"cpd17042_c":1,"cpd17043_c":1} if not classic and self.dna > 0: met = self.get_or_create_metabolite(model, "cpd11461", "c", index) specific_reactions["dna"] = self.get_or_create_reaction( model, "rxn05294", "c", index ) + specific_reactions["dna"].name = "DNA synthesis" + if "rxn13783_c" + index in model.reactions: + specific_reactions["dna"].gene_reaction_rule = model.reactions.get_by_id("rxn13783_c" + index).gene_reaction_rule + specific_reactions["dna"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13783_c" + index).notes['modelseed_complex'] + model.remove_reactions([model.reactions.get_by_id("rxn13783_c" + index)]) specific_reactions["dna"].subtract_metabolites( specific_reactions["dna"].metabolites ) @@ -713,6 +716,11 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru specific_reactions["protein"] = self.get_or_create_reaction( model, "rxn05296", "c", index ) + specific_reactions["protein"].name = "Protein synthesis" + if "rxn13782_c" + index in model.reactions: + specific_reactions["protein"].gene_reaction_rule = model.reactions.get_by_id("rxn13782_c" + index).gene_reaction_rule + specific_reactions["protein"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13782_c" + index).notes['modelseed_complex'] + model.remove_reactions([model.reactions.get_by_id("rxn13782_c" + index)]) specific_reactions["protein"].subtract_metabolites( specific_reactions["protein"].metabolites ) @@ -723,6 +731,11 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru specific_reactions["rna"] = self.get_or_create_reaction( model, "rxn05295", "c", index ) + specific_reactions["rna"].name = "mRNA synthesis" + if "rxn13784_c" + index in model.reactions: + specific_reactions["rna"].gene_reaction_rule = model.reactions.get_by_id("rxn13784_c" + index).gene_reaction_rule + specific_reactions["rna"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13784_c" + index).notes['modelseed_complex'] + model.remove_reactions([model.reactions.get_by_id("rxn13784_c" + index)]) specific_reactions["rna"].subtract_metabolites( specific_reactions["rna"].metabolites ) @@ -731,7 +744,9 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru bio_type_hash = {} for type in types: for comp in self.templateBiomassComponents: - if type == comp.comp_class: + if comp.metabolite.id in exclusions and not classic: + pass + elif type == comp.comp_class: met = self.get_or_create_metabolite( model, comp.metabolite.id, None, index ) diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 58140418..880aeabf 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -7,6 +7,7 @@ import json from optlang.symbolics import Zero, add from cobra import Model, Reaction, Metabolite +from cobra.io import load_json_model, save_json_model, load_matlab_model, save_matlab_model, read_sbml_model, write_sbml_model from modelseedpy.fbapkg.basefbapkg import BaseFBAPkg from modelseedpy.core.fbahelper import FBAHelper @@ -899,6 +900,26 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): return solution def filter_database_based_on_tests(self, test_conditions): + #Preserving the gapfilling objective function + gfobj = self.model.objective + #Setting the minimal growth constraint to zero + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 + #Setting the objective to the original default objective for the model + self.model.objective = self.parameters["origobj"] + #Testing if the minimal objective can be achieved before filtering + solution = self.model.optimize() + print( + "Objective before filtering:", + solution.objective_value, + "; min objective:", + self.parameters["minimum_obj"], + ) + with open("debuggf.lp", "w") as out: + out.write(str(self.model.solver)) + if solution.objective_value < self.parameters["minimum_obj"]: + save_json_model(self.model, "gfdebugmdl.json") + logger.critical("Model cannot achieve the minimum objective even before filtering!") + #Filtering the database of any reactions that violate the specified tests filetered_list = [] with self.model: rxnlist = [] @@ -908,7 +929,7 @@ def filter_database_based_on_tests(self, test_conditions): rxnlist.append([reaction, "<"]) if "forward" in self.gapfilling_penalties[reaction.id]: rxnlist.append([reaction, ">"]) - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 + filtered_list = self.modelutl.reaction_expansion_test( rxnlist, test_conditions ) @@ -920,21 +941,19 @@ def filter_database_based_on_tests(self, test_conditions): else: self.model.reactions.get_by_id(item[0].id).lower_bound = 0 # Now testing if the gapfilling minimum objective can still be achieved - gfobj = self.model.objective - self.model.objective = self.parameters["origobj"] solution = self.model.optimize() - # Restoring the minimum objective constraint - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ - "minimum_obj" - ] print( "Objective after filtering:", solution.objective_value, "; min objective:", self.parameters["minimum_obj"], ) + # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached if solution.objective_value < self.parameters["minimum_obj"]: - # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached + # Restoring the minimum objective constraint + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] new_objective = self.model.problem.Objective(Zero, direction="min") filterobjcoef = dict() for item in filtered_list: @@ -945,7 +964,6 @@ def filter_database_based_on_tests(self, test_conditions): else: filterobjcoef[rxn.reverse_variable] = item[3] rxn.lower_bound = item[2] - self.model.objective = new_objective new_objective.set_linear_coefficients(filterobjcoef) solution = self.model.optimize() @@ -979,9 +997,10 @@ def filter_database_based_on_tests(self, test_conditions): self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: self.model.reactions.get_by_id(item[0].id).lower_bound = 0 - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"][ - "1" - ].lb = self.parameters["minimum_obj"] + #Restoring gapfilling objective function and minimal objective constraint + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] self.model.objective = gfobj def compute_gapfilled_solution(self, flux_values=None): From c22a8d6940ca173f934e21a3d1d6e39b02239cca Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 9 Feb 2023 13:59:47 -0600 Subject: [PATCH 07/57] Formatting with black --- modelseedpy/core/fbahelper.py | 2 +- modelseedpy/core/msbuilder.py | 9 +++- modelseedpy/core/mstemplate.py | 70 +++++++++++++++++++++-------- modelseedpy/fbapkg/gapfillingpkg.py | 33 +++++++++----- 4 files changed, 81 insertions(+), 33 deletions(-) diff --git a/modelseedpy/core/fbahelper.py b/modelseedpy/core/fbahelper.py index 8605fef3..502611d9 100644 --- a/modelseedpy/core/fbahelper.py +++ b/modelseedpy/core/fbahelper.py @@ -115,7 +115,7 @@ def modelseed_id_from_cobra_reaction(reaction): @staticmethod def metabolite_mw(metabolite): - fixed_masses = {"cpd11416":1,"cpd17041":0,"cpd17042":0,"cpd17043":0} + fixed_masses = {"cpd11416": 1, "cpd17041": 0, "cpd17042": 0, "cpd17043": 0} msid = FBAHelper.modelseed_id_from_cobra_metabolite(metabolite) if msid in fixed_masses: return fixed_masses[msid] diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 3c54986b..e53a28ac 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -668,7 +668,10 @@ def build_complex_groups(self, complex_sets): group_complexes = {} for complex_set in complex_sets: for complex_id in complex_set: - if complex_id not in group_complexes and complex_id in self.template.complexes: + if ( + complex_id not in group_complexes + and complex_id in self.template.complexes + ): cpx = self.template.complexes.get_by_id(complex_id) g = Group(complex_id) g.notes["complex_source"] = cpx.source @@ -943,7 +946,9 @@ def build_full_template_model(template, model_id=None, index="0"): model.objective = "bio1" else: for bio in template.biomasses: - bio.build_biomass(self, model, index, classic=False, GC=0.5,add_to_model=True) + bio.build_biomass( + self, model, index, classic=False, GC=0.5, add_to_model=True + ) model.objective = "bio1" reactions_sinks = [] diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 8f2a8560..6a6d5b6f 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -670,7 +670,7 @@ def get_or_create_reaction(self, model, baseid, compartment=None, index=None): model.add_reactions(newrxn) return newrxn - def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=True): + def build_biomass(self, model, index="0", classic=False, GC=0.5, add_to_model=True): types = [ "cofactor", "lipid", @@ -695,7 +695,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru biorxn = Reaction(self.id, self.name, "biomasses", 0, 1000) # Adding standard compounds for DNA, RNA, protein, and biomass specific_reactions = {"dna": None, "rna": None, "protein": None} - exclusions = {"cpd17041_c":1,"cpd17042_c":1,"cpd17043_c":1} + exclusions = {"cpd17041_c": 1, "cpd17042_c": 1, "cpd17043_c": 1} if not classic and self.dna > 0: met = self.get_or_create_metabolite(model, "cpd11461", "c", index) specific_reactions["dna"] = self.get_or_create_reaction( @@ -703,13 +703,23 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru ) specific_reactions["dna"].name = "DNA synthesis" if "rxn13783_c" + index in model.reactions: - specific_reactions["dna"].gene_reaction_rule = model.reactions.get_by_id("rxn13783_c" + index).gene_reaction_rule - specific_reactions["dna"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13783_c" + index).notes['modelseed_complex'] - model.remove_reactions([model.reactions.get_by_id("rxn13783_c" + index)]) + specific_reactions[ + "dna" + ].gene_reaction_rule = model.reactions.get_by_id( + "rxn13783_c" + index + ).gene_reaction_rule + specific_reactions["dna"].notes[ + "modelseed_complex" + ] = model.reactions.get_by_id("rxn13783_c" + index).notes[ + "modelseed_complex" + ] + model.remove_reactions( + [model.reactions.get_by_id("rxn13783_c" + index)] + ) specific_reactions["dna"].subtract_metabolites( specific_reactions["dna"].metabolites ) - specific_reactions["dna"].add_metabolites({met:1}) + specific_reactions["dna"].add_metabolites({met: 1}) metabolites[met] = 1 metabolites[met] = -1 * self.dna if not classic and self.protein > 0: @@ -719,13 +729,23 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru ) specific_reactions["protein"].name = "Protein synthesis" if "rxn13782_c" + index in model.reactions: - specific_reactions["protein"].gene_reaction_rule = model.reactions.get_by_id("rxn13782_c" + index).gene_reaction_rule - specific_reactions["protein"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13782_c" + index).notes['modelseed_complex'] - model.remove_reactions([model.reactions.get_by_id("rxn13782_c" + index)]) + specific_reactions[ + "protein" + ].gene_reaction_rule = model.reactions.get_by_id( + "rxn13782_c" + index + ).gene_reaction_rule + specific_reactions["protein"].notes[ + "modelseed_complex" + ] = model.reactions.get_by_id("rxn13782_c" + index).notes[ + "modelseed_complex" + ] + model.remove_reactions( + [model.reactions.get_by_id("rxn13782_c" + index)] + ) specific_reactions["protein"].subtract_metabolites( specific_reactions["protein"].metabolites ) - specific_reactions["protein"].add_metabolites({met:1}) + specific_reactions["protein"].add_metabolites({met: 1}) metabolites[met] = -1 * self.protein if not classic and self.rna > 0: met = self.get_or_create_metabolite(model, "cpd11462", "c", index) @@ -734,13 +754,23 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru ) specific_reactions["rna"].name = "mRNA synthesis" if "rxn13784_c" + index in model.reactions: - specific_reactions["rna"].gene_reaction_rule = model.reactions.get_by_id("rxn13784_c" + index).gene_reaction_rule - specific_reactions["rna"].notes['modelseed_complex'] = model.reactions.get_by_id("rxn13784_c" + index).notes['modelseed_complex'] - model.remove_reactions([model.reactions.get_by_id("rxn13784_c" + index)]) + specific_reactions[ + "rna" + ].gene_reaction_rule = model.reactions.get_by_id( + "rxn13784_c" + index + ).gene_reaction_rule + specific_reactions["rna"].notes[ + "modelseed_complex" + ] = model.reactions.get_by_id("rxn13784_c" + index).notes[ + "modelseed_complex" + ] + model.remove_reactions( + [model.reactions.get_by_id("rxn13784_c" + index)] + ) specific_reactions["rna"].subtract_metabolites( specific_reactions["rna"].metabolites ) - specific_reactions["rna"].add_metabolites({met:1}) + specific_reactions["rna"].add_metabolites({met: 1}) metabolites[met] = -1 * self.rna bio_type_hash = {} for type in types: @@ -774,13 +804,15 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru coef = comp.coefficient elif comp.coefficient_type == "AT": coef = ( - 2 * comp.coefficient + 2 + * comp.coefficient * (1 - GC) * (type_abundances[type] / bio_type_hash[type]["total_mw"]) ) elif comp.coefficient_type == "GC": coef = ( - 2 * comp.coefficient + 2 + * comp.coefficient * GC * (type_abundances[type] / bio_type_hash[type]["total_mw"]) ) @@ -793,7 +825,7 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru metabolites[met] = coef elif not classic: coef = coef / type_abundances[type] - specific_reactions[type].add_metabolites({met:coef}) + specific_reactions[type].add_metabolites({met: coef}) for l_met in comp.linked_metabolites: met = self.get_or_create_metabolite( model, l_met.id, None, index @@ -806,7 +838,9 @@ def build_biomass(self, model, index="0", classic=False, GC=0.5,add_to_model=Tru else: metabolites[met] = coef * comp.linked_metabolites[l_met] elif not classic: - specific_reactions[type].add_metabolites({met:coef * comp.linked_metabolites[l_met]}) + specific_reactions[type].add_metabolites( + {met: coef * comp.linked_metabolites[l_met]} + ) biorxn.annotation[SBO_ANNOTATION] = "SBO:0000629" biorxn.add_metabolites(metabolites) if add_to_model: diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 880aeabf..ebbebe72 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -7,7 +7,14 @@ import json from optlang.symbolics import Zero, add from cobra import Model, Reaction, Metabolite -from cobra.io import load_json_model, save_json_model, load_matlab_model, save_matlab_model, read_sbml_model, write_sbml_model +from cobra.io import ( + load_json_model, + save_json_model, + load_matlab_model, + save_matlab_model, + read_sbml_model, + write_sbml_model, +) from modelseedpy.fbapkg.basefbapkg import BaseFBAPkg from modelseedpy.core.fbahelper import FBAHelper @@ -900,13 +907,13 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): return solution def filter_database_based_on_tests(self, test_conditions): - #Preserving the gapfilling objective function + # Preserving the gapfilling objective function gfobj = self.model.objective - #Setting the minimal growth constraint to zero + # Setting the minimal growth constraint to zero self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 - #Setting the objective to the original default objective for the model + # Setting the objective to the original default objective for the model self.model.objective = self.parameters["origobj"] - #Testing if the minimal objective can be achieved before filtering + # Testing if the minimal objective can be achieved before filtering solution = self.model.optimize() print( "Objective before filtering:", @@ -918,8 +925,10 @@ def filter_database_based_on_tests(self, test_conditions): out.write(str(self.model.solver)) if solution.objective_value < self.parameters["minimum_obj"]: save_json_model(self.model, "gfdebugmdl.json") - logger.critical("Model cannot achieve the minimum objective even before filtering!") - #Filtering the database of any reactions that violate the specified tests + logger.critical( + "Model cannot achieve the minimum objective even before filtering!" + ) + # Filtering the database of any reactions that violate the specified tests filetered_list = [] with self.model: rxnlist = [] @@ -929,7 +938,7 @@ def filter_database_based_on_tests(self, test_conditions): rxnlist.append([reaction, "<"]) if "forward" in self.gapfilling_penalties[reaction.id]: rxnlist.append([reaction, ">"]) - + filtered_list = self.modelutl.reaction_expansion_test( rxnlist, test_conditions ) @@ -951,9 +960,9 @@ def filter_database_based_on_tests(self, test_conditions): # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached if solution.objective_value < self.parameters["minimum_obj"]: # Restoring the minimum objective constraint - self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ - "minimum_obj" - ] + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"][ + "1" + ].lb = self.parameters["minimum_obj"] new_objective = self.model.problem.Objective(Zero, direction="min") filterobjcoef = dict() for item in filtered_list: @@ -997,7 +1006,7 @@ def filter_database_based_on_tests(self, test_conditions): self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: self.model.reactions.get_by_id(item[0].id).lower_bound = 0 - #Restoring gapfilling objective function and minimal objective constraint + # Restoring gapfilling objective function and minimal objective constraint self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ "minimum_obj" ] From c61394c564810914e39da3ee8698fd3c60db7ced Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 13 Feb 2023 23:26:21 -0600 Subject: [PATCH 08/57] biomass fix --- modelseedpy/core/mstemplate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index d33846f3..b9475dc4 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -823,6 +823,8 @@ def get_data(self): for comp in self.templateBiomassComponents: data["templateBiomassComponents"].append(comp.get_data()) + return data + class NewModelTemplateRole: def __init__(self, role_id, name, features=None, source="", aliases=None): From 2915d7743f8cd6f1b335e2a86d44eb55e0f84d0b Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 14 Feb 2023 14:45:37 -0600 Subject: [PATCH 09/57] bug fix --- .../build_metabolic_model.ipynb | 26 +++++++++++++++++++ modelseedpy/core/mstemplate.py | 3 ++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/examples/Model Reconstruction/build_metabolic_model.ipynb b/examples/Model Reconstruction/build_metabolic_model.ipynb index 8cdd7a12..ea2e8d41 100644 --- a/examples/Model Reconstruction/build_metabolic_model.ipynb +++ b/examples/Model Reconstruction/build_metabolic_model.ipynb @@ -40,6 +40,32 @@ "`MSBuilder.build_metabolic_model` default parameters runs RAST, ML prediction to select template (gram neg, gram pos, cyano [not implemented], archaea [not implemented]), builds draft model and gapfills with complete media" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "type object argument after ** must be a mapping, not str", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_3118582/859642788.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mmodelseedpy\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mRastClient\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mrast\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRastClient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mrast\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mannotate_genome\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgenome\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/rast_client.py\u001b[0m in \u001b[0;36mannotate_genome\u001b[0;34m(self, genome)\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0mp_features\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"id\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"protein_translation\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mseq\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp_features\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mo\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"features\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/rast_client.py\u001b[0m in \u001b[0;36mf\u001b[0;34m(self, p_features)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp_features\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0mparams\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"features\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mp_features\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"stages\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstages\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrpc_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"GenomeAnnotation.run_pipeline\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 94\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/modelseedpy/core/rpcclient.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, method, params, token)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0merr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"error\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 75\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mServerError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"error\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 76\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mServerError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Unknown\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: type object argument after ** must be a mapping, not str" + ] + } + ], + "source": [ + "from modelseedpy import RastClient\n", + "rast = RastClient()\n", + "rast.annotate_genome(genome)" + ] + }, { "cell_type": "code", "execution_count": 3, diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index b9475dc4..7e992d52 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -654,13 +654,14 @@ def get_or_create_metabolite(self, model, baseid, compartment=None, index=None): ) def get_or_create_reaction(self, model, baseid, compartment=None, index=None): + logger.debug(f'{baseid}, {compartment}, {index}') fullid = baseid if compartment: fullid += "_" + compartment tempid = fullid if index: fullid += index - if fullid in model.metabolites: + if fullid in model.reactions: return model.reactions.get_by_id(fullid) if tempid in self._template.reactions: rxn = self._template.reactions.get_by_id(tempid).to_reaction(model, index) From b42a04f7b7dcfaa6b6cf1eea84c8e8519cbb0b23 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 14 Feb 2023 14:48:19 -0600 Subject: [PATCH 10/57] black --- modelseedpy/core/mstemplate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 7e992d52..36a49698 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -654,7 +654,7 @@ def get_or_create_metabolite(self, model, baseid, compartment=None, index=None): ) def get_or_create_reaction(self, model, baseid, compartment=None, index=None): - logger.debug(f'{baseid}, {compartment}, {index}') + logger.debug(f"{baseid}, {compartment}, {index}") fullid = baseid if compartment: fullid += "_" + compartment From d33bd9a23e720a5799853e485491d2b7f3086ded Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 15 Feb 2023 14:07:37 -0600 Subject: [PATCH 11/57] Updates to fix gapfilling and string concatenation --- modelseedpy/core/msgapfill.py | 3 +- modelseedpy/core/msmodelutl.py | 28 +- modelseedpy/core/mstemplate.py | 4 +- modelseedpy/fbapkg/gapfillingpkg.py | 777 ++++++++++++++++------------ 4 files changed, 456 insertions(+), 356 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index c48cf94b..ad430ef2 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -10,7 +10,6 @@ from modelseedpy.core import FBAHelper # !!! the import is never used from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.core.msmodelutl import MSModelUtil -from modelseedpy.fbapkg.gapfillingpkg import default_blacklist from modelseedpy.core.exceptions import GapfillingError @@ -57,7 +56,7 @@ def __init__( self.gapfill_templates_by_index, self.gapfill_models_by_index = {}, {} self.gapfill_all_indecies_with_default_templates = True self.gapfill_all_indecies_with_default_models = True - self.blacklist = list(set(default_blacklist + blacklist)) + self.blacklist = list(set(blacklist)) self.test_condition_iteration_limit = 10 self.test_conditions = test_conditions self.reaction_scores = reaction_scores diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index af499773..bb147f89 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -307,27 +307,26 @@ def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): print(len(output)) self.model.add_reactions(output) return output - + ################################################################################# # Functions related to utility functions ################################################################################# def build_model_data_hash(self): data = { - "Model":self.id, - "Genome":self.genome.info.metadata["Name"], - "Genes":self.genome.info.metadata["Number of Protein Encoding Genes"], - + "Model": self.id, + "Genome": self.genome.info.metadata["Name"], + "Genes": self.genome.info.metadata["Number of Protein Encoding Genes"], } return data - - def compare_reactions(self, reaction_list,filename): + + def compare_reactions(self, reaction_list, filename): data = {} for rxn in reaction_list: for met in rxn.metabolites: if met.id not in data: data[met.id] = {} for other_rxn in reaction_list: - data[met.id][other_rxn.id] = 0 + data[met.id][other_rxn.id] = 0 data[met.id][rxn.id] = rxn.metabolites[met] df = pd.DataFrame(data) df = df.transpose() @@ -508,6 +507,7 @@ def convert_cobra_reaction_to_kbreaction( def test_solution(self, solution, keep_changes=False): unneeded = [] + removed_rxns = [] tempmodel = self.model if not keep_changes: tempmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) @@ -535,6 +535,7 @@ def test_solution(self, solution, keep_changes=False): ) rxnobj.upper_bound = original_bound else: + removed_rxns.append(rxnobj) unneeded.append([rxn_id, solution[key][rxn_id], key]) logger.debug( rxn_id @@ -557,6 +558,7 @@ def test_solution(self, solution, keep_changes=False): ) rxnobj.lower_bound = original_bound else: + removed_rxns.append(rxnobj) unneeded.append([rxn_id, solution[key][rxn_id], key]) logger.debug( rxn_id @@ -565,6 +567,7 @@ def test_solution(self, solution, keep_changes=False): + str(objective) ) if keep_changes: + tempmodel.remove_reactions(removed_rxns) for items in unneeded: del solution[items[2]][items[0]] return unneeded @@ -682,6 +685,7 @@ def test_single_condition(self, condition, apply_condition=True, model=None): if model is None: model = self.model if apply_condition: + print("applying - bad") self.apply_test_condition(condition, model) new_objective = model.slim_optimize() value = new_objective @@ -882,12 +886,10 @@ def reaction_expansion_test( Raises ------ """ - logger.debug("Expansion started!") + logger.debug(f"Expansion started! Binary = {binary_search}") filtered_list = [] for condition in condition_list: - logger.debug(f"testing condition {condition}") - currmodel = self.model tic = time.perf_counter() new_filtered = [] @@ -921,6 +923,10 @@ def reaction_expansion_test( + " out of " + str(len(reaction_list)) ) + filterlist = [] + for item in new_filtered: + filterlist.append(item[0].id + item[1]) + logger.debug(",".join(filterlist)) return filtered_list def add_atp_hydrolysis(self, compartment): diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 6a6d5b6f..5d206aed 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -583,7 +583,7 @@ def from_table( for index, row in filename_or_df.iterrows(): if row["biomass_id"] == bio_id: metabolite = template.compcompounds.get_by_id( - row["id"] + "_" + row["compartment"] + f'{row["id"]}_{row["compartment"]}' ) linked_mets = {} if ( @@ -594,7 +594,7 @@ def from_table( for item in array: sub_array = item.split(":") l_met = template.compcompounds.get_by_id( - sub_array[0] + "_" + row["compartment"] + f'{sub_array[0]}_{row["compartment"]}' ) linked_mets[l_met] = float(sub_array[1]) self.add_biomass_component( diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index ebbebe72..3ea2d6dd 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -20,346 +20,425 @@ logger = logging.getLogger(__name__) -default_blacklist = [ - "rxn12985", - "rxn00238", - "rxn07058", - "rxn05305", - "rxn09037", - "rxn10643", - "rxn11317", - "rxn05254", - "rxn05257", - "rxn05258", - "rxn05259", - "rxn05264", - "rxn05268", - "rxn05269", - "rxn05270", - "rxn05271", - "rxn05272", - "rxn05273", - "rxn05274", - "rxn05275", - "rxn05276", - "rxn05277", - "rxn05278", - "rxn05279", - "rxn05280", - "rxn05281", - "rxn05282", - "rxn05283", - "rxn05284", - "rxn05285", - "rxn05286", - "rxn05963", - "rxn05964", - "rxn05971", - "rxn05989", - "rxn05990", - "rxn06041", - "rxn06042", - "rxn06043", - "rxn06044", - "rxn06045", - "rxn06046", - "rxn06079", - "rxn06080", - "rxn06081", - "rxn06086", - "rxn06087", - "rxn06088", - "rxn06089", - "rxn06090", - "rxn06091", - "rxn06092", - "rxn06138", - "rxn06139", - "rxn06140", - "rxn06141", - "rxn06145", - "rxn06217", - "rxn06218", - "rxn06219", - "rxn06220", - "rxn06221", - "rxn06222", - "rxn06223", - "rxn06235", - "rxn06362", - "rxn06368", - "rxn06378", - "rxn06474", - "rxn06475", - "rxn06502", - "rxn06562", - "rxn06569", - "rxn06604", - "rxn06702", - "rxn06706", - "rxn06715", - "rxn06803", - "rxn06811", - "rxn06812", - "rxn06850", - "rxn06901", - "rxn06971", - "rxn06999", - "rxn07123", - "rxn07172", - "rxn07254", - "rxn07255", - "rxn07269", - "rxn07451", - "rxn09037", - "rxn10018", - "rxn10077", - "rxn10096", - "rxn10097", - "rxn10098", - "rxn10099", - "rxn10101", - "rxn10102", - "rxn10103", - "rxn10104", - "rxn10105", - "rxn10106", - "rxn10107", - "rxn10109", - "rxn10111", - "rxn10403", - "rxn10410", - "rxn10416", - "rxn11313", - "rxn11316", - "rxn11318", - "rxn11353", - "rxn05224", - "rxn05795", - "rxn05796", - "rxn05797", - "rxn05798", - "rxn05799", - "rxn05801", - "rxn05802", - "rxn05803", - "rxn05804", - "rxn05805", - "rxn05806", - "rxn05808", - "rxn05812", - "rxn05815", - "rxn05832", - "rxn05836", - "rxn05851", - "rxn05857", - "rxn05869", - "rxn05870", - "rxn05884", - "rxn05888", - "rxn05896", - "rxn05898", - "rxn05900", - "rxn05903", - "rxn05904", - "rxn05905", - "rxn05911", - "rxn05921", - "rxn05925", - "rxn05936", - "rxn05947", - "rxn05956", - "rxn05959", - "rxn05960", - "rxn05980", - "rxn05991", - "rxn05992", - "rxn05999", - "rxn06001", - "rxn06014", - "rxn06017", - "rxn06021", - "rxn06026", - "rxn06027", - "rxn06034", - "rxn06048", - "rxn06052", - "rxn06053", - "rxn06054", - "rxn06057", - "rxn06059", - "rxn06061", - "rxn06102", - "rxn06103", - "rxn06127", - "rxn06128", - "rxn06129", - "rxn06130", - "rxn06131", - "rxn06132", - "rxn06137", - "rxn06146", - "rxn06161", - "rxn06167", - "rxn06172", - "rxn06174", - "rxn06175", - "rxn06187", - "rxn06189", - "rxn06203", - "rxn06204", - "rxn06246", - "rxn06261", - "rxn06265", - "rxn06266", - "rxn06286", - "rxn06291", - "rxn06294", - "rxn06310", - "rxn06320", - "rxn06327", - "rxn06334", - "rxn06337", - "rxn06339", - "rxn06342", - "rxn06343", - "rxn06350", - "rxn06352", - "rxn06358", - "rxn06361", - "rxn06369", - "rxn06380", - "rxn06395", - "rxn06415", - "rxn06419", - "rxn06420", - "rxn06421", - "rxn06423", - "rxn06450", - "rxn06457", - "rxn06463", - "rxn06464", - "rxn06466", - "rxn06471", - "rxn06482", - "rxn06483", - "rxn06486", - "rxn06492", - "rxn06497", - "rxn06498", - "rxn06501", - "rxn06505", - "rxn06506", - "rxn06521", - "rxn06534", - "rxn06580", - "rxn06585", - "rxn06593", - "rxn06609", - "rxn06613", - "rxn06654", - "rxn06667", - "rxn06676", - "rxn06693", - "rxn06730", - "rxn06746", - "rxn06762", - "rxn06779", - "rxn06790", - "rxn06791", - "rxn06792", - "rxn06793", - "rxn06794", - "rxn06795", - "rxn06796", - "rxn06797", - "rxn06821", - "rxn06826", - "rxn06827", - "rxn06829", - "rxn06839", - "rxn06841", - "rxn06842", - "rxn06851", - "rxn06866", - "rxn06867", - "rxn06873", - "rxn06885", - "rxn06891", - "rxn06892", - "rxn06896", - "rxn06938", - "rxn06939", - "rxn06944", - "rxn06951", - "rxn06952", - "rxn06955", - "rxn06957", - "rxn06960", - "rxn06964", - "rxn06965", - "rxn07086", - "rxn07097", - "rxn07103", - "rxn07104", - "rxn07105", - "rxn07106", - "rxn07107", - "rxn07109", - "rxn07119", - "rxn07179", - "rxn07186", - "rxn07187", - "rxn07188", - "rxn07195", - "rxn07196", - "rxn07197", - "rxn07198", - "rxn07201", - "rxn07205", - "rxn07206", - "rxn07210", - "rxn07244", - "rxn07245", - "rxn07253", - "rxn07275", - "rxn07299", - "rxn07302", - "rxn07651", - "rxn07723", - "rxn07736", - "rxn07878", - "rxn11417", - "rxn11582", - "rxn11593", - "rxn11597", - "rxn11615", - "rxn11617", - "rxn11619", - "rxn11620", - "rxn11624", - "rxn11626", - "rxn11638", - "rxn11648", - "rxn11651", - "rxn11665", - "rxn11666", - "rxn11667", - "rxn11698", - "rxn11983", - "rxn11986", - "rxn11994", - "rxn12006", - "rxn12007", - "rxn12014", - "rxn12017", - "rxn12022", - "rxn12160", - "rxn12161", - "rxn01267", - "rxn05294", - "rxn04656", -] +base_blacklist = { + "rxn10157": "<", + "rxn09295": "<", + "rxn05938": "<", + "rxn08628": ">", + "rxn10155": "<", + "rxn01353": "<", + "rxn05683": "<", + "rxn09193": "<", + "rxn09003": "<", + "rxn01128": ">", + "rxn08655": "<", + "rxn09272": "<", + "rxn05313": "<", + "rxn01510": ">", + "rxn05297": ">", + "rxn00507": "<", + "rxn05596": "<", + "rxn01674": "<", + "rxn01679": "<", + "rxn00778": ">", + "rxn05206": ">", + "rxn00239": "<", + "rxn05937": "<", + "rxn00715": "<", + "rxn05638": ">", + "rxn05289": ">", + "rxn00839": "<", + "rxn08866": "<", + "rxn10901": "<", + "rxn09331": "<", + "rxn05242": "<", + "rxn12549": "<", + "rxn13143": "<", + "rxn12498": "<", + "rxn08373": "<", + "rxn05208": "<", + "rxn09372": "<", + "rxn00571": ">", + "rxn08104": "<", + "rxn08704": "<", + "rxn07191": "<", + "rxn09672": "<", + "rxn01048": ">", + "rxn11267": ">", + "rxn08290": "<", + "rxn09307": "<", + "rxn05676": ">", + "rxn09653": "<", + "rxn11277": "<", + "rxn00976": "<", + "rxn02520": "<", + "rxn08275": "<", + "rxn09121": "<", + "rxn08999": "<", + "rxn08633": "<", + "rxn08610": "<", + "rxn09218": "<", + "rxn05626": "<", + "rxn11320": "<", + "rxn10058": ">", + "rxn08544": "<", + "rxn12539": "<", + "rxn08990": "<", + "rxn09348": "<", + "rxn00378": "<", + "rxn05243": "<", + "rxn02154": "<", + "rxn12587": "<", + "rxn00125": "<", + "rxn05648": "<", + "rxn13722": "<", + "rxn10910": ">", + "rxn05308": ">", + "rxn08585": "<", + "rxn14207": "<", + "rxn08682": "<", + "rxn10895": "<", + "rxn09655": "<", + "rxn11934": "<", + "rxn01742": ">", + "rxn05222": ">", + "rxn09942": "<", + "rxn13753": ">", + "rxn10857": "<", + "rxn03468": "<", + "rxn04942": "<", + "rxn10990": ">", + "rxn08639": "<", + "rxn09248": "<", + "rxn11935": ">", + "rxn00870": ">", + "rxn08314": "<", + "rxn09378": "<", + "rxn09269": "<", + "rxn10057": ">", + "rxn13702": ">", + "rxn00517": "<", + "rxn09221": ">", + "rxn01505": ">", + "rxn13692": ">", + "rxn05573": "<", + "rxn10123": ">", + "rxn09005": "<", + "rxn05244": "<", + "rxn05940": "<", + "rxn10124": ">", + "rxn06202": ">", + "rxn09660": "<", + "rxn02260": ">", + "rxn08912": "<", + "rxn05760": ">", + "rxn05580": ">", + "rxn02181": ">", + "rxn09339": "<", + "rxn00767": "<", + "rxn09118": "<", + "rxn05303": "<", + "rxn06110": "<", + "rxn12800": "<", + "rxn10966": "<", + "rxn12561": "<", + "rxn04678": ">", + "rxn10818": "<", + "rxn08166": "<", + "rxn02044": ">", + "rxn12623": "<", + "rxn13392": ">", + "rxn02283": "<", + "rxn13647": ">", + "rxn08653": "<", + "rxn05218": ">", + "rxn11676": ">", + "rxn00197": "<", + "rxn00697": "<", + "rxn12575": ">", + "rxn08188": "<", + "rxn01215": "<", + "rxn08730": ">", + "rxn08519": ">", + "rxn08642": "<", + "rxn05245": "<", + "rxn04042": "<", + "rxn01443": ">", + "rxn08535": "<", + "rxn03983": "<", + "rxn08317": "<", + "rxn14173": ">", + "rxn08868": "<", + "rxn05893": ">", + "rxn00435": ">", + "rxn13724": "<", + "rxn09681": "<", + "rxn00572": ">", + "rxn05942": "<", + "rxn11158": "<", + "rxn05562": "<", + "rxn10868": "<", + "rxn10426": "<", + "rxn00941": ">", + "rxn08240": "<", + "rxn05220": ">", + "rxn01228": ">", + "rxn12540": "<", + "rxn10618": ">", + "rxn09659": "<", + "rxn08985": ">", + "rxn05523": "<", + "rxn00421": "<", + "rxn09385": "<", + "rxn08542": "<", + "rxn09658": "<", + "rxn01173": "<", + "rxn10977": "<", + "rxn05216": "<", + "rxn13748": ">", + "rxn10769": ">", + "rxn00451": "<", + "rxn01639": "<", + "rxn08661": "<", + "rxn09308": "<", + "rxn09260": "<", + "rxn00253": "<", + "rxn05207": "<", + "rxn01667": "<", + "rxn08063": "<", + "rxn01508": ">", + "rxn09657": "<", + "rxn01209": ">", + "rxn00548": ">", + "rxn12617": "<", + "rxn08747": ">", + "rxn08096": "<", + "rxn11951": "<", + "rxn09061": "<", + "rxn10978": "<", + "rxn02748": ">", + "rxn09663": "<", + "rxn08737": "<", + "rxn13127": "<", + "rxn09366": "<", + "rxn05634": "<", + "rxn05554": "<", + "rxn09266": ">", + "rxn04676": ">", + "rxn11078": ">", + "rxn04932": "<", + "rxn00607": ">", + "rxn08856": "<", + "rxn12624": "<", + "rxn05215": "<", + "rxn13686": "<", + "rxn12529": "<", + "rxn00234": "<", + "rxn13689": ">", + "rxn08117": "<", + "rxn05315": ">", + "rxn08865": "<", + "rxn11678": ">", + "rxn00518": "<", + "rxn00195": "<", + "rxn10054": "<", + "rxn12532": "<", + "rxn05902": ">", + "rxn12777": "<", + "rxn12822": ">", + "rxn13735": ">", + "rxn00427": "<", + "rxn13196": "<", + "rxn08284": "<", + "rxn10576": ">", + "rxn00891": "<", + "rxn08293": "<", + "rxn00374": ">", + "rxn08795": "<", + "rxn12583": "<", + "rxn00918": ">", + "rxn08525": "<", + "rxn10427": ">", + "rxn09271": "<", + "rxn10860": "<", + "rxn10600": ">", + "rxn13729": ">", + "rxn01375": "<", + "rxn13726": ">", + "rxn10587": "<", + "rxn08672": "<", + "rxn10588": ">", + "rxn08152": ">", + "rxn09306": "<", + "rxn00635": "<", + "rxn08427": "<", + "rxn05225": ">", + "rxn00680": ">", + "rxn08786": ">", + "rxn08721": "<", + "rxn11339": "<", + "rxn05749": "<", + "rxn01187": ">", + "rxn08625": "<", + "rxn06677": "<", + "rxn12302": ">", + "rxn02770": "<", + "rxn05628": "<", + "rxn13706": ">", + "rxn12739": "<", + "rxn00177": "<", + "rxn09896": ">", + "rxn12574": "<", + "rxn12533": ">", + "rxn08537": ">", + "rxn05651": ">", + "rxn08170": "<", + "rxn05240": "<", + "rxn00663": ">", + "rxn12589": "<", + "rxn09299": "<", + "rxn02059": "<", + "rxn12217": ">", + "rxn06592": "<", + "rxn05939": ">", + "rxn08581": "<", + "rxn00430": "<", + "rxn09283": ">", + "rxn08919": "<", + "rxn13660": "<", + "rxn08065": "<", + "rxn08428": ">", + "rxn10936": ">", + "rxn05238": ">", + "rxn05685": "<", + "rxn08920": ">", + "rxn07193": "<", + "rxn08265": "<", + "rxn12554": "<", + "rxn08094": "<", + "rxn13727": ">", + "rxn04158": "<", + "rxn09839": "<", + "rxn10820": "<", + "rxn00869": ">", + "rxn00331": ">", + "rxn09034": "<", + "rxn01136": "<", + "rxn09247": "<", + "rxn08302": "<", + "rxn10594": "<", + "rxn08670": ">", + "rxn11334": "<", + "rxn09941": "<", + "rxn02919": "<", + "rxn09670": "<", + "rxn10892": "<", + "rxn09794": "<", + "rxn02332": ">", + "rxn00244": ">", + "rxn08030": "<", + "rxn12526": "<", + "rxn13150": ">", + "rxn05486": "<", + "rxn10852": ">", + "rxn13790": ">", + "rxn06348": ">", + "rxn09172": ">", + "rxn03653": ">", + "rxn05213": "<", + "rxn01869": "<", + "rxn08142": "<", + "rxn12606": "<", + "rxn11916": ">", + "rxn05748": "<", + "rxn08543": "<", + "rxn01107": ">", + "rxn05708": "<", + "rxn08169": "<", + "rxn06641": ">", + "rxn12578": "<", + "rxn01172": "<", + "rxn02120": ">", + "rxn05669": "<", + "rxn11322": "<", + "rxn12630": "<", + "rxn00698": "<", + "rxn05507": ">", + "rxn12530": "<", + "rxn09304": "<", + "rxn05532": ">", + "rxn03644": ">", + "rxn08733": "<", + "rxn13733": "<", + "rxn10044": ">", + "rxn00176": ">", + "rxn01364": ">", + "rxn02198": ">", + "rxn06990": "<", + "rxn08424": "<", + "rxn08069": "<", + "rxn05611": "<", + "rxn11973": "<", + "rxn12665": ">", + "rxn05241": "<", + "rxn08982": ">", + "rxn00542": ">", + "rxn12588": "<", + "rxn03517": ">", + "rxn01805": "<", + "rxn13203": ">", + "rxn08614": "<", + "rxn12200": ">", + "rxn13811": "<", + "rxn08377": "<", + "rxn11342": ">", + "rxn02976": "<", + "rxn08217": "<", + "rxn07921": ">", + "rxn09944": ">", + "rxn02401": "<", + "rxn08429": ">", + "rxn00905": "<", + "rxn08196": "<", + "rxn03054": "<", + "rxn08643": "<", + "rxn01874": "<", + "rxn08028": "<", + "rxn01641": ">", + "rxn03442": "<", + "rxn02172": "<", + "rxn10692": ">", + "rxn10613": ">", + "rxn12928": ">", + "rxn12994": ">", + "rxn13843": ">", + "rxn12942": ">", + "rxn12934": ">", + "rxn16827": ">", + "rxn12941": ">", + "rxn01736": ">", + "rxn14109": ">", + "rxn15060": ">", + "rxn15064": ">", + "rxn30685": ">", + "rxn10095": ">", + "rxn16143": ">", + "rxn25271": ">", + "rxn25160": ">", + "rxn30917": ">", + "rxn16843": ">", + "rxn08921": ">", + "rxn09390": ">", + "rxn27362": ">", + "rxn02664": ">", + "rxn24638": ">", + "rxn24613": ">", + "rxn24611": ">", + "rxn14428": ">", + "rxn03079": ">", + "rxn03020": ">", + "rxn10471": "<", +} class GapfillingPkg(BaseFBAPkg): @@ -416,7 +495,7 @@ def build_package(self, parameters): "minimum_obj": 0.01, "set_objective": 1, "minimize_exchanges": False, - "blacklist": default_blacklist, + "blacklist": [], }, ) # Adding model reactions to original reaction list @@ -558,6 +637,11 @@ def extend_model_with_model_for_gapfilling(self, source_model, index): if re.search("(.+)_([a-z])\d+$", modelreaction.id) != None: m = re.search("(.+)_([a-z])\d+$", modelreaction.id) if m[1] not in self.parameters["blacklist"]: + if m[1] in base_blacklist: + if base_blacklist[m[1]] == ">" or base_blacklist[m[1]] == "=": + cobra_reaction.upper_bound = 0 + if base_blacklist[m[1]] == "<" or base_blacklist[m[1]] == "=": + cobra_reaction.lower_bound = 0 cobra_reaction = modelreaction.copy() cobra_reaction.id = groups[1] + "_" + groups[2] + index if ( @@ -687,6 +771,17 @@ def extend_model_with_template_for_gapfilling(self, template, index): cobra_reaction = self.convert_template_reaction( template_reaction, index, template, 1 ) # TODO: move function out + if template_reaction.reference_id in base_blacklist: + if ( + base_blacklist[template_reaction.reference_id] == ">" + or base_blacklist[template_reaction.reference_id] == "=" + ): + cobra_reaction.upper_bound = 0 + if ( + base_blacklist[template_reaction.reference_id] == "<" + or base_blacklist[template_reaction.reference_id] == "=" + ): + cobra_reaction.lower_bound = 0 new_penalties[cobra_reaction.id] = dict() if ( cobra_reaction.id not in self.model.reactions From 906bb3e3a008b81cafbc39597fbf71b21e8d40e6 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 20 Feb 2023 00:24:38 -0600 Subject: [PATCH 12/57] genome feature aliases --- modelseedpy/core/msbuilder.py | 4 +++- modelseedpy/core/msgenome.py | 4 ++-- modelseedpy/core/msmodelutl.py | 15 +++++++-------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index e53a28ac..54fb06c6 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -872,7 +872,9 @@ def build( biomass_reactions.append(reaction) if len(biomass_reactions) > 0: - cobra_model.add_reactions(biomass_reactions) + for rxn in biomass_reactions: + if rxn.id not in cobra_model.reactions: + cobra_model.add_reactions([rxn]) cobra_model.objective = biomass_reactions[0].id """ diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 999e464d..875699c2 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -48,7 +48,7 @@ def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None): class MSFeature: - def __init__(self, feature_id, sequence, description=None): + def __init__(self, feature_id, sequence, description=None, aliases=None): """ @param feature_id: identifier for the protein coding feature @@ -60,7 +60,7 @@ def __init__(self, feature_id, sequence, description=None): self.seq = sequence self.description = description # temporary replace with proper parsing self.ontology_terms = {} - self.aliases = [] + self.aliases = aliases def add_ontology_term(self, ontology_term, value): """ diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index af499773..7017552b 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -307,27 +307,26 @@ def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): print(len(output)) self.model.add_reactions(output) return output - + ################################################################################# # Functions related to utility functions ################################################################################# def build_model_data_hash(self): data = { - "Model":self.id, - "Genome":self.genome.info.metadata["Name"], - "Genes":self.genome.info.metadata["Number of Protein Encoding Genes"], - + "Model": self.id, + "Genome": self.genome.info.metadata["Name"], + "Genes": self.genome.info.metadata["Number of Protein Encoding Genes"], } return data - - def compare_reactions(self, reaction_list,filename): + + def compare_reactions(self, reaction_list, filename): data = {} for rxn in reaction_list: for met in rxn.metabolites: if met.id not in data: data[met.id] = {} for other_rxn in reaction_list: - data[met.id][other_rxn.id] = 0 + data[met.id][other_rxn.id] = 0 data[met.id][rxn.id] = rxn.metabolites[met] df = pd.DataFrame(data) df = df.transpose() From 26d7b622bf0d2a4464f7f631c87e5f1001abb575 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 20 Feb 2023 21:13:39 -0600 Subject: [PATCH 13/57] template.add_reaction update comcompound references --- modelseedpy/core/mstemplate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index f439dc91..7bf9cbea 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -1302,7 +1302,9 @@ def add_reactions(self, reaction_list: list): if cpx.id not in self.complexes: self.add_complexes([cpx]) complex_replace.add(self.complexes.get_by_id(cpx.id)) + x._metabolites = metabolites_replace + x._update_awareness() x.complexes = complex_replace self.reactions += reaction_list From 34b4d812b0d1fcaf562a99db76f91f784f9db119 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 1 Mar 2023 00:01:53 -0600 Subject: [PATCH 14/57] Improving gapfilling and ATP correction --- modelseedpy/__init__.py | 2 + modelseedpy/biochem/modelseed_biochem.py | 2 +- modelseedpy/core/msatpcorrection.py | 46 ++++- modelseedpy/core/msbuilder.py | 8 +- modelseedpy/core/msgapfill.py | 137 ++++++-------- modelseedpy/core/msmodelutl.py | 169 ++++++++++++++++- modelseedpy/core/mstemplate.py | 14 +- modelseedpy/fbapkg/flexiblebiomasspkg.py | 229 ++++++++++++++++------- modelseedpy/fbapkg/gapfillingpkg.py | 99 +++++----- 9 files changed, 494 insertions(+), 212 deletions(-) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 7f135055..aabb2c53 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -59,6 +59,8 @@ from modelseedpy.community import MSCommunity, MSCompatibility, CommKineticPkg +from modelseedpy.biochem import ModelSEEDBiochem + from modelseedpy.fbapkg import ( BaseFBAPkg, RevBinPkg, diff --git a/modelseedpy/biochem/modelseed_biochem.py b/modelseedpy/biochem/modelseed_biochem.py index 80594e0e..287ce470 100644 --- a/modelseedpy/biochem/modelseed_biochem.py +++ b/modelseedpy/biochem/modelseed_biochem.py @@ -495,7 +495,7 @@ class ModelSEEDBiochem: @staticmethod def get(create_if_missing=True): if not ModelSEEDBiochem.default_biochemistry: - ModelSEEDBiochem.default_biochemistry = from_local( + ModelSEEDBiochem.default_biochemistry = from_local2( config.get("biochem", "path") ) return ModelSEEDBiochem.default_biochemistry diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index c5b20e3c..e72835aa 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -22,6 +22,7 @@ from modelseedpy.helpers import get_template logger = logging.getLogger(__name__) +# logger.setLevel(logging.DEBUG) _path = _dirname(_abspath(__file__)) @@ -291,7 +292,10 @@ def evaluate_growth_media(self): or solution.status != "optimal" ): self.media_gapfill_stats[media] = self.msgapfill.run_gapfilling( - media, self.atp_hydrolysis.id, minimum_obj + media, + self.atp_hydrolysis.id, + minimum_obj, + check_for_growth=False, ) # IF gapfilling fails - need to activate and penalize the noncore and try again elif solution.objective_value >= minimum_obj: @@ -312,16 +316,29 @@ def determine_growth_media(self, max_gapfilling=None): Decides which of the test media to use as growth conditions for this model :return: """ + atp_att = {"tests": {}, "selected_media": {}, "core_atp_gapfilling": {}} self.selected_media = [] best_score = None for media in self.media_gapfill_stats: gfscore = 0 + atp_att["core_atp_gapfilling"][media.id] = { + "score": 0, + "new": {}, + "reversed": {}, + } if self.media_gapfill_stats[media]: gfscore = len( self.media_gapfill_stats[media]["new"].keys() ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) + atp_att["core_atp_gapfilling"][media.id][ + "new" + ] = self.media_gapfill_stats[media]["new"] + atp_att["core_atp_gapfilling"][media.id][ + "reversed" + ] = self.media_gapfill_stats[media]["reversed"] if best_score is None or gfscore < best_score: best_score = gfscore + atp_att["core_atp_gapfilling"][media.id]["score"] = gfscore if self.max_gapfilling is None: self.max_gapfilling = best_score @@ -339,6 +356,9 @@ def determine_growth_media(self, max_gapfilling=None): best_score + self.gapfilling_delta ): self.selected_media.append(media) + atp_att["selected_media"][media.id] = 0 + + self.modelutl.save_attributes(atp_att, "ATP_analysis") def determine_growth_media2(self, max_gapfilling=None): """ @@ -385,8 +405,15 @@ def apply_growth_media_gapfilling(self): and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0 ): self.msgapfill.integrate_gapfill_solution( - self.media_gapfill_stats[media], self.cumulative_core_gapfilling + self.media_gapfill_stats[media], + self.cumulative_core_gapfilling, + link_gaps_to_objective=False, ) + core_gf = { + "count": len(self.cumulative_core_gapfilling), + "reactions": self.cumulative_core_gapfilling, + } + self.modelutl.save_attributes(core_gf, "core_gapfilling") def expand_model_to_genome_scale(self): """Restores noncore reactions to model while filtering out reactions that break ATP @@ -460,6 +487,11 @@ def build_tests(self, multiplier=None): Raises ------ """ + atp_att = self.modelutl.get_attributes( + "ATP_analysis", + {"tests": {}, "selected_media": {}, "core_atp_gapfilling": {}}, + ) + if multiplier is None: multiplier = self.multiplier tests = [] @@ -467,7 +499,7 @@ def build_tests(self, multiplier=None): for media in self.selected_media: self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) obj_value = self.model.slim_optimize() - logger.debug(f"{media.name} = {obj_value}") + logger.debug(f"{media.name} = {obj_value};{multiplier}") tests.append( { "media": media, @@ -476,6 +508,14 @@ def build_tests(self, multiplier=None): "objective": self.atp_hydrolysis.id, } ) + atp_att["selected_media"][media.id] = obj_value + atp_att["tests"][media.id] = { + "threshold": multiplier * obj_value, + "objective": self.atp_hydrolysis.id, + } + + self.modelutl.save_attributes(atp_att, "ATP_analysis") + return tests def run_atp_correction(self): diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index e53a28ac..4ea0cd3e 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -849,16 +849,22 @@ def build( complex_groups = self.build_complex_groups( self.reaction_to_complex_sets.values() ) - + if "bio1" in cobra_model.reactions: + print("1:Biomass present!!") metabolic_reactions = self.build_metabolic_reactions() cobra_model.add_reactions(metabolic_reactions) + if "bio1" in cobra_model.reactions: + print("2:Biomass present!!") non_metabolic_reactions = self.build_non_metabolite_reactions( cobra_model, allow_all_non_grp_reactions ) cobra_model.add_reactions(non_metabolic_reactions) + if "bio1" in cobra_model.reactions: + print("3:Biomass present!!") cobra_model.add_groups(list(complex_groups.values())) self.add_exchanges_to_model(cobra_model) + print("Adding biomass!!") biomass_reactions = [] for rxn_biomass in self.template.biomasses: reaction = rxn_biomass.build_biomass( diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index ad430ef2..8d023272 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -69,6 +69,7 @@ def run_gapfilling( minimum_obj=0.01, binary_check=False, prefilter=True, + check_for_growth=True, ): if target: self.model.objective = self.model.problem.Objective( @@ -96,15 +97,54 @@ def run_gapfilling( ) pkgmgr.getpkg("KBaseMediaPkg").build_package(media) + # Testing if gapfilling can work before filtering + if ( + check_for_growth + and not pkgmgr.getpkg("GapfillingPkg").test_gapfill_database() + ): + # save_json_model(self.model, "gfdebugmdl.json") + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if media.id not in gf_sensitivity: + gf_sensitivity[media.id] = {} + if target not in gf_sensitivity[media.id]: + gf_sensitivity[media.id][target] = {} + gf_sensitivity[media.id][target][ + "FBF" + ] = self.mdlutl.find_unproducible_biomass_compounds(target) + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") + logger.warning("No solution found before filtering for %s", media) + return None + # Filtering breaking reactions out of the database if prefilter and self.test_conditions: pkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) + # Testing if gapfilling can work after filtering + if ( + check_for_growth + and not pkgmgr.getpkg("GapfillingPkg").test_gapfill_database() + ): + # save_json_model(self.model, "gfdebugmdl.json") + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if media.id not in gf_sensitivity: + gf_sensitivity[media.id] = {} + if target not in gf_sensitivity[media.id]: + gf_sensitivity[media.id][target] = {} + gf_sensitivity[media.id][target][ + "FAF" + ] = self.mdlutl.find_unproducible_biomass_compounds(target) + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") + logger.warning("No solution found after filtering for %s", media) + return None + + # Printing the gapfilling LP file if self.lp_filename: with open(self.lp_filename, "w") as out: out.write(str(self.gfmodel.solver)) + + # Running gapfilling and checking solution sol = self.gfmodel.optimize() logger.debug( "gapfill solution objective value %f (%s) for media %s", @@ -112,11 +152,11 @@ def run_gapfilling( sol.status, media, ) - if sol.status != "optimal": logger.warning("No solution found for %s", media) return None + # Computing solution and ensuring all tests still pass self.last_solution = pkgmgr.getpkg("GapfillingPkg").compute_gapfilled_solution() if self.test_conditions: self.last_solution = pkgmgr.getpkg("GapfillingPkg").run_test_conditions( @@ -129,18 +169,23 @@ def run_gapfilling( "no solution could be found that satisfied all specified test conditions in specified iterations!" ) return None + + # Running binary check to reduce solution to minimal reaction soltuion if binary_check: self.last_solution = pkgmgr.getpkg( "GapfillingPkg" ).binary_check_gapfilling_solution() + # Setting last solution data self.last_solution["media"] = media self.last_solution["target"] = target self.last_solution["minobjective"] = minimum_obj self.last_solution["binary_check"] = binary_check return self.last_solution - def integrate_gapfill_solution(self, solution, cumulative_solution=[]): + def integrate_gapfill_solution( + self, solution, cumulative_solution=[], link_gaps_to_objective=True + ): """Integrating gapfilling solution into model Parameters ---------- @@ -191,84 +236,20 @@ def integrate_gapfill_solution(self, solution, cumulative_solution=[]): cumulative_solution.remove(oitem) break self.mdlutl.add_gapfilling(solution) + if link_gaps_to_objective: + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if solution["media"] not in gf_sensitivity: + gf_sensitivity[solution["media"]] = {} + if solution["target"] not in gf_sensitivity[solution["media"]]: + gf_sensitivity[solution["media"]][solution["target"]] = {} + gf_sensitivity[solution["media"]][solution["target"]][ + "success" + ] = self.mdlutl.find_unproducible_biomass_compounds( + solution["target"], cumulative_solution + ) + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") self.cumulative_gapfilling.extend(cumulative_solution) - def link_gapfilling_to_biomass(self, target="bio1"): - def find_dependency( - item, target_rxn, tempmodel, original_objective, min_flex_obj - ): - objective = tempmodel.slim_optimize() - logger.debug("Obj:" + str(objective)) - with open("FlexBiomass2.lp", "w") as out: - out.write(str(tempmodel.solver)) - if objective > 0: - target_rxn.lower_bound = 0.1 - tempmodel.objective = min_flex_obj - solution = tempmodel.optimize() - with open("FlexBiomass3.lp", "w") as out: - out.write(str(tempmodel.solver)) - biocpds = [] - for reaction in tempmodel.reactions: - if ( - reaction.id[0:5] == "FLEX_" - and reaction.forward_variable.primal > Zero - ): - biocpds.append(reaction.id[5:]) - item.append(biocpds) - logger.debug(item[0] + ":" + ",".join(biocpds)) - tempmodel.objective = original_objective - target_rxn.lower_bound = 0 - - # Copying model before manipulating it - tempmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.mdlutl.model)) - # Getting target reaction and making sure it exists - target_rxn = tempmodel.reactions.get_by_id(target) - # Constraining objective to be greater than 0.1 - pkgmgr = MSPackageManager.get_pkg_mgr(tempmodel) - # Adding biomass flexibility - pkgmgr.getpkg("FlexibleBiomassPkg").build_package( - { - "bio_rxn_id": target, - "flex_coefficient": [0, 1], - "use_rna_class": None, - "use_dna_class": None, - "use_protein_class": None, - "use_energy_class": [0, 1], - "add_total_biomass_constraint": False, - } - ) - # Creating min flex objective - tempmodel.objective = target_rxn - original_objective = tempmodel.objective - min_flex_obj = tempmodel.problem.Objective(Zero, direction="min") - obj_coef = dict() - for reaction in tempmodel.reactions: - if reaction.id[0:5] == "FLEX_" or reaction.id[0:6] == "energy": - obj_coef[reaction.forward_variable] = 1 - # Temporarily setting flex objective so I can set coefficients - tempmodel.objective = min_flex_obj - min_flex_obj.set_linear_coefficients(obj_coef) - # Restoring biomass object - tempmodel.objective = original_objective - # Knocking out gapfilled reactions one at a time - for item in self.cumulative_gapfilling: - logger.debug("KO:" + item[0] + item[1]) - rxnobj = tempmodel.reactions.get_by_id(item[0]) - if item[1] == ">": - original_bound = rxnobj.upper_bound - rxnobj.upper_bound = 0 - find_dependency( - item, target_rxn, tempmodel, original_objective, min_flex_obj - ) - rxnobj.upper_bound = original_bound - else: - original_bound = rxnobj.lower_bound - rxnobj.lower_bound = 0 - find_dependency( - item, target_rxn, tempmodel, original_objective, min_flex_obj - ) - rxnobj.lower_bound = original_bound - @staticmethod def gapfill( model, diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index bb147f89..a44c5653 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -5,10 +5,15 @@ import json import sys import pandas as pd +import cobra from cobra import Model, Reaction, Metabolite +from optlang.symbolics import Zero from modelseedpy.fbapkg.mspackagemanager import MSPackageManager from modelseedpy.biochem.modelseed_biochem import ModelSEEDBiochem from modelseedpy.core.fbahelper import FBAHelper +from multiprocessing import Value + +# from builtins import None logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -105,6 +110,9 @@ def __init__(self, model): self.reaction_scores = None self.score = None self.integrated_gapfillings = [] + self.attributes = {} + if hasattr(self.model, "attributes"): + self.attributes = self.model def compute_automated_reaction_scores(self): """ @@ -270,6 +278,22 @@ def reaction_scores(self): ################################################################################# # Functions related to editing the model ################################################################################# + def get_attributes(self, key=None, default=None): + if not key: + return self.attributes + if key not in self.attributes: + self.attributes[key] = default + return self.attributes[key] + + def save_attributes(self, value, key=None): + attributes = self.get_attributes() + if key: + attributes[key] = value + else: + self.attributes = value + if hasattr(self.model, "attributes"): + self.model.attributes = self.attributes + def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): modelseed = ModelSEEDBiochem.get() output = [] @@ -923,12 +947,151 @@ def reaction_expansion_test( + " out of " + str(len(reaction_list)) ) - filterlist = [] + # Adding filter results to attributes + gf_filter_att = self.get_attributes("gf_filter", {}) + if condition["media"].id not in gf_filter_att: + gf_filter_att[condition["media"].id] = {} + if condition["objective"] not in gf_filter_att[condition["media"].id]: + gf_filter_att[condition["media"].id][condition["objective"]] = {} + if ( + condition["threshold"] + not in gf_filter_att[condition["media"].id][condition["objective"]] + ): + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ] = {} for item in new_filtered: - filterlist.append(item[0].id + item[1]) - logger.debug(",".join(filterlist)) + if ( + item[0].id + not in gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ] + ): + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id] = {} + if ( + item[1] + not in gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id] + ): + if len(item) < 3: + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id][item[1]] = None + else: + gf_filter_att[condition["media"].id][condition["objective"]][ + condition["threshold"] + ][item[0].id][item[1]] = item[2] + gf_filter_att = self.save_attributes(gf_filter_att, "gf_filter") return filtered_list + ################################################################################# + # Functions related to biomass sensitivity analysis + ################################################################################# + def find_unproducible_biomass_compounds(self, target_rxn="bio1", ko_list=None): + # Cloning the model because we don't want to modify the original model with this analysis + tempmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) + # Getting target reaction and making sure it exists + if target_rxn not in tempmodel.reactions: + logger.critical(target_rxn + " not in model!") + target_rxn_obj = tempmodel.reactions.get_by_id(target_rxn) + tempmodel.objective = target_rxn + original_objective = tempmodel.objective + pkgmgr = MSPackageManager.get_pkg_mgr(tempmodel) + rxn_list = [target_rxn, "rxn05294_c0", "rxn05295_c0", "rxn05296_c0"] + for rxn in rxn_list: + if rxn in tempmodel.reactions: + pkgmgr.getpkg("FlexibleBiomassPkg").build_package( + { + "bio_rxn_id": rxn, + "flex_coefficient": [0, 1], + "use_rna_class": None, + "use_dna_class": None, + "use_protein_class": None, + "use_energy_class": [0, 1], + "add_total_biomass_constraint": False, + } + ) + + # Creating min flex objective + min_flex_obj = tempmodel.problem.Objective(Zero, direction="min") + obj_coef = dict() + for reaction in tempmodel.reactions: + if reaction.id[0:5] == "FLEX_" or reaction.id[0:6] == "energy": + obj_coef[reaction.forward_variable] = 1 + obj_coef[reaction.reverse_variable] = 1 + # Temporarily setting flex objective so I can set coefficients + tempmodel.objective = min_flex_obj + min_flex_obj.set_linear_coefficients(obj_coef) + if not ko_list: + return self.run_biomass_dependency_test( + target_rxn_obj, tempmodel, original_objective, min_flex_obj, rxn_list + ) + else: + output = {} + for item in ko_list: + logger.debug("KO:" + item[0] + item[1]) + rxnobj = tempmodel.reactions.get_by_id(item[0]) + if item[1] == ">": + original_bound = rxnobj.upper_bound + rxnobj.upper_bound = 0 + if item[0] not in output: + output[item[0]] = {} + output[item[0]][item[1]] = self.run_biomass_dependency_test( + target_rxn_obj, + tempmodel, + original_objective, + min_flex_obj, + rxn_list, + ) + rxnobj.upper_bound = original_bound + else: + original_bound = rxnobj.lower_bound + rxnobj.lower_bound = 0 + if item[0] not in output: + output[item[0]] = {} + output[item[0]][item[1]] = self.run_biomass_dependency_test( + target_rxn_obj, + tempmodel, + original_objective, + min_flex_obj, + rxn_list, + ) + rxnobj.lower_bound = original_bound + return output + + def run_biomass_dependency_test( + self, target_rxn, tempmodel, original_objective, min_flex_obj, rxn_list + ): + tempmodel.objective = original_objective + objective = tempmodel.slim_optimize() + with open("FlexBiomass2.lp", "w") as out: + out.write(str(tempmodel.solver)) + if objective > 0: + target_rxn.lower_bound = 0.1 + tempmodel.objective = min_flex_obj + solution = tempmodel.optimize() + with open("FlexBiomass3.lp", "w") as out: + out.write(str(tempmodel.solver)) + biocpds = [] + for reaction in tempmodel.reactions: + if reaction.id[0:5] == "FLEX_" and ( + reaction.forward_variable.primal > Zero + or reaction.reverse_variable.primal > Zero + ): + logger.debug("Depends on:" + reaction.id) + label = reaction.id[5:] + for item in rxn_list: + if label[0 : len(item)] == item: + biocpds.append(label[len(item) + 1 :]) + target_rxn.lower_bound = 0 + return biocpds + else: + logger.debug("Cannot grow") + return None + def add_atp_hydrolysis(self, compartment): # Searching for ATP hydrolysis compounds coefs = { diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 5d206aed..72118f07 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -581,9 +581,13 @@ def from_table( if isinstance(filename_or_df, str): filename_or_df = pd.read_table(filename_or_df) for index, row in filename_or_df.iterrows(): + if "biomass_id" not in row: + row["biomass_id"] = "bio1" if row["biomass_id"] == bio_id: + if "compartment" not in row: + row["compartment"] = "c" metabolite = template.compcompounds.get_by_id( - f'{row["id"]}_{row["compartment"]}' + f'{row["id"]}_{lower(row["compartment"])}' ) linked_mets = {} if ( @@ -594,14 +598,14 @@ def from_table( for item in array: sub_array = item.split(":") l_met = template.compcompounds.get_by_id( - f'{sub_array[0]}_{row["compartment"]}' + f'{sub_array[0]}_{lower(row["compartment"])}' ) linked_mets[l_met] = float(sub_array[1]) self.add_biomass_component( metabolite, - row["class"], - row["coefficient"], - row["coefficient_type"], + lower(row["class"]), + float(row["coefficient"]), + upper(row["coefficient_type"]), linked_mets, ) return self diff --git a/modelseedpy/fbapkg/flexiblebiomasspkg.py b/modelseedpy/fbapkg/flexiblebiomasspkg.py index ae8a1cfe..223f778d 100644 --- a/modelseedpy/fbapkg/flexiblebiomasspkg.py +++ b/modelseedpy/fbapkg/flexiblebiomasspkg.py @@ -93,7 +93,13 @@ def build_package(self, parameters): for metabolite in self.parameters["bio_rxn"].metabolites: met_class[metabolite] = None msid = MSModelUtil.metabolite_msid(metabolite) - if msid != "cpd11416" and msid != None: + if ( + msid != "cpd11416" + and msid != "cpd11463" + and msid != "cpd11462" + and msid != "cpd11461" + and msid != None + ): if msid in refcpd: met_class[metabolite] = "refcpd" else: @@ -111,20 +117,24 @@ def build_package(self, parameters): self.parameters["use_" + curr_class + "_class"] = None break # Creating FLEX reactions and constraints for unclassified compounds - flexcpds = [] + flexcpds = {} for metabolite in self.parameters["bio_rxn"].metabolites: if not met_class[metabolite]: - flexcpds.append(metabolite) + flexcpds[metabolite] = self.parameters["bio_rxn"].metabolites[ + metabolite + ] elif ( met_class[metabolite] != "refcpd" and not self.parameters["use_" + met_class[metabolite] + "_class"] ): - flexcpds.append(metabolite) + flexcpds[metabolite] = self.parameters["bio_rxn"].metabolites[ + metabolite + ] self.modelutl.add_exchanges_for_metabolites( flexcpds, uptake=1000, excretion=1000, - prefix="FLEX_", + prefix="FLEX_" + self.parameters["bio_rxn"].id + "_", prefix_name="Biomass flex for ", ) for metabolite in flexcpds: @@ -206,24 +216,32 @@ def build_variable(self, object, type): # !!! can the function be removed? pass def build_constraint(self, cobra_obj, obj_type): - element_mass = FBAHelper.elemental_mass() # !!! element_mass is never used if obj_type == "flxbio": # Sum(MW*(vdrn,for-vdrn,ref)) + Sum(massdiff*(vrxn,for-vrxn,ref)) = 0 coef = {} for metabolite in self.parameters["bio_rxn"].metabolites: - if "FLEX_" + metabolite.id in self.model.reactions: + if ( + "FLEX_" + self.parameters["bio_rxn"].id + "_" + metabolite.id + in self.model.reactions + ): mw = FBAHelper.metabolite_mw(metabolite) sign = -1 if self.parameters["bio_rxn"].metabolites[metabolite] > 0: sign = 1 coef[ self.model.reactions.get_by_id( - "FLEX_" + metabolite.id + "FLEX_" + + self.parameters["bio_rxn"].id + + "_" + + metabolite.id ).forward_variable ] = (sign * mw) coef[ self.model.reactions.get_by_id( - "FLEX_" + metabolite.id + "FLEX_" + + self.parameters["bio_rxn"].id + + "_" + + metabolite.id ).reverse_variable ] = (-1 * sign * mw) for met_class in classes: @@ -238,8 +256,11 @@ def build_constraint(self, cobra_obj, obj_type): coef[rxn.reverse_variable] = -massdiff return BaseFBAPkg.build_constraint(self, obj_type, 0, 0, coef, cobra_obj) elif obj_type == "flxcpd" or obj_type == "flxcls": + first_entry = None + second_entry = None + product = False biovar = self.parameters["bio_rxn"].forward_variable - object = cobra_obj + object = None const = None if obj_type == "flxcpd": # 0.75 * abs(bio_coef) * vbio - vdrn,for >= 0 @@ -250,7 +271,11 @@ def build_constraint(self, cobra_obj, obj_type): second_entry = self.parameters["flex_coefficient"][1] * abs( self.parameters["bio_rxn"].metabolites[cobra_obj] ) - object = self.model.reactions.get_by_id("FLEX_" + cobra_obj.id) + if self.parameters["bio_rxn"].metabolites[cobra_obj] > 0: + product = True + object = self.model.reactions.get_by_id( + "FLEX_" + self.parameters["bio_rxn"].id + "_" + cobra_obj.id + ) elif ( cobra_obj.id[0:-5] == None or not self.parameters["use_" + cobra_obj.id[0:-5] + "_class"] @@ -263,87 +288,157 @@ def build_constraint(self, cobra_obj, obj_type): second_entry = self.parameters["use_" + cobra_obj.id[0:-5] + "_class"][ 1 ] + object = cobra_obj if first_entry == second_entry: # If the value is positive, lock in the forward variable and set the reverse to zero if first_entry > 0: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, - 0, - 0, - {biovar: second_entry, object.forward_variable: -1}, - cobra_obj, - ) - object.lower_bound = 0 + if product: + const = self.build_constraint( + "f" + obj_type, + 0, + 0, + {biovar: second_entry, object.forward_variable: -1}, + cobra_obj, + ) + object.lower_bound = 0 + else: + const = self.build_constraint( + "f" + obj_type, + 0, + 0, + {biovar: second_entry, object.reverse_variable: -1}, + cobra_obj, + ) + object.upper_bound = 0 # If the value is negative, lock in the reverse variable and set the forward to zero elif first_entry < 0: - const = BaseFBAPkg.build_constraint( - self, - "r" + obj_type, - 0, - 0, - {biovar: -first_entry, object.reverse_variable: -1}, - cobra_obj, - ) - object.upper_bound = 0 + if product: + const = self.build_constraint( + "r" + obj_type, + 0, + 0, + {biovar: -first_entry, object.reverse_variable: -1}, + cobra_obj, + ) + object.upper_bound = 0 + else: + const = self.build_constraint( + "r" + obj_type, + 0, + 0, + {biovar: -first_entry, object.forward_variable: -1}, + cobra_obj, + ) + object.lower_bound = 0 # If the value is zero, lock both variables to zero if first_entry == 0: object.lower_bound = 0 object.upper_bound = 0 elif second_entry >= 0: if first_entry >= 0: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, - 0, - None, - {biovar: second_entry, object.forward_variable: -1}, - cobra_obj, - ) - object.lower_bound = 0 - if first_entry > 0: - BaseFBAPkg.build_constraint( + if product: + const = BaseFBAPkg.build_constraint( self, - "r" + obj_type, + "f" + obj_type, 0, None, - {biovar: -first_entry, object.forward_variable: 1}, + {biovar: second_entry, object.forward_variable: -1}, cobra_obj, ) + object.lower_bound = 0 + if first_entry > 0: + BaseFBAPkg.build_constraint( + self, + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.forward_variable: 1}, + cobra_obj, + ) + else: + const = BaseFBAPkg.build_constraint( + self, + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.reverse_variable: -1}, + cobra_obj, + ) + object.upper_bound = 0 + if first_entry > 0: + BaseFBAPkg.build_constraint( + self, + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.reverse_variable: 1}, + cobra_obj, + ) else: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, - 0, - None, - {biovar: second_entry, object.forward_variable: -1}, - cobra_obj, - ) - BaseFBAPkg.build_constraint( - self, + if product: + const = self.build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.forward_variable: -1}, + cobra_obj, + ) + self.build_constraint( + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.reverse_variable: -1}, + cobra_obj, + ) + else: + const = self.build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.reverse_variable: -1}, + cobra_obj, + ) + self.build_constraint( + "r" + obj_type, + 0, + None, + {biovar: -first_entry, object.forward_variable: -1}, + cobra_obj, + ) + else: + if second_entry < 0: + if product: + const = self.build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.reverse_variable: 1}, + cobra_obj, + ) + else: + const = self.build_constraint( + "f" + obj_type, + 0, + None, + {biovar: second_entry, object.forward_variable: 1}, + cobra_obj, + ) + if product: + self.build_constraint( "r" + obj_type, 0, None, {biovar: -first_entry, object.reverse_variable: -1}, cobra_obj, ) - else: - if second_entry < 0: - const = BaseFBAPkg.build_constraint( - self, - "f" + obj_type, + object.lower_bound = 0 + else: + self.build_constraint( + "r" + obj_type, 0, None, - {biovar: second_entry, object.reverse_variable: 1}, + {biovar: -first_entry, object.forward_variable: -1}, cobra_obj, ) - BaseFBAPkg.build_constraint( - self, - "r" + obj_type, - 0, - None, - {biovar: -first_entry, object.reverse_variable: -1}, - cobra_obj, - ) - object.upper_bound = 0 + object.upper_bound = 0 return const diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 3ea2d6dd..f14eb7ed 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -19,6 +19,7 @@ from modelseedpy.core.fbahelper import FBAHelper logger = logging.getLogger(__name__) +# logger.setLevel(logging.DEBUG) base_blacklist = { "rxn10157": "<", @@ -493,7 +494,6 @@ def build_package(self, parameters): "default_excretion": 100, "default_uptake": 100, "minimum_obj": 0.01, - "set_objective": 1, "minimize_exchanges": False, "blacklist": [], }, @@ -578,29 +578,27 @@ def build_package(self, parameters): ) self.model.solver.update() - if self.parameters["set_objective"] == 1: - reaction_objective = self.model.problem.Objective(Zero, direction="min") - obj_coef = dict() - for reaction in self.model.reactions: - if reaction.id in self.gapfilling_penalties: - if ( - self.parameters["minimize_exchanges"] - or reaction.id[0:3] != "EX_" - ): - # Minimizing gapfilled reactions - if "reverse" in self.gapfilling_penalties[reaction.id]: - obj_coef[reaction.reverse_variable] = abs( - self.gapfilling_penalties[reaction.id]["reverse"] - ) - if "forward" in self.gapfilling_penalties[reaction.id]: - obj_coef[reaction.forward_variable] = abs( - self.gapfilling_penalties[reaction.id]["forward"] - ) - else: - obj_coef[reaction.forward_variable] = 0 - obj_coef[reaction.reverse_variable] = 0 - self.model.objective = reaction_objective - reaction_objective.set_linear_coefficients(obj_coef) + + reaction_objective = self.model.problem.Objective(Zero, direction="min") + obj_coef = dict() + for reaction in self.model.reactions: + if reaction.id in self.gapfilling_penalties: + if self.parameters["minimize_exchanges"] or reaction.id[0:3] != "EX_": + # Minimizing gapfilled reactions + if "reverse" in self.gapfilling_penalties[reaction.id]: + obj_coef[reaction.reverse_variable] = abs( + self.gapfilling_penalties[reaction.id]["reverse"] + ) + if "forward" in self.gapfilling_penalties[reaction.id]: + obj_coef[reaction.forward_variable] = abs( + self.gapfilling_penalties[reaction.id]["forward"] + ) + else: + obj_coef[reaction.forward_variable] = 0 + obj_coef[reaction.reverse_variable] = 0 + self.model.objective = reaction_objective + reaction_objective.set_linear_coefficients(obj_coef) + self.parameters["gfobj"] = self.model.objective def extend_model_with_model_for_gapfilling(self, source_model, index): new_metabolites = {} @@ -1001,28 +999,27 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): return None return solution - def filter_database_based_on_tests(self, test_conditions): - # Preserving the gapfilling objective function - gfobj = self.model.objective - # Setting the minimal growth constraint to zero + def test_gapfill_database(self): self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 - # Setting the objective to the original default objective for the model self.model.objective = self.parameters["origobj"] - # Testing if the minimal objective can be achieved before filtering solution = self.model.optimize() - print( - "Objective before filtering:", - solution.objective_value, - "; min objective:", - self.parameters["minimum_obj"], + logger.info( + "Objective with gapfill database:" + + str(solution.objective_value) + + "; min objective:" + + str(self.parameters["minimum_obj"]) ) - with open("debuggf.lp", "w") as out: - out.write(str(self.model.solver)) + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] + self.model.objective = self.parameters["gfobj"] if solution.objective_value < self.parameters["minimum_obj"]: - save_json_model(self.model, "gfdebugmdl.json") - logger.critical( - "Model cannot achieve the minimum objective even before filtering!" - ) + return False + return True + + def filter_database_based_on_tests(self, test_conditions): + # Setting the minimal growth constraint to zero + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 # Filtering the database of any reactions that violate the specified tests filetered_list = [] with self.model: @@ -1039,21 +1036,14 @@ def filter_database_based_on_tests(self, test_conditions): ) # Now constraining filtered reactions to zero for item in filtered_list: - logger.debug("Filtering:", item[0].id, item[1]) + logger.info("Filtering:" + item[0].id + item[1]) if item[1] == ">": self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: self.model.reactions.get_by_id(item[0].id).lower_bound = 0 # Now testing if the gapfilling minimum objective can still be achieved - solution = self.model.optimize() - print( - "Objective after filtering:", - solution.objective_value, - "; min objective:", - self.parameters["minimum_obj"], - ) - # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached - if solution.objective_value < self.parameters["minimum_obj"]: + if not self.test_gapfill_database(): + # Now we need to restore a minimal set of filtered reactions such that we permit the minimum objective to be reached # Restoring the minimum objective constraint self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"][ "1" @@ -1089,14 +1079,14 @@ def filter_database_based_on_tests(self, test_conditions): else: count += -1 rxn.lower_bound = 0 - print("Reactions unfiltered:", count) + logger.info("Reactions unfiltered:" + str(count)) # Checking for model reactions that can be removed to enable all tests to pass self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 filtered_list = self.modelutl.reaction_expansion_test( self.parameters["original_reactions"], test_conditions ) for item in filtered_list: - logger.debug("Filtering:", item[0].id, item[1]) + logger.info("Filtering:" + item[0].id + item[1]) if item[1] == ">": self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: @@ -1105,7 +1095,8 @@ def filter_database_based_on_tests(self, test_conditions): self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ "minimum_obj" ] - self.model.objective = gfobj + self.model.objective = self.parameters["gfobj"] + return True def compute_gapfilled_solution(self, flux_values=None): if flux_values is None: From 64010b3096b1259afb727074c5578c7cb9565773 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Wed, 1 Mar 2023 00:46:17 -0600 Subject: [PATCH 15/57] template species name --- modelseedpy/core/msbuilder.py | 2 +- modelseedpy/core/mstemplate.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index 86df362b..cd16d75e 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -955,7 +955,7 @@ def build_full_template_model(template, model_id=None, index="0"): else: for bio in template.biomasses: bio.build_biomass( - self, model, index, classic=False, GC=0.5, add_to_model=True + model, index, classic=False, GC=0.5, add_to_model=True ) model.objective = "bio1" diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 07931f86..af7b0deb 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -184,8 +184,8 @@ def compound(self): @property def name(self): if self._template_compound: - return self._template_compound.name - return "" + return f'{self._template_compound.name} [{self.compartment}]' + return f'{self.id} [{self.compartment}]' @name.setter def name(self, value): From 972920b35bb8ac597085a46a0fb7039ba54c6233 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Wed, 1 Mar 2023 00:46:42 -0600 Subject: [PATCH 16/57] black --- modelseedpy/core/mstemplate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index af7b0deb..f28d170f 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -184,8 +184,8 @@ def compound(self): @property def name(self): if self._template_compound: - return f'{self._template_compound.name} [{self.compartment}]' - return f'{self.id} [{self.compartment}]' + return f"{self._template_compound.name} [{self.compartment}]" + return f"{self.id} [{self.compartment}]" @name.setter def name(self, value): From 75c464ac5ca4a5f05085baacec295ad03cd45052 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 13 Mar 2023 09:55:05 -0500 Subject: [PATCH 17/57] x --- modelseedpy/biochem/modelseed_compound.py | 18 +++++++++++++++++- modelseedpy/core/mstemplate.py | 2 +- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/modelseedpy/biochem/modelseed_compound.py b/modelseedpy/biochem/modelseed_compound.py index 89c4d5f5..1d00435d 100644 --- a/modelseedpy/biochem/modelseed_compound.py +++ b/modelseedpy/biochem/modelseed_compound.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from modelseedpy.biochem.seed_object import ModelSEEDObject -from modelseedpy.core.mstemplate import MSTemplateSpecies +from modelseedpy.core.mstemplate import MSTemplateSpecies, MSTemplateMetabolite from cobra.core import Metabolite import pandas as pd @@ -58,7 +58,23 @@ def __init__( def to_template_compartment_compound(self, compartment): cpd_id = f"{self.seed_id}_{compartment}" + # build Template Compound + metabolite = MSTemplateMetabolite( + self.seed_id, + self.formula, + self.name, + self.charge, + self.mass, + self.delta_g, + self.delta_g_error, + self.is_cofactor, + self.abbr, + ) + # build Template Compartment Compound res = MSTemplateSpecies(cpd_id, self.charge, compartment, self.id) + + # assign Compound to Compartment Compound + res._template_compound = metabolite res.annotation.update(self.annotation) return res diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index f28d170f..3b5552f4 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -143,7 +143,7 @@ class MSTemplateSpecies(Metabolite): def __init__( self, comp_cpd_id: str, - charge: int, + charge: float, compartment: str, cpd_id, max_uptake=0, From cac909bbec0f6d1176511c817a5ba3246ee758e7 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 28 Mar 2023 01:07:23 -0500 Subject: [PATCH 18/57] template format --- modelseedpy/core/mstemplate.py | 23 ++++++++++++++++------- setup.py | 3 ++- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 3b5552f4..fc2bbb08 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -160,19 +160,26 @@ def __init__( self.cpd_id ) - def to_metabolite(self, index="0"): + def to_metabolite(self, index="0", force=False): """ Create cobra.core.Metabolite instance :param index: compartment index + :@param force: force index :return: cobra.core.Metabolite """ if index is None: index = "" + index = str(index) + + if self.compartment == 'e' and index.isnumeric(): + if force: + logger.warning(f'Forcing numeric index [{index}] to extra cellular compartment not advised') + else: + index = '0' + cpd_id = f"{self.id}{index}" compartment = f"{self.compartment}{index}" - name = f"{self.name}" - if len(str(index)) > 0: - name = f"{self.name} [{compartment}]" + name = f"{self.compound.name} [{compartment}]" metabolite = Metabolite(cpd_id, self.formula, name, self.charge, compartment) metabolite.notes["modelseed_template_id"] = self.id return metabolite @@ -294,15 +301,17 @@ def compartment(self): def to_reaction(self, model=None, index="0"): if index is None: index = "" + index = str(index) rxn_id = f"{self.id}{index}" compartment = f"{self.compartment}{index}" name = f"{self.name}" metabolites = {} for m, v in self.metabolites.items(): - if model and m.id in model.metabolites: - metabolites[model.metabolites.get_by_id(m.id)] = v + _metabolite = m.to_metabolite(index) + if _metabolite.id in model.metabolites: + metabolites[model.metabolites.get_by_id(_metabolite.id)] = v else: - metabolites[m.to_metabolite(index)] = v + metabolites[_metabolite] = v if len(str(index)) > 0: name = f"{self.name} [{compartment}]" diff --git a/setup.py b/setup.py index 5fba7f6c..a7555b97 100644 --- a/setup.py +++ b/setup.py @@ -27,9 +27,10 @@ "Topic :: Scientific/Engineering :: Bio-Informatics", "Intended Audience :: Science/Research", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Natural Language :: English", ], install_requires=[ From dbe8c6d7acb3f72087166200766b8436f96150e3 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 28 Mar 2023 01:11:06 -0500 Subject: [PATCH 19/57] black --- modelseedpy/core/mstemplate.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index fc2bbb08..4a628e21 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -171,11 +171,13 @@ def to_metabolite(self, index="0", force=False): index = "" index = str(index) - if self.compartment == 'e' and index.isnumeric(): + if self.compartment == "e" and index.isnumeric(): if force: - logger.warning(f'Forcing numeric index [{index}] to extra cellular compartment not advised') + logger.warning( + f"Forcing numeric index [{index}] to extra cellular compartment not advised" + ) else: - index = '0' + index = "0" cpd_id = f"{self.id}{index}" compartment = f"{self.compartment}{index}" From 29e5c4d164bdee5a9fd6077cba23636b4107bef7 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 13 Apr 2023 08:44:34 -0500 Subject: [PATCH 20/57] examples --- .../Model Reconstruction/Gapfilling.ipynb | 95 +++--- examples/Model Reconstruction/Genomes.ipynb | 297 +++++++++++------- 2 files changed, 234 insertions(+), 158 deletions(-) diff --git a/examples/Model Reconstruction/Gapfilling.ipynb b/examples/Model Reconstruction/Gapfilling.ipynb index eea0c536..88eadaa6 100644 --- a/examples/Model Reconstruction/Gapfilling.ipynb +++ b/examples/Model Reconstruction/Gapfilling.ipynb @@ -2,17 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cobrakbase 0.2.8\n" - ] - } - ], + "outputs": [], "source": [ "import cobra\n", "#If you have CPLEX, uncomment this\n", @@ -20,31 +12,37 @@ "import cobrakbase\n", "#import modelseedpy.fbapkg\n", "from modelseedpy import GapfillingPkg, KBaseMediaPkg\n", - "from modelseedpy import FBAHelper, MSBuilder" + "from modelseedpy import FBAHelper, MSBuilder\n", + "kbase_api = cobrakbase.KBaseAPI()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "model = kbase_api.get_from_ws(\"test_model\",18528)" + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, "tags": [] }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:modelseedpy.core.msmodelutl:cpd00244 not found in model!\n" + ] + }, { "data": { "text/html": [ - "

Objective

1.0 bio1 = 0.8048653841131165

Uptake

\n", + "

Objective

1.0 bio1 = 0.7997546667881398

Uptake

\n", " \n", " \n", " \n", @@ -58,14 +56,14 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -79,98 +77,98 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -189,28 +187,35 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -218,19 +223,15 @@ "
Metabolite
cpd00009_e0EX_cpd00009_e00.99980.993400.00%
cpd00013_e0EX_cpd00013_e06.0376.09400.00%
cpd00030_e0EX_cpd00030_e00.006390.0063500.00%
cpd00034_e0EX_cpd00034_e00.006390.0063500.00%
cpd00048_e0EX_cpd00048_e00.17550.174400.00%
cpd00058_e0EX_cpd00058_e00.006390.0063500.00%
cpd00063_e0EX_cpd00063_e00.006390.0063500.00%
cpd00067_e0EX_cpd00067_e061.8561.4300.00%
cpd00099_e0EX_cpd00099_e00.006390.0063500.00%
cpd00149_e0EX_cpd00149_e00.006390.0063500.00%
cpd00205_e0EX_cpd00205_e00.006390.0063500.00%
cpd00254_e0EX_cpd00254_e00.006390.0063500.00%
cpd10516_e0EX_cpd10516_e00.025560.025400.00%
cpd17041_c0rxn13782_c00.80490.799800.00%
cpd17042_c0rxn13783_c00.80490.799800.00%
cpd17043_c0rxn13784_c00.80490.799800.00%
cpd00001_e0EX_cpd00001_e0-82.26-81.9500.00%
cpd00007_e0EX_cpd00007_e0-2.928-2.86900.00%
cpd15378_e0EX_cpd15378_e0-0.00639-0.006357100.00%18.92%
cpd03091_c0SK_cpd03091_c0-0.019051081.08%
cpd11416_c0SK_cpd11416_c0-0.8049-0.799800.00%
" ], "text/plain": [ - "" + "" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "kbase_api = cobrakbase.KBaseAPI()\n", - "model = kbase_api.get_from_ws(\"test_model\",18528)\n", - "#If you have CPLEX, uncomment this\n", - "#model.solver = 'optlang-cplex'\n", "template = kbase_api.get_from_ws(\"GramNegModelTemplateV3\",\"NewKBaseModelTemplates\")\n", "media = kbase_api.get_from_ws(\"Carbon-D-Glucose\",\"KBaseMedia\")\n", "model = MSBuilder.gapfill_model(model,\"bio1\",template,media)\n", @@ -17910,7 +17911,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, diff --git a/examples/Model Reconstruction/Genomes.ipynb b/examples/Model Reconstruction/Genomes.ipynb index 60270468..8ea82ef4 100644 --- a/examples/Model Reconstruction/Genomes.ipynb +++ b/examples/Model Reconstruction/Genomes.ipynb @@ -1,223 +1,300 @@ { "cells": [ { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "tags": [] + }, "source": [ - "import modelseedpy\n", - "from modelseedpy.core.msgenome import MSGenome\n", - "from modelseedpy.core.rast_client import RastClient" + "### Genomes\n", + "\n", + "ModelSEEDpy provides its own genome object type `modelseedpy.core.msgenome.MSGenome` to manipulate genomes" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "genome = MS" + "import modelseedpy\n", + "from modelseedpy.core.msgenome import MSGenome" ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "1" + "#### Reading faa file\n", + "\n", + "To load a genome we can read a `.faa` file that contains protein sequences" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "genome = MSGenome.from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')" + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "rast = RastClient()" + "genome" ] }, { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, "source": [ - "genome = MSGenome.from_fasta('GCF_000005845.2.faa', split=' ')" + "#### Manipulating genes\n", + "\n", + "Each gene is stored as a `modelseedpy.core.msgenome.MSFeature` in the `.features` of type `cobra.core.dictlist.DictList` similiar to the cobrapy `.reactions` and `.metabolites` in the `cobra.core.Model`" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of features: 3\n" - ] + "data": { + "text/plain": [ + "4285" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print('Number of features:', len(genome.features))" + "len(genome.features)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "for f in genome.features:\n", - " print(f.id, len(f.seq), f.description)" + "gene = genome.features.get_by_id('NP_414542.1')\n", + "gene" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[{'execution_time': 1622756127.36331,\n", - " 'tool_name': 'kmer_search',\n", - " 'hostname': 'pear',\n", - " 'parameters': ['-a',\n", - " '-g',\n", - " 200,\n", - " '-m',\n", - " 5,\n", - " '-d',\n", - " '/opt/patric-common/data/kmer_metadata_v2',\n", - " '-u',\n", - " 'http://pear.mcs.anl.gov:6100/query'],\n", - " 'id': '9CCA6D20-C4B3-11EB-A893-36A8BEF382BD'},\n", - " {'parameters': ['annotate_hypothetical_only=1',\n", - " 'dataset_name=Release70',\n", - " 'kmer_size=8'],\n", - " 'hostname': 'pear',\n", - " 'tool_name': 'KmerAnnotationByFigfam',\n", - " 'id': '9CE3769E-C4B3-11EB-A893-36A8BEF382BD',\n", - " 'execution_time': 1622756127.52738},\n", - " {'execute_time': 1622756127.88296,\n", - " 'hostname': 'pear',\n", - " 'parameters': [],\n", - " 'tool_name': 'annotate_proteins_similarity',\n", - " 'id': '9D19B7EA-C4B3-11EB-9714-71B3BDF382BD'}]" + "modelseedpy.core.msgenome.MSFeature" ] }, - "execution_count": 14, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "rast.annotate_genome(genome)" + "type(gene)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Equivalent call from the client it self" + "##### Gene annotation\n", + "Annotation is store as an **ontology term**. When loading from a `.faa` file no ontology term is present but we can add them later." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#genome, res = rast.annotate_genome_from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')\n", - "#res" + "gene.ontology_terms" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "'thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gene.description" + ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "{'annotation': ['thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]']}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gene.add_ontology_term('annotation', gene.description)\n", + "gene.ontology_terms" + ] }, { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "cell_type": "markdown", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "#### RAST\n", + "It is possible to annotate genomes with RAST by calling the `RastClient`" + ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from modelseedpy.core.rast_client import RastClient\n", + "rast = RastClient()" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "[{'id': 'C54F08A4-CDB3-11ED-A7E9-CAF09D6086F0',\n", + " 'parameters': ['-a',\n", + " '-g',\n", + " 200,\n", + " '-m',\n", + " 5,\n", + " '-d',\n", + " '/opt/patric-common/data/kmer_metadata_v2',\n", + " '-u',\n", + " 'http://pear.mcs.anl.gov:6100/query'],\n", + " 'hostname': 'pear',\n", + " 'tool_name': 'kmer_search',\n", + " 'execution_time': 1680040751.14837},\n", + " {'id': 'C5638324-CDB3-11ED-A7E9-CAF09D6086F0',\n", + " 'parameters': ['annotate_hypothetical_only=1',\n", + " 'dataset_name=Release70',\n", + " 'kmer_size=8'],\n", + " 'tool_name': 'KmerAnnotationByFigfam',\n", + " 'hostname': 'pear',\n", + " 'execution_time': 1680040751.28257},\n", + " {'parameters': [],\n", + " 'id': 'C5944E1E-CDB3-11ED-8217-51F29F6086F0',\n", + " 'execute_time': 1680040751.60236,\n", + " 'tool_name': 'annotate_proteins_similarity',\n", + " 'hostname': 'pear'}]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rast.annotate_genome(genome)" + ] }, { - "cell_type": "code", - "execution_count": 34, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "feature = genome.features.get_by_id('YP_588478.1')" + "RAST annotation is stored in the ontology term **RAST** and this is used as default to build metabolic models with the ModelSEED templates" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'RAST': 'DUF1435 domain-containing protein YjjZ [Escherichia coli str. K-12 substr. MG1655]'}" + "{'annotation': ['thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]'],\n", + " 'RAST': ['Thr operon leader peptide']}" ] }, - "execution_count": 36, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "feature.ontology_terms" + "gene.ontology_terms" ] }, { @@ -225,14 +302,12 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "feature.add_ontology_term('')" - ] + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -246,7 +321,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.10" } }, "nbformat": 4, From 24ef228fd800755c6380e917e4c513d8ff5d36ef Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 18 Apr 2023 16:09:06 -0500 Subject: [PATCH 21/57] lower/upper case fix --- modelseedpy/core/mstemplate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modelseedpy/core/mstemplate.py b/modelseedpy/core/mstemplate.py index 4a628e21..49fd98c3 100644 --- a/modelseedpy/core/mstemplate.py +++ b/modelseedpy/core/mstemplate.py @@ -598,7 +598,7 @@ def from_table( if "compartment" not in row: row["compartment"] = "c" metabolite = template.compcompounds.get_by_id( - f'{row["id"]}_{lower(row["compartment"])}' + f'{row["id"]}_{row["compartment"].lower()}' ) linked_mets = {} if ( @@ -609,14 +609,14 @@ def from_table( for item in array: sub_array = item.split(":") l_met = template.compcompounds.get_by_id( - f'{sub_array[0]}_{lower(row["compartment"])}' + f'{sub_array[0]}_{row["compartment"].lower()}' ) linked_mets[l_met] = float(sub_array[1]) self.add_biomass_component( metabolite, - lower(row["class"]), + row["class"].lower(), float(row["coefficient"]), - upper(row["coefficient_type"]), + row["coefficient_type"].upper(), linked_mets, ) return self From b1e7ff457ad84fcdddbd8f9bfff2a575956ee1ba Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 3 May 2023 09:31:25 -0500 Subject: [PATCH 22/57] Implementing multiple gapfill --- modelseedpy/core/msatpcorrection.py | 73 +++-- modelseedpy/core/msgapfill.py | 190 ++++++++---- modelseedpy/core/msmodelutl.py | 19 +- modelseedpy/fbapkg/gapfillingpkg.py | 448 ++-------------------------- 4 files changed, 206 insertions(+), 524 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index e72835aa..46bd32ea 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import logging -import itertools import cobra +import copy import json import time import pandas as pd @@ -22,7 +22,9 @@ from modelseedpy.helpers import get_template logger = logging.getLogger(__name__) -# logger.setLevel(logging.DEBUG) +logger.setLevel( + logging.WARNING +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO _path = _dirname(_abspath(__file__)) @@ -122,7 +124,9 @@ def __init__( self.coretemplate = core_template self.msgapfill = MSGapfill( - self.modelutl, default_gapfill_templates=core_template + self.modelutl, + default_gapfill_templates=[core_template], + default_target=self.atp_hydrolysis.id, ) # These should stay as None until atp correction is actually run self.cumulative_core_gapfilling = None @@ -209,6 +213,7 @@ def disable_noncore_reactions(self): self.other_compartments = [] # Iterating through reactions and disabling for reaction in self.model.reactions: + gfrxn = self.msgapfill.gfmodel.reactions.get_by_id(reaction.id) if reaction.id == self.atp_hydrolysis.id: continue if FBAHelper.is_ex(reaction): @@ -233,10 +238,12 @@ def disable_noncore_reactions(self): logger.debug(reaction.id + " core but reversible") self.noncore_reactions.append([reaction, "<"]) reaction.lower_bound = 0 + gfrxn.lower_bound = 0 if reaction.upper_bound > 0 and template_reaction.upper_bound <= 0: logger.debug(reaction.id + " core but reversible") self.noncore_reactions.append([reaction, ">"]) reaction.upper_bound = 0 + gfrxn.upper_bound = 0 else: logger.debug(f"{reaction.id} non core") if FBAHelper.rxn_compartment(reaction) != self.compartment: @@ -251,6 +258,8 @@ def disable_noncore_reactions(self): self.noncore_reactions.append([reaction, ">"]) reaction.lower_bound = 0 reaction.upper_bound = 0 + gfrxn.lower_bound = 0 + gfrxn.upper_bound = 0 def evaluate_growth_media(self): """ @@ -266,24 +275,22 @@ def evaluate_growth_media(self): output = {} with self.model: self.model.objective = self.atp_hydrolysis.id - # self.model.objective = self.model.problem.Objective(Zero,direction="max") - - logger.debug( - f"ATP bounds: ({self.atp_hydrolysis.lower_bound}, {self.atp_hydrolysis.upper_bound})" - ) - # self.model.objective.set_linear_coefficients({self.atp_hydrolysis.forward_variable:1}) pkgmgr = MSPackageManager.get_pkg_mgr(self.model) + # First prescreening model for ATP production without gapfilling + media_list = [] + min_objectives = {} for media, minimum_obj in self.atp_medias: - logger.debug("evaluate media %s", media) + logger.info("evaluate media %s", media) pkgmgr.getpkg("KBaseMediaPkg").build_package(media) - logger.debug("model.medium %s", self.model.medium) + logger.info("model.medium %s", self.model.medium) solution = self.model.optimize() - logger.debug( + logger.info( "evaluate media %s - %f (%s)", media.id, solution.objective_value, solution.status, ) + self.media_gapfill_stats[media] = None output[media.id] = solution.objective_value @@ -291,23 +298,29 @@ def evaluate_growth_media(self): solution.objective_value < minimum_obj or solution.status != "optimal" ): - self.media_gapfill_stats[media] = self.msgapfill.run_gapfilling( - media, - self.atp_hydrolysis.id, - minimum_obj, - check_for_growth=False, - ) - # IF gapfilling fails - need to activate and penalize the noncore and try again + media_list.append(media) + min_objectives[media] = minimum_obj elif solution.objective_value >= minimum_obj: self.media_gapfill_stats[media] = {"reversed": {}, "new": {}} - logger.debug( - "gapfilling stats: %s", - json.dumps(self.media_gapfill_stats[media], indent=2, default=vars), - ) + + # Now running gapfilling on all conditions where initially there was no growth + all_solutions = self.msgapfill.run_multi_gapfill( + media_list, + self.atp_hydrolysis.id, + min_objectives, + check_for_growth=False, + ) + + # Adding the new solutions to the media gapfill stats + for media in all_solutions: + self.media_gapfill_stats[media] = all_solutions[media] if MSATPCorrection.DEBUG: + export_data = {} + for media in self.media_gapfill_stats: + export_data[media.id] = self.media_gapfill_stats[media] with open("debug.json", "w") as outfile: - json.dump(self.media_gapfill_stats[media], outfile) + json.dump(export_data, outfile) return output @@ -342,7 +355,7 @@ def determine_growth_media(self, max_gapfilling=None): if self.max_gapfilling is None: self.max_gapfilling = best_score - logger.debug(f"max_gapfilling: {self.max_gapfilling}, best_score: {best_score}") + logger.info(f"max_gapfilling: {self.max_gapfilling}, best_score: {best_score}") for media in self.media_gapfill_stats: gfscore = 0 @@ -359,6 +372,9 @@ def determine_growth_media(self, max_gapfilling=None): atp_att["selected_media"][media.id] = 0 self.modelutl.save_attributes(atp_att, "ATP_analysis") + if MSATPCorrection.DEBUG: + with open("atp_att_debug.json", "w") as outfile: + json.dump(atp_att, outfile) def determine_growth_media2(self, max_gapfilling=None): """ @@ -386,7 +402,7 @@ def scoring_function(media): max_gapfilling = best_score + self.gapfilling_delta for media in media_scores: score = media_scores[media] - logger.debug(score, best_score, max_gapfilling) + logger.info(score, best_score, max_gapfilling) if score <= max_gapfilling: self.selected_media.append(media) @@ -435,7 +451,7 @@ def expand_model_to_genome_scale(self): ) # Removing filtered reactions for item in self.filtered_noncore: - print("Removing " + item[0].id + " " + item[1]) + logger.debug("Removing " + item[0].id + " " + item[1]) if item[1] == ">": item[0].upper_bound = 0 else: @@ -500,6 +516,7 @@ def build_tests(self, multiplier=None): self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) obj_value = self.model.slim_optimize() logger.debug(f"{media.name} = {obj_value};{multiplier}") + logger.debug("Test:" + media.id + ";" + str(multiplier * obj_value)) tests.append( { "media": media, @@ -527,7 +544,7 @@ def run_atp_correction(self): self.evaluate_growth_media() self.determine_growth_media() self.apply_growth_media_gapfilling() - self.evaluate_growth_media() + # self.evaluate_growth_media() self.expand_model_to_genome_scale() return self.build_tests() diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 8d023272..92890c0e 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -1,9 +1,5 @@ # -*- coding: utf-8 -*- import logging -import itertools # !!! the import is never used - -logger = logging.getLogger(__name__) - import cobra import re from optlang.symbolics import Zero, add @@ -12,6 +8,11 @@ from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.exceptions import GapfillingError +logger = logging.getLogger(__name__) +logger.setLevel( + logging.WARNING +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO + class MSGapfill: @staticmethod @@ -32,6 +33,10 @@ def __init__( reaction_scores={}, blacklist=[], atp_gapfilling=False, + minimum_obj=0.01, + default_excretion=100, + default_uptake=100, + default_target=None, ): # Discerning input is model or mdlutl and setting internal links if isinstance(model_or_mdlutl, MSModelUtil): @@ -49,7 +54,18 @@ def __init__( "cpd15302", "cpd03091", ] # the cpd11416 compound is filtered during model extension with templates - self.gfmodel = self.lp_filename = self.last_solution = None + # Cloning model to create gapfilling model + self.gfmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) + # Getting package manager for gapfilling model + self.gfpkgmgr = MSPackageManager.get_pkg_mgr(self.gfmodel) + # Setting target from input + if default_target: + self.gfmodel.objective = self.gfmodel.problem.Objective( + self.gfmodel.reactions.get_by_id(default_target).flux_expression, + direction="max", + ) + # Setting parameters for gapfilling + self.lp_filename = self.last_solution = None self.model_penalty = 1 self.default_gapfill_models = default_gapfill_models self.default_gapfill_templates = default_gapfill_templates @@ -61,23 +77,8 @@ def __init__( self.test_conditions = test_conditions self.reaction_scores = reaction_scores self.cumulative_gapfilling = [] - - def run_gapfilling( - self, - media=None, - target=None, - minimum_obj=0.01, - binary_check=False, - prefilter=True, - check_for_growth=True, - ): - if target: - self.model.objective = self.model.problem.Objective( - self.model.reactions.get_by_id(target).flux_expression, direction="max" - ) - self.gfmodel = cobra.io.json.from_json(cobra.io.json.to_json(self.model)) - pkgmgr = MSPackageManager.get_pkg_mgr(self.gfmodel) - pkgmgr.getpkg("GapfillingPkg").build_package( + # Building gapfilling package + self.gfpkgmgr.getpkg("GapfillingPkg").build_package( { "auto_sink": self.auto_sink, "model_penalty": self.model_penalty, @@ -87,58 +88,95 @@ def run_gapfilling( "gapfill_models_by_index": self.gapfill_models_by_index, "gapfill_all_indecies_with_default_templates": self.gapfill_all_indecies_with_default_templates, "gapfill_all_indecies_with_default_models": self.gapfill_all_indecies_with_default_models, - "default_excretion": 100, - "default_uptake": 100, + "default_excretion": default_excretion, + "default_uptake": default_uptake, "minimum_obj": minimum_obj, "blacklist": self.blacklist, "reaction_scores": self.reaction_scores, "set_objective": 1, } ) - pkgmgr.getpkg("KBaseMediaPkg").build_package(media) + def test_gapfill_database(self, media, target=None, before_filtering=True): # Testing if gapfilling can work before filtering - if ( - check_for_growth - and not pkgmgr.getpkg("GapfillingPkg").test_gapfill_database() - ): - # save_json_model(self.model, "gfdebugmdl.json") - gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) - if media.id not in gf_sensitivity: - gf_sensitivity[media.id] = {} - if target not in gf_sensitivity[media.id]: - gf_sensitivity[media.id][target] = {} - gf_sensitivity[media.id][target][ - "FBF" - ] = self.mdlutl.find_unproducible_biomass_compounds(target) - self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") - logger.warning("No solution found before filtering for %s", media) - return None + if target: + self.gfmodel.objective = self.gfmodel.problem.Objective( + self.gfmodel.reactions.get_by_id(target).flux_expression, + direction="max", + ) + self.gfpkgmgr.getpkg("GapfillingPkg").reset_original_objective() + else: + target = str(self.gfmodel.objective) + target = target.split(" ")[0] + target = target[13:] + if self.gfpkgmgr.getpkg("GapfillingPkg").test_gapfill_database(): + return True + gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) + if media.id not in gf_sensitivity: + gf_sensitivity[media.id] = {} + if target not in gf_sensitivity[media.id]: + gf_sensitivity[media.id][target] = {} + filter_msg = " " + note = "FAF" + if before_filtering: + filter_msg = " before filtering " + note = "FBF" + gf_sensitivity[media.id][target][ + note + ] = self.mdlutl.find_unproducible_biomass_compounds(target) + self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") + logger.warning( + "No gapfilling solution found" + + filter_msg + + "for " + + media.id + + " activating " + + target + ) + return False + def prefilter(self, media, target): # Filtering breaking reactions out of the database - if prefilter and self.test_conditions: - pkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( + if self.test_conditions: + self.gfpkgmgr.getpkg("GapfillingPkg").filter_database_based_on_tests( self.test_conditions ) # Testing if gapfilling can work after filtering - if ( - check_for_growth - and not pkgmgr.getpkg("GapfillingPkg").test_gapfill_database() - ): - # save_json_model(self.model, "gfdebugmdl.json") - gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) - if media.id not in gf_sensitivity: - gf_sensitivity[media.id] = {} - if target not in gf_sensitivity[media.id]: - gf_sensitivity[media.id][target] = {} - gf_sensitivity[media.id][target][ - "FAF" - ] = self.mdlutl.find_unproducible_biomass_compounds(target) - self.mdlutl.save_attributes(gf_sensitivity, "gf_sensitivity") - logger.warning("No solution found after filtering for %s", media) + if not self.test_gapfill_database(media, target, before_filtering=False): + return False + return True + + def run_gapfilling( + self, + media=None, + target=None, + minimum_obj=None, + binary_check=False, + prefilter=True, + check_for_growth=True, + ): + # Setting target and media if specified + if target: + self.gfmodel.objective = self.gfmodel.problem.Objective( + self.gfmodel.reactions.get_by_id(target).flux_expression, + direction="max", + ) + self.gfpkgmgr.getpkg("GapfillingPkg").reset_original_objective() + if media: + self.gfpkgmgr.getpkg("KBaseMediaPkg").build_package(media) + if minimum_obj: + self.gfpkgmgr.getpkg("GapfillingPkg").set_min_objective(minimum_obj) + + # Testing if gapfilling can work before filtering + if not self.test_gapfill_database(media, before_filtering=True): return None + # Filtering + if prefilter: + if not self.prefilter(media, target): + return None + # Printing the gapfilling LP file if self.lp_filename: with open(self.lp_filename, "w") as out: @@ -157,9 +195,13 @@ def run_gapfilling( return None # Computing solution and ensuring all tests still pass - self.last_solution = pkgmgr.getpkg("GapfillingPkg").compute_gapfilled_solution() + self.last_solution = self.gfpkgmgr.getpkg( + "GapfillingPkg" + ).compute_gapfilled_solution() if self.test_conditions: - self.last_solution = pkgmgr.getpkg("GapfillingPkg").run_test_conditions( + self.last_solution = self.gfpkgmgr.getpkg( + "GapfillingPkg" + ).run_test_conditions( self.test_conditions, self.last_solution, self.test_condition_iteration_limit, @@ -172,7 +214,7 @@ def run_gapfilling( # Running binary check to reduce solution to minimal reaction soltuion if binary_check: - self.last_solution = pkgmgr.getpkg( + self.last_solution = self.gfpkgmgr.getpkg( "GapfillingPkg" ).binary_check_gapfilling_solution() @@ -183,6 +225,32 @@ def run_gapfilling( self.last_solution["binary_check"] = binary_check return self.last_solution + def run_multi_gapfill( + self, + media_list, + target=None, + minimum_objectives={}, + binary_check=False, + prefilter=True, + check_for_growth=True, + ): + first = True + solution_dictionary = {} + for item in media_list: + minimum_obj = None + if item in minimum_objectives: + minimum_obj = minimum_objectives[item] + if first: + solution_dictionary[item] = self.run_gapfilling( + item, target, minimum_obj, binary_check, True, True + ) + else: + solution_dictionary[item] = self.run_gapfilling( + item, None, minimum_obj, binary_check, False, True + ) + false = False + return solution_dictionary + def integrate_gapfill_solution( self, solution, cumulative_solution=[], link_gaps_to_objective=True ): diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index a44c5653..371abeb7 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -16,12 +16,9 @@ # from builtins import None logger = logging.getLogger(__name__) -logger.setLevel(logging.DEBUG) -# handler = logging.StreamHandler(sys.stdout) -# handler.setLevel(logging.DEBUG) -# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') -# handler.setFormatter(formatter) -# logger.addHandler(handler) +logger.setLevel( + logging.INFO +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO class MSModelUtil: @@ -924,13 +921,15 @@ def reaction_expansion_test( reaction_list, condition, currmodel ) for item in new_filtered: - filtered_list.append(item) + if item not in filtered_list: + filtered_list.append(item) else: new_filtered = self.linear_expansion_test( reaction_list, condition, currmodel ) for item in new_filtered: - filtered_list.append(item) + if item not in filtered_list: + filtered_list.append(item) # Restoring knockout of newly filtered reactions, which expire after exiting the "with" block above for item in new_filtered: if item[1] == ">": @@ -938,10 +937,10 @@ def reaction_expansion_test( else: item[0].lower_bound = 0 toc = time.perf_counter() - logger.debug( + logger.info( "Expansion time:" + condition["media"].id + ":" + str((toc - tic)) ) - logger.debug( + logger.info( "Filtered count:" + str(len(filtered_list)) + " out of " diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index f14eb7ed..465e5558 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -3,6 +3,7 @@ from __future__ import absolute_import import logging +import sys import re import json from optlang.symbolics import Zero, add @@ -19,427 +20,11 @@ from modelseedpy.core.fbahelper import FBAHelper logger = logging.getLogger(__name__) -# logger.setLevel(logging.DEBUG) +logger.setLevel( + logging.WARNING +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO -base_blacklist = { - "rxn10157": "<", - "rxn09295": "<", - "rxn05938": "<", - "rxn08628": ">", - "rxn10155": "<", - "rxn01353": "<", - "rxn05683": "<", - "rxn09193": "<", - "rxn09003": "<", - "rxn01128": ">", - "rxn08655": "<", - "rxn09272": "<", - "rxn05313": "<", - "rxn01510": ">", - "rxn05297": ">", - "rxn00507": "<", - "rxn05596": "<", - "rxn01674": "<", - "rxn01679": "<", - "rxn00778": ">", - "rxn05206": ">", - "rxn00239": "<", - "rxn05937": "<", - "rxn00715": "<", - "rxn05638": ">", - "rxn05289": ">", - "rxn00839": "<", - "rxn08866": "<", - "rxn10901": "<", - "rxn09331": "<", - "rxn05242": "<", - "rxn12549": "<", - "rxn13143": "<", - "rxn12498": "<", - "rxn08373": "<", - "rxn05208": "<", - "rxn09372": "<", - "rxn00571": ">", - "rxn08104": "<", - "rxn08704": "<", - "rxn07191": "<", - "rxn09672": "<", - "rxn01048": ">", - "rxn11267": ">", - "rxn08290": "<", - "rxn09307": "<", - "rxn05676": ">", - "rxn09653": "<", - "rxn11277": "<", - "rxn00976": "<", - "rxn02520": "<", - "rxn08275": "<", - "rxn09121": "<", - "rxn08999": "<", - "rxn08633": "<", - "rxn08610": "<", - "rxn09218": "<", - "rxn05626": "<", - "rxn11320": "<", - "rxn10058": ">", - "rxn08544": "<", - "rxn12539": "<", - "rxn08990": "<", - "rxn09348": "<", - "rxn00378": "<", - "rxn05243": "<", - "rxn02154": "<", - "rxn12587": "<", - "rxn00125": "<", - "rxn05648": "<", - "rxn13722": "<", - "rxn10910": ">", - "rxn05308": ">", - "rxn08585": "<", - "rxn14207": "<", - "rxn08682": "<", - "rxn10895": "<", - "rxn09655": "<", - "rxn11934": "<", - "rxn01742": ">", - "rxn05222": ">", - "rxn09942": "<", - "rxn13753": ">", - "rxn10857": "<", - "rxn03468": "<", - "rxn04942": "<", - "rxn10990": ">", - "rxn08639": "<", - "rxn09248": "<", - "rxn11935": ">", - "rxn00870": ">", - "rxn08314": "<", - "rxn09378": "<", - "rxn09269": "<", - "rxn10057": ">", - "rxn13702": ">", - "rxn00517": "<", - "rxn09221": ">", - "rxn01505": ">", - "rxn13692": ">", - "rxn05573": "<", - "rxn10123": ">", - "rxn09005": "<", - "rxn05244": "<", - "rxn05940": "<", - "rxn10124": ">", - "rxn06202": ">", - "rxn09660": "<", - "rxn02260": ">", - "rxn08912": "<", - "rxn05760": ">", - "rxn05580": ">", - "rxn02181": ">", - "rxn09339": "<", - "rxn00767": "<", - "rxn09118": "<", - "rxn05303": "<", - "rxn06110": "<", - "rxn12800": "<", - "rxn10966": "<", - "rxn12561": "<", - "rxn04678": ">", - "rxn10818": "<", - "rxn08166": "<", - "rxn02044": ">", - "rxn12623": "<", - "rxn13392": ">", - "rxn02283": "<", - "rxn13647": ">", - "rxn08653": "<", - "rxn05218": ">", - "rxn11676": ">", - "rxn00197": "<", - "rxn00697": "<", - "rxn12575": ">", - "rxn08188": "<", - "rxn01215": "<", - "rxn08730": ">", - "rxn08519": ">", - "rxn08642": "<", - "rxn05245": "<", - "rxn04042": "<", - "rxn01443": ">", - "rxn08535": "<", - "rxn03983": "<", - "rxn08317": "<", - "rxn14173": ">", - "rxn08868": "<", - "rxn05893": ">", - "rxn00435": ">", - "rxn13724": "<", - "rxn09681": "<", - "rxn00572": ">", - "rxn05942": "<", - "rxn11158": "<", - "rxn05562": "<", - "rxn10868": "<", - "rxn10426": "<", - "rxn00941": ">", - "rxn08240": "<", - "rxn05220": ">", - "rxn01228": ">", - "rxn12540": "<", - "rxn10618": ">", - "rxn09659": "<", - "rxn08985": ">", - "rxn05523": "<", - "rxn00421": "<", - "rxn09385": "<", - "rxn08542": "<", - "rxn09658": "<", - "rxn01173": "<", - "rxn10977": "<", - "rxn05216": "<", - "rxn13748": ">", - "rxn10769": ">", - "rxn00451": "<", - "rxn01639": "<", - "rxn08661": "<", - "rxn09308": "<", - "rxn09260": "<", - "rxn00253": "<", - "rxn05207": "<", - "rxn01667": "<", - "rxn08063": "<", - "rxn01508": ">", - "rxn09657": "<", - "rxn01209": ">", - "rxn00548": ">", - "rxn12617": "<", - "rxn08747": ">", - "rxn08096": "<", - "rxn11951": "<", - "rxn09061": "<", - "rxn10978": "<", - "rxn02748": ">", - "rxn09663": "<", - "rxn08737": "<", - "rxn13127": "<", - "rxn09366": "<", - "rxn05634": "<", - "rxn05554": "<", - "rxn09266": ">", - "rxn04676": ">", - "rxn11078": ">", - "rxn04932": "<", - "rxn00607": ">", - "rxn08856": "<", - "rxn12624": "<", - "rxn05215": "<", - "rxn13686": "<", - "rxn12529": "<", - "rxn00234": "<", - "rxn13689": ">", - "rxn08117": "<", - "rxn05315": ">", - "rxn08865": "<", - "rxn11678": ">", - "rxn00518": "<", - "rxn00195": "<", - "rxn10054": "<", - "rxn12532": "<", - "rxn05902": ">", - "rxn12777": "<", - "rxn12822": ">", - "rxn13735": ">", - "rxn00427": "<", - "rxn13196": "<", - "rxn08284": "<", - "rxn10576": ">", - "rxn00891": "<", - "rxn08293": "<", - "rxn00374": ">", - "rxn08795": "<", - "rxn12583": "<", - "rxn00918": ">", - "rxn08525": "<", - "rxn10427": ">", - "rxn09271": "<", - "rxn10860": "<", - "rxn10600": ">", - "rxn13729": ">", - "rxn01375": "<", - "rxn13726": ">", - "rxn10587": "<", - "rxn08672": "<", - "rxn10588": ">", - "rxn08152": ">", - "rxn09306": "<", - "rxn00635": "<", - "rxn08427": "<", - "rxn05225": ">", - "rxn00680": ">", - "rxn08786": ">", - "rxn08721": "<", - "rxn11339": "<", - "rxn05749": "<", - "rxn01187": ">", - "rxn08625": "<", - "rxn06677": "<", - "rxn12302": ">", - "rxn02770": "<", - "rxn05628": "<", - "rxn13706": ">", - "rxn12739": "<", - "rxn00177": "<", - "rxn09896": ">", - "rxn12574": "<", - "rxn12533": ">", - "rxn08537": ">", - "rxn05651": ">", - "rxn08170": "<", - "rxn05240": "<", - "rxn00663": ">", - "rxn12589": "<", - "rxn09299": "<", - "rxn02059": "<", - "rxn12217": ">", - "rxn06592": "<", - "rxn05939": ">", - "rxn08581": "<", - "rxn00430": "<", - "rxn09283": ">", - "rxn08919": "<", - "rxn13660": "<", - "rxn08065": "<", - "rxn08428": ">", - "rxn10936": ">", - "rxn05238": ">", - "rxn05685": "<", - "rxn08920": ">", - "rxn07193": "<", - "rxn08265": "<", - "rxn12554": "<", - "rxn08094": "<", - "rxn13727": ">", - "rxn04158": "<", - "rxn09839": "<", - "rxn10820": "<", - "rxn00869": ">", - "rxn00331": ">", - "rxn09034": "<", - "rxn01136": "<", - "rxn09247": "<", - "rxn08302": "<", - "rxn10594": "<", - "rxn08670": ">", - "rxn11334": "<", - "rxn09941": "<", - "rxn02919": "<", - "rxn09670": "<", - "rxn10892": "<", - "rxn09794": "<", - "rxn02332": ">", - "rxn00244": ">", - "rxn08030": "<", - "rxn12526": "<", - "rxn13150": ">", - "rxn05486": "<", - "rxn10852": ">", - "rxn13790": ">", - "rxn06348": ">", - "rxn09172": ">", - "rxn03653": ">", - "rxn05213": "<", - "rxn01869": "<", - "rxn08142": "<", - "rxn12606": "<", - "rxn11916": ">", - "rxn05748": "<", - "rxn08543": "<", - "rxn01107": ">", - "rxn05708": "<", - "rxn08169": "<", - "rxn06641": ">", - "rxn12578": "<", - "rxn01172": "<", - "rxn02120": ">", - "rxn05669": "<", - "rxn11322": "<", - "rxn12630": "<", - "rxn00698": "<", - "rxn05507": ">", - "rxn12530": "<", - "rxn09304": "<", - "rxn05532": ">", - "rxn03644": ">", - "rxn08733": "<", - "rxn13733": "<", - "rxn10044": ">", - "rxn00176": ">", - "rxn01364": ">", - "rxn02198": ">", - "rxn06990": "<", - "rxn08424": "<", - "rxn08069": "<", - "rxn05611": "<", - "rxn11973": "<", - "rxn12665": ">", - "rxn05241": "<", - "rxn08982": ">", - "rxn00542": ">", - "rxn12588": "<", - "rxn03517": ">", - "rxn01805": "<", - "rxn13203": ">", - "rxn08614": "<", - "rxn12200": ">", - "rxn13811": "<", - "rxn08377": "<", - "rxn11342": ">", - "rxn02976": "<", - "rxn08217": "<", - "rxn07921": ">", - "rxn09944": ">", - "rxn02401": "<", - "rxn08429": ">", - "rxn00905": "<", - "rxn08196": "<", - "rxn03054": "<", - "rxn08643": "<", - "rxn01874": "<", - "rxn08028": "<", - "rxn01641": ">", - "rxn03442": "<", - "rxn02172": "<", - "rxn10692": ">", - "rxn10613": ">", - "rxn12928": ">", - "rxn12994": ">", - "rxn13843": ">", - "rxn12942": ">", - "rxn12934": ">", - "rxn16827": ">", - "rxn12941": ">", - "rxn01736": ">", - "rxn14109": ">", - "rxn15060": ">", - "rxn15064": ">", - "rxn30685": ">", - "rxn10095": ">", - "rxn16143": ">", - "rxn25271": ">", - "rxn25160": ">", - "rxn30917": ">", - "rxn16843": ">", - "rxn08921": ">", - "rxn09390": ">", - "rxn27362": ">", - "rxn02664": ">", - "rxn24638": ">", - "rxn24613": ">", - "rxn24611": ">", - "rxn14428": ">", - "rxn03079": ">", - "rxn03020": ">", - "rxn10471": "<", -} +base_blacklist = {} class GapfillingPkg(BaseFBAPkg): @@ -600,6 +185,9 @@ def build_package(self, parameters): reaction_objective.set_linear_coefficients(obj_coef) self.parameters["gfobj"] = self.model.objective + def reset_original_objective(self): + self.parameters["origobj"] = self.model.objective + def extend_model_with_model_for_gapfilling(self, source_model, index): new_metabolites = {} new_reactions = {} @@ -980,7 +568,7 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10): condition["change"] = False if len(filtered_list) > 0: if max_iterations > 0: - print("Gapfilling test failed " + str(11 - max_iterations)) + logger.warning("Gapfilling test failed " + str(11 - max_iterations)) # Forcing filtered reactions to zero for item in filtered_list: if item[1] == ">": @@ -1003,7 +591,7 @@ def test_gapfill_database(self): self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 self.model.objective = self.parameters["origobj"] solution = self.model.optimize() - logger.info( + logger.debug( "Objective with gapfill database:" + str(solution.objective_value) + "; min objective:" @@ -1017,6 +605,12 @@ def test_gapfill_database(self): return False return True + def set_min_objective(self, min_objective): + self.parameters["minimum_obj"] = min_objective + self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = self.parameters[ + "minimum_obj" + ] + def filter_database_based_on_tests(self, test_conditions): # Setting the minimal growth constraint to zero self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 @@ -1036,7 +630,7 @@ def filter_database_based_on_tests(self, test_conditions): ) # Now constraining filtered reactions to zero for item in filtered_list: - logger.info("Filtering:" + item[0].id + item[1]) + logger.debug("Filtering:" + item[0].id + item[1]) if item[1] == ">": self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: @@ -1079,14 +673,14 @@ def filter_database_based_on_tests(self, test_conditions): else: count += -1 rxn.lower_bound = 0 - logger.info("Reactions unfiltered:" + str(count)) + logger.debug("Reactions unfiltered:" + str(count)) # Checking for model reactions that can be removed to enable all tests to pass self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].lb = 0 filtered_list = self.modelutl.reaction_expansion_test( self.parameters["original_reactions"], test_conditions ) for item in filtered_list: - logger.info("Filtering:" + item[0].id + item[1]) + logger.debug("Filtering:" + item[0].id + item[1]) if item[1] == ">": self.model.reactions.get_by_id(item[0].id).upper_bound = 0 else: @@ -1109,15 +703,19 @@ def compute_gapfilled_solution(self, flux_values=None): and "forward" in self.gapfilling_penalties[reaction.id] ): if "added" in self.gapfilling_penalties[reaction.id]: + logger.debug(f"New gapfilled reaction: {reaction.id} >") output["new"][reaction.id] = ">" else: + logger.debug(f"Reversed gapfilled reaction: {reaction.id} >") output["reversed"][reaction.id] = ">" elif ( flux_values[reaction.id]["reverse"] > Zero and "reverse" in self.gapfilling_penalties[reaction.id] ): if "added" in self.gapfilling_penalties[reaction.id]: + logger.debug(f"New gapfilled reaction: {reaction.id} <") output["new"][reaction.id] = "<" else: + logger.debug(f"Reversed gapfilled reaction: {reaction.id} <") output["reversed"][reaction.id] = "<" return output From 9a6da4521df92689f27f4a8794e32f40bccdc1dd Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 16 May 2023 01:18:17 -0500 Subject: [PATCH 23/57] Fixing test --- tests/core/test_msatpcorreption.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/core/test_msatpcorreption.py b/tests/core/test_msatpcorreption.py index 108cc3ec..3d036193 100644 --- a/tests/core/test_msatpcorreption.py +++ b/tests/core/test_msatpcorreption.py @@ -251,7 +251,7 @@ def test_ms_atp_correction_and_gap_fill1( model = get_model_with_infinite_atp_loop(["GLCpts_c0", "GLUSy_c0", "GLUDy_c0"]) model.reactions.ATPM_c0.lower_bound = 0 model.reactions.ATPM_c0.upper_bound = 1000 - + model.objective = "ATPM_c0" atp_correction = MSATPCorrection( model, template, @@ -260,7 +260,6 @@ def test_ms_atp_correction_and_gap_fill1( load_default_medias=False, ) tests = atp_correction.run_atp_correction() - # expected tests = [{'media': MSMedia object, 'is_max_threshold': True, 'threshold': 21.0, 'objective': 'ATPM_c0'}] assert tests @@ -268,13 +267,13 @@ def test_ms_atp_correction_and_gap_fill1( assert tests[0]["threshold"] > 0 assert tests[0]["objective"] == "ATPM_c0" + model.objective = "BIOMASS_Ecoli_core_w_GAM_c0" gap_fill = MSGapfill(model, [template_genome_scale], [], tests, {}, []) result = gap_fill.run_gapfilling( media_genome_scale_glucose_aerobic, "BIOMASS_Ecoli_core_w_GAM_c0", minimum_obj=0.1, ) - # either GLUSy_c0 or GLUDy_c0 should be gap filled for glutamate assert result From 3256ea0ad2e0d5b199500afe654c18b33d54ee89 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 14 Jun 2023 23:48:42 -0500 Subject: [PATCH 24/57] Renaming ATP --- modelseedpy/core/msatpcorrection.py | 36 ++++++++++++++--------------- modelseedpy/data/atp_medias.tsv | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 46bd32ea..c848fbeb 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -23,28 +23,28 @@ logger = logging.getLogger(__name__) logger.setLevel( - logging.WARNING + logging.INFO ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO _path = _dirname(_abspath(__file__)) min_gap = { - "Glc/O2": 5, - "Etho/O2": 0.01, - "Ac/O2": 1, - "Pyr/O2": 3, - "Glyc/O2": 2, - "Fum/O2": 3, - "Succ/O2": 2, - "Akg/O2": 2, - "LLac/O2": 2, - "Dlac/O2": 2, - "For/O2": 2, - "For/NO3": 1.5, - "Pyr/NO": 2.5, - "Pyr/NO2": 2.5, - "Pyr/NO3": 2.5, - "Pyr/SO4": 2.5, + "Glc.O2": 5, + "Etho.O2": 0.01, + "Ac.O2": 1, + "Pyr.O2": 3, + "Glyc.O2": 2, + "Fum.O2": 3, + "Succ.O2": 2, + "Akg.O2": 2, + "LLac.O2": 2, + "Dlac.O2": 2, + "For.O2": 2, + "For.NO3": 1.5, + "Pyr.NO": 2.5, + "Pyr.NO2": 2.5, + "Pyr.NO3": 2.5, + "Pyr.SO4": 2.5, } @@ -451,7 +451,7 @@ def expand_model_to_genome_scale(self): ) # Removing filtered reactions for item in self.filtered_noncore: - logger.debug("Removing " + item[0].id + " " + item[1]) + logger.info("Removing " + item[0].id + " " + item[1]) if item[1] == ">": item[0].upper_bound = 0 else: diff --git a/modelseedpy/data/atp_medias.tsv b/modelseedpy/data/atp_medias.tsv index 4a4b7a84..0bf5e56c 100644 --- a/modelseedpy/data/atp_medias.tsv +++ b/modelseedpy/data/atp_medias.tsv @@ -1,4 +1,4 @@ -seed Glc/O2 Ac/O2 Etho/O2 Pyr/O2 Glyc/O2 Fum/O2 Succ/O2 Akg/O2 LLac/O2 Dlac/O2 For/O2 Glc Ac Etho Pyr Glyc Fum Succ Akg Llac Dlac For mal-L For/NO2 For/NO3 For/NO Pyr/NO2 Pyr/NO3 Pyr/NO Ac/NO2 Ac/NO3 Ac/NO Glc/DMSO Glc/TMAO Pyr/DMSO Pyr/TMAO Pyr/SO4 Pyr/SO3 H2/CO2 H2/Ac For/SO4/H2 LLac/SO4/H2 For/SO4 LLac/SO4 H2/SO4 empty Light ANME Methane +seed Glc.O2 Ac.O2 Etho.O2 Pyr.O2 Glyc.O2 Fum.O2 Succ.O2 Akg.O2 LLac.O2 Dlac.O2 For.O2 Glc Ac Etho Pyr Glyc Fum Succ Akg Llac Dlac For mal-L For.NO2 For.NO3 For.NO Pyr.NO2 Pyr.NO3 Pyr.NO Ac.NO2 Ac.NO3 Ac.NO Glc.DMSO Glc.TMAO Pyr.DMSO Pyr.TMAO Pyr.SO4 Pyr.SO3 H2.CO2 H2.Ac For.SO4.H2 LLac.SO4.H2 For.SO4 LLac.SO4 H2.SO4 empty Light ANME Methane EX_cpd00027_e0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 EX_cpd00024_e0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 EX_cpd00106_e0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 From da33bd2b366cadab4da89d5f790ef3607fb54dba Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 26 Jun 2023 12:34:56 -0500 Subject: [PATCH 25/57] fixes --- modelseedpy/biochem/modelseed_compound.py | 2 +- modelseedpy/core/msbuilder.py | 1 + modelseedpy/core/msgenome.py | 9 +++++++-- modelseedpy/core/rast_client.py | 8 ++++++++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/modelseedpy/biochem/modelseed_compound.py b/modelseedpy/biochem/modelseed_compound.py index 1d00435d..a3ea75f3 100644 --- a/modelseedpy/biochem/modelseed_compound.py +++ b/modelseedpy/biochem/modelseed_compound.py @@ -71,7 +71,7 @@ def to_template_compartment_compound(self, compartment): self.abbr, ) # build Template Compartment Compound - res = MSTemplateSpecies(cpd_id, self.charge, compartment, self.id) + res = MSTemplateSpecies(cpd_id, self.charge, compartment, metabolite.id) # assign Compound to Compartment Compound res._template_compound = metabolite diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index cd16d75e..e376ae0b 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -315,6 +315,7 @@ def __init__( self.reaction_to_complex_sets = None self.compartments = None self.base_model = None + self.compartments_index = None # TODO: implement custom index by compartment self.index = index def build_drains(self): diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index 875699c2..e41953d2 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -15,8 +15,13 @@ def normalize_role(s): def read_fasta(f, split=DEFAULT_SPLIT, h_func=None): - with open(f, "r") as fh: - return parse_fasta_str(fh.read(), split, h_func) + if f.endswith('.gz'): + import gzip + with gzip.open(f, 'rb') as fh: + return parse_fasta_str(fh.read().decode('utf-8'), split, h_func) + else: + with open(f, "r") as fh: + return parse_fasta_str(fh.read(), split, h_func) def parse_fasta_str(faa_str, split=DEFAULT_SPLIT, h_func=None): diff --git a/modelseedpy/core/rast_client.py b/modelseedpy/core/rast_client.py index 575cf0d4..ebe06cb5 100644 --- a/modelseedpy/core/rast_client.py +++ b/modelseedpy/core/rast_client.py @@ -84,6 +84,14 @@ def annotate_genome_from_fasta(self, filepath, split="|"): return genome, res + def annotate_protein_sequence(self, protein_id: str, protein_seq: str): + p_features = [{"id": protein_id, "protein_translation": protein_seq}] + return self.f(p_features) + + def annotate_protein_sequences(self, protein_seqs: dict): + p_features = [{"id": protein_id, "protein_translation": protein_seq}] + return self.f(p_features) + def f1(self, protein_id, protein_seq): p_features = [{"id": protein_id, "protein_translation": protein_seq}] return self.f(p_features) From d13f6c20b7f70413d668c4ddcbb23b27af083f1b Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Mon, 26 Jun 2023 12:35:47 -0500 Subject: [PATCH 26/57] black --- modelseedpy/core/msgenome.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/msgenome.py b/modelseedpy/core/msgenome.py index e41953d2..78f1e004 100644 --- a/modelseedpy/core/msgenome.py +++ b/modelseedpy/core/msgenome.py @@ -15,10 +15,11 @@ def normalize_role(s): def read_fasta(f, split=DEFAULT_SPLIT, h_func=None): - if f.endswith('.gz'): + if f.endswith(".gz"): import gzip - with gzip.open(f, 'rb') as fh: - return parse_fasta_str(fh.read().decode('utf-8'), split, h_func) + + with gzip.open(f, "rb") as fh: + return parse_fasta_str(fh.read().decode("utf-8"), split, h_func) else: with open(f, "r") as fh: return parse_fasta_str(fh.read(), split, h_func) From b7edac04b16ae63ba68b7afa344dd8995c81e946 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sun, 2 Jul 2023 14:30:36 -0500 Subject: [PATCH 27/57] Changing zero threshold on gapfilling --- modelseedpy/core/msgapfill.py | 8 +++++++- modelseedpy/core/msmodelutl.py | 2 ++ modelseedpy/fbapkg/gapfillingpkg.py | 10 +++++----- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 92890c0e..774c1ca8 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -67,6 +67,7 @@ def __init__( # Setting parameters for gapfilling self.lp_filename = self.last_solution = None self.model_penalty = 1 + self.default_minimum_objective = minimum_obj self.default_gapfill_models = default_gapfill_models self.default_gapfill_templates = default_gapfill_templates self.gapfill_templates_by_index, self.gapfill_models_by_index = {}, {} @@ -165,6 +166,8 @@ def run_gapfilling( self.gfpkgmgr.getpkg("GapfillingPkg").reset_original_objective() if media: self.gfpkgmgr.getpkg("KBaseMediaPkg").build_package(media) + if not minimum_obj: + minimum_obj = self.default_minimum_objective if minimum_obj: self.gfpkgmgr.getpkg("GapfillingPkg").set_min_objective(minimum_obj) @@ -230,14 +233,17 @@ def run_multi_gapfill( media_list, target=None, minimum_objectives={}, + default_minimum_objective = None, binary_check=False, prefilter=True, check_for_growth=True, ): + if not default_minimum_objective: + default_minimum_objective = self.default_minimum_objective first = True solution_dictionary = {} for item in media_list: - minimum_obj = None + minimum_obj = default_minimum_objective if item in minimum_objectives: minimum_obj = minimum_objectives[item] if first: diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 371abeb7..d24ac90f 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -88,6 +88,8 @@ def search_name(name): @staticmethod def get(model, create_if_missing=True): + if isinstance(model, MSModelUtil): + return model if model in MSModelUtil.mdlutls: return MSModelUtil.mdlutls[model] elif create_if_missing: diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 465e5558..d066c1a1 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -25,7 +25,7 @@ ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO base_blacklist = {} - +zero_threshold = 1e-8 class GapfillingPkg(BaseFBAPkg): """ """ @@ -532,12 +532,12 @@ def knockout_gf_reactions_outside_solution(self, solution=None, flux_values=None if rxnobj.id in self.gapfilling_penalties: if ( "reverse" in self.gapfilling_penalties[rxnobj.id] - and flux_values[rxnobj.id]["reverse"] <= Zero + and flux_values[rxnobj.id]["reverse"] <= zero_threshold ): rxnobj.lower_bound = 0 if ( "forward" in self.gapfilling_penalties[rxnobj.id] - and flux_values[rxnobj.id]["forward"] <= Zero + and flux_values[rxnobj.id]["forward"] <= zero_threshold ): rxnobj.upper_bound = 0 rxnobj.update_variable_bounds() @@ -699,7 +699,7 @@ def compute_gapfilled_solution(self, flux_values=None): for reaction in self.model.reactions: if reaction.id in self.gapfilling_penalties: if ( - flux_values[reaction.id]["forward"] > Zero + flux_values[reaction.id]["forward"] > zero_threshold and "forward" in self.gapfilling_penalties[reaction.id] ): if "added" in self.gapfilling_penalties[reaction.id]: @@ -709,7 +709,7 @@ def compute_gapfilled_solution(self, flux_values=None): logger.debug(f"Reversed gapfilled reaction: {reaction.id} >") output["reversed"][reaction.id] = ">" elif ( - flux_values[reaction.id]["reverse"] > Zero + flux_values[reaction.id]["reverse"] > zero_threshold and "reverse" in self.gapfilling_penalties[reaction.id] ): if "added" in self.gapfilling_penalties[reaction.id]: From 97b5d4fe436dc477fefbb451bffbf057660685eb Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 3 Jul 2023 00:47:30 -0500 Subject: [PATCH 28/57] Adding version printing to ModelSEEDpy so I can be sure what version of the code is running --- modelseedpy/__init__.py | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index aabb2c53..24c19a8e 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -11,27 +11,11 @@ from os.path import dirname as _dirname from modelseedpy.helpers import config -logging_hash = { - "debug": logging.DEBUG, - "critical": logging.CRITICAL, - "error": logging.ERROR, - "warning": logging.WARNING, - "info": logging.INFO, -} +__author__ = "Christopher Henry" +__email__ = "chenry@anl.gov" +__version__ = "0.2.2" -# Configuing modelseedpy logger -logger = logging.getLogger(__name__) -c_handler = logging.StreamHandler() -c_handler.setLevel(logging_hash[config.get("logging", "console_level")]) -c_format = logging.Formatter("%(name)s - %(levelname)s - %(message)s") -c_handler.setFormatter(c_format) -logger.addHandler(c_handler) -if config.get("logging", "log_file") == "yes": - f_handler = logging.FileHandler(config.get("logging", "filename"), mode="a") - f_handler.setLevel(logging_hash[config.get("logging", "file_level")]) - f_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") - f_handler.setFormatter(f_format) - logger.addHandler(f_handler) +print("modelseedpy", __version__) if sys.version_info[0] == 2: logger.warning( @@ -83,5 +67,3 @@ ) from modelseedpy.multiomics import MSExpression - -__version__ = "0.2.2" From a27ba8f7d1940435a0b966943e41d85b57ca5fe8 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 01:01:40 -0500 Subject: [PATCH 29/57] Resetting gapfill threshold for zero --- modelseedpy/fbapkg/gapfillingpkg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index d066c1a1..715f7667 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -25,7 +25,7 @@ ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO base_blacklist = {} -zero_threshold = 1e-8 +zero_threshold = 0 class GapfillingPkg(BaseFBAPkg): """ """ From 3dc662bc8c2852739ae58ed42c39aa5868572dab Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 11:35:27 -0500 Subject: [PATCH 30/57] Restoring small gapfilling threshold --- modelseedpy/core/msmodelutl.py | 9 +++++---- modelseedpy/fbapkg/gapfillingpkg.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index d24ac90f..e1754b0d 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -101,6 +101,7 @@ def get(model, create_if_missing=True): def __init__(self, model): self.model = model self.pkgmgr = MSPackageManager.get_pkg_mgr(model) + self.wsid = None self.atputl = None self.gfutl = None self.metabolite_hash = None @@ -548,7 +549,7 @@ def test_solution(self, solution, keep_changes=False): rxnobj.upper_bound = 0 objective = tempmodel.slim_optimize() if objective < solution["minobjective"]: - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " needed:" @@ -560,7 +561,7 @@ def test_solution(self, solution, keep_changes=False): else: removed_rxns.append(rxnobj) unneeded.append([rxn_id, solution[key][rxn_id], key]) - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " not needed:" @@ -571,7 +572,7 @@ def test_solution(self, solution, keep_changes=False): rxnobj.lower_bound = 0 objective = tempmodel.slim_optimize() if objective < solution["minobjective"]: - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " needed:" @@ -583,7 +584,7 @@ def test_solution(self, solution, keep_changes=False): else: removed_rxns.append(rxnobj) unneeded.append([rxn_id, solution[key][rxn_id], key]) - logger.debug( + logger.info( rxn_id + solution[key][rxn_id] + " not needed:" diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index 715f7667..d066c1a1 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -25,7 +25,7 @@ ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO base_blacklist = {} -zero_threshold = 0 +zero_threshold = 1e-8 class GapfillingPkg(BaseFBAPkg): """ """ From 7f694bf515b900f2a8e27e9f0bf87a5100e7099b Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 11:46:02 -0500 Subject: [PATCH 31/57] Fixing gapfilling target issue --- modelseedpy/core/msgapfill.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 774c1ca8..09ba8c5c 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -60,6 +60,7 @@ def __init__( self.gfpkgmgr = MSPackageManager.get_pkg_mgr(self.gfmodel) # Setting target from input if default_target: + self.default_target = default_target self.gfmodel.objective = self.gfmodel.problem.Objective( self.gfmodel.reactions.get_by_id(default_target).flux_expression, direction="max", @@ -164,6 +165,8 @@ def run_gapfilling( direction="max", ) self.gfpkgmgr.getpkg("GapfillingPkg").reset_original_objective() + else: + target = self.default_target if media: self.gfpkgmgr.getpkg("KBaseMediaPkg").build_package(media) if not minimum_obj: From a8563df8fd9bb31e51e93395b4f8ce420ffe4e38 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 14:53:44 -0500 Subject: [PATCH 32/57] Fixing bug in ATP correction --- modelseedpy/core/msatpcorrection.py | 39 +++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index c848fbeb..38114ace 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -99,6 +99,7 @@ def __init__( output = self.modelutl.add_atp_hydrolysis(compartment) self.atp_hydrolysis = output["reaction"] + self.media_hash = {} self.atp_medias = [] if load_default_medias: self.load_default_medias() @@ -107,6 +108,7 @@ def __init__( self.atp_medias.append(media) else: self.atp_medias.append([media, 0.01]) + self.media_hash[media.id] = media self.forced_media = [] for media_id in forced_media: @@ -292,6 +294,7 @@ def evaluate_growth_media(self): ) self.media_gapfill_stats[media] = None + output[media.id] = solution.objective_value if ( @@ -339,16 +342,23 @@ def determine_growth_media(self, max_gapfilling=None): "new": {}, "reversed": {}, } - if self.media_gapfill_stats[media]: - gfscore = len( - self.media_gapfill_stats[media]["new"].keys() - ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) - atp_att["core_atp_gapfilling"][media.id][ - "new" - ] = self.media_gapfill_stats[media]["new"] - atp_att["core_atp_gapfilling"][media.id][ - "reversed" - ] = self.media_gapfill_stats[media]["reversed"] + if media in self.media_gapfill_stats: + if self.media_gapfill_stats[media]: + gfscore = len( + self.media_gapfill_stats[media]["new"].keys() + ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) + atp_att["core_atp_gapfilling"][media.id][ + "new" + ] = self.media_gapfill_stats[media]["new"] + atp_att["core_atp_gapfilling"][media.id][ + "reversed" + ] = self.media_gapfill_stats[media]["reversed"] + else: + gfscore = 1000 + atp_att["core_atp_gapfilling"][media.id] = { + "score": 1000, + "failed":True + } if best_score is None or gfscore < best_score: best_score = gfscore atp_att["core_atp_gapfilling"][media.id]["score"] = gfscore @@ -511,6 +521,15 @@ def build_tests(self, multiplier=None): if multiplier is None: multiplier = self.multiplier tests = [] + if "empty" in self.media_hash: + tests.append( + { + "media": self.media_hash["empty"], + "is_max_threshold": True, + "threshold": 0.00001, + "objective": self.atp_hydrolysis.id, + } + ) self.model.objective = self.atp_hydrolysis.id for media in self.selected_media: self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) From 7cc3c550a481df80fdf3c65757c4654f60e26927 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 15:21:26 -0500 Subject: [PATCH 33/57] Fixing ATP correction --- modelseedpy/core/msatpcorrection.py | 33 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 38114ace..c6cc5707 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -342,23 +342,22 @@ def determine_growth_media(self, max_gapfilling=None): "new": {}, "reversed": {}, } - if media in self.media_gapfill_stats: - if self.media_gapfill_stats[media]: - gfscore = len( - self.media_gapfill_stats[media]["new"].keys() - ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) - atp_att["core_atp_gapfilling"][media.id][ - "new" - ] = self.media_gapfill_stats[media]["new"] - atp_att["core_atp_gapfilling"][media.id][ - "reversed" - ] = self.media_gapfill_stats[media]["reversed"] - else: - gfscore = 1000 - atp_att["core_atp_gapfilling"][media.id] = { - "score": 1000, - "failed":True - } + if self.media_gapfill_stats[media]: + gfscore = len( + self.media_gapfill_stats[media]["new"].keys() + ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) + atp_att["core_atp_gapfilling"][media.id][ + "new" + ] = self.media_gapfill_stats[media]["new"] + atp_att["core_atp_gapfilling"][media.id][ + "reversed" + ] = self.media_gapfill_stats[media]["reversed"] + else: + gfscore = 1000 + atp_att["core_atp_gapfilling"][media.id] = { + "score": 1000, + "failed":True + } if best_score is None or gfscore < best_score: best_score = gfscore atp_att["core_atp_gapfilling"][media.id]["score"] = gfscore From e76d8242245c8bfdfdd38efea8107d85e5cb9348 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 15:37:08 -0500 Subject: [PATCH 34/57] Fixing ATP correction media selection --- modelseedpy/core/msatpcorrection.py | 54 ++++------------------------- 1 file changed, 7 insertions(+), 47 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index c6cc5707..b5e59c97 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -336,14 +336,15 @@ def determine_growth_media(self, max_gapfilling=None): self.selected_media = [] best_score = None for media in self.media_gapfill_stats: - gfscore = 0 atp_att["core_atp_gapfilling"][media.id] = { "score": 0, "new": {}, "reversed": {}, } if self.media_gapfill_stats[media]: - gfscore = len( + atp_att["core_atp_gapfilling"][media.id][ + "score" + ] = len( self.media_gapfill_stats[media]["new"].keys() ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) atp_att["core_atp_gapfilling"][media.id][ @@ -353,67 +354,26 @@ def determine_growth_media(self, max_gapfilling=None): "reversed" ] = self.media_gapfill_stats[media]["reversed"] else: - gfscore = 1000 atp_att["core_atp_gapfilling"][media.id] = { "score": 1000, "failed":True } - if best_score is None or gfscore < best_score: - best_score = gfscore - atp_att["core_atp_gapfilling"][media.id]["score"] = gfscore + if best_score is None or atp_att["core_atp_gapfilling"][media.id]["score"] < best_score: + best_score = atp_att["core_atp_gapfilling"][media.id]["score"] + if self.max_gapfilling is None: self.max_gapfilling = best_score logger.info(f"max_gapfilling: {self.max_gapfilling}, best_score: {best_score}") for media in self.media_gapfill_stats: - gfscore = 0 - if self.media_gapfill_stats[media]: - gfscore = len( - self.media_gapfill_stats[media]["new"].keys() - ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) - - logger.debug(f"media gapfilling score: {media.id}: {gfscore}") - if gfscore <= self.max_gapfilling and gfscore <= ( + if atp_att["core_atp_gapfilling"][media.id]["score"] <= self.max_gapfilling and atp_att["core_atp_gapfilling"][media.id]["score"] <= ( best_score + self.gapfilling_delta ): self.selected_media.append(media) atp_att["selected_media"][media.id] = 0 self.modelutl.save_attributes(atp_att, "ATP_analysis") - if MSATPCorrection.DEBUG: - with open("atp_att_debug.json", "w") as outfile: - json.dump(atp_att, outfile) - - def determine_growth_media2(self, max_gapfilling=None): - """ - Decides which of the test media to use as growth conditions for this model - :return: - """ - - def scoring_function(media): - return len(self.media_gapfill_stats[media]["new"].keys()) + 0.5 * len( - self.media_gapfill_stats[media]["reversed"].keys() - ) - - if not max_gapfilling: - max_gapfilling = self.max_gapfilling - self.selected_media = [] - media_scores = dict( - (media, scoring_function(media)) - for media in self.media_gapfill_stats - if self.media_gapfill_stats[media] - ) - best_score = min(media_scores.values()) - if max_gapfilling is None or max_gapfilling > ( - best_score + self.gapfilling_delta - ): - max_gapfilling = best_score + self.gapfilling_delta - for media in media_scores: - score = media_scores[media] - logger.info(score, best_score, max_gapfilling) - if score <= max_gapfilling: - self.selected_media.append(media) def apply_growth_media_gapfilling(self): """ From c4565407d6420cd1e8073899d86f11ffc653a871 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 4 Jul 2023 15:40:43 -0500 Subject: [PATCH 35/57] Running black --- modelseedpy/core/msatpcorrection.py | 21 +++++++++++++-------- modelseedpy/core/msgapfill.py | 2 +- modelseedpy/core/msmodelutl.py | 2 +- modelseedpy/fbapkg/gapfillingpkg.py | 1 + 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index b5e59c97..232d4b3f 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -294,7 +294,7 @@ def evaluate_growth_media(self): ) self.media_gapfill_stats[media] = None - + output[media.id] = solution.objective_value if ( @@ -342,9 +342,7 @@ def determine_growth_media(self, max_gapfilling=None): "reversed": {}, } if self.media_gapfill_stats[media]: - atp_att["core_atp_gapfilling"][media.id][ - "score" - ] = len( + atp_att["core_atp_gapfilling"][media.id]["score"] = len( self.media_gapfill_stats[media]["new"].keys() ) + 0.5 * len(self.media_gapfill_stats[media]["reversed"].keys()) atp_att["core_atp_gapfilling"][media.id][ @@ -356,18 +354,25 @@ def determine_growth_media(self, max_gapfilling=None): else: atp_att["core_atp_gapfilling"][media.id] = { "score": 1000, - "failed":True + "failed": True, } - if best_score is None or atp_att["core_atp_gapfilling"][media.id]["score"] < best_score: + if ( + best_score is None + or atp_att["core_atp_gapfilling"][media.id]["score"] < best_score + ): best_score = atp_att["core_atp_gapfilling"][media.id]["score"] - + if self.max_gapfilling is None: self.max_gapfilling = best_score logger.info(f"max_gapfilling: {self.max_gapfilling}, best_score: {best_score}") for media in self.media_gapfill_stats: - if atp_att["core_atp_gapfilling"][media.id]["score"] <= self.max_gapfilling and atp_att["core_atp_gapfilling"][media.id]["score"] <= ( + if atp_att["core_atp_gapfilling"][media.id][ + "score" + ] <= self.max_gapfilling and atp_att["core_atp_gapfilling"][media.id][ + "score" + ] <= ( best_score + self.gapfilling_delta ): self.selected_media.append(media) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 09ba8c5c..4448b1e7 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -236,7 +236,7 @@ def run_multi_gapfill( media_list, target=None, minimum_objectives={}, - default_minimum_objective = None, + default_minimum_objective=None, binary_check=False, prefilter=True, check_for_growth=True, diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index e1754b0d..ac232de8 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -89,7 +89,7 @@ def search_name(name): @staticmethod def get(model, create_if_missing=True): if isinstance(model, MSModelUtil): - return model + return model if model in MSModelUtil.mdlutls: return MSModelUtil.mdlutls[model] elif create_if_missing: diff --git a/modelseedpy/fbapkg/gapfillingpkg.py b/modelseedpy/fbapkg/gapfillingpkg.py index d066c1a1..74a097df 100644 --- a/modelseedpy/fbapkg/gapfillingpkg.py +++ b/modelseedpy/fbapkg/gapfillingpkg.py @@ -27,6 +27,7 @@ base_blacklist = {} zero_threshold = 1e-8 + class GapfillingPkg(BaseFBAPkg): """ """ From c8b67b563862bdc36a2066740880dc6921cc3923 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Tue, 4 Jul 2023 18:01:40 -0500 Subject: [PATCH 36/57] minor --- modelseedpy/core/msatpcorrection.py | 14 +++++----- modelseedpy/core/msgapfill.py | 12 +++------ modelseedpy/core/msmodelutl.py | 42 +++++++---------------------- 3 files changed, 21 insertions(+), 47 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 46bd32ea..63b6933d 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -413,15 +413,15 @@ def apply_growth_media_gapfilling(self): """ self.cumulative_core_gapfilling = ( [] - ) # TODO: In case someone runs ATP correction twice with different parameters, before resetting this, maybe check if any of these reactions are already in the model and remove them so we're starting fresh??? + ) + # TODO: In case someone runs ATP correction twice with different parameters, + # before resetting this, maybe check if any of these reactions are already in + # the model and remove them so we're starting fresh??? for media in self.selected_media: - if ( - media in self.media_gapfill_stats - and self.media_gapfill_stats[media] - and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0 - ): + stats = self.media_gapfill_stats.get(media, None) + if stats is not None and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0: self.msgapfill.integrate_gapfill_solution( - self.media_gapfill_stats[media], + stats, self.cumulative_core_gapfilling, link_gaps_to_objective=False, ) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 92890c0e..68b9ba9a 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -15,6 +15,7 @@ class MSGapfill: + @staticmethod def gapfill_count(solution): total = 0 @@ -184,12 +185,7 @@ def run_gapfilling( # Running gapfilling and checking solution sol = self.gfmodel.optimize() - logger.debug( - "gapfill solution objective value %f (%s) for media %s", - sol.objective_value, - sol.status, - media, - ) + logger.debug(f"gapfill solution objective value {sol.objective_value} ({sol.status}) for media {media}") if sol.status != "optimal": logger.warning("No solution found for %s", media) return None @@ -212,7 +208,7 @@ def run_gapfilling( ) return None - # Running binary check to reduce solution to minimal reaction soltuion + # Running binary check to reduce solution to minimal reaction solution if binary_check: self.last_solution = self.gfpkgmgr.getpkg( "GapfillingPkg" @@ -221,7 +217,7 @@ def run_gapfilling( # Setting last solution data self.last_solution["media"] = media self.last_solution["target"] = target - self.last_solution["minobjective"] = minimum_obj + self.last_solution["minobjective"] = self.gfpkgmgr.getpkg("GapfillingPkg").parameters['minimum_obj'] self.last_solution["binary_check"] = binary_check return self.last_solution diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 371abeb7..031a9e58 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -538,55 +538,33 @@ def test_solution(self, solution, keep_changes=False): objective = tempmodel.slim_optimize() logger.debug("Starting objective:" + str(objective)) types = ["new", "reversed"] + for key in types: for rxn_id in solution[key]: rxnobj = tempmodel.reactions.get_by_id(rxn_id) - if solution[key][rxn_id] == ">": + solution_key_rxn_id = solution[key][rxn_id] # could call this direction instead but wasn't 100% sure + if solution_key_rxn_id == ">": original_bound = rxnobj.upper_bound rxnobj.upper_bound = 0 objective = tempmodel.slim_optimize() if objective < solution["minobjective"]: - logger.debug( - rxn_id - + solution[key][rxn_id] - + " needed:" - + str(objective) - + " with min obj:" - + str(solution["minobjective"]) - ) + logger.debug(f'{rxn_id}{solution_key_rxn_id} needed:{objective} with min obj:{solution["minobjective"]}') rxnobj.upper_bound = original_bound else: removed_rxns.append(rxnobj) - unneeded.append([rxn_id, solution[key][rxn_id], key]) - logger.debug( - rxn_id - + solution[key][rxn_id] - + " not needed:" - + str(objective) - ) + unneeded.append([rxn_id, solution_key_rxn_id, key]) + logger.debug(f'{rxn_id}{solution_key_rxn_id} not needed:{objective}') else: original_bound = rxnobj.lower_bound rxnobj.lower_bound = 0 objective = tempmodel.slim_optimize() if objective < solution["minobjective"]: - logger.debug( - rxn_id - + solution[key][rxn_id] - + " needed:" - + str(objective) - + " with min obj:" - + str(solution["minobjective"]) - ) + logger.debug(f'{rxn_id}{solution_key_rxn_id} needed:{objective} with min obj:{solution["minobjective"]}') rxnobj.lower_bound = original_bound else: removed_rxns.append(rxnobj) - unneeded.append([rxn_id, solution[key][rxn_id], key]) - logger.debug( - rxn_id - + solution[key][rxn_id] - + " not needed:" - + str(objective) - ) + unneeded.append([rxn_id, solution_key_rxn_id, key]) + logger.debug(f'{rxn_id}{solution_key_rxn_id} not needed:{objective}') if keep_changes: tempmodel.remove_reactions(removed_rxns) for items in unneeded: @@ -726,7 +704,7 @@ def test_single_condition(self, condition, apply_condition=True, model=None): if model.solver.status != "optimal": self.printlp(condition["media"].id + "-Testing-Infeasible.lp") logger.critical( - ondition["media"].id + condition["media"].id + "testing leads to infeasible problem. LP file printed to debug!" ) return False From 1851286a03871a758d67e81750dfd4392d6f6da7 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 6 Jul 2023 12:20:51 -0500 Subject: [PATCH 37/57] added e0 as extracell search for cobrapy --- .gitignore | 2 ++ modelseedpy/__init__.py | 6 +++++ tests/core/test_msgapfill.py | 50 ------------------------------------ 3 files changed, 8 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index 6390162b..5589324a 100644 --- a/.gitignore +++ b/.gitignore @@ -131,3 +131,5 @@ dmypy.json # Pyre type checker .pyre/ + +*.lp \ No newline at end of file diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 24c19a8e..1973b2a3 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -5,6 +5,7 @@ # set the warning format to be on a single line import sys import logging +import cobra import warnings as _warnings from os import name as _name from os.path import abspath as _abspath @@ -15,6 +16,8 @@ __email__ = "chenry@anl.gov" __version__ = "0.2.2" +logger = logging.getLogger(__name__) + print("modelseedpy", __version__) if sys.version_info[0] == 2: @@ -25,6 +28,9 @@ "still work but we will no longer actively maintain Python 2 support." ) +if 'e0' not in cobra.medium.annotations.compartment_shortlist['e']: + cobra.medium.annotations.compartment_shortlist['e'].append('e0') + import modelseedpy from modelseedpy.core import ( RastClient, diff --git a/tests/core/test_msgapfill.py b/tests/core/test_msgapfill.py index 1ee694bd..622a0924 100644 --- a/tests/core/test_msgapfill.py +++ b/tests/core/test_msgapfill.py @@ -1,54 +1,4 @@ # -*- coding: utf-8 -*- -""" -from glob import glob -os.environ["HOME"] = 'C:\\Users\\Andrew Freiburger\\Dropbox\\My PC (DESKTOP-M302P50)\\Documents\\UVic Civil Engineering\\Internships\\Agronne\\cobrakbase' -import cobrakbase -token = 'xx' -kbase = cobrakbase.KBaseAPI(token) -import re - -# define the example individual model and associated API media package -model = kbase.get_from_ws('e_coli_core.kb', 95098) -model.solver = 'optlang-cplex' - -# import the modelseedpy packages -import modelseedpy -from modelseedpy.core.msgapfill import MSGapfill -gapfill = MSGapfill(model) - -def test_init(): - assert type(gapfill.model) is cobrakbase.core.kbasefba.fbamodel.FBAModel - assert type(gapfill.blacklist) is list - assert type(gapfill.solutions) is dict - -def test_run_gapfilling_and_integrate_gapfill_solution(): - solutions = gapfill.run_gapfilling() - - # test that the objective expression is correctly set - if solutions is not None: - assert type(solutions) is dict - - # verify the integrate_gapfill_solution function - model_2 = gapfill.integrate_gapfill_solution(solutions) - assert type(model_2) is cobrakbase.core.kbasefba.fbamodel.FBAModel - - for reaction in solutions['reversed']: - if solution["reversed"][reaction] == ">": - assert reaction.upper_bound == 100 - else: - assert reaction.lower_bound == -100 - - for reaction in solutions['new']: - if solution["new"][reaction] == ">": - assert reaction.upper_bound == 100 - assert reaction.lower_bound == 0 - else: - assert reaction.upper_bound == 0 - assert reaction.lower_bound == -100 - -def test_gapfill(): - pass -""" import os import pytest import json From 7e0e21632d2580c7d3ab5337a06c632b232508c1 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 6 Jul 2023 12:21:16 -0500 Subject: [PATCH 38/57] black --- modelseedpy/__init__.py | 4 ++-- modelseedpy/core/msatpcorrection.py | 9 +++++---- modelseedpy/core/msgapfill.py | 9 ++++++--- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 1973b2a3..665f000c 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -28,8 +28,8 @@ "still work but we will no longer actively maintain Python 2 support." ) -if 'e0' not in cobra.medium.annotations.compartment_shortlist['e']: - cobra.medium.annotations.compartment_shortlist['e'].append('e0') +if "e0" not in cobra.medium.annotations.compartment_shortlist["e"]: + cobra.medium.annotations.compartment_shortlist["e"].append("e0") import modelseedpy from modelseedpy.core import ( diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 2f17d576..727c3e33 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -385,15 +385,16 @@ def apply_growth_media_gapfilling(self): Applies the gapfilling to all selected growth media :return: """ - self.cumulative_core_gapfilling = ( - [] - ) + self.cumulative_core_gapfilling = [] # TODO: In case someone runs ATP correction twice with different parameters, # before resetting this, maybe check if any of these reactions are already in # the model and remove them so we're starting fresh??? for media in self.selected_media: stats = self.media_gapfill_stats.get(media, None) - if stats is not None and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0: + if ( + stats is not None + and MSGapfill.gapfill_count(self.media_gapfill_stats[media]) > 0 + ): self.msgapfill.integrate_gapfill_solution( stats, self.cumulative_core_gapfilling, diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index c6dd0a17..cb0824a4 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -15,7 +15,6 @@ class MSGapfill: - @staticmethod def gapfill_count(solution): total = 0 @@ -191,7 +190,9 @@ def run_gapfilling( # Running gapfilling and checking solution sol = self.gfmodel.optimize() - logger.debug(f"gapfill solution objective value {sol.objective_value} ({sol.status}) for media {media}") + logger.debug( + f"gapfill solution objective value {sol.objective_value} ({sol.status}) for media {media}" + ) if sol.status != "optimal": logger.warning("No solution found for %s", media) return None @@ -223,7 +224,9 @@ def run_gapfilling( # Setting last solution data self.last_solution["media"] = media self.last_solution["target"] = target - self.last_solution["minobjective"] = self.gfpkgmgr.getpkg("GapfillingPkg").parameters['minimum_obj'] + self.last_solution["minobjective"] = self.gfpkgmgr.getpkg( + "GapfillingPkg" + ).parameters["minimum_obj"] self.last_solution["binary_check"] = binary_check return self.last_solution From afcaa7a2fe060f58195994638381090118b32c85 Mon Sep 17 00:00:00 2001 From: Filipe Liu Date: Thu, 6 Jul 2023 12:33:44 -0500 Subject: [PATCH 39/57] pre-commit --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5589324a..d5d6d7bd 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,4 @@ dmypy.json # Pyre type checker .pyre/ -*.lp \ No newline at end of file +*.lp From 38c5a7a48d5cc3567b179a5b9530b6506c2f7e17 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 6 Jul 2023 23:38:21 -0500 Subject: [PATCH 40/57] Fixing threshold and adding empty media and fixing thresholds --- modelseedpy/core/msatpcorrection.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 232d4b3f..d22e816e 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -39,7 +39,7 @@ "Akg.O2": 2, "LLac.O2": 2, "Dlac.O2": 2, - "For.O2": 2, + "For.O2": 1.875, "For.NO3": 1.5, "Pyr.NO": 2.5, "Pyr.NO2": 2.5, @@ -109,7 +109,12 @@ def __init__( else: self.atp_medias.append([media, 0.01]) self.media_hash[media.id] = media - + if "empty" not in self.media_hash: + media = MSMedia.from_dict({}) + media.id = "empty" + media.name = "empty" + self.media_hash[media.id] = media + self.forced_media = [] for media_id in forced_media: for media in self.atp_medias: @@ -500,11 +505,14 @@ def build_tests(self, multiplier=None): obj_value = self.model.slim_optimize() logger.debug(f"{media.name} = {obj_value};{multiplier}") logger.debug("Test:" + media.id + ";" + str(multiplier * obj_value)) + threshold = multiplier * obj_value + if threshold == 0: + threshold += 0.00001 tests.append( { "media": media, "is_max_threshold": True, - "threshold": multiplier * obj_value, + "threshold": threshold, "objective": self.atp_hydrolysis.id, } ) From 787c6bec657760a1ced0845ff8512c4922d042b1 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Thu, 6 Jul 2023 23:59:47 -0500 Subject: [PATCH 41/57] Fixing empty media --- modelseedpy/core/msatpcorrection.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index d22e816e..653de451 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -499,6 +499,11 @@ def build_tests(self, multiplier=None): "objective": self.atp_hydrolysis.id, } ) + atp_att["tests"]["empty"] = { + "threshold": 0.00001, + "objective": self.atp_hydrolysis.id, + } + self.model.objective = self.atp_hydrolysis.id for media in self.selected_media: self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) From 2f8aeeeaf1756842f880afde103f416404ce6698 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Fri, 7 Jul 2023 11:01:24 -0500 Subject: [PATCH 42/57] Fixing git ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index d5d6d7bd..87619079 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,7 @@ dmypy.json # Pyre type checker .pyre/ +.pydevproject +.settings/* +*data/* *.lp From 91ac4998cd78a083f98131fb49a5aa2e705c6db5 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Fri, 7 Jul 2023 23:05:57 -0500 Subject: [PATCH 43/57] Making thresholds on tests more flexible, including supporting media specific multipliers for the threshold --- modelseedpy/core/msatpcorrection.py | 34 +++++++++++++++++++---------- modelseedpy/core/msbuilder.py | 7 ------ 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index 31cb7905..aa10acac 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -47,6 +47,11 @@ "Pyr.SO4": 2.5, } +default_threshold_multipiers = { + "Glc": 2, + "default":1.2, +} + class MSATPCorrection: @@ -287,11 +292,11 @@ def evaluate_growth_media(self): media_list = [] min_objectives = {} for media, minimum_obj in self.atp_medias: - logger.info("evaluate media %s", media) + logger.debug("evaluate media %s", media) pkgmgr.getpkg("KBaseMediaPkg").build_package(media) - logger.info("model.medium %s", self.model.medium) + logger.debug("model.medium %s", self.model.medium) solution = self.model.optimize() - logger.info( + logger.debug( "evaluate media %s - %f (%s)", media.id, solution.objective_value, @@ -467,7 +472,7 @@ def restore_noncore_reactions(self, noncore=True, othercompartment=True): reaction.lower_bound = self.original_bounds[reaction.id][0] reaction.upper_bound = self.original_bounds[reaction.id][1] - def build_tests(self, multiplier=None): + def build_tests(self,multiplier_hash_override={}): """Build tests based on ATP media evaluations Parameters @@ -483,13 +488,16 @@ def build_tests(self, multiplier=None): Raises ------ """ + #Applying threshold multiplier + for key in default_threshold_multipiers: + if key not in multiplier_hash_override: + multiplier_hash_override[key] = default_threshold_multipiers[key] + #Initialzing atp test attributes atp_att = self.modelutl.get_attributes( "ATP_analysis", {"tests": {}, "selected_media": {}, "core_atp_gapfilling": {}}, ) - - if multiplier is None: - multiplier = self.multiplier + #Initializing tests and adding empty media every time tests = [] if "empty" in self.media_hash: tests.append( @@ -504,13 +512,18 @@ def build_tests(self, multiplier=None): "threshold": 0.00001, "objective": self.atp_hydrolysis.id, } - + #Setting objective to ATP hydrolysis self.model.objective = self.atp_hydrolysis.id for media in self.selected_media: + #Setting multiplier for test threshold + multiplier = multiplier_hash_override["default"] + if media.id in multiplier_hash_override: + multiplier = multiplier_hash_override[media.id] + #Constraining model exchanges for media self.modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) + #Computing core ATP production obj_value = self.model.slim_optimize() logger.debug(f"{media.name} = {obj_value};{multiplier}") - logger.debug("Test:" + media.id + ";" + str(multiplier * obj_value)) threshold = multiplier * obj_value if threshold == 0: threshold += 0.00001 @@ -527,9 +540,8 @@ def build_tests(self, multiplier=None): "threshold": multiplier * obj_value, "objective": self.atp_hydrolysis.id, } - + #Saving test attributes to the model self.modelutl.save_attributes(atp_att, "ATP_analysis") - return tests def run_atp_correction(self): diff --git a/modelseedpy/core/msbuilder.py b/modelseedpy/core/msbuilder.py index e376ae0b..3a78188a 100644 --- a/modelseedpy/core/msbuilder.py +++ b/modelseedpy/core/msbuilder.py @@ -850,22 +850,15 @@ def build( complex_groups = self.build_complex_groups( self.reaction_to_complex_sets.values() ) - if "bio1" in cobra_model.reactions: - print("1:Biomass present!!") metabolic_reactions = self.build_metabolic_reactions() cobra_model.add_reactions(metabolic_reactions) - if "bio1" in cobra_model.reactions: - print("2:Biomass present!!") non_metabolic_reactions = self.build_non_metabolite_reactions( cobra_model, allow_all_non_grp_reactions ) cobra_model.add_reactions(non_metabolic_reactions) - if "bio1" in cobra_model.reactions: - print("3:Biomass present!!") cobra_model.add_groups(list(complex_groups.values())) self.add_exchanges_to_model(cobra_model) - print("Adding biomass!!") biomass_reactions = [] for rxn_biomass in self.template.biomasses: reaction = rxn_biomass.build_biomass( From 55ae63f60d3039014187177eebfd6a8048455e67 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sun, 9 Jul 2023 23:24:28 -0500 Subject: [PATCH 44/57] Improving commenting and improving multi gapfilling --- modelseedpy/core/msatpcorrection.py | 2 +- modelseedpy/core/msgapfill.py | 46 +++++++++++++++++++++++++++-- modelseedpy/core/msmodelutl.py | 11 +++---- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index aa10acac..dd381b18 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -408,7 +408,7 @@ def apply_growth_media_gapfilling(self): self.msgapfill.integrate_gapfill_solution( stats, self.cumulative_core_gapfilling, - link_gaps_to_objective=False, + link_gaps_to_objective=False ) core_gf = { "count": len(self.cumulative_core_gapfilling), diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index cb0824a4..cc17df98 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) logger.setLevel( - logging.WARNING + logging.INFO#WARNING ) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO @@ -158,6 +158,22 @@ def run_gapfilling( prefilter=True, check_for_growth=True, ): + """Run gapfilling on a single media condition to force the model to achieve a nonzero specified objective + Parameters + ---------- + media : MSMedia + Media in which the model should be gapfilled + target : string + Name or expression describing the reaction or combination of reactions to the optimized + minimum_obj : double + Value to use for the minimal objective threshold that the model must be gapfilled to achieve + binary_check : bool + Indicates if the solution should be checked to ensure it is minimal in the number of reactions involved + prefilter : bool + Indicates if the gapfilling database should be prefiltered using the tests provided in the MSGapfill constructor before running gapfilling + check_for_growth : bool + Indicates if the model should be checked to ensure that the resulting gapfilling solution produces a nonzero objective + """ # Setting target and media if specified if target: self.gfmodel.objective = self.gfmodel.problem.Objective( @@ -240,6 +256,25 @@ def run_multi_gapfill( prefilter=True, check_for_growth=True, ): + """Run gapfilling across an array of media conditions ultimately using different integration policies: simultaneous gapfilling, independent gapfilling, cumulative gapfilling + Parameters + ---------- + media_list : [MSMedia] + List of the medias in which the model should be gapfilled + target : string + Name or expression describing the reaction or combination of reactions to the optimized + minimum_objectives : {string - media ID : double - minimum objective value} + Media-specific minimal objective thresholds that the model must be gapfilled to achieve + default_minimum_objective : double + Default value to use for the minimal objective threshold that the model must be gapfilled to achieve + binary_check : bool + Indicates if the solution should be checked to ensure it is minimal in the number of reactions involved + prefilter : bool + Indicates if the gapfilling database should be prefiltered using the tests provided in the MSGapfill constructor before running gapfilling + check_for_growth : bool + Indicates if the model should be checked to ensure that the resulting gapfilling solution produces a nonzero objective + """ + if not default_minimum_objective: default_minimum_objective = self.default_minimum_objective first = True @@ -250,11 +285,11 @@ def run_multi_gapfill( minimum_obj = minimum_objectives[item] if first: solution_dictionary[item] = self.run_gapfilling( - item, target, minimum_obj, binary_check, True, True + item, target, minimum_obj, binary_check, prefilter, check_for_growth ) else: solution_dictionary[item] = self.run_gapfilling( - item, None, minimum_obj, binary_check, False, True + item, None, minimum_obj, binary_check, False, check_for_growth ) false = False return solution_dictionary @@ -303,6 +338,8 @@ def integrate_gapfill_solution( cumulative_solution.append([rxn_id, "<"]) rxn.upper_bound = 0 rxn.lower_bound = -100 + + #Sometimes for whatever reason, the solution includes useless reactions that should be stripped out before saving the final model unneeded = self.mdlutl.test_solution( solution, keep_changes=True ) # Strips out unneeded reactions - which undoes some of what is done above @@ -311,8 +348,11 @@ def integrate_gapfill_solution( if item[0] == oitem[0] and item[1] == oitem[1]: cumulative_solution.remove(oitem) break + #Adding the gapfilling solution data to the model, which is needed for saving the model in KBase self.mdlutl.add_gapfilling(solution) + #Testing which gapfilled reactions are needed to produce each reactant in the objective function if link_gaps_to_objective: + logger.info("Gapfilling sensitivity analysis running on succesful run in "+solution["media"]+" for target "+solution["target"]) gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) if solution["media"] not in gf_sensitivity: gf_sensitivity[solution["media"]] = {} diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index ac232de8..3d40c60b 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -285,12 +285,13 @@ def get_attributes(self, key=None, default=None): self.attributes[key] = default return self.attributes[key] - def save_attributes(self, value, key=None): + def save_attributes(self, value=None, key=None): attributes = self.get_attributes() - if key: - attributes[key] = value - else: - self.attributes = value + if value: + if key: + attributes[key] = value + else: + self.attributes = value if hasattr(self.model, "attributes"): self.model.attributes = self.attributes From 32c590f45689a673d9acc91822a3af2418a8e70d Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 10 Jul 2023 00:46:13 -0500 Subject: [PATCH 45/57] Fixing bug in log message --- modelseedpy/core/msgapfill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index cc17df98..10bcb9a2 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -352,7 +352,7 @@ def integrate_gapfill_solution( self.mdlutl.add_gapfilling(solution) #Testing which gapfilled reactions are needed to produce each reactant in the objective function if link_gaps_to_objective: - logger.info("Gapfilling sensitivity analysis running on succesful run in "+solution["media"]+" for target "+solution["target"]) + logger.info("Gapfilling sensitivity analysis running on succesful run in "+solution["media"].id+" for target "+solution["target"]) gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) if solution["media"] not in gf_sensitivity: gf_sensitivity[solution["media"]] = {} From 239ac6eb8f1e77adf593dbd55a30cc1ca5a71c5d Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 14:04:13 -0500 Subject: [PATCH 46/57] Fixing various media and element package --- modelseedpy/core/msmedia.py | 14 +++++++++++++- modelseedpy/core/msmodelutl.py | 4 ---- modelseedpy/fbapkg/basefbapkg.py | 9 +++++++-- modelseedpy/fbapkg/elementuptakepkg.py | 19 ++++++++++++++----- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/modelseedpy/core/msmedia.py b/modelseedpy/core/msmedia.py index 488aad57..aeac7092 100644 --- a/modelseedpy/core/msmedia.py +++ b/modelseedpy/core/msmedia.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import logging from cobra.core.dictlist import DictList +from builtins import None logger = logging.getLogger(__name__) @@ -21,7 +22,18 @@ def maxFlux(self): def minFlux(self): # TODO: will be removed later just for old methods return -self.upper_bound - + + def get_mdl_exchange_hash(self,model_or_mdlutl): + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + mets = modelutl.find_met(self.id) + output = {} + exchange_hash = modelutl.exchange_hash() + for met in mets: + if met in exchange_hash: + output[met] = exchange_hash[met] + return output class MSMedia: def __init__(self, media_id, name=""): diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 3d40c60b..9c69a51f 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -1070,14 +1070,10 @@ def run_biomass_dependency_test( ): tempmodel.objective = original_objective objective = tempmodel.slim_optimize() - with open("FlexBiomass2.lp", "w") as out: - out.write(str(tempmodel.solver)) if objective > 0: target_rxn.lower_bound = 0.1 tempmodel.objective = min_flex_obj solution = tempmodel.optimize() - with open("FlexBiomass3.lp", "w") as out: - out.write(str(tempmodel.solver)) biocpds = [] for reaction in tempmodel.reactions: if reaction.id[0:5] == "FLEX_" and ( diff --git a/modelseedpy/fbapkg/basefbapkg.py b/modelseedpy/fbapkg/basefbapkg.py index 662696f3..77effe32 100644 --- a/modelseedpy/fbapkg/basefbapkg.py +++ b/modelseedpy/fbapkg/basefbapkg.py @@ -33,8 +33,13 @@ class BaseFBAPkg: def __init__( self, model, name, variable_types={}, constraint_types={}, reaction_types={} ): - self.model = model - self.modelutl = MSModelUtil.get(model) + if isinstance(model, MSModelUtil): + self.model = model.model + self.modelutl = model + else: + self.model = model + self.modelutl = MSModelUtil.get(model) + self.name = name self.pkgmgr = MSPackageManager.get_pkg_mgr(model) diff --git a/modelseedpy/fbapkg/elementuptakepkg.py b/modelseedpy/fbapkg/elementuptakepkg.py index 66e01035..4eb27e44 100644 --- a/modelseedpy/fbapkg/elementuptakepkg.py +++ b/modelseedpy/fbapkg/elementuptakepkg.py @@ -16,21 +16,30 @@ def __init__(self, model): {"elements": "string"}, ) - def build_package(self, element_limits): + def build_package(self, element_limits,exception_compounds=[],exception_reactions=[]): + #Converting exception compounds list into exception reaction list + exchange_hash = self.modelutl.exchange_hash() + for met in exception_compounds: + if met in exchange_hash: + exception_reactions.append(exchange_hash[met]) + #Now building or rebuilding constraints for element in element_limits: if element not in self.variables["elements"]: self.build_variable(element, element_limits[element]) - self.build_constraint(element) + for element in element_limits: + #This call will first remove existing constraints then build the new constraint + self.build_constraint(element,exception_reactions) def build_variable(self, element, limit): return BaseFBAPkg.build_variable( self, "elements", 0, limit, "continuous", element ) - def build_constraint(self, element): + def build_constraint(self, element,exception_reactions): coef = {self.variables["elements"][element]: -1} - for reaction in self.model.reactions: - if reaction.id[0:3] == "EX_": + rxnlist = self.modelutl.exchange_list() + for reaction in rxnlist: + if reaction not in exception_reactions: total = 0 for metabolite in reaction.metabolites: elements = metabolite.elements From 3f25882b8646b4f777591c5fb1a7e818c0f7aa36 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 14:07:18 -0500 Subject: [PATCH 47/57] Fixing weird import --- modelseedpy/core/msmedia.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modelseedpy/core/msmedia.py b/modelseedpy/core/msmedia.py index aeac7092..48fa90ad 100644 --- a/modelseedpy/core/msmedia.py +++ b/modelseedpy/core/msmedia.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import logging from cobra.core.dictlist import DictList -from builtins import None logger = logging.getLogger(__name__) From b0311a317be2de2a77b008d1585deaed4e1727d0 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 23:06:14 -0500 Subject: [PATCH 48/57] Improving phenotype simulations and gapfilling --- modelseedpy/core/msgrowthphenotypes.py | 310 +++++++++++++++++++------ 1 file changed, 238 insertions(+), 72 deletions(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 6c30bb2a..13d540b1 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -9,7 +9,9 @@ from modelseedpy.core.msgapfill import MSGapfill logger = logging.getLogger(__name__) - +logger.setLevel( + logging.INFO +) # When debugging - set this to INFO then change needed messages below from DEBUG to INFO class MSGrowthPhenotype: def __init__( @@ -33,102 +35,186 @@ def __init__( self.additional_compounds = additional_compounds self.parent = parent - def build_media(self): + def build_media(self,include_base_media=True): + """Builds media object to use when simulating the phenotype + Parameters + ---------- + include_base_media : bool + Indicates whether to include the base media for the phenotype set in the formulation + """ cpd_hash = {} for cpd in self.additional_compounds: cpd_hash[cpd] = 100 full_media = MSMedia.from_dict(cpd_hash) - if self.media != None: + if self.media: full_media.merge(self.media, overwrite_overlap=False) - if self.parent != None and self.parent.base_media != None: - full_media.merge(parent.base_media, overwrite_overlap=False) + if full_media: + if self.parent and self.parent.base_media: + full_media.merge(parent.base_media, overwrite_overlap=False) return full_media def simulate( self, - modelutl, - growth_threshold=0.001, + model_or_modelutl, + objective, + growth_multiplier=10, add_missing_exchanges=False, save_fluxes=False, pfba=False, ): - if not isinstance(modelutl, MSModelUtil): - modelutl = MSModelUtil(modelutl) - media = self.build_media() - output = {"growth": None, "class": None, "missing_transports": []} + """Simulates a single phenotype + Parameters + ---------- + model_or_modelutl : Model | MSModelUtl + Model to use to run the simulations + add_missing_exchanges : bool + Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically + growth_multiplier : double + Indicates a multiplier to use for positive growth above the growth on baseline media + save_fluxes : bool + Indicates if the fluxes should be saved and returned with the results + pfba : bool + Runs pFBA to compute fluxes after initially solving for growth + """ + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + + #Setting objective + if objective: + modelutl.model.objective = objective + + #Building full media and adding missing exchanges + output = {"growth": None, "class": None, "missing_transports": [], "baseline_growth": None} + full_media = self.build_media() if add_missing_exchanges: - output["missing_transports"] = modelutl.add_missing_exchanges(media) - pkgmgr = MSPackageManager.get_pkg_mgr(modelutl.model) - pkgmgr.getpkg("KBaseMediaPkg").build_package( - media, self.parent.base_uptake, self.parent.base_excretion - ) - for gene in self.gene_ko: - if gene in modelutl.model.genes: - geneobj = modelutl.model.genes.get_by_id(gene) - geneobj.knock_out() - solution = modelutl.model.optimize() - output["growth"] = solution.objective_value - if solution.objective_value > 0 and pfba: - solution = cobra.flux_analysis.pfba(modelutl.model) - if save_fluxes: - output["fluxes"] = solution.fluxes - if output["growth"] >= growth_threshold: + output["missing_transports"] = modelutl.add_missing_exchanges(full_media) + + #Getting basline growth + output["baseline_growth"] = 0.001 + if self.parent: + output["baseline_growth"] = self.parent.baseline_growth(modelutl,True) + + #Building specific media and setting compound exception list + if self.parent and self.parent.atom_limits and len(self.parent.atom_limits) > 0: + reaction_exceptions = [] + specific_media = self.build_media(False) + for mediacpd in specific_media.mediacompounds: + output = mediacpd.get_mdl_exchange_hash(self,modelutl) + for mdlcpd in output: + reaction_exceptions.append(output[mdlcpd]) + modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.parent.atom_limits,exception_reactions=reaction_exceptions) + + #Applying media + if self.parent: + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( + full_media, self.parent.base_uptake, self.parent.base_excretion + ) + else: + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( + full_media,0,1000 + ) + + with modelutl.model: + #Applying gene knockouts + for gene in self.gene_ko: + if gene in modelutl.model.genes: + geneobj = modelutl.model.genes.get_by_id(gene) + geneobj.knock_out() + + #Optimizing model + solution = modelutl.model.optimize() + output["growth"] = solution.objective_value + if solution.objective_value > 0 and pfba: + solution = cobra.flux_analysis.pfba(modelutl.model) + if save_fluxes: + output["fluxes"] = solution.fluxes + + #Determining phenotype class + if output["growth"] >= output["baseline_growth"]*growth_multiplier: if self.growth > 0: output["class"] = "CP" - else: + elif self.growth == 0: output["class"] = "FP" + else: + output["class"] = "GROWTH" else: if self.growth > 0: output["class"] = "FN" - else: + elif self.growth == 0: output["class"] = "CN" + else: + output["class"] = "NOGROWTH" return output def gapfill_model_for_phenotype( self, - modelutl, - default_gapfill_templates, + msgapfill, + objective, test_conditions, - default_gapfill_models=[], - blacklist=[], - growth_threshold=0.001, + growth_multiplier=10, add_missing_exchanges=False, ): - if not isinstance(modelutl, MSModelUtil): - modelutl = MSModelUtil(modelutl) - self.gapfilling = MSGapfill( - modelutl.model, - default_gapfill_templates, - default_gapfill_models, - test_conditions, - modelutl.reaction_scores(), - blacklist, - ) - media = self.build_media() - if add_missing_exchanges: - modelutl.add_missing_exchanges(media) - for gene in self.gene_ko: - if gene in modelutl.model.genes: - geneobj = modelutl.model.genes.get_by_id(gene) - geneobj.knock_out() - gfresults = self.gapfilling.run_gapfilling(media, None) - if gfresults is None: + """Gapfills the model to permit this single phenotype to be positive + Parameters + ---------- + msgapfill : MSGapfill + Fully configured gapfilling object + add_missing_exchanges : bool + Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically + growth_multiplier : double + Indicates a multiplier to use for positive growth above the growth on baseline media + objective : string + Expression for objective to be activated by gapfilling + """ + #First simulate model without gapfilling to assess ungapfilled growth + output = self.simulate(msgapfill.mdlutl,objective,growth_multiplier,add_missing_exchanges) + if output["growth"] >= output["baseline_growth"]*growth_multiplier: + #No gapfilling needed - original model grows without gapfilling + return {"reversed": {}, "new": {},"media": self.build_media(), "target":objective, "minobjective": output["baseline_growth"]*growth_multiplier, "binary_check":False} + + #Now pulling the gapfilling configured model from MSGapfill + gfmodelutl = MSModelUtil.get(msgapfill.gfmodel) + #Saving the gapfill objective because this will be replaced when the simulation runs + gfobj = gfmodelutl.model.objective + #Running simulate on gapfill model to add missing exchanges and set proper media and uptake limit constraints + output = self.simulate(modelutl,objective,growth_multiplier,add_missing_exchanges) + #If the gapfilling model fails to achieve the minimum growth, then no solution exists + if output["growth"] < output["baseline_growth"]*growth_multiplier: logger.warning( "Gapfilling failed with the specified model, media, and target reaction." ) - return self.gapfilling.integrate_gapfill_solution(gfresults) - + return None + + #Running the gapfilling itself + full_media = self.build_media() + with modelutl.model: + #Applying gene knockouts + for gene in self.gene_ko: + if gene in modelutl.model.genes: + geneobj = modelutl.model.genes.get_by_id(gene) + geneobj.knock_out() + + gfresults = self.gapfilling.run_gapfilling(media,None,minimum_obj=output["baseline_growth"]*growth_multiplier) + if gfresults is None: + logger.warning( + "Gapfilling failed with the specified model, media, and target reaction." + ) + + return gfresults class MSGrowthPhenotypes: - def __init__(self, base_media=None, base_uptake=0, base_excretion=1000): + def __init__(self, base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): self.base_media = base_media self.phenotypes = DictList() self.base_uptake = base_uptake self.base_excretion = base_excretion + self.atom_limits = global_atom_limits + self.baseline_growth_data = {} @staticmethod - def from_compound_hash(compounds, base_media, base_uptake=0, base_excretion=1000): - growthpheno = MSGrowthPhenotypes(base_media, base_uptake, base_excretion) + def from_compound_hash(compounds,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): + growthpheno = MSGrowthPhenotypes(base_media, base_uptake, base_excretion,global_atom_limits) new_phenos = [] for cpd in compounds: newpheno = MSGrowthPhenotype(cpd, None, compounds[cpd], [], [cpd]) @@ -137,8 +223,8 @@ def from_compound_hash(compounds, base_media, base_uptake=0, base_excretion=1000 return growthpheno @staticmethod - def from_kbase_object(data, kbase_api): - growthpheno = MSGrowthPhenotypes(None, 0, 1000) + def from_kbase_object(data, kbase_api,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): + growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) new_phenos = [] for pheno in data["phenotypes"]: media = kbase_api.get_from_ws(pheno["media_ref"], None) @@ -156,9 +242,9 @@ def from_kbase_object(data, kbase_api): return growthpheno @staticmethod - def from_kbase_file(filename, kbase_api): + def from_kbase_file(filename, kbase_api,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): # TSV file with the following headers:media mediaws growth geneko addtlCpd - growthpheno = MSGrowthPhenotypes(base_media, 0, 1000) + growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) headings = [] new_phenos = [] with open(filename) as f: @@ -190,8 +276,8 @@ def from_kbase_file(filename, kbase_api): return growthpheno @staticmethod - def from_ms_file(filename, basemedia, base_uptake=0, base_excretion=100): - growthpheno = MSGrowthPhenotypes(base_media, base_uptake, base_excretion) + def from_ms_file(filename,base_media=None, base_uptake=0, base_excretion=100,global_atom_limits={}): + growthpheno = MSGrowthPhenotypes(base_media,base_uptake, base_excretion,global_atom_limits) df = pd.read_csv(filename) required_headers = ["Compounds", "Growth"] for item in required_headers: @@ -222,19 +308,40 @@ def add_phenotypes(self, new_phenotypes): def simulate_phenotypes( self, - model, - biomass, + model_or_modelutl, + objective, add_missing_exchanges=False, correct_false_negatives=False, template=None, - growth_threshold=0.001, - save_fluxes=False, + growth_threshold=0.01, + save_fluxes=False ): - model.objective = biomass - modelutl = MSModelUtil(model) + """Simulates all the specified phenotype conditions and saves results + Parameters + ---------- + model_or_modelutl : Model | MSModelUtl + Model to use to run the simulations + objective : string + Expression for objective to maximize in simulations + add_missing_exchanges : bool + Boolean indicating if exchanges for compounds mentioned explicitly in phenotype media should be added to the model automatically + growth_multiplier : double + Indicates a multiplier to use for positive growth above the growth on baseline media + save_fluxes : bool + Indicates if the fluxes should be saved and returned with the results + """ + # Discerning input is model or mdlutl and setting internal links + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + #Setting objective + modelutl.objective = objective + #Getting basline growth + if self.parent + summary = { - "Label": ["Accuracy", "CP", "CN", "FP", "FN"], - "Count": [0, 0, 0, 0, 0], + "Label": ["Accuracy", "CP", "CN", "FP", "FN","Growth","No growth"], + "Count": [0, 0, 0, 0, 0,0,0], } data = { "Phenotype": [], @@ -293,3 +400,62 @@ def simulate_phenotypes( df = pd.DataFrame(data) logger.info(df) return {"details": df, "summary": sdf} + + def fit_model_to_phenotypes( + self, + model_or_mdlutl, + correct_false_negatives, + correct_false_positives, + minimize_new_false_positives, + core_template, + template, + integrate_results + ): + + """Simulates all the specified phenotype conditions and saves results + Parameters + ---------- + model_or_mdlutl : Model | MSModelUtl + Model to use to run the simulations + correct_false_negatives : bool + Indicates if false negatives should be corrected + correct_false_positives : bool + Indicates if false positives should be corrected + minimize_new_false_positives : bool + Indicates if new false positivies should be avoided + core_template : MSTemplate + Core template to use for ATP safe gapfilling if tests aren't already computed (defaults to model core template if it has one) + template : MSTemplate + The template that should be used for gapfilling (will default to model template if it has one) + integrate_results : bool + Indicates if the resulting modifications to the model should be integrated + """ + pass + + def gapfill_all_phenotypes( + self, + model_or_mdlutl, + msgapfill=None, # Needed if the gapfilling object in model utl is not initialized + growth_threshold=None, + add_missing_exchanges=False, + ): + mdlutl = MSModelUtil.get(model_or_mdlutl) + # if msgapfill: + # mdlutl.gfutl = msgapfill + # if not mdlutl.gfutl: + # logger.critical( + # "Must either provide a gapfilling object or provide a model utl with an existing gapfilling object" + # ) + # media_list = [] + # for pheno in self.phenotypes: + # + # + # output = mdlutl.gfutl.run_multi_gapfill( + # media_list, + # default_minimum_objective=growth_threshold + # target=mdlutl.primary_biomass(), + # + # binary_check=False, + # prefilter=True, + # check_for_growth=True, + # ) From ea81a98b40fe797c7e02b8b3a96843d4923cd585 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 23:34:33 -0500 Subject: [PATCH 49/57] Fixing bug --- modelseedpy/core/msgrowthphenotypes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 13d540b1..fffb4619 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -337,7 +337,6 @@ def simulate_phenotypes( #Setting objective modelutl.objective = objective #Getting basline growth - if self.parent summary = { "Label": ["Accuracy", "CP", "CN", "FP", "FN","Growth","No growth"], From abd998c395350695bd3516f7aac38d3a52f68cb5 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 23:36:20 -0500 Subject: [PATCH 50/57] Adding MSGrowthPhenotype object --- modelseedpy/core/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/__init__.py b/modelseedpy/core/__init__.py index 204564ab..0b7c7b5c 100644 --- a/modelseedpy/core/__init__.py +++ b/modelseedpy/core/__init__.py @@ -9,7 +9,7 @@ from modelseedpy.core.mseditorapi import MSEditorAPI, MSEquation from modelseedpy.core.msgapfill import MSGapfill from modelseedpy.core.msatpcorrection import MSATPCorrection -from modelseedpy.core.msgrowthphenotypes import MSGrowthPhenotypes +from modelseedpy.core.msgrowthphenotypes import MSGrowthPhenotypes, MSGrowthPhenotype from modelseedpy.core.msmodelutl import MSModelUtil from modelseedpy.core.mstemplate import MSTemplateBuilder from modelseedpy.core.exceptions import * From 963f00a0ef8e1e3a812b2820cc1218628e5fec45 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 23:39:02 -0500 Subject: [PATCH 51/57] Adding MSGrowthPhenotype --- modelseedpy/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index 665f000c..dbb7c090 100644 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -38,6 +38,7 @@ MSBuilder, MSMedia, MSGrowthPhenotypes, + MSGrowthPhenotype, MSModelUtil, FBAHelper, MSEditorAPI, From 0d6e7af0727907b64ba6f737f48679f9235f4f94 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Wed, 12 Jul 2023 23:47:33 -0500 Subject: [PATCH 52/57] Fixing error in simulate arguments --- modelseedpy/core/msgrowthphenotypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index fffb4619..553b0523 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -55,7 +55,7 @@ def build_media(self,include_base_media=True): def simulate( self, - model_or_modelutl, + model_or_mdlutl, objective, growth_multiplier=10, add_missing_exchanges=False, From c0abcd166a8fcbfde702cf375fef9b604440f37a Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sun, 16 Jul 2023 22:42:19 -0500 Subject: [PATCH 53/57] Fixing attributes, improving phenotypes, improving gapfilling --- modelseedpy/core/msgapfill.py | 11 ++--- modelseedpy/core/msgrowthphenotypes.py | 57 ++++++++++++++++++-------- modelseedpy/core/msmodelutl.py | 17 ++++++-- 3 files changed, 60 insertions(+), 25 deletions(-) diff --git a/modelseedpy/core/msgapfill.py b/modelseedpy/core/msgapfill.py index 10bcb9a2..dde1514e 100644 --- a/modelseedpy/core/msgapfill.py +++ b/modelseedpy/core/msgapfill.py @@ -2,6 +2,7 @@ import logging import cobra import re +import json from optlang.symbolics import Zero, add from modelseedpy.core import FBAHelper # !!! the import is never used from modelseedpy.fbapkg.mspackagemanager import MSPackageManager @@ -354,11 +355,11 @@ def integrate_gapfill_solution( if link_gaps_to_objective: logger.info("Gapfilling sensitivity analysis running on succesful run in "+solution["media"].id+" for target "+solution["target"]) gf_sensitivity = self.mdlutl.get_attributes("gf_sensitivity", {}) - if solution["media"] not in gf_sensitivity: - gf_sensitivity[solution["media"]] = {} - if solution["target"] not in gf_sensitivity[solution["media"]]: - gf_sensitivity[solution["media"]][solution["target"]] = {} - gf_sensitivity[solution["media"]][solution["target"]][ + if solution["media"].id not in gf_sensitivity: + gf_sensitivity[solution["media"].id] = {} + if solution["target"] not in gf_sensitivity[solution["media"].id]: + gf_sensitivity[solution["media"].id][solution["target"]] = {} + gf_sensitivity[solution["media"].id][solution["target"]][ "success" ] = self.mdlutl.find_unproducible_biomass_compounds( solution["target"], cumulative_solution diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 553b0523..bc2f4f05 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -297,6 +297,15 @@ def from_ms_file(filename,base_media=None, base_uptake=0, base_excretion=100,glo growthpheno.add_phenotypes(new_phenos) return growthpheno + def build_super_media(self): + super_media = None + for pheno in self.phenotypes: + if not super_media: + super_media = pheno.build_media() + else: + super_media.merge(pheno.build_media(), overwrite_overlap=False) + return super_media + def add_phenotypes(self, new_phenotypes): keep_phenos = [] for pheno in new_phenotypes: @@ -402,34 +411,50 @@ def simulate_phenotypes( def fit_model_to_phenotypes( self, - model_or_mdlutl, - correct_false_negatives, - correct_false_positives, - minimize_new_false_positives, - core_template, - template, - integrate_results + msgapfill, + objective, + grow_multiplier, + correct_false_positives=False, + minimize_new_false_positives=True, + atp_safe=True, + integrate_results=True, + global_gapfilling=True ): """Simulates all the specified phenotype conditions and saves results Parameters ---------- - model_or_mdlutl : Model | MSModelUtl - Model to use to run the simulations - correct_false_negatives : bool - Indicates if false negatives should be corrected + msgapfill : MSGapfill + Gapfilling object used for the gapfilling process correct_false_positives : bool Indicates if false positives should be corrected minimize_new_false_positives : bool Indicates if new false positivies should be avoided - core_template : MSTemplate - Core template to use for ATP safe gapfilling if tests aren't already computed (defaults to model core template if it has one) - template : MSTemplate - The template that should be used for gapfilling (will default to model template if it has one) integrate_results : bool Indicates if the resulting modifications to the model should be integrated """ - pass + #Create super media for all + super_media = self.build_super_media() + #Adding missing exchanges + msgapfill.gfmodel.add_missing_exchanges(super_media) + #Adding elemental constraints + self.add_elemental_constraints() + #Getting ATP tests + + #Filtering database for ATP tests + + #Penalizing database to avoid creating false positives + + #Building additional tests from current correct negatives + + #Computing base-line growth + + #Computing growth threshold + + #Running global gapfill + + #Integrating solution + def gapfill_all_phenotypes( self, diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 9c69a51f..fb4c45e4 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -113,6 +113,12 @@ def __init__(self, model): self.attributes = {} if hasattr(self.model, "attributes"): self.attributes = self.model + if "pathways" not in self.attributes: + self.attributes["pathways"] = {} + if "auxotrophy" not in self.attributes: + self.attributes["auxotrophy"] = {} + if "fbas" not in self.attributes: + self.attributes["fbas"] = {} def compute_automated_reaction_scores(self): """ @@ -286,14 +292,17 @@ def get_attributes(self, key=None, default=None): return self.attributes[key] def save_attributes(self, value=None, key=None): - attributes = self.get_attributes() if value: if key: - attributes[key] = value + self.attributes[key] = value else: self.attributes = value - if hasattr(self.model, "attributes"): - self.model.attributes = self.attributes + if hasattr(self.model, "computed_attributes"): + logger.info( + "Setting FBAModel computed_attributes to mdlutl attributes" + ) + self.attributes["gene_count"] = len(self.model.genes) + self.model.computed_attributes = self.attributes def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]): modelseed = ModelSEEDBiochem.get() From de260aec2b0897fa20dbc6b0ca0981d050df0151 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Sun, 16 Jul 2023 23:15:22 -0500 Subject: [PATCH 54/57] Fixing attritbute problem and fixing phenotypes --- modelseedpy/core/msatpcorrection.py | 2 +- modelseedpy/core/msgrowthphenotypes.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modelseedpy/core/msatpcorrection.py b/modelseedpy/core/msatpcorrection.py index dd381b18..c07cc34f 100644 --- a/modelseedpy/core/msatpcorrection.py +++ b/modelseedpy/core/msatpcorrection.py @@ -414,7 +414,7 @@ def apply_growth_media_gapfilling(self): "count": len(self.cumulative_core_gapfilling), "reactions": self.cumulative_core_gapfilling, } - self.modelutl.save_attributes(core_gf, "core_gapfilling") + self.modelutl.save_attributes(core_gf, "core_gapfilling_details") def expand_model_to_genome_scale(self): """Restores noncore reactions to model while filtering out reactions that break ATP diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index bc2f4f05..885e4f78 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -132,19 +132,19 @@ def simulate( #Determining phenotype class if output["growth"] >= output["baseline_growth"]*growth_multiplier: - if self.growth > 0: + if not self.growth: + output["class"] = "GROWTH" + elif self.growth > 0: output["class"] = "CP" elif self.growth == 0: output["class"] = "FP" - else: - output["class"] = "GROWTH" else: - if self.growth > 0: + if not self.growth: + output["class"] = "NOGROWTH" + elif self.growth > 0: output["class"] = "FN" elif self.growth == 0: output["class"] = "CN" - else: - output["class"] = "NOGROWTH" return output def gapfill_model_for_phenotype( From 328c57878423d89c2d48281d4b3adca606fc1bec Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 17 Jul 2023 23:23:34 -0500 Subject: [PATCH 55/57] Improving phenotype simulations and enabling use of complete media --- modelseedpy/core/msgrowthphenotypes.py | 49 ++++++++++++++++++++++---- modelseedpy/core/msmedia.py | 1 + modelseedpy/core/msmodelutl.py | 6 ++-- modelseedpy/fbapkg/kbasemediapkg.py | 2 +- 4 files changed, 47 insertions(+), 11 deletions(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index 885e4f78..ebdba851 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -48,9 +48,9 @@ def build_media(self,include_base_media=True): full_media = MSMedia.from_dict(cpd_hash) if self.media: full_media.merge(self.media, overwrite_overlap=False) - if full_media: + if include_base_media: if self.parent and self.parent.base_media: - full_media.merge(parent.base_media, overwrite_overlap=False) + full_media.merge(self.parent.base_media, overwrite_overlap=False) return full_media def simulate( @@ -91,18 +91,18 @@ def simulate( output["missing_transports"] = modelutl.add_missing_exchanges(full_media) #Getting basline growth - output["baseline_growth"] = 0.001 + output["baseline_growth"] = 0.01 if self.parent: - output["baseline_growth"] = self.parent.baseline_growth(modelutl,True) + output["baseline_growth"] = self.parent.baseline_growth(modelutl,objective) #Building specific media and setting compound exception list if self.parent and self.parent.atom_limits and len(self.parent.atom_limits) > 0: reaction_exceptions = [] specific_media = self.build_media(False) for mediacpd in specific_media.mediacompounds: - output = mediacpd.get_mdl_exchange_hash(self,modelutl) - for mdlcpd in output: - reaction_exceptions.append(output[mdlcpd]) + ex_hash = mediacpd.get_mdl_exchange_hash(modelutl) + for mdlcpd in ex_hash: + reaction_exceptions.append(ex_hash[mdlcpd]) modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.parent.atom_limits,exception_reactions=reaction_exceptions) #Applying media @@ -211,6 +211,7 @@ def __init__(self, base_media=None, base_uptake=0, base_excretion=1000,global_at self.base_excretion = base_excretion self.atom_limits = global_atom_limits self.baseline_growth_data = {} + self.cached_based_growth = {} @staticmethod def from_compound_hash(compounds,base_media=None, base_uptake=0, base_excretion=1000,global_atom_limits={}): @@ -315,6 +316,40 @@ def add_phenotypes(self, new_phenotypes): additions = DictList(keep_phenos) self.phenotypes += additions + def baseline_growth( + self, + model_or_mdlutl, + objective + ): + """Simulates all the specified phenotype conditions and saves results + Parameters + ---------- + model_or_modelutl : Model | MSModelUtl + Model to use to run the simulations + """ + # Discerning input is model or mdlutl and setting internal links + modelutl = model_or_mdlutl + if not isinstance(model_or_mdlutl, MSModelUtil): + modelutl = MSModelUtil.get(model_or_mdlutl) + #Checking if base growth already computed + if modelutl in self.cached_based_growth: + if objective in self.cached_based_growth[modelutl]: + return self.cached_based_growth[modelutl][objective] + else: + self.cached_based_growth[modelutl] = {} + #Setting objective + modelutl.objective = objective + #Setting media + modelutl.pkgmgr.getpkg("KBaseMediaPkg").build_package( + self.base_media, self.base_uptake, self.base_excretion + ) + #Adding uptake limits + if len(self.atom_limits) > 0: + modelutl.pkgmgr.getpkg("ElementUptakePkg").build_package(self.atom_limits) + #Simulating + self.cached_based_growth[modelutl][objective] = modelutl.model.slim_optimize() + return self.cached_based_growth[modelutl][objective] + def simulate_phenotypes( self, model_or_modelutl, diff --git a/modelseedpy/core/msmedia.py b/modelseedpy/core/msmedia.py index 48fa90ad..fadc435d 100644 --- a/modelseedpy/core/msmedia.py +++ b/modelseedpy/core/msmedia.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import logging from cobra.core.dictlist import DictList +from modelseedpy.core.msmodelutl import MSModelUtil logger = logging.getLogger(__name__) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index fb4c45e4..097dc9cc 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -111,14 +111,14 @@ def __init__(self, model): self.score = None self.integrated_gapfillings = [] self.attributes = {} - if hasattr(self.model, "attributes"): - self.attributes = self.model + if hasattr(self.model, "computed_attributes"): + self.attributes = self.model.computed_attributes if "pathways" not in self.attributes: self.attributes["pathways"] = {} if "auxotrophy" not in self.attributes: self.attributes["auxotrophy"] = {} if "fbas" not in self.attributes: - self.attributes["fbas"] = {} + self.attributes["fbas"] = {} def compute_automated_reaction_scores(self): """ diff --git a/modelseedpy/fbapkg/kbasemediapkg.py b/modelseedpy/fbapkg/kbasemediapkg.py index 4dbf0779..a3c19243 100644 --- a/modelseedpy/fbapkg/kbasemediapkg.py +++ b/modelseedpy/fbapkg/kbasemediapkg.py @@ -40,7 +40,7 @@ def build_package( self.parameters["default_uptake"] = 0 if self.parameters["default_excretion"] is None: self.parameters["default_excretion"] = 100 - if self.parameters["media"] is None and self.parameters["default_uptake"] == 0: + if (self.parameters["media"] is None or self.parameters["media"].name == "Complete") and self.parameters["default_uptake"] == 0: self.parameters["default_uptake"] = 100 # First initializing all exchanges to default uptake and excretion From 084054dbb831231c324c09db35a4bff636a8bc3b Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Mon, 17 Jul 2023 23:26:23 -0500 Subject: [PATCH 56/57] Fixing issue where attributes will be none --- modelseedpy/core/msmodelutl.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 097dc9cc..ec6bc903 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -112,7 +112,8 @@ def __init__(self, model): self.integrated_gapfillings = [] self.attributes = {} if hasattr(self.model, "computed_attributes"): - self.attributes = self.model.computed_attributes + if self.model.computed_attributes: + self.attributes = self.model.computed_attributes if "pathways" not in self.attributes: self.attributes["pathways"] = {} if "auxotrophy" not in self.attributes: From b08f8f38362c3a9140571f5ade10f8d00ab10ae6 Mon Sep 17 00:00:00 2001 From: Christopher Henry Date: Tue, 18 Jul 2023 13:29:46 -0500 Subject: [PATCH 57/57] Improving phenotype simulation and making sure parameters are documented in element uptake --- modelseedpy/core/msgrowthphenotypes.py | 2 +- modelseedpy/fbapkg/elementuptakepkg.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/modelseedpy/core/msgrowthphenotypes.py b/modelseedpy/core/msgrowthphenotypes.py index ebdba851..a98fd64d 100644 --- a/modelseedpy/core/msgrowthphenotypes.py +++ b/modelseedpy/core/msgrowthphenotypes.py @@ -57,7 +57,7 @@ def simulate( self, model_or_mdlutl, objective, - growth_multiplier=10, + growth_multiplier=3, add_missing_exchanges=False, save_fluxes=False, pfba=False, diff --git a/modelseedpy/fbapkg/elementuptakepkg.py b/modelseedpy/fbapkg/elementuptakepkg.py index 4eb27e44..8348e602 100644 --- a/modelseedpy/fbapkg/elementuptakepkg.py +++ b/modelseedpy/fbapkg/elementuptakepkg.py @@ -18,6 +18,11 @@ def __init__(self, model): def build_package(self, element_limits,exception_compounds=[],exception_reactions=[]): #Converting exception compounds list into exception reaction list + self.parameters = { + "element_limits" : element_limits, + "exception_compounds" : exception_compounds, + "exception_reactions" : exception_reactions + } exchange_hash = self.modelutl.exchange_hash() for met in exception_compounds: if met in exchange_hash: