diff --git a/modelseedpy/__init__.py b/modelseedpy/__init__.py index be541f0..1f6c23e 100755 --- a/modelseedpy/__init__.py +++ b/modelseedpy/__init__.py @@ -40,6 +40,7 @@ MSGrowthPhenotypes, MSGrowthPhenotype, MSModelUtil, + MSMinimalMedia, FBAHelper, MSEditorAPI, MSATPCorrection, @@ -51,10 +52,6 @@ ) from modelseedpy.core.exceptions import * -from modelseedpy.community import ( - MSCommunity, CommunityMember, MSKineticsFBA, CommPhitting, - MSSteadyCom, build_from_species_models, phenotypes) - from modelseedpy.biochem import ModelSEEDBiochem from modelseedpy.fbapkg import ( diff --git a/modelseedpy/community/MSID_hash.json b/modelseedpy/community/MSID_hash.json deleted file mode 100644 index 96ceef4..0000000 --- a/modelseedpy/community/MSID_hash.json +++ /dev/null @@ -1,20 +0,0 @@ -[ - "cpd11416_c1", - "cpd11416_c2", - "cpd11416_c3", - "cpd11416_c4", - "cpd11416_c5", - "cpd11416_c6", - "cpd11416_c7", - "cpd11416_c8", - "cpd11416_c9", - "cpd11416_c10", - "cpd11416_c11", - "cpd11416_c12", - "cpd11416_c13", - "cpd11416_c14", - "cpd11416_c15", - "cpd11416_c16", - "cpd11416_c17", - "cpd11416_c0" -] \ No newline at end of file diff --git a/modelseedpy/community/__init__.py b/modelseedpy/community/__init__.py deleted file mode 100644 index e4e6208..0000000 --- a/modelseedpy/community/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import absolute_import - -# import pyximport; pyximport.install(language_level=3) # improve computational speed - -from modelseedpy.community.mscommunity import * -from modelseedpy.community.datastandardization import * -from modelseedpy.community.commkineticpkg import CommKineticPkg -from modelseedpy.community.mscompatibility import MSCompatibility -from modelseedpy.community.mssteadycom import MSSteadyCom -from modelseedpy.community.commphitting import CommPhitting -from modelseedpy.community.commhelper import build_from_species_models, phenotypes -from modelseedpy.community.mskineticsfba import MSKineticsFBA diff --git a/modelseedpy/community/commhelper.py b/modelseedpy/community/commhelper.py deleted file mode 100644 index c357af2..0000000 --- a/modelseedpy/community/commhelper.py +++ /dev/null @@ -1,376 +0,0 @@ -from modelseedpy.core.msminimalmedia import minimizeFlux_withGrowth, bioFlux_check -from modelseedpy.core.exceptions import NoFluxError, ObjectiveError -from modelseedpy.fbapkg.mspackagemanager import MSPackageManager -from modelseedpy.core.msmodelutl import MSModelUtil -from modelseedpy.core.fbahelper import FBAHelper -from cobra import Model, Reaction, Metabolite -from optlang import Constraint, Objective -from cobra.medium import minimal_medium -# from commscores import GEMCompatibility -from cobra.flux_analysis import pfba -from collections import OrderedDict -from optlang.symbolics import Zero -from math import inf, isclose -from pandas import DataFrame -from pprint import pprint -from numpy import mean -import re - -import logging -logging.basicConfig(filename='example.log', encoding='utf-8', level=logging.WARNING) -logger = logging.getLogger(__name__) - - -def strip_comp(ID): - ID = ID.replace("-", "~") - return re.sub("(\_\w\d)", "", ID) - -def export_lp(model, name): - with open(f"{name}.lp", 'w') as out: - out.write(model.solver.to_lp()) - -def correct_nonMSID(nonMSobject, output, model_index): - name, compartment = output - index = 0 if compartment == "e" else model_index - nonMSobject.compartment = compartment + str(index) - comp = re.search(r"(_[a-z]\d+$)", nonMSobject.id) - if comp is None and rf"[{compartment}]" in nonMSobject.id: return nonMSobject.id.replace(rf"[{compartment}]", f"_{nonMSobject.compartment}") - elif comp is None: return nonMSobject.id + f"_{nonMSobject.compartment}" - return "_".join([nonMSobject.id.replace(comp.group(), ""), nonMSobject.compartment]) - - -def build_from_species_models(org_models, model_id=None, name=None, abundances=None, - standardize=False, MSmodel = True, commkinetics=True, - copy_models=True, printing=False): - """Merges the input list of single species metabolic models into a community metabolic model - - Parameters - ---------- - org_models : list to be merged into a community model - model_id : string specifying community model ID - name : string specifying community model name - names : list human-readable names for models being merged - abundances : dict relative abundances for input models in community model - cobra_model : bool for whether the raw COBRA model is returned - standardize: bool for whether the exchanges of each member model will be standardized (True) or just aligned. - - Returns - ------- - Cobra.Model for the desired Community - - Raises - ------ - """ - # construct the new model - models = org_models #if not standardize else GEMCompatibility.standardize( - #org_models, exchanges=True, conflicts_file_name='exchanges_conflicts.json') - biomass_indices = [] - biomass_index = minimal_biomass_index = 2 - new_metabolites, new_reactions = set(), set() - member_biomasses = {} - models = models if not abundances else [mdl for mdl in models if mdl.id in abundances] - for model_index, org_model in enumerate(models): - model_util = MSModelUtil(org_model, copy=copy_models) - model_reaction_ids = [rxn.id for rxn in model_util.model.reactions] - model_index += 1 - # if MSmodel: - # Rename metabolites - for met in model_util.model.metabolites: - # Renaming compartments - output = MSModelUtil.parse_id(met) - # if printing: print(met, output) - if output is None: - if printing: print(f"The {met.id} ({output}; {hasattr(met, 'compartment')}) is unpredictable.") - met.id = correct_nonMSID(met, (met.id, "c"), model_index) - elif len(output) == 2: met.id = correct_nonMSID(met, output, model_index) - elif len(output) == 3: - name, compartment, out_index = output - index = 0 if compartment == "e" else model_index - if out_index == "": - met.id += str(index) - met.compartment += str(index) - elif compartment == "e": met.compartment = "e0" - else: - met.compartment = compartment + str(index) - met.id = name + "_" + met.compartment - new_metabolites.add(met) - if "cpd11416_c" in met.id or "biomass" in met.name or "biomass" in met.id: - met.name = f"{met.id}_{model_util.model.id}" - member_biomasses[org_model.id] = met - # Rename reactions - for rxn in model_util.model.reactions: # !!! all reactions should have a non-zero compartment index - if rxn.id[0:3] != "EX_": - ## biomass reactions - if re.search('^(bio)(\d+)$', rxn.id) or "biomass" in rxn.id: - print(rxn.id, "from", model_util.id, "becomes", end=" ") - index = int(re.sub(r"(^biomass)", "", rxn.id)) if "biomass" in rxn.id else int(re.sub(r"(^bio)", "", rxn.id)) - if biomass_index == 2: - while f"bio{biomass_index}" in model_reaction_ids: biomass_index += 1 - if index not in biomass_indices and index >= minimal_biomass_index: biomass_indices.append(index) - elif f"bio{biomass_index}" not in model_reaction_ids: biomass_indices.append(biomass_index) - elif f"bio{minimal_biomass_index}" not in model_reaction_ids: biomass_indices.append(minimal_biomass_index) - else: - logger.error("The biomass reaction is not properly defined.") - index = minimal_biomass_index - while f"bio{index}" not in model_reaction_ids and index not in biomass_indices: - index += 1 - biomass_indices.append(index) - rxn.id = f"bio{biomass_indices[-1]}" - model_reaction_ids.append(rxn.id) - biomass_index += 1 - print(rxn.id) - ## non-biomass reactions - else: - initialID = str(rxn.id) - output = MSModelUtil.parse_id(rxn) - if output is None: - if printing: print(f"The {rxn.id} ({output}; {hasattr(rxn, 'compartment')}) is unpredictable.") - try: - rxn.id = correct_nonMSID(rxn, (rxn.id, "c"), model_index) - output = MSModelUtil.parse_id(rxn) - except ValueError: pass - elif len(output) == 2: - rxn.id = correct_nonMSID(rxn, output, model_index) - if printing: print(f"{output} from {rxn.id}") - output = MSModelUtil.parse_id(rxn) - if len(output) == 3: - name, compartment, index = output - if compartment != "e": - rxn.name = f"{name}_{compartment}{model_index}" - rxn_id = re.search(r"(.+\_\w)(?=\d+)", rxn.id).group() - if index == "": rxn.id += str(model_index) - else: rxn.id = rxn_id + str(model_index) - finalID = str(rxn.id) - string_diff = "" - for index, let in enumerate(finalID): - if index >= len(initialID) or index < len(initialID) and let != initialID[index]: string_diff += let - # if "compartment" not in locals(): print(f"the {rxn.id} with a {output} output is not defined with a compartment.") - if string_diff != f"_{compartment}{model_index}" or not FBAHelper.isnumber(string_diff) and printing: - logger.debug(f"The ID {initialID} is changed with {string_diff} to create the final ID {finalID}") - new_reactions.add(rxn) - # else: - # # TODO develop a method for compartmentalizing models without editing all reaction IDs or assuming their syntax - # pass - - # Create community biomass - comm_biomass = Metabolite("cpd11416_c0", None, "Community biomass", 0, "c0") - metabolites = {comm_biomass: 1} - ## constrain the community abundances - if abundances: - if isinstance(abundances[list(abundances.keys())[0]], dict): - abundances = {met: -abundances[memberID]["abundance"] for memberID, met in member_biomasses.items()} - else: abundances = {met: -abundances[memberID] for memberID, met in member_biomasses.items()} - else: abundances = {met: -1 / len(member_biomasses) for met in member_biomasses.values()} - - ## TODO - add the biomass reactions instead of the biomass metabolites for the commKinetics - - ## define community biomass components - metabolites.update(abundances) - comm_biorxn = Reaction(id="bio1", name="bio1", lower_bound=0, upper_bound=1000) - comm_biorxn.add_metabolites(metabolites) - - # adds only unique reactions and metabolites to the community model - newmodel = Model(model_id or "+".join([model.id for model in models]), - name or "+".join([model.name for model in models])) - newmodel.add_reactions(FBAHelper.filter_cobra_set(new_reactions)) - newmodel.add_metabolites(FBAHelper.filter_cobra_set(new_metabolites)) - newmodel.add_reactions([comm_biorxn]) - newmodel.objective = Objective(comm_biorxn.flux_expression) - newutl = MSModelUtil(newmodel) - # newutl.add_objective(comm_biorxn.flux_expression) - newutl.model.add_boundary(comm_biomass, "sink") # Is a sink reaction for reversible cpd11416_c0 consumption necessary? - ## proportionally limit the fluxes to their abundances - # add the metadata of community composition - print("Community objective", newutl.model.objective.expression) - if hasattr(newutl.model, "_context"): newutl.model._contents.append(member_biomasses) - elif hasattr(newutl.model, "notes"): newutl.model.notes.update({"member_biomass_cpds": member_biomasses}) - # print([cons.name for cons in newutl.model.constraints]) - return newutl.model - - -def phenotypes(community_members, phenotype_flux_threshold=.1, solver:str="glpk"): - # log information of each respective model - models = OrderedDict() - solutions = [] - media_conc = set() - # calculate all phenotype profiles for all members - comm_members = community_members.copy() - # print(community_members) - for org_model, content in community_members.items(): # community_members excludes the stationary phenotype - print("\n", org_model.id) - org_model.solver = solver - all_phenotypes = "phenotypes" not in content - model_util = MSModelUtil(org_model, True) - if "org_coef" not in locals(): - org_coef = {model_util.model.reactions.get_by_id("EX_cpd00007_e0").reverse_variable: -1} - model_util.standard_exchanges() - models[org_model.id] = {"exchanges": model_util.exchange_list(), "solutions": {}, "name": content["name"]} - phenotypes = {met.name: {"consumed": met.id.replace("EX_", "").replace("_e0", "")} - for met in model_util.carbon_exchange_mets_list(include_unknown=False) - } if all_phenotypes else content["phenotypes"] - # print(phenotypes) - models[org_model.id]["phenotypes"] = ["stationary"] + [ - content["phenotypes"].keys() for member, content in comm_members.items()] - phenoRXNs = [pheno_cpd for pheno, pheno_cpds in content['phenotypes'].items() - for pheno_cpd in pheno_cpds["consumed"]] - media = {cpd: 100 for cpd, flux in model_util.model.medium.items()} - #TODO correct or remove the media, since it seems to be overwritten by the optimization of all carbon exchanges - ### eliminate hydrogen absorption - media.update({"EX_cpd11640_e0": 0}) - past_phenoRXNs = [] - for name, phenoCPDs in phenotypes.items(): - pheno_util = MSModelUtil(model_util.model, True) - metID = phenoCPDs["consumed"][0] - try: - phenoRXN = pheno_util.model.reactions.get_by_id(f'EX_{metID}_e0') - if past_phenoRXNs: - del media[past_phenoRXNs[-1]] - except Exception as e: - print(e, f'\nEX_{metID}_e0 is not in the model {org_model.id}') - continue - media.update({phenoRXN.id: 100}) - pheno_util.add_medium(media) - print(phenoRXN.id) - pheno_util.model.solver = solver - ### define an oxygen absorption relative to the phenotype carbon source - # O2_consumption: EX_cpd00007_e0 <= phenotype carbon source # formerly <= 2 * sum(primary carbon fluxes) - coef = org_coef.copy() - coef.update({phenoRXN.reverse_variable: 1}) - pheno_util.create_constraint(Constraint(Zero, lb=0, ub=None, name="EX_cpd00007_e0_limitation"), coef=coef) - - ## minimize the influx of all carbonaceous exchanges, mostly non-phenotype compounds, at a fixed biomass growth - min_growth = float(1) # arbitrarily assigned minimal growth - pheno_util.add_minimal_objective_cons(min_growth) - phenoRXN.upper_bound = 0 - for ex in pheno_util.carbon_exchange_list(): - exMet = ex.id.replace("EX_", "").replace("_e0", "") - if exMet in phenoRXNs and exMet != metID: ex.lower_bound = 0 - # print(f"The new bounds of {exMet} exchange are: {ex.bounds}") - pheno_util.add_objective(Zero, "min", coef={ - ex.reverse_variable: 1000 if ex.id != phenoRXN.id else 1 - for ex in pheno_util.carbon_exchange_list()}) - # export_lp(pheno_util.model, f"minimize_cInFlux_{phenoRXN.id}") - sol = pheno_util.model.optimize() - if sol.status != "optimal": - pheno_util.model.remove_cons_vars(["EX_cpd00007_e0_limitation"]) - coef.update({phenoRXN.reverse_variable: 5}) - pheno_util.create_constraint( - Constraint(Zero, lb=0, ub=None, name="EX_cpd00007_e0_limitation"), coef=coef) - sol = pheno_util.model.optimize() - bioFlux_check(pheno_util.model, sol) - ### limit maximum consumption to the values from the previous minimization - for ex in pheno_util.carbon_exchange_list(): - #### (limiting the reverse_variable is more restrictive than the net flux variable) - if ex.id != phenoRXN.id: ex.reverse_variable.ub = abs(min(0, sol.fluxes[ex.id])) - - ## maximize the phenotype yield with the previously defined growth and constraints - pheno_util.add_objective(phenoRXN.reverse_variable, "min") - # export_lp(pheno_util.model, f"maximize_phenoYield_{phenoRXN.id}") - pheno_sol = pheno_util.model.optimize() - bioFlux_check(pheno_util.model, pheno_sol) - pheno_influx = pheno_sol.fluxes[phenoRXN.id] - if pheno_influx >= 0: - if not all_phenotypes: - print(f"The phenotype carbon source has a flux of {pheno_sol.fluxes[phenoRXN.id]}.") - pprint({rxn: flux for rxn, flux in pheno_sol.fluxes.items() if flux != 0}) - # TODO gapfill the model in media the non-functioning carbon source - raise NoFluxError(f"The (+) net flux of {pheno_influx} for the {phenoRXN.id} phenotype" - f" indicates that it is an implausible phenotype.") - print(f"NoFluxError: The (+) net flux of {pheno_influx} for the {phenoRXN.id}" - " phenotype indicates that it is an implausible phenotype.") - continue - phenoRXN.lower_bound = phenoRXN.upper_bound = pheno_influx - - ## maximize excretion of all potential carbon byproducts whose #C's < phenotype source #C's - phenotype_source_carbons = FBAHelper.rxn_mets_list(phenoRXN)[0].elements["C"] - minimum_fluxes = {} - for carbon_source in pheno_util.carbon_exchange_list(include_unknown=False): - if 0 < FBAHelper.rxn_mets_list(carbon_source)[0].elements["C"] < phenotype_source_carbons: - pheno_util.add_objective(carbon_source.flux_expression, "max") - minObj = pheno_util.model.slim_optimize() - # print(carbon_source.reaction, "\t", carbon_source.flux_expression, "\t", minObj) - if minObj > phenotype_flux_threshold: - minimum_fluxes[carbon_source.id] = minObj - # TODO limit the possible excreted compounds to only those that are defined in the media - excreted_compounds = list([exID for exID in minimum_fluxes.keys() if exID != "EX_cpd00011_e0"]) - # minimum_fluxes_df = DataFrame(data=list(minimum_fluxes.values()), index=excreted_compounds, columns=["min_flux"]) - # max_excretion_cpd = minimum_fluxes_df["minimum"].idxmin() - ### optimize the excretion of the discovered phenotype excreta - if "excreted" in phenoCPDs: - phenoCPDs["excreted"] = [f"EX_{cpd}_e0" for cpd in phenoCPDs["excreted"]] - phenoCPDs["excreted"].extend(excreted_compounds) - else: phenoCPDs["excreted"] = excreted_compounds - pheno_excreta = [pheno_util.model.reactions.get_by_id(excreta) - for excreta in phenoCPDs["excreted"]] - pheno_util.add_objective(sum([ex.flux_expression for ex in pheno_excreta]), "max") - # export_lp(pheno_util.model, "maximize_excreta") - sol = pheno_util.model.optimize() - bioFlux_check(pheno_util.model, sol) - for ex in pheno_excreta: - ex.lower_bound = ex.upper_bound = sol.fluxes[ex.id] - - ## minimize flux of the total simulation flux through pFBA - # TODO discover why some phenotypes are infeasible with pFBA - try: pheno_sol = pfba(pheno_util.model) - # pheno_util.add_objective(sum([rxn.flux_expression for rxn in pheno_util.e]), "min") - # pheno_sol = pheno_util.model.optimize() - except Exception as e: - print(f"The {phenoRXN.id} phenotype of the {pheno_util.model} model is " - f"unable to be simulated with pFBA and yields a < {e} > error.") - sol_dict = FBAHelper.solution_to_variables_dict(pheno_sol, pheno_util.model) - simulated_growth = sum([flux for var, flux in sol_dict.items() if re.search(r"(^bio\d+$)", var.name)]) - if not isclose(simulated_growth, min_growth): - display([(rxn, flux) for rxn, flux in pheno_sol.fluxes.items() if "EX_" in rxn and flux != 0]) - raise ObjectiveError(f"The assigned minimal_growth of {min_growth} was not optimized" - f" during the simulation, where the observed growth was {simulated_growth}.") - - ## store solution fluxes and update the community_members phenotypes - met_name = strip_comp(name).replace(" ", "-") - col = content["name"] + '_' + met_name - models[pheno_util.model.id]["solutions"][col] = pheno_sol - solutions.append(models[pheno_util.model.id]["solutions"][col].objective_value) - met_name = met_name.replace("_", "-").replace("~", "-") - if all_phenotypes: - if "phenotypes" not in comm_members[org_model]: - comm_members[org_model]["phenotypes"] = {met_name: {"consumed": [strip_comp(metID)]}} - if met_name not in comm_members[org_model]["phenotypes"]: - comm_members[org_model]["phenotypes"].update({met_name: {"consumed": [strip_comp(metID)]}}) - else: comm_members[org_model]["phenotypes"][met_name]["consumed"] = [strip_comp(metID)] - met_pheno = content["phenotypes"][met_name] - if "excreted" in met_pheno and strip_comp(metID) in met_pheno["excreted"]: - comm_members[org_model]["phenotypes"][met_name].update({"excreted": met_pheno}) - past_phenoRXNs.append(phenoRXN.id) - - # construct the parsed table of all exchange fluxes for each phenotype - cols = {} - ## biomass row - cols["rxn"] = ["bio"] - for content in models.values(): - for col in content["solutions"]: - cols[col] = [0] - if col not in content["solutions"]: continue - bio_rxns = [x for x in content["solutions"][col].fluxes.index if "bio" in x] - flux = mean([content["solutions"][col].fluxes[rxn] for rxn in bio_rxns - if content["solutions"][col].fluxes[rxn] != 0]) - cols[col] = [flux] - ## exchange reactions rows - looped_cols = cols.copy() - looped_cols.pop("rxn") - for content in models.values(): - for ex_rxn in content["exchanges"]: - cols["rxn"].append(ex_rxn.id) - for col in looped_cols: - ### reactions that are not present in the columns are ignored - flux = 0 if (col not in content["solutions"] or - ex_rxn.id not in list(content["solutions"][col].fluxes.index) - ) else content["solutions"][col].fluxes[ex_rxn.id] - cols[col].append(flux) - ## construct the DataFrame - fluxes_df = DataFrame(data=cols) - fluxes_df.index = fluxes_df['rxn'] - fluxes_df.drop('rxn', axis=1, inplace=True) - fluxes_df = fluxes_df.groupby(fluxes_df.index).sum() - fluxes_df = fluxes_df.loc[(fluxes_df != 0).any(axis=1)] - fluxes_df.astype(str) - # fluxes_df.to_csv("fluxes.csv") - return fluxes_df, comm_members diff --git a/modelseedpy/community/commkineticpkg.py b/modelseedpy/community/commkineticpkg.py deleted file mode 100644 index c84a122..0000000 --- a/modelseedpy/community/commkineticpkg.py +++ /dev/null @@ -1,45 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import absolute_import - -import logging -from modelseedpy.fbapkg.basefbapkg import BaseFBAPkg -from modelseedpy.community.mscommunity import MSCommunity -from modelseedpy.core.fbahelper import FBAHelper - -# Base class for FBA packages -class CommKineticPkg(BaseFBAPkg): - def __init__(self, model): - BaseFBAPkg.__init__( - self, model, "community kinetics", {}, {"commkin": "string"} - ) - - def build_package(self, kinetic_coef, community_model=None): - self.validate_parameters( - {}, - [], - { - "kinetic_coef": kinetic_coef, - "community": community_model - if community_model - else MSCommunity(self.model), - }, - ) - for species in self.parameters["community"].species: - self.build_constraint(species) - - def build_constraint(self, species): - coef = { - species.biomasses[0].forward_variable: -1 * self.parameters["kinetic_coef"], - species.biomasses[0].reverse_variable: self.parameters["kinetic_coef"] - } - for reaction in self.model.reactions: - if ( - int(FBAHelper.rxn_compartment(reaction)[1:]) == species.index - and reaction != species.biomasses[0] - ): - coef[reaction.forward_variable] = 1 - coef[reaction.reverse_variable] = 1 - return BaseFBAPkg.build_constraint( - self, "commkin", None, 0, coef, "Species" + str(species.index) - ) diff --git a/modelseedpy/community/commphitting.py b/modelseedpy/community/commphitting.py deleted file mode 100644 index 8d3cc0f..0000000 --- a/modelseedpy/community/commphitting.py +++ /dev/null @@ -1,1345 +0,0 @@ -# -*- coding: utf-8 -*- -# from modelseedpy.fbapkg.mspackagemanager import MSPackageManager -from modelseedpy.core.exceptions import FeasibilityError, ParameterError, ObjectAlreadyDefinedError, NoFluxError -from modelseedpy.core.optlanghelper import OptlangHelper, Bounds, tupVariable, tupConstraint, tupObjective, isIterable, define_term -from modelseedpy.community.datastandardization import GrowthData -from modelseedpy.core.fbahelper import FBAHelper -from modelseedpy.biochem import from_local -from scipy.constants import hour, minute -from zipfile import ZipFile, ZIP_LZMA -from optlang import Model, Objective -from time import sleep, process_time -from typing import Union, Iterable -from optlang.symbolics import Zero -from scipy.optimize import newton -from matplotlib import pyplot -from math import inf, isclose -from deepdiff import DeepDiff -from pandas import DataFrame -from itertools import chain -from pprint import pprint -# from h5py import File -from icecream import ic -import numpy as np -import cobra.io -# from cplex import Cplex -import warnings, logging, json, os, re - -logger = logging.getLogger(__name__) - -def dict_keys_exists(dic, *keys): - result = keys[0] in dic - if keys[0] in dic: - remainingKeys = keys[1:] - if len(remainingKeys) > 0: result = dict_keys_exists(dic[keys[0]], *remainingKeys) - return result - return result - -def find_dic_number(dic): - for k, v in dic.items(): - if FBAHelper.isnumber(v): return v - num = find_dic_number(dic[k]) - return num - -def trial_contents(short_code, indices_tup, values): - matches = [ele == short_code for ele in indices_tup] - return np.array(values)[matches] - -def dic_keys(dic): - keys = [] - if isinstance(dic, dict): - for key, value in dic.items(): - keys.append(key) - keys.extend(dic_keys(value)) - return keys - -# define data objects -def _name(name, suffix, short_code, timestep, names): - name = '-'.join([x for x in list(map(str, [name + suffix, short_code, timestep])) if x]) - if name not in names: names.append(name) ; return name - else: pprint(names) ; raise ObjectAlreadyDefinedError(f"The object {name} is already defined for the problem.") - -def _export_model_json(json_model, path): - with open(path, 'w') as lp: json.dump(json_model, lp, indent=3) - -def _met_id_parser(met): - met_id = re.sub('(\_\w\d+)', '', met) - met_id = met_id.replace('EX_', '', 1) - met_id = met_id.replace('c_', '', 1) - return met_id - -# define an entity as a variable or a constant -def _obj_val(primal, name, pheno, short_code, timestep, bounds, data_timestep_hr, names): - time_hr = int(timestep) * data_timestep_hr - return tupVariable(_name(name, pheno, short_code, timestep, names), - Bounds=bounds) if not primal else primal[short_code][name+pheno][time_hr] - -def _michaelis_menten(conc, vmax, km): - return (conc*vmax)/(km+conc) - -def clamp(val, minimum, maximum): - return min(max(val, minimum), maximum) - -# parse primal values for use in the optimization loops -def parse_primals(primal_values, entity_labels=None, coefs=None, kcat_vals=None): - if kcat_vals: - kcat_primal = {} - for trial, content in primal_values.items(): - for primal, time_value in content.items(): - if "bin" not in primal: continue - name, trial = primal.split("-") - number = re.search(r"(\d)", name).group() - species, pheno = re.sub(r"(bin\d_)", "", name).split("_") - if "stationary" in pheno: continue - if species not in kcat_primal: kcat_primal[species] = {} - if pheno not in kcat_primal[species]: kcat_primal[species][pheno] = 0 - # kcat_(k,new) = sum_z^Z ( kcat_z * bin_k^z ) * kcat_(k,old) < 10 - if time_value == 0 and kcat_primal[species][pheno] < 10: - kcat_primal[species][pheno] += coefs[int(number)-1]*kcat_vals[species][pheno] - kcat_primal[species][pheno] = clamp(kcat_primal[species][pheno], 1e-4, 10) - return kcat_primal - select_primals = {} - for trial, entities in primal_values.items(): - select_primals[trial] = {} - for entity, times in entities.items(): - # a poor man's dictionary copy - if any([label in entity for label in entity_labels]): select_primals[trial][entity] = dict(list(times.items())) - return select_primals - -def signal_species(signal): - return signal.split(":")[0].replace(" ", "_") - -def _partition_coefs(initial_val, divisor): - return (initial_val, initial_val/divisor, initial_val/divisor**2, initial_val/divisor**3, initial_val/divisor**4) - - -biomass_partition_coefs = [_partition_coefs(10, 10), _partition_coefs(2, 2), _partition_coefs(1, 3)] - - -class CommPhitting: - - def __init__(self, msdb_path, community_members: dict=None, fluxes_df=None, data_df=None, carbon_conc=None, - media_conc=None, experimental_metadata=None, base_media=None, solver: str = 'glpk', all_phenotypes=True, - data_paths: dict = None, species_abundances: str = None, ignore_trials: Union[dict, list] = None, - ignore_timesteps: list = None, species_identities_rows=None, significant_deviation: float = 2, - extract_zip_path: str = None, determine_requisite_biomass:bool = True, consumed_mets:iter=None): - self.msdb = from_local(msdb_path) ; self.msdb_path = msdb_path - self.solver = solver ; self.all_phenotypes = all_phenotypes ; self.data_paths = data_paths - self.species_abundances = species_abundances ; self.ignore_trials = ignore_trials - self.ignore_timesteps = ignore_timesteps ; self.species_identities_rows = species_identities_rows - self.significant_deviation = significant_deviation ; self.extract_zip_path = extract_zip_path - - self.community_members = community_members - self.consumed_mets = consumed_mets or set([ - met for content in community_members.values() for met in content["phenotypes"]]) - if community_members is not None or any([x is None for x in [fluxes_df, data_df]]): - (self.experimental_metadata, data_df, fluxes_df, carbon_conc, self.requisite_biomass, - self.trial_name_conversion, self.data_timestep_hr, simulation_timestep, media_conc - ) = GrowthData.process(community_members, base_media, solver, all_phenotypes, data_paths, - species_abundances, carbon_conc, ignore_trials, ignore_timesteps, - species_identities_rows, significant_deviation, extract_zip_path, - determine_requisite_biomass) - # for content in community_members.values() for met in content["phenotypes"]] - self.fluxes_tup = FBAHelper.parse_df(fluxes_df) - self.fluxes_df = fluxes_df ; self.data_df = data_df - self.default_excreta = [index for index, row in fluxes_df.iterrows() if any(row > 1)] - self.parameters, self.variables, self.constraints = {}, {}, {} - self.zipped_output, self.plots, self.names = [], [], [] - self.experimental_metadata = experimental_metadata - self.carbon_conc = carbon_conc; self.media_conc = media_conc - - #################### FITTING PHASE METHODS #################### - - def fit_kcat(self, parameters: dict = None, mets_to_track: list = None, rel_final_conc: dict = None, - zero_start: list = None, abs_final_conc: dict = None, graphs: list = None, data_timesteps: dict = None, - export_zip_name: str = None, export_parameters: bool = True, requisite_biomass: dict = None, - export_lp:str = f'solveKcat.lp', figures_zip_name:str=None, publishing=True, primals_export_path=None): - if export_zip_name and os.path.exists(export_zip_name): os.remove(export_zip_name) - kcat_primal = None - requisite_biomass = requisite_biomass or self.requisite_biomass - for index, coefs in enumerate(biomass_partition_coefs): - # solve for growth rate constants with the previously solved biomasses - newSim = CommPhitting(self.msdb_path, None, self.fluxes_df, self.data_df, self.carbon_conc, - self.media_conc, self.experimental_metadata, None, self.solver, self.all_phenotypes, - self.data_paths, self.species_abundances, self.ignore_trials, self.ignore_timesteps, - self.species_identities_rows, self.significant_deviation, self.extract_zip_path, - True, self.consumed_mets) - newSim.define_problem(parameters, mets_to_track, rel_final_conc, zero_start, abs_final_conc, - data_timesteps, export_zip_name, export_parameters, export_lp, - kcat_primal, coefs, requisite_biomass) - newSim.compute(graphs, export_zip_name, figures_zip_name, publishing, - primals_export_path or re.sub(r"(.lp)", ".json", export_lp)) - kcat_primal = parse_primals(newSim.values, coefs=coefs, kcat_vals=newSim.parameters["kcat"]) - pprint(kcat_primal) - print(f"Interation {index+1} is complete\n") - kcats = {k: val for k, val in newSim.values.items() if "kcat" in k} - DataFrame(kcats).T.to_csv("pheno_growth_kcat.tsv", sep="\t") - return kcats - - def fit(self, parameters:dict=None, mets_to_track: list = None, rel_final_conc:dict=None, zero_start:list=None, - abs_final_conc:dict=None, graphs: list = None, data_timesteps: dict = None, - export_zip_name: str = None, export_parameters: bool = True, requisite_biomass: dict = None, - export_lp: str = 'CommPhitting.lp', figures_zip_name:str=None, publishing:bool=False, primals_export_path=None): - if hasattr(self, "requisite_biomass"): requisite_biomass = self.requisite_biomass - self.define_problem(parameters, mets_to_track, rel_final_conc, zero_start, abs_final_conc, - data_timesteps, export_zip_name, export_parameters, export_lp, - None, None, requisite_biomass) - self.compute(graphs, export_zip_name, figures_zip_name, publishing, - primals_export_path or re.sub(r"(.lp)", ".json", export_lp)) - - def define_b_vars(self, pheno, short_code, timestep, variables): - self.variables['b_' + pheno][short_code][timestep] = tupVariable( - _name("b_", pheno, short_code, timestep, self.names), Bounds(0, 1000)) - self.variables['b1_' + pheno][short_code][timestep] = tupVariable( - _name("b1_", pheno, short_code, timestep, self.names), Bounds(0, 1000)) - self.variables['b2_' + pheno][short_code][timestep] = tupVariable( - _name("b2_", pheno, short_code, timestep, self.names), Bounds(0, 1000)) - self.variables['b3_' + pheno][short_code][timestep] = tupVariable( - _name("b3_", pheno, short_code, timestep, self.names), Bounds(0, 1000)) - self.variables['b4_' + pheno][short_code][timestep] = tupVariable( - _name("b4_", pheno, short_code, timestep, self.names), Bounds(0, 1000)) - self.variables['b5_' + pheno][short_code][timestep] = tupVariable( - _name("b5_", pheno, short_code, timestep, self.names), Bounds(0, 1000)) - variables.extend([self.variables['b_' + pheno][short_code][timestep], - self.variables['b1_' + pheno][short_code][timestep], - self.variables['b2_' + pheno][short_code][timestep], - self.variables['b3_' + pheno][short_code][timestep], - self.variables['b4_' + pheno][short_code][timestep], - self.variables['b5_' + pheno][short_code][timestep]]) - if short_code not in self.variables[f"bin1_{pheno}"]: - self.variables[f"bin1_{pheno}"][short_code] = tupVariable( - _name("bin1_", pheno, short_code, "", self.names), Bounds(0, 1), "binary") - self.variables[f"bin2_{pheno}"][short_code] = tupVariable( - _name("bin2_", pheno, short_code, "", self.names), Bounds(0, 1), "binary") - self.variables[f"bin3_{pheno}"][short_code] = tupVariable( - _name("bin3_", pheno, short_code, "", self.names), Bounds(0, 1), "binary") - self.variables[f"bin4_{pheno}"][short_code] = tupVariable( - _name("bin4_", pheno, short_code, "", self.names), Bounds(0, 1), "binary") - self.variables[f"bin5_{pheno}"][short_code] = tupVariable( - _name("bin5_", pheno, short_code, "", self.names), Bounds(0, 1), "binary") - variables.extend([self.variables[f"bin1_{pheno}"][short_code], self.variables[f"bin2_{pheno}"][short_code], - self.variables[f"bin3_{pheno}"][short_code], self.variables[f"bin4_{pheno}"][short_code], - self.variables[f"bin5_{pheno}"][short_code]]) - return variables - - def define_b_cons(self, pheno, short_code, timestep, biomass_coefs): - biomass_coefs = biomass_coefs or biomass_partition_coefs[-1] - # define the partitioned biomass groups - ## b_n{pheno,t} <= coef*b_tot{pheno,t} - self.constraints['b1c_' + pheno][short_code][timestep] = tupConstraint( - _name("b1c_", pheno, short_code, timestep, self.names), Bounds(0, None), { - "elements": [ - {"elements": [biomass_coefs[0], self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['b1_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - ], - "operation": "Add" - }) - self.constraints['b2c_' + pheno][short_code][timestep] = tupConstraint( - _name("b2c_", pheno, short_code, timestep, self.names), Bounds(0, None), { - "elements": [ - {"elements": [biomass_coefs[1], self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['b2_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - ], - "operation": "Add" - }) - self.constraints['b3c_' + pheno][short_code][timestep] = tupConstraint( - _name("b3c_", pheno, short_code, timestep, self.names), Bounds(0, None), { - "elements": [ - {"elements": [biomass_coefs[2], self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['b3_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - ], - "operation": "Add" - }) - self.constraints['b4c_' + pheno][short_code][timestep] = tupConstraint( - _name("b4c_", pheno, short_code, timestep, self.names), Bounds(0, None), { - "elements": [ - {"elements": [biomass_coefs[3], self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['b4_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - ], - "operation": "Add" - }) - self.constraints['b5c_' + pheno][short_code][timestep] = tupConstraint( - _name("b5c_", pheno, short_code, timestep, self.names), Bounds(0, None), { - "elements": [ - {"elements": [biomass_coefs[4], self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['b5_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - ], - "operation": "Add" - }) - - # define the comprehensive biomass constraints - ## coef*b{pheno,t} - b_n{pheno,t} - 1000*bin_n{pheno} <= 0 - self.constraints['b1c_control_' + pheno][short_code][timestep] = tupConstraint( - _name("b1c_control_", pheno, short_code, timestep, self.names), Bounds(None, 0), { - "elements": [ - {"elements": [biomass_coefs[0], self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['b1_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1000, self.variables[f"bin1_{pheno}"][short_code].name], - "operation": "Mul"}, - ], - "operation": "Add" - }) - self.constraints['b2c_control_' + pheno][short_code][timestep] = tupConstraint( - _name("b2c_control_", pheno, short_code, timestep, self.names), Bounds(None, 0), { - "elements": [ - {"elements": [biomass_coefs[1], self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['b2_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1000, self.variables[f"bin2_{pheno}"][short_code].name], - "operation": "Mul"}, - ], - "operation": "Add" - }) - self.constraints['b3c_control_' + pheno][short_code][timestep] = tupConstraint( - _name("b3c_control_", pheno, short_code, timestep, self.names), Bounds(None, 0), { - "elements": [ - {"elements": [biomass_coefs[2], self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['b3_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1000, self.variables[f"bin3_{pheno}"][short_code].name], - "operation": "Mul"}, - ], - "operation": "Add" - }) - self.constraints['b4c_control_' + pheno][short_code][timestep] = tupConstraint( - _name("b4c_control_", pheno, short_code, timestep, self.names), Bounds(None, 0), { - "elements": [ - {"elements": [biomass_coefs[3], self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['b4_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1000, self.variables[f"bin4_{pheno}"][short_code].name], - "operation": "Mul"}, - ], - "operation": "Add" - }) - self.constraints['b5c_control_' + pheno][short_code][timestep] = tupConstraint( - _name("b5c_control_", pheno, short_code, timestep, self.names), Bounds(None, 0), { - "elements": [ - {"elements": [biomass_coefs[4], self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['b5_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1000, self.variables[f"bin5_{pheno}"][short_code].name], - "operation": "Mul"}, - ], - "operation": "Add" - }) - - # define the binary constraints - ## b_n{pheno,t} <= 1000 - 1000*bin_n{pheno} - self.constraints['bin1c_' + pheno][short_code][timestep] = tupConstraint( - _name("bin1c_", pheno, short_code, timestep, self.names), Bounds(0, None), { - "elements": [ - 1000, - {"elements": [-1, self.variables['b1_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1000, self.variables[f"bin1_{pheno}"][short_code].name], - "operation": "Mul"} - ], - "operation": "Add" - }) - self.constraints['bin2c_' + pheno][short_code][timestep] = tupConstraint( - _name("bin2c_", pheno, short_code, timestep, self.names), Bounds(0, None), { - "elements": [ - 1000, - {"elements": [-1, self.variables['b2_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1000, self.variables[f"bin2_{pheno}"][short_code].name], - "operation": "Mul"} - ], - "operation": "Add" - }) - self.constraints['bin3c_' + pheno][short_code][timestep] = tupConstraint( - _name("bin3c_", pheno, short_code, timestep, self.names), Bounds(0, None), { - "elements": [ - 1000, - {"elements": [-1, self.variables['b3_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1000, self.variables[f"bin3_{pheno}"][short_code].name], - "operation": "Mul"} - ], - "operation": "Add" - }) - self.constraints['bin4c_' + pheno][short_code][timestep] = tupConstraint( - _name("bin4c_", pheno, short_code, timestep, self.names), Bounds(0, None), { - "elements": [ - 1000, - {"elements": [-1, self.variables['b4_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1000, self.variables[f"bin4_{pheno}"][short_code].name], - "operation": "Mul"} - ], - "operation": "Add" - }) - self.constraints['bin5c_' + pheno][short_code][timestep] = tupConstraint( - _name("bin5c_", pheno, short_code, timestep, self.names), Bounds(0, None), { - "elements": [ - 1000, - {"elements": [-1, self.variables['b5_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [-1000, self.variables[f"bin5_{pheno}"][short_code].name], - "operation": "Mul"} - ], - "operation": "Add" - }) - - # load the constraints to the model - return [self.constraints['b1c_' + pheno][short_code][timestep], - self.constraints['b2c_' + pheno][short_code][timestep], - self.constraints['b3c_' + pheno][short_code][timestep], - self.constraints['b4c_' + pheno][short_code][timestep], - self.constraints['b5c_' + pheno][short_code][timestep], - self.constraints['b1c_control_' + pheno][short_code][timestep], - self.constraints['b2c_control_' + pheno][short_code][timestep], - self.constraints['b3c_control_' + pheno][short_code][timestep], - self.constraints['b4c_control_' + pheno][short_code][timestep], - self.constraints['b5c_control_' + pheno][short_code][timestep], - self.constraints['bin1c_' + pheno][short_code][timestep], - self.constraints['bin2c_' + pheno][short_code][timestep], - self.constraints['bin3c_' + pheno][short_code][timestep], - self.constraints['bin4c_' + pheno][short_code][timestep], - self.constraints['bin5c_' + pheno][short_code][timestep]] - - def initialize_vars_cons(self, pheno, short_code): - # cvt and cvf - self.variables['cvt_' + pheno] = {}; self.variables['cvf_' + pheno] = {} - self.variables['cvt_' + pheno][short_code] = {}; self.variables['cvf_' + pheno][short_code] = {} - # total biomass and growth - self.variables['b_' + pheno] = {}; self.variables['g_' + pheno] = {} - self.variables['b_' + pheno][short_code] = {}; self.variables['g_' + pheno][short_code] = {} - self.constraints['gc_' + pheno] = {}; self.constraints['cvc_' + pheno] = {} - self.constraints['gc_' + pheno][short_code] = {}; self.constraints['cvc_' + pheno][short_code] = {} - # partitioned biomasses - self.variables['b1_' + pheno] = {}; self.variables['b2_' + pheno] = {}; self.variables['b3_' + pheno] = {} - self.variables['b4_' + pheno] = {}; self.variables['b5_' + pheno] = {} - self.variables['b1_' + pheno][short_code] = {}; self.variables['b2_' + pheno][short_code] = {} - self.variables['b3_' + pheno][short_code] = {}; self.variables['b4_' + pheno][short_code] = {} - self.variables['b5_' + pheno][short_code] = {} - ## biomass binary variables - self.variables[f'bin1_{pheno}'] = {}; self.variables[f'bin2_{pheno}'] = {}; self.variables[f'bin3_{pheno}'] = {} - self.variables[f'bin4_{pheno}'] = {}; self.variables[f'bin5_{pheno}'] = {} - self.variables[f"bin1_{pheno}"][short_code] = {}; self.variables[f"bin2_{pheno}"][short_code] = {} - self.variables[f"bin3_{pheno}"][short_code] = {}; self.variables[f"bin4_{pheno}"][short_code] = {} - self.variables[f"bin5_{pheno}"][short_code] = {} - ## biomass partition constraints - self.constraints['b1c_' + pheno] = {}; self.constraints['b2c_' + pheno] = {}; self.constraints['b3c_' + pheno] = {} - self.constraints['b4c_' + pheno] = {}; self.constraints['b5c_' + pheno] = {} - self.constraints['b1c_' + pheno][short_code] = {}; self.constraints['b2c_' + pheno][short_code] = {} - self.constraints['b3c_' + pheno][short_code] = {}; self.constraints['b4c_' + pheno][short_code] = {} - self.constraints['b5c_' + pheno][short_code] = {} - self.constraints['b1c_control_' + pheno] = {}; self.constraints['b2c_control_' + pheno] = {} - self.constraints['b3c_control_' + pheno] = {}; self.constraints['b4c_control_' + pheno] = {} - self.constraints['b5c_control_' + pheno] = {} - self.constraints['b1c_control_' + pheno][short_code] = {}; self.constraints['b2c_control_' + pheno][short_code] = {} - self.constraints['b3c_control_' + pheno][short_code] = {}; self.constraints['b4c_control_' + pheno][short_code] = {} - self.constraints['b5c_control_' + pheno][short_code] = {} - self.constraints[f'binc_{pheno}'] = {}; self.constraints[f'binc_{pheno}'][short_code] = {} - self.constraints['bin1c_' + pheno] = {}; self.constraints['bin2c_' + pheno] = {} - self.constraints['bin3c_' + pheno] = {}; self.constraints['bin4c_' + pheno] = {}; self.constraints['bin5c_' + pheno] = {} - self.constraints['bin1c_' + pheno][short_code] = {}; self.constraints['bin2c_' + pheno][short_code] = {} - self.constraints['bin3c_' + pheno][short_code] = {}; self.constraints['bin4c_' + pheno][short_code] = {} - self.constraints['bin5c_' + pheno][short_code] = {} - - def get_timestep_bin(self, timestep): - if timestep < self.first: return 0 - elif timestep < self.second: return 1 - elif timestep < self.third: return 2 - elif timestep < self.fourth: return 3 - return 4 - - def define_problem(self, parameters=None, mets_to_track=None, rel_final_conc=None, zero_start=None, - abs_final_conc=None, data_timesteps=None, export_zip_name: str=None, - export_parameters: bool=True, export_lp: str='CommPhitting.lp', primal_values=None, - biomass_coefs=None, requisite_biomass:dict=None, biolog_simulation=False, - export_phenotype_profiles=True): - # parse the growth data - growth_tup = FBAHelper.parse_df(self.data_df, False) - self.phenotypes = list(self.fluxes_tup.columns) - self.phenotypes.extend([signal_species(signal)+"_stationary" for signal in growth_tup.columns if ( - ":" in signal and "OD" not in signal)]) - self.species_list = [signal_species(signal) for signal in growth_tup.columns if ":" in signal] - num_sorted = np.sort(np.array([int(obj[1:]) for obj in set(growth_tup.index)])) - # TODO - short_codes must be distinguished for different conditions - unique_short_codes = [f"{growth_tup.index[0][0]}{num}" for num in map(str, num_sorted)] - full_times = growth_tup.values[:, growth_tup.columns.index("Time (s)")] - self.times = {short_code: trial_contents(short_code, growth_tup.index, full_times) - for short_code in unique_short_codes} - average_time_series = np.mean(list(self.times.values()), axis=0) ; points = len(average_time_series) - self.first, self.second, self.third, self.fourth = int(points*0.1), int(points*0.25), int(points*0.45), int(points*0.7) - self.time_ranges = {0: average_time_series[:self.first], 1: average_time_series[self.first:self.second], - 2: average_time_series[self.second:self.third], 3: average_time_series[self.third:self.fourth], - 4: average_time_series[self.fourth:]} - - # define default values - # TODO render bcv and cvmin dependent upon temperature, and possibly trained on Carlson's data - parameters, data_timesteps = parameters or {}, data_timesteps or {} - self.parameters["data_timestep_hr"] = np.mean(np.diff(np.array(list( - self.times.values())).flatten()))/hour if not hasattr(self, "data_timestep_hr") else self.data_timestep_hr - self.parameters.update({ - "timestep_hr": self.parameters['data_timestep_hr'], - "cvct": 0.01, "cvcf": 0.01, - "bcv": 0.01, "cvmin": 0.01, - "kcat": 0.33, - 'diffpos': 1, 'diffneg': 1, # coefficients that weight difference between experimental and predicted biomass - "stationary": 10, # the penalty coefficient for the stationary phenotype - }) - self.parameters.update(parameters) - # distribute kcat values to all phenotypes of all species and update from previous simulations where necessary - self.parameters.update(self._universalize(self.parameters, "kcat", exclude=["stationary"])) - if primal_values is not None: - for species, content in self.parameters["kcat"].items(): - if species not in primal_values: continue - for pheno, content2 in content.items(): - if pheno not in primal_values[species]: continue - for time, val in content2.items(): - if time not in primal_values[species][pheno]: continue - self.parameters["kcat"][species][pheno][time] = val - print(self.parameters["kcat"]) - # define the metabolites that are tracked, exchanged, and not available in the media - # TODO the default zero_start logic appears to be incorrect - self.zero_start = zero_start or [met for met in self.consumed_mets - if (met not in self.carbon_conc or self.carbon_conc[met] == 0)] - self.rel_final_conc = rel_final_conc or { - met:0.1 for met, concs in self.carbon_conc.items() if any( - [concs[short_code] > 0 for short_code in self.data_df.index.unique()] - ) and met not in self.zero_start} - self.abs_final_conc = abs_final_conc or {} - if mets_to_track: self.mets_to_track = mets_to_track - elif not isinstance(rel_final_conc, dict): self.mets_to_track = self.fluxes_tup.index - else: self.mets_to_track = list(self.rel_final_conc.keys()) + self.zero_start - print(self.mets_to_track) - - ts_to_delete = {} # {short_code: full_times for short_code in unique_short_codes} - if data_timesteps: # {short_code:[times]} - for short_code, times in data_timesteps.items(): - ts_to_delete[short_code] = set(list(range(len(full_times)))) - set(times) - self.times[short_code] = np.delete(self.times[short_code], list(ts_to_delete[short_code])) - - # construct the problem - objective = tupObjective("minimize variance and phenotypic transitions", [], "min") - constraints, variables, simulated_mets = [], [], [] - time_1 = process_time() - for exID in self.fluxes_tup.index: - if exID == "bio": continue - met_id = re.search(r"(cpd\d{5})", exID).group() - met = self.msdb.compounds.get_by_id(met_id) - if "C" not in met.elements: continue - concID = f"c_{met_id}_e0" - simulated_mets.append(met_id) - self.variables[concID] = {}; self.constraints['dcc_' + met_id] = {} - - # define the growth rate for each metabolite and concentrations - # TODO the MM parameters may be deletable once the binned kcat method is refined - if "Vmax" and "Km" in self.parameters: - self.parameters["Vmax"].update(self._universalize(self.parameters["Vmax"], met_id)) - self.parameters["Km"].update(self._universalize(self.parameters["Km"], met_id)) - for short_code in unique_short_codes: - self.variables[concID][short_code] = {}; self.constraints['dcc_' + met_id][short_code] = {} - timesteps = list(range(1, len(self.times[short_code]) + 1)) - for timestep in timesteps: - ## define the concentration variables - conc_var = tupVariable(_name(concID, "", short_code, timestep, self.names)) - ## constrain initial time concentrations to the media or a large default - if timestep == timesteps[0]: - initial_val = None - if met_id in self.media_conc: initial_val = self.media_conc[met_id] - if met_id in self.zero_start: initial_val = 0 - if dict_keys_exists(self.carbon_conc, met_id, short_code): - initial_val = self.carbon_conc[met_id][short_code] - if initial_val is not None: - conc_var = conc_var._replace(bounds=Bounds(initial_val, initial_val)) - if biolog_simulation: conc_var = conc_var._replace(bounds=Bounds(1, None)) - ## mandate complete carbon consumption - elif timestep == timesteps[-1] and (met_id in self.rel_final_conc or met_id in self.abs_final_conc): - if met_id in self.rel_final_conc: - final_bound = self.variables[concID][short_code][1].bounds.lb * self.rel_final_conc[met_id] - if met_id in self.abs_final_conc: # this intentionally overwrites rel_final_conc - final_bound = self.abs_final_conc[met_id] - conc_var = conc_var._replace(bounds=Bounds(0, final_bound)) - if met_id in self.zero_start: - conc_var = conc_var._replace(bounds=Bounds(final_bound, final_bound)) - self.variables[concID][short_code][timestep] = conc_var - variables.append(self.variables[concID][short_code][timestep]) - for pheno in self.phenotypes: - self.constraints['dbc_' + pheno] = {short_code: {} for short_code in unique_short_codes} - - # define growth and biomass variables and constraints - for pheno in self.phenotypes: - for short_code in unique_short_codes: - self.initialize_vars_cons(pheno, short_code) - timesteps = list(range(1, len(self.times[short_code]) + 1)) - nth_percentile_timestep = timesteps[int(0.90*len(timesteps))] - penalty_range = np.linspace(self.parameters['stationary'], self.parameters['stationary']/10, - len(timesteps[nth_percentile_timestep:])) - timestep_excess_count = 0 - for timestep in map(int, timesteps): - variables = self.define_b_vars(pheno, short_code, timestep, variables) - if short_code not in self.constraints[f"binc_{pheno}"]: - self.constraints[f"binc_{pheno}"][short_code] = tupConstraint( - _name("binc_", pheno, short_code, "", self.names), Bounds(0, 4), { - "elements": [self.variables[f"bin1_{pheno}"][short_code].name, - self.variables[f"bin2_{pheno}"][short_code].name, - self.variables[f"bin3_{pheno}"][short_code].name, - self.variables[f"bin4_{pheno}"][short_code].name, - self.variables[f"bin5_{pheno}"][short_code].name], - "operation": "Add"}) - constraints.append(self.constraints[f'binc_{pheno}'][short_code]) - constraints.extend(self.define_b_cons(pheno, short_code, timestep, biomass_coefs)) - - ## define the growth rate variable or primal value - species, phenotype = pheno.split("_") - self.variables['g_' + pheno][short_code][timestep] = tupVariable( - _name("g_", pheno, short_code, timestep, self.names)) - variables.append(self.variables['g_' + pheno][short_code][timestep]) - - if 'stationary' in pheno: - weight = self.parameters['stationary'] - if timestep > nth_percentile_timestep: - weight = penalty_range[timestep_excess_count] - timestep_excess_count += 1 - objective.expr.extend([{ - "elements": [{"elements": [weight, self.variables['b_' + pheno][short_code][timestep].name], - "operation": "Mul"}], - "operation": "Add"}]) - continue - # the conversion rates to and from the stationary phase - self.variables['cvt_' + pheno][short_code][timestep] = tupVariable( - _name("cvt_", pheno, short_code, timestep, self.names), Bounds(0, 100)) - self.variables['cvf_' + pheno][short_code][timestep] = tupVariable( - _name("cvf_", pheno, short_code, timestep, self.names), Bounds(0, 100)) - variables.extend([self.variables['cvf_' + pheno][short_code][timestep], - self.variables['cvt_' + pheno][short_code][timestep]]) - - # cvt <= bcv*b_{pheno} + cvmin - self.constraints['cvc_' + pheno][short_code][timestep] = tupConstraint( - _name('cvc_', pheno, short_code, timestep, self.names), (0, None), { - "elements": [{"elements": [-1, self.variables['cvt_' + pheno][short_code][timestep].name], - "operation": "Mul"}], - "operation": "Add"}) - # biomass_term = [self.parameters['bcv']*b_value + self.parameters['cvmin']] if FBAHelper.isnumber(b_value) else [ - biomass_term = [self.parameters['cvmin'], - {"elements": [self.parameters['bcv'], - self.variables["b_"+pheno][short_code][timestep].name], - "operation": "Mul"}] - self.constraints['cvc_' + pheno][short_code][timestep].expr["elements"].extend(biomass_term) - - # g_{pheno} = b_{pheno}*v_{pheno} - b_values = [self.variables['b1_' + pheno][short_code][timestep].name, - self.variables['b2_' + pheno][short_code][timestep].name, - self.variables['b3_' + pheno][short_code][timestep].name, - self.variables['b4_' + pheno][short_code][timestep].name, - self.variables['b5_' + pheno][short_code][timestep].name] - self.constraints['gc_' + pheno][short_code][timestep] = tupConstraint( - name=_name('gc_', pheno, short_code, timestep, self.names), - expr={"elements": [*[{"elements": [-self.parameters["kcat"][species][phenotype], b], - "operation": "Mul"} for b in b_values], - self.variables['g_' + pheno][short_code][timestep].name], - "operation": "Add"}) - - constraints.extend([self.constraints['cvc_' + pheno][short_code][timestep], - self.constraints['gc_' + pheno][short_code][timestep]]) - # self.constraints["binTot_" + pheno][short_code]]) - - # define the concentration constraint - half_dt = self.parameters['data_timestep_hr'] / 2 - time_2 = process_time() - print(f'Done with concentrations and biomass loops: {(time_2 - time_1) / 60} min') - for r_index, met in enumerate(self.fluxes_tup.index): - met_id = _met_id_parser(met) - if met_id not in simulated_mets: continue - concID = f"c_{met_id}_e0" - for short_code in unique_short_codes: - timesteps = list(range(1, len(self.times[short_code]) + 1)) - for timestep in timesteps[:-1]: - # c_{met} + dt/2*sum_k^K(n_{k,met} * (g_{pheno}+g+1_{pheno})) = c+1_{met} - next_timestep = timestep + 1 - growth_phenos = [[self.variables['g_' + pheno][short_code][next_timestep].name, - self.variables['g_' + pheno][short_code][timestep].name] - for pheno in self.fluxes_tup.columns] - self.constraints['dcc_' + met_id][short_code][timestep] = tupConstraint( - name=_name("dcc_", met_id, short_code, timestep, self.names), - expr={ - "elements": [ - self.variables[concID][short_code][timestep].name, - {"elements": [-1, self.variables[concID][short_code][next_timestep].name], - "operation": "Mul"}, - *OptlangHelper.dot_product( - growth_phenos, heuns_coefs=half_dt * self.fluxes_tup.values[r_index])], - "operation": "Add"}) - constraints.append(self.constraints['dcc_' + met_id][short_code][timestep]) - - # define the conversion variables of every signal for every phenotype - # for signal in growth_tup.columns[2:]: - # for pheno in self.fluxes_tup.columns: - # conversion_name = "_".join([signal, pheno, "__conversion"]) - # self.variables[conversion_name] = tupVariable(conversion_name) - # variables.append(self.variables[conversion_name]) - - time_3 = process_time() - print(f'Done with DCC loop: {(time_3 - time_2) / 60} min') - species_phenos = {} - self.conversion_bounds = [5e-6, 50] - for index, org_signal in enumerate(growth_tup.columns[2:]): - # signal = org_signal.split(":")[1] - signal = org_signal.replace(":", "|") - species = signal_species(org_signal) - species_phenos[species] = {None if "OD" in species else f"{species}_stationary"} - signal_column_index = index + 2 - data_timestep = 1 - self.variables[signal + '|conversion'] = tupVariable( - signal + '|conversion', bounds=Bounds(*self.conversion_bounds)) - variables.append(self.variables[signal + '|conversion']) - - self.variables[signal + '|bio'] = {}; self.variables[signal + '|diffpos'] = {} - self.variables[signal + '|diffneg'] = {}; self.variables['g_' + species] = {} - self.constraints[signal + '|bioc'] = {}; self.constraints[signal + '|diffc'] = {} - self.constraints["gc_" + species] = {}; self.constraints["totVc_" + species] = {} - self.constraints["totGc_" + species] = {}; self.constraints[signal + '|bio_finalc'] = {} - for short_code in unique_short_codes: - self.variables[signal + '|bio'][short_code] = {} - self.variables[signal + '|diffpos'][short_code] = {} - self.variables[signal + '|diffneg'][short_code] = {} - self.variables['g_' + species][short_code] = {} - self.constraints[signal + '|bioc'][short_code] = {} - self.constraints[signal + '|diffc'][short_code] = {} - self.constraints["gc_" + species][short_code] = {} - self.constraints["totVc_" + species][short_code] = {} - self.constraints["totGc_" + species][short_code] = {} - # self.constraints[signal + '|bio_finalc'][short_code] = {} - # the value entries are matched to only the timesteps that are condoned by data_timesteps - values_slice = trial_contents(short_code, growth_tup.index, growth_tup.values) - if ts_to_delete: values_slice = np.delete(values_slice, list(ts_to_delete[short_code]), axis=0) - timesteps = list(range(1, len(values_slice) + 1)) - # the last timestep is omitted since Heun's method in the modelled biomass - ## requires a future timestep, which does not exist for the last timestep - for timestep in timesteps[:-1]: - ## the user timestep and data timestep must be synchronized - if (int(timestep)*self.parameters['timestep_hr'] - < data_timestep*self.parameters['data_timestep_hr']): - print(f"Skipping timestep {timestep} that does not align with the user's timestep") ; continue - data_timestep += 1 - if data_timestep > int(self.times[short_code][-1] / self.parameters["data_timestep_hr"]): - print(f"The user-defined time exceeds the simulation time, so the DBC & diff loop is broken.") - break - next_timestep = int(timestep) + 1 - ## the phenotype transition terms are aggregated - total_biomass, signal_sum, from_sum, to_sum = [], [], [], [] - for pheno_index, pheno in enumerate(self.phenotypes): - ### define the collections of signal and pheno terms - if species in pheno or "OD" in signal: - # if not FBAHelper.isnumber(b_values[pheno][short_code][timestep]): - signal_sum.append({"operation": "Mul", "elements": [ - -1, self.variables['b_' + pheno][short_code][timestep].name]}) - # else: - # signal_sum.append(-b_values[pheno][short_code][timestep]) - ### total_biomass.append(self.variables["b_"+pheno][short_code][timestep].name) - if all(['OD' not in signal, species in pheno, 'stationary' not in pheno]): - species_phenos[species].add(pheno) - from_sum.append({"operation": "Mul", "elements": [ - -1, self.variables["cvf_" + pheno][short_code][timestep].name]}) - to_sum.append(self.variables["cvt_" + pheno][short_code][timestep].name) - for pheno in species_phenos[species]: - if "OD" in signal: continue - # print(pheno, timestep, b_values[pheno][short_code][timestep], b_values[pheno][short_code][next_timestep]) - if "stationary" in pheno: - # b_{phenotype} - sum_k^K(es_k*cvf) + sum_k^K(pheno_bool*cvt) = b+1_{phenotype} - self.constraints['dbc_' + pheno][short_code][timestep] = tupConstraint( - name=_name("dbc_", pheno, short_code, timestep, self.names), - expr={"elements": [*from_sum, *to_sum], "operation": "Add"}) - else: - # b_{phenotype} + dt/2*(g_{phenotype} + g+1_{phenotype}) + cvf-cvt = b+1_{phenotype} - self.constraints['dbc_' + pheno][short_code][timestep] = tupConstraint( - name=_name("dbc_", pheno, short_code, timestep, self.names), - expr={ - "elements": [ - self.variables['cvf_' + pheno][short_code][timestep].name, - {"elements": [half_dt, self.variables['g_' + pheno][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [half_dt, self.variables['g_' + pheno][short_code][next_timestep].name], - "operation": "Mul"}, - {"elements": [-1, self.variables['cvt_' + pheno][short_code][timestep].name], - "operation": "Mul"}], - "operation": "Add"}) - # if not FBAHelper.isnumber(self.variables['b_' + pheno][short_code][timestep]): - biomass_term = [self.variables['b_' + pheno][short_code][timestep].name, { - "elements": [-1, self.variables['b_' + pheno][short_code][next_timestep].name], - "operation": "Mul"}] - # else: - # biomass_term = [b_values[pheno][short_code][timestep]-b_values[pheno][short_code][next_timestep]] - self.constraints['dbc_' + pheno][short_code][timestep].expr["elements"].extend(biomass_term) - constraints.append(self.constraints['dbc_' + pheno][short_code][timestep]) - - if not requisite_biomass or any([timestep != timesteps[-2], signal not in requisite_biomass[short_code]]): - self.variables[signal + '|bio'][short_code][timestep] = tupVariable( - _name(signal, '|bio', short_code, timestep, self.names)) - else: - biomass_flux = requisite_biomass[short_code][signal]["bio"] - estimated_biomass = biomass_flux #* int(timestep)*self.parameters['data_timestep_hr'] - self.variables[signal + '|bio'][short_code][timestep] = tupVariable( - _name(signal, '|bio', short_code, timestep, self.names), - Bounds(estimated_biomass, None)) - self.variables[signal + '|diffpos'][short_code][timestep] = tupVariable( - _name(signal, '|diffpos', short_code, timestep, self.names), Bounds(0, 100)) - self.variables[signal + '|diffneg'][short_code][timestep] = tupVariable( - _name(signal, '|diffneg', short_code, timestep, self.names), Bounds(0, 100)) - variables.extend([self.variables[signal + '|bio'][short_code][timestep], - self.variables[signal + '|diffpos'][short_code][timestep], - self.variables[signal + '|diffneg'][short_code][timestep]]) - - # {signal}__conversion*datum = {signal}__bio - # TODO - the conversion variable must be a constant for BIOLOG conditions - self.constraints[signal + '|bioc'][short_code][timestep] = tupConstraint( - name=_name(signal, '|bioc', short_code, timestep, self.names), - expr={ - "elements": [ - {"elements": [-1, self.variables[signal + '|bio'][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [self.variables[signal + '|conversion'].name, - values_slice[timestep, signal_column_index]], - "operation": "Mul"}], - "operation": "Add"}) - constraints.append(self.constraints[signal + '|bioc'][short_code][timestep]) - - # {speces}_bio + {signal}_diffneg-{signal}_diffpos = sum_k^K(es_k*b_{phenotype}) - self.constraints[signal + '|diffc'][short_code][timestep] = tupConstraint( - name=_name(signal, '|diffc', short_code, timestep, self.names), - expr={ - "elements": [ - self.variables[signal + '|bio'][short_code][timestep].name, - self.variables[signal + '|diffneg'][short_code][timestep].name, - {"elements": [-1, self.variables[signal + '|diffpos'][short_code][timestep].name], - "operation": "Mul"}], - "operation": "Add"}) - if all([isinstance(val, dict) for val in signal_sum]): - self.constraints[signal + "|diffc"][short_code][timestep].expr["elements"].extend(signal_sum) - else: raise ValueError(f"The {signal_sum} value has unexpected contents.") - constraints.append(self.constraints[signal + '|diffc'][short_code][timestep]) - - objective.expr.extend([{ - "elements": [ - {"elements": [self.parameters['diffpos'], - self.variables[f'{signal}|diffpos'][short_code][timestep].name], - "operation": "Mul"}, - {"elements": [self.parameters['diffneg'], - self.variables[f'{signal}|diffneg'][short_code][timestep].name], - "operation": "Mul"}], - "operation": "Add"}]) - - time_4 = process_time() - print(f'Done with the DBC & diffc loop: {(time_4 - time_3) / 60} min') - - # construct the problem - self.problem = OptlangHelper.define_model("CommPhitting model", variables, constraints, objective, True) - self.hdf5_name = export_lp.replace(".lp", ".h5") - self.hdf5_file = File(self.hdf5_name, 'w') - time_5 = process_time() - print(f'Done with constructing the {type(self.problem)} model: {(time_5 - time_4) / 60} min') - - # export contents - if export_phenotype_profiles: - phenotype_profiles_name = 'phenotype_profiles.tsv' - self.fluxes_df.to_csv(phenotype_profiles_name, sep="\t") - self.zipped_output.append(phenotype_profiles_name) - if export_parameters: - parameter_name = 'parameters.tsv' - DataFrame(data=list(self.parameters.values()), index=list(self.parameters.keys()), - columns=['values']).to_csv(parameter_name, sep="\t") - self.zipped_output.append(parameter_name) - if export_lp: - if re.search(r"(\\\\/)", export_lp): os.makedirs(os.path.dirname(export_lp), exist_ok=True) - with open(export_lp, 'w') as lp: lp.write(self.problem.to_lp()) - model_name = 'CommPhitting.json' - _export_model_json(self.problem.to_json(), model_name) - self.zipped_output.extend([export_lp, model_name]) - if export_zip_name: - self.zip_name = export_zip_name - sleep(2) - with ZipFile(self.zip_name, 'a', compression=ZIP_LZMA) as zp: - for file in self.zipped_output: - zp.write(file) ; os.remove(file) ; self.zipped_output.remove(file) - time_6 = process_time() - print(f'Done exporting the content: {(time_6 - time_5) / 60} min') - - def compute(self, graphs: list = None, export_zip_name=None, figures_zip_name=None, publishing=False, - primals_export_path:str = "primal_values.json", remove_empty_plots=False): - print("starting optimization") - time1 = process_time() - self.values = {} - solution = self.problem.optimize() - timesteps = min(list(map(len, self.times.values()))) - fit_quality = self.problem.objective.value/timesteps - print(f"The optimization fit quality is {fit_quality}") - if "parameters.tsv" in self.zipped_output: - self.parameters["fit"] = fit_quality - parameter_name = 'parameters.tsv' - DataFrame(data=list(self.parameters.values()), index=list(self.parameters.keys()), - columns=['values']).to_csv(parameter_name, sep="\t") - with ZipFile(self.zip_name, 'a', compression=ZIP_LZMA) as zp: - for file in self.zipped_output: - zp.write(file) ; os.remove(file) - - # TODO approximate a threshold of good fits, and trigger black box optimization for bad fits - ## that iteratively adjust parameters until the fit metric surmounts the threshold. - - # categorize the primal values by trial and time - if "optimal" not in solution: - raise FeasibilityError(f'The solution is sub-optimal, with a(n) {solution} status.') - if all(np.array(list(self.problem.primal_values.values())) == 0): - raise NoFluxError("The simulation lacks any flux.") - for variable, value in self.problem.primal_values.items(): - if "v_" in variable: self.values[variable] = value - elif 'conversion' in variable or re.search(r"(bin\d)", variable): - self.values[short_code].update({variable: value}) - if value in self.conversion_bounds: - warnings.warn(f"The conversion factor {value} optimized to a bound, which may be " - f"indicative of an error, such as improper kinetic rates.") - else: - basename, short_code, timestep = variable.split('-') - time_hr = int(timestep) * self.parameters['data_timestep_hr'] - self.values[short_code] = self.values.get(short_code, {}) - self.values[short_code][basename] = self.values[short_code].get(basename, {}) - self.values[short_code][basename][time_hr] = value - - # export the processed primal values for graphing - # with open(primals_export_path, 'w') as out: - # json.dump(self.values, out, indent=3) - # if not export_zip_name and hasattr(self, 'zip_name'): - # export_zip_name = self.zip_name - # if export_zip_name: - # with ZipFile(export_zip_name, 'a', compression=ZIP_LZMA) as zp: - # zp.write(primals_export_path) - # os.remove(primals_export_path) - # visualize the specified information - time2 = process_time() - if graphs: self.graph(graphs, export_zip_name=figures_zip_name or export_zip_name, - publishing=publishing, remove_empty_plots=remove_empty_plots) - - # parse the primal values - values_df = DataFrame(self.values) - values_index = values_df.index.tolist() - for col in values_df.columns: - trial_values = values_df[col].tolist() - ## process the times - times = [list(ele.keys()) for ele in trial_values if isinstance(ele, dict)] - max_time = max(list(map(len, times))) - for max_time_series in times: - if len(max_time_series) == max_time: break - trial_path = f'results/primals/{col}/' - self.hdf5_file.create_dataset(f'{trial_path}/times', data=max_time_series) - ## process the data values - for index, ele in enumerate(trial_values): - dataset_name = f'{trial_path}/{values_index[index]}' - if FBAHelper.isnumber(ele): self.hdf5_file.create_dataset(dataset_name, data=[float(ele)]) - elif isinstance(ele, dict): - self.hdf5_file.create_dataset(dataset_name, data=list(map(float, ele.values()))) - self.hdf5_file[dataset_name].attrs["full_time"] = (len(ele.values()) == max_time) - - self.hdf5_file.close() - with ZipFile(self.zip_name, 'a', compression=ZIP_LZMA) as zp: - zp.write(self.hdf5_name) ; os.remove(self.hdf5_name) - - time3 = process_time() - print(f"Optimization completed in {(time2-time1)/60} minutes") - print(f"Graphing completed in {(time3-time2)/60} minutes") - - def load_model(self, mscomfit_json_path: str = None, zip_name: str = None, model_to_load: dict = None): - if zip_name: - with ZipFile(zip_name, 'r') as zp: zp.extract(mscomfit_json_path) - if mscomfit_json_path: - with open(mscomfit_json_path, 'r') as mscmft: return json.load(mscmft) - if model_to_load: self.problem = Model.from_json(model_to_load) - - @staticmethod - def assign_values(param, var, next_dimension, kcat=True): - dic = {var: {}} - for dim1, dim2_list in next_dimension.items(): - if isinstance(dim2_list, dict): dic[var].update(CommPhitting.assign_values(param, dim1, dim2_list)) - else: - if kcat: dic[var][dim1] = param - else: dic[var][dim1] = {dim2: param for dim2 in dim2_list} - return dic - - def _universalize(self, param, var, next_dimension=None, exclude=None, tsBin=False): - if not next_dimension: - next_dimension = {} - for organism in self.fluxes_tup.columns: - species, pheno = organism.split("_") - if pheno in exclude: continue - if not tsBin: - if species in next_dimension: next_dimension[species].append(pheno) - else: next_dimension[species] = [pheno] - else: - if species in next_dimension: next_dimension[species].update({pheno: self.time_ranges}) - else: next_dimension[species] = {pheno: self.time_ranges} - if FBAHelper.isnumber(param): return CommPhitting.assign_values(param, var, next_dimension) - elif FBAHelper.isnumber(param[var]): return CommPhitting.assign_values(param[var], var, next_dimension) - elif isinstance(param[var], dict): - return {var: {dim1: {dim2: param[var][dim1] for dim2 in dim2_list} - for dim1, dim2_list in next_dimension.items()}} - else: logger.critical(f"The param (with keys {dic_keys(param)}) and var {var} are not amenable" - " with the parameterizing a universal value.") - # {short_code: {list(timestep_info.keys())[0]: find_dic_number(param)} for short_code, timestep_info in variable.items()}} - - def adjust_color(self, color, amount=0.5): - """ - Lightens the given color by multiplying (1-luminosity) by the given amount. - Input can be matplotlib color string, hex string, or RGB tuple. - - Examples: - >> lighten_color('g', 0.3) - >> lighten_color('#F034A3', 0.6) - >> lighten_color((.3,.55,.1), 0.5) - """ - import colorsys - import matplotlib.colors as mc - try: c = mc.cnames[color] - except: c = color - c = colorsys.rgb_to_hls(*mc.to_rgb(c)) - return colorsys.hls_to_rgb(c[0], max(0, min(1, amount * c[1])), c[2]) - - def _add_plot(self, ax, labels, label, basename, trial, x_axis_split, linestyle="solid", - scatter=False, color=None, xs=None, ys=None): - labels.append(label or basename.split('-')[-1]) - xs = xs if xs is not None else list(map(float, self.values[trial][basename].keys())) - ys = ys if ys is not None else list(map(float, self.values[trial][basename].values())) - if scatter: ax.scatter(xs, ys, s=10, label=labels[-1], color=color or None) - else: ax.plot(xs, ys, label=labels[-1], linestyle=linestyle, color=color or None) - ax.set_xticks(list(map(int, xs))[::x_axis_split]) - return ax, labels - - def graph(self, graphs, primal_values_filename: str = None, primal_values_zip_path: str = None, - export_zip_name: str = None, data_timestep_hr: float = 0.163, publishing: bool = False, - title: str = None, remove_empty_plots:bool = False): - print(export_zip_name) - # define the default timestep ratio as 1 - data_timestep_hr = self.parameters.get('data_timestep_hr', data_timestep_hr) - timestep_ratio = data_timestep_hr / self.parameters.get('timestep_hr', data_timestep_hr) - if primal_values_filename: - if primal_values_zip_path: - with ZipFile(primal_values_zip_path, 'r') as zp: zp.extract(primal_values_filename) - with open(primal_values_filename, 'r', encoding='utf-8') as primal: self.values = json.load(primal) - - # plot the content for desired trials - x_axis_split = int(3 / data_timestep_hr / timestep_ratio) - self.plots = set() - contents = {"biomass": 'b_', "all_biomass": 'b_', "growth": 'g_', "conc": "c_"} - mM_threshold = 1e-3 - for graph_index, graph in enumerate(graphs): - content = contents.get(graph['content'], graph['content']) - y_label = 'Variable value'; x_label = r'Time ($hr$)' - if any([x in graph['content'] for x in ['biomass', 'OD']]): - total_biomasses = {name: [] for name in self.species_list} - total_biomasses.update({"OD":[]}) - if "species" not in graph: graph['species'] = self.species_list - if "biomass" in graph['content']: y_label = r'Biomass ($\frac{g}{L}$)' - elif 'growth' in graph['content']: y_label = r'Biomass growth ($\frac{g}{hr}$)' - graph["experimental_data"] = graph.get("experimental_data", False) - if "painting" not in graph: - graph["painting"] = { - "OD": { - "color": "blue", - "linestyle": "solid", - "name": "Total biomass" - }, - "ecoli": { - "color": "red", - "linestyle": "dashed", - "name": "E. coli" - }, - "pf": { - "color": "green", - "linestyle": "dotted", - "name": "P. fluorescens" - }} - graph["parsed"] = graph.get("parsed", False) - if 'phenotype' in graph and graph['phenotype'] == '*': - if "species" not in graph: graph['species'] = self.species_list - graph['phenotype'] = set([pheno.split("_")[-1] for pheno in self.phenotypes - if pheno.split("_")[0] in graph["species"]]) - # TODO - a species-resolved option must be developed for the paper figure - if 'species' in graph and graph['species'] == '*': graph['species'] = self.species_list - elif content == "c_" and 'mets' not in graph: - print(self.mets_to_track) - graph["mets"] = self.mets_to_track - elif not any(["species" in graph, "mets" in graph]): - raise ValueError(f"The specified graph {graph} must define species for which data will be plotted.") - print(f"graph_{graph_index}") ; pprint(graph) - - # define figure specifications - if publishing: - pyplot.rc('axes', titlesize=22, labelsize=28) - pyplot.rc('xtick', labelsize=24) - pyplot.rc('ytick', labelsize=24) - pyplot.rc('legend', fontsize=18) - if graph["parsed"]: - parsed_graphs = {} - for species in graph["species"]: - parsed_graphs[species] = pyplot.subplots(dpi=200, figsize=(11, 7)) - else: fig, ax = pyplot.subplots(dpi=200, figsize=(11, 7)) - yscale = "linear" - - # populate the figures - for trial, basenames in self.values.items(): - if trial not in graph['trial']: continue - labels = [] - for basename, values in basenames.items(): - # graph experimental and total simulated biomasses - if any([x in graph['content'] for x in ['biomass', 'OD']]): - if 'b_' in basename: - vals = list(map(float, values.values())) - var_name, species, phenotype = basename.split('_') - # ic(basename) - label = f'{species}_biomass (model)' - if publishing: - species_name = graph["painting"][species]["name"] - label = f'{species_name} total (model)' - labels.append({species: label}) - if remove_empty_plots and all([v == 0 for v in vals]): - print(f"The {basename} is empty and thus is removed.") - continue - if (any([x in graph['content'] for x in ["total", "biomass", 'OD']]) or - graph['species'] == self.species_list): # and not graph["parsed"]: - total_biomasses['OD'].append(vals) - if "OD" not in graph['content']: total_biomasses[species].append(vals) - if all([graph['experimental_data'], '|bio' in basename, ]): - # any([content in basename])]): # TODO - any() must include all_biomass and total - species, signal, phenotype = basename.split('|') - label = basename - if publishing: - species_name = "total" if "OD" in signal else graph["painting"][species]["name"] - label = f'Experimental {species_name} (from {signal})' - # print(basename, label, self.values[trial][basename].values()) - if remove_empty_plots and all(self.values[trial][basename].values() == 0): - print(f"The {basename} is empty and thus is removed.") - continue - ax, labels = self._add_plot(ax, labels, label, basename, trial, x_axis_split, scatter=True, - color=self.adjust_color(graph["painting"][species]["color"], 1.5)) - - if content not in basename: continue - # graph individual phenotypes - if "phenotype" in graph: - # print(graph['phenotype']) - for specie in graph["species"]: - if specie not in basename: continue - if not any([p in basename for p in graph['phenotype']]): - print(f"{basename} data with unknown phenotype.") - continue - if remove_empty_plots and all(self.values[trial][basename].values() == 0): - print(f"The {specie} is empty and thus is removed.") - continue - if graph["parsed"]: fig, ax = parsed_graphs[specie] - ## define graph characteristics - label = basename.split("_")[-1] - style = "solid" - if len(graph["species"]) > 1: - label = re.sub(r"(^[a-b]+\_)", "", basename) - style = graph["painting"][specie]["linestyle"] - ax, labels = self._add_plot(ax, labels, label, basename, trial, x_axis_split, style) - if graph["parsed"]: parsed_graphs[specie] = (fig, ax) - # graph media concentration plots - elif "mets" in graph and all([any([x in basename for x in graph["mets"]]), 'c_cpd' in basename]): - if not any(np.array(list(self.values[trial][basename].values())) > mM_threshold): continue - if remove_empty_plots and all(self.values[trial][basename].values() == 0): continue - label=self.msdb.compounds.get_by_id(re.search(r"(cpd\d+)", basename).group()).name - ax, labels = self._add_plot(ax, labels, label, basename, trial, x_axis_split) - yscale = "log" - y_label = r'Concentration ($mM$)' - - if labels: # assesses whether graph(s) were created - ## graph all of the total biomasses - if any([x in graph['content'] for x in ['OD', 'biomass', 'total']]): - labeled_species = [label for label in labels if isinstance(label, dict)] - for name, vals in total_biomasses.items(): - # ic(name) - if not vals or (len(total_biomasses) == 2 and "OD" not in name): continue - if len(total_biomasses) == 2: - specie_label = [graph["painting"][name]["name"] for name in total_biomasses - if "OD" not in name][0] - label = f"{graph['painting'][name]['name']} ({specie_label})" - else: - label = f'{name}_biomass (model)' - if labeled_species: - for label_specie in labeled_species: - if name in label_specie: label = label_specie[name] ; break - style = "solid" if (len(graph["species"]) < 1 or name not in graph["painting"] - ) else graph["painting"][name]["linestyle"] - style = "dashdot" if "model" in label else style - style = "solid" if ("OD" in name and not graph["experimental_data"] - or "total" in graph["content"]) else style - total_biomass = sum(np.array(vals))[:-1] - xs = list(map(float, values.keys())) - if graph["parsed"]: fig, ax = parsed_graphs[name] - self._add_plot(ax, labels, label, None, None, x_axis_split, style, False, - graph["painting"][name]["color"], xs, total_biomass) - if graph["parsed"]: - ## process and export the parsed figures - ax.set_xlabel(x_label) ; ax.set_ylabel(y_label) ; ax.grid(axis="y") - ax.set_yscale(yscale) ; ax.legend() - phenotype_id = graph.get('phenotype', "") - if "phenotype" in graph and not isinstance(graph['phenotype'], str): - phenotype_id = f"{','.join(graph['phenotype'])} phenotypes" - fig_name = f'{"_".join([trial, name, phenotype_id, content])}.jpg' - fig.savefig(fig_name, bbox_inches="tight", transparent=True) - self.plots.add(fig_name) - - if graph["parsed"]: continue - ## process and export the non-parsed figures - phenotype_id = graph.get('phenotype', "") - if "phenotype" in graph and not isinstance(graph['phenotype'], str): - phenotype_id = f"{','.join(graph['phenotype'])} phenotypes" - - species_id = "" - if "mets" not in graph and content != "c_": - species_id = graph["species"] if isinstance(graph["species"], str) else ",".join(graph["species"]) - if "species" in graph and graph['species'] == self.species_list: species_id = 'all species' - else: phenotype_id = f"{','.join(graph['species'])} species" - if species_id == "all species" and not phenotype_id: phenotype_id = ','.join(graph['species']) - - ax.set_xlabel(x_label) ; ax.set_ylabel(y_label) - if "mets" in graph: ax.set_ylim(mM_threshold) - ax.grid(axis="y") - if len(labels) > 1: ax.legend() - else: yscale = "linear" - ax.set_yscale(yscale) - if not publishing: - if not title: - org_content = content if content not in contents.values() else list( - contents.keys())[list(contents.values()).index(content)] - this_title = f'{org_content} of {species_id} ({phenotype_id}) in the {trial} trial' - if content == "c_": this_title = f"{org_content} in the {trial} trial" - ax.set_title(this_title) - else: ax.set_title(title) - fig_name = f'{"_".join([trial, species_id, phenotype_id, content])}.jpg' - if "mets" in graph: fig_name = f"{trial}_{','.join(graph['mets'])}_c.jpg" - fig.savefig(fig_name, bbox_inches="tight", transparent=True) - - self.plots.add(fig_name) - - # export the figures with other simulation content - if export_zip_name: - with ZipFile(export_zip_name, 'a', compression=ZIP_LZMA) as zp: - for plot in self.plots: - zp.write(plot) ; os.remove(plot) - - - #################### ENGINEERING PHASE METHODS #################### - - def engineering(self): - if not hasattr(self, "problem"): - self.fit() # TODO - accommodate both fitting a new model and loading an existing model - - # This will capture biomass variables at all times and trials, which seems undesirable - self.problem.objective = Objective(sum([x for x in self.problem.variables if "bio" in x.name])) - - # Use a community COBRA model and CommKinetics with the fitted kinetic parameters? - - def _add_phenotypes(self): - pass - - - - def _change_obj(self): - pass - - -class BIOLOGPhitting(CommPhitting): - def __init__(self, carbon_conc, media_conc, biolog_df, fluxes_df, - experimental_metadata, msdb_path, community_members): - self.biolog_df = biolog_df; self.experimental_metadata = experimental_metadata - self.carbon_conc = carbon_conc; self.media_conc = media_conc or [] - self.fluxes_df = fluxes_df ; self.phenotypes = list(self.fluxes_df.columns) - self.phenotypes.extend([signal_species(signal)+"_stationary" - for signal in self.biolog_df if ":" in signal]) - self.community_members = community_members - # import os - from modelseedpy.biochem import from_local - self.msdb_path = msdb_path ; self.msdb = from_local(msdb_path) - - def fitAll(self, parameters: dict = None, rel_final_conc: float = None, - abs_final_conc: dict = None, graphs: list = None, data_timesteps: dict = None, - export_zip_name: str = None, export_parameters: bool = True, requisite_biomass: dict = None, - figures_zip_name: str = None, publishing: bool = False): - # simulate each condition - if export_zip_name and os.path.exists(export_zip_name): - os.remove(export_zip_name) - org_rel_final_conc = rel_final_conc - # total_reactions = set(list(chain.from_iterable([model.reactions for model in models_dict.values()]))) - model_abbreviations = ','.join([content["name"] for content in self.community_members.values()]) - for exp_index, experiment in self.experimental_metadata.iterrows(): - print(f"\n{exp_index} {experiment}") - display(experiment) - pheno = experiment["ModelSEED_ID"] - if not pheno: - print("The BIOLOG condition is not defined.") - continue - for model in self.community_members: - cpd = self.msdb.compounds.get_by_id(pheno) - if "C" not in cpd.elements or not any([re.search(pheno, rxn.id) for rxn in model.reactions]): - if "valid_condition" not in locals(): - valid_condition = False - continue - exp_list = [pheno] if isinstance(pheno, str) else pheno - self.community_members[model].update({"phenotypes": { - re.sub(r"(-|\s)", "", experiment["condition"]): {"consumed": exp_list} }}) - # determine the requisite biomass for each condition based on which member consumes the compound - valid_condition = True - # proceed if none of the members can utilize the phenotype condition - if not valid_condition: - print(f"The BIOLOG condition with {experiment['ModelSEED_ID']} is not" - f" absorbed by the {model_abbreviations} model(s).") - continue - print(f"The {experiment['ModelSEED_ID']} ({cpd.formula}) metabolite of the " - f"{experiment['condition']} condition may feed the {model_abbreviations} model(s).") - if not any([experiment["ModelSEED_ID"] in pheno for pheno in self.phenotypes]): - print(e) - print(f"The {experiment['ModelSEED_ID']} ({cpd.formula}) metabolite of the " - f"{experiment['condition']} condition is not a suitable phenotype for " - f"the {model_abbreviations} model(s).") - continue - - # for exp_index, experiment in self.experimental_metadata.iterrows(): - # the model(s) for which the condition is a suitable carbon source must be defined here - # simulate through the kinetics ranges with conditions that can be used by one of members - rel_final_conc = {experiment["ModelSEED_ID"]: org_rel_final_conc} - export_path = os.path.join(os.getcwd(), "BIOLOG_LPs", f"{exp_index}_{','.join(exp_list)}.lp") - kcat_primal = None - for coef_index, coefs in enumerate(biomass_partition_coefs): - # solve for growth rate constants with the previously solved biomasses - new_simulation = CommPhitting(self.fluxes_df, self.carbon_conc, self.media_conc, - self.msdb_path, self.biolog_df.loc[exp_index,:], - self.experimental_metadata) - new_simulation.define_problem( - parameters, exp_list, rel_final_conc, - set(list(chain.from_iterable([ - content["excretions"] for content in self.community_members.values()]))), - abs_final_conc, data_timesteps, export_zip_name, export_parameters, export_path, - kcat_primal, coefs, requisite_biomass, True) - time1 = process_time() - primals_export_path = primals_export_path or f"BIOLOG_{experiment['ModelSEED_ID']}.json" - try: - new_simulation.compute(graphs, export_zip_name, None, publishing, primals_export_path, True) - except (NoFluxError) as e: - print(e) - kcat_primal = parse_primals(new_simulation.values, coefs=coefs, - kcat_vals=new_simulation.parameters["kcat"]) - time2 = process_time() - print(f"Done simulating with the coefficients for biomass partitions: {coef_index}" - f"\n{(time2 - time1) / 60} minutes") - pprint(kcat_primal) - print("\n\n\n") - return {k: val for k, val in new_simulation.values.items() if "kcat" in k} diff --git a/modelseedpy/community/commscores_old.py b/modelseedpy/community/commscores_old.py deleted file mode 100644 index 3f81119..0000000 --- a/modelseedpy/community/commscores_old.py +++ /dev/null @@ -1,918 +0,0 @@ -from modelseedpy.core.exceptions import ObjectiveError, ParameterError -from modelseedpy.community.commhelper import build_from_species_models -from modelseedpy.community.mscompatibility import MSCompatibility -from modelseedpy.core.msminimalmedia import MSMinimalMedia -from modelseedpy.community.mscommunity import MSCommunity -from modelseedpy.core.msmodelutl import MSModelUtil -from modelseedpy.core.fbahelper import FBAHelper -from modelseedpy.core.msgapfill import MSGapfill -from itertools import combinations, permutations, chain -from optlang import Variable, Constraint, Objective -from numpy import array, unique, ndarray, where, sort, array_split, nan -from collections import Counter -from deepdiff import DeepDiff # (old, new) -from typing import Iterable, Union -from pprint import pprint -from numpy.random import shuffle -from multiprocess import current_process -from math import inf -import sigfig -# from icecream import ic -import re -# from math import prod - -# silence deprecation warnings from DeepDiff parsing the syntrophy -import warnings -warnings.simplefilter("ignore", category=DeprecationWarning) - -rm_comp = FBAHelper.remove_compartment - -def _compatibilize(member_models: Iterable, printing=False): - # return member_models - models = MSCompatibility.standardize(member_models, conflicts_file_name='exchanges_conflicts.json', printing=printing) - if not isinstance(member_models, (set, list, tuple)): return models[0] - return models - -def _load_models(member_models: Iterable, com_model=None, compatibilize=True, printing=False): - # ic(member_models, com_model, compatibilize) - if not com_model and member_models: - model = build_from_species_models(member_models, name="SMETANA_pair") - return member_models, model # (model, names=names, abundances=abundances) - # models = PARSING_FUNCTION(community_model) # TODO the individual models of a community model can be parsed - if compatibilize: return _compatibilize(member_models, printing), _compatibilize([com_model], printing)[0] - return member_models, com_model - -def _get_media(media=None, com_model=None, model_s_=None, min_growth=None, environment=None, - interacting=True, printing=False, minimization_method="minFlux", skip_bad_media=False): - # ic(media, com_model, model_s_) - if com_model is None and model_s_ is None: raise TypeError("< com_model > or < model_s_ > must be parameterized.") - if media is not None: - if model_s_ is not None and not isinstance(model_s_, (list,set,tuple)): - return media["members"][model_s_.id]["media"] - elif com_model is not None: return media["community_media"] - return media - # model_s_ is either a singular model or a list of models - if com_model is not None: - try: - com_media, media_sol = MSMinimalMedia.determine_min_media( - com_model, minimization_method, min_growth, None, interacting, 5, printing) - except Exception as e: - if skip_bad_media: com_media, media_sol = None, None - else: print(e) - if model_s_ is not None: - if not isinstance(model_s_, (list,set,tuple,ndarray)): - try: - return MSMinimalMedia.determine_min_media( - model_s_, minimization_method, min_growth, environment, interacting, printing) - except Exception as e: - if not skip_bad_media: print(e) - return None - members_media = {} - for model in model_s_: - try: - members_media[model.id] = {"media": MSMinimalMedia.determine_min_media( - model, minimization_method, min_growth, environment, interacting, printing)[0]} - continue - except Exception as e: - if skip_bad_media: continue - else: print(e) - # print(members_media) - if com_model is None: return members_media - else: return com_media, media_sol - return {"community_media":com_media, "members":members_media} - - -def _sigfig_check(value, sigfigs, default): - if str(value) in ["inf", "nan"]: value = "" - if FBAHelper.isnumber(value): return sigfig.round(value, sigfigs) - else: return default - - - -def nanFilter(value, string=True): - if isinstance(value, str) or value is None: - if string: return value - else: return nan - if any([value < 0, value > 1e5]): return "" if string else nan - return value - - -class CommScores: - def __init__(self, member_models, min_growth=0.1, n_solutions=100, environment=None, - abstol=1e-3, media_dict=None, printing=True, raw_content=False, antismash_json_path:str=None, - antismash_zip_path:str=None, minimal_media_method="minFlux"): - self.min_growth = min_growth ; self.abstol = abstol ; self.n_solutions = n_solutions - self.printing = printing ; self.raw_content = raw_content - self.antismash_json_path = antismash_json_path ; self.antismash_zip_path = antismash_zip_path - - # process the models - self.models = _compatibilize(member_models) - self.community = MSModelUtil(build_from_species_models(self.models)) - ## define the environment - if environment: - if hasattr(environment, "get_media_constraints"): - ### standardize modelseed media into COBRApy media - environment = {"EX_" + exID: -bound[0] for exID, bound in environment.get_media_constraints().items()} - self.community.add_medium(environment) - self.environment = environment - ## test growth - for model in self.models: - if model.slim_optimize() == 0: - raise ObjectiveError(f"The model {model.id} possesses an objective value of 0 in complete media, " - "which is incompatible with minimal media computations and hence SMETANA.") - if self.community.model.slim_optimize() == 0: - raise ObjectiveError(f"The community model {self.community.model.id} possesses an objective " - "value of 0 in complete media, which is incompatible with minimal " - "media computations and hence SMETANA.") - ## determine the minimal media for each model, including the community - self.media = media_dict if media_dict else MSMinimalMedia.comm_media_est( - member_models, self.community.model, minimal_media_method, - min_growth, self.environment, True, n_solutions, printing) - - def all_scores(self, mp_score=True, kbase_obj=None, cobrakbase_path:str=None, - kbase_token_path:str=None, annotated_genomes:dict=None): - mro = self.mro_score() - mip = self.mip_score(interacting_media=self.media) - mp = None if not mp_score else self.mp_score() - mu = None # self.mu_score() - sc = None # self.sc_score() - smetana = None # self.smetana_score() - gyd = self.gyd_score() - fs = self.fs_score() if any([kbase_obj is not None, annotated_genomes != [], cobrakbase_path is not None - and kbase_token_path is not None]) else None - return {"mro": mro, "mip": mip, "mp": mp, "mu": mu, "sc": sc, "smetana": smetana, - "gyd":gyd, "fs":fs} - - def mro_score(self): - self.mro_val = CommScores.mro(self.models, self.media["members"], self.min_growth, - self.media, self.raw_content, self.environment, self.printing, True) - if not self.printing: return self.mro_val - if self.raw_content: - for pair, (interaction, media) in self.mro_val.items(): - newcomer, established = pair.split('---') - print(f"\n(MRO) The {newcomer} media {media} possesses {interaction} shared " - f"requirements with the {established} established member.") - return self.mro_val - for pair, mro in self.mro_val.items(): - newcomer, established = pair.split('---') - print(f"\nThe {newcomer} on {established} MRO score: {mro[0]} ({mro[0]*100:.2f}%). " - f"This is the percent of nutritional requirements in {newcomer} " - f"that overlap with {established} ({mro[1]}/{mro[2]}).") - return self.mro_val - - def mip_score(self, interacting_media:dict=None, noninteracting_media:dict=None): - interacting_media = interacting_media or self.media or None - diff, self.mip_val = CommScores.mip(self.models, self.community.model, self.min_growth, interacting_media, - noninteracting_media, self.environment, self.printing, True) - if not self.printing: return self.mip_val - print(f"\nMIP score: {self.mip_val}\t\t\t{self.mip_val} required compound(s) can be sourced via syntrophy:") - if self.raw_content: pprint(diff) - return self.mip_val - - def gyd_score(self, coculture_growth=False): - self.gyd_val = CommScores.gyd(self.models, environment=self.environment, coculture_growth=coculture_growth) - if not self.printing: return self.gyd - growth_type = "monocultural" if not coculture_growth else "cocultural" - for pair, score in self.gyd_val.items(): - print(f"\nGYD score: The {growth_type} growth difference between the {pair} member models" - f" is {score} times greater than the growth of the slower member.") - return self.gyd - - def fs_score(self, kbase_obj=None, cobrakbase_path:str=None, kbase_token_path:str=None, annotated_genomes:dict=None): - self.fs_val = CommScores.fs(self.models, kbase_obj, cobrakbase_path, kbase_token_path, annotated_genomes) - if not self.printing: return self.fs - for pair, score in self.fs_val.items(): - print(f"\nFS Score: The similarity of RAST functional SSO ontology " - f"terms between the {pair} members is {score}.") - return self.fs - - def mp_score(self): - print("executing MP") - self.mp_val = CommScores.mp(self.models, self.environment, self.community.model, None, self.abstol, self.printing) - if not self.printing: return self.mp_val - if self.raw_content: - print("\n(MP) The possible contributions of each member in the member media include:\n") - pprint(self.mp_val) - else: - print("\nMP score:\t\t\tEach member can possibly contribute the following to the community:\n") - for member, contributions in self.mp_val.items(): - print(member, "\t", len(contributions)) - return self.mp_val - - def mu_score(self): - member_excreta = self.mp_score() if not hasattr(self, "mp_val") else self.mp_val - self.mu_val = CommScores.mu(self.models, self.environment, member_excreta, self.n_solutions, - self.abstol, True, self.printing) - if not self.printing: return self.mu_val - print("\nMU score:\t\t\tThe fraction of solutions in which each member is the " - "syntrophic receiver that contain a respective metabolite:\n") - pprint(self.mu_val) - return self.mu_val - - def sc_score(self): - self.sc_val = CommScores.sc(self.models, self.community.model, self.min_growth, - self.n_solutions, self.abstol, True, self.printing) - if not self.printing: return self.sc_val - print("\nSC score:\t\t\tThe fraction of community members who syntrophically contribute to each species:\n") - pprint(self.sc_val) - return self.sc_val - - def smetana_score(self): - if not hasattr(self, "sc_val"): self.sc_val = self.sc_score() - sc_coupling = all(array(list(self.sc.values())) is not None) - if not hasattr(self, "mu_val"): self.mu_val = self.mu_score() - if not hasattr(self, "mp_val"): self.mp_val = self.mp_score() - - self.smetana = CommScores.smetana( - self.models, self.community.model, self.min_growth, self.n_solutions, self.abstol, - (self.sc_val, self.mu_val, self.mp_val), True, sc_coupling, self.printing) - if self.printing: print("\nsmetana score:\n") ; pprint(self.smetana) - return self.smetana - - def antiSMASH_scores(self, antismash_json_path=None): - self.antismash = CommScores.antiSMASH(antismash_json_path or self.antismash_json_path) - if not self.printing: return self.antismash - if self.raw_content: - print("\n(antismash) The biosynthetic_areas, BGCs, protein_annotations, clusterBlast, and " - "num_clusterBlast from the provided antiSMASH results:\n") - print("The 'areas' that antiSMASH determines produce biosynthetic products:") - pprint(self.antismash[0]) - print("The set of biosynthetic gene clusters:") - pprint(self.antismash[1]) - print("The set of clusterblast protein annotations:") - pprint(self.antismash[2]) - print("Resistance information from clusterblast") - pprint(self.antismash[3]) - print("The number of proteins associated with resistance") - pprint(self.antismash[4]) - return self.antismash - print("\nantiSMASH scores:\n") - print("The community exhibited:" - f"- {len(self.antismash[0])}'areas' that antiSMASH determines produce biosynthetic products." - f"- {len(self.antismash[1])} biosynthetic gene clusters." - f"- {len(self.antismash[2])} clusterblast protein annotations." - f"- {len(self.antismash[3])} parcels of resistance information from clusterblast." - f"- {self.antismash[4]} proteins associated with resistance.") - return list(map(len, self.antismash[:4]))+[self.antismash[4]] - - - ###### STATIC METHODS OF THE SMETANA SCORES, WHICH ARE APPLIED IN THE ABOVE CLASS OBJECT ###### - - @staticmethod - def _check_model(model_util, media, model_str, skip_bad_media): - default_media = model_util.model.medium - if media is not None: model_util.add_medium(media) - obj_val = model_util.model.slim_optimize() - if obj_val == 0 or not FBAHelper.isnumber(obj_val): - print(f"The {model_str} model input does not yield an operational model, and will therefore be gapfilled.") - # if not skip_bad_media: return MSGapfill.gapfill(model_util.model, media) - model_util.add_medium(default_media) - return model_util.model - - @staticmethod - def _load(model, kbase_obj): - model_str = model - if len(model) == 2: model = kbase_obj.get_from_ws(*model) - else: model = kbase_obj.get_from_ws(model) - return model, model_str - - @staticmethod - def _determine_growths(modelUtils): - return [util.model.slim_optimize() for util in modelUtils] - - - @staticmethod - def calculate_scores(pairs, models_media=None, environments=None, annotated_genomes=True, lazy_load=False, - kbase_obj=None, cip_score=True, costless=True, skip_bad_media=False, anme_comm=False, - print_progress=False): - from pandas import Series - - if isinstance(pairs, list): pairs, models_media, environments, annotated_genomes, lazy_load, kbase_obj = pairs - series, mets = [], [] - if not isinstance(environments, (list, tuple)): environments = [environments] - if isinstance(environments, (list, tuple)) and hasattr(environments[0], "name"): - environments = {m.name:FBAHelper.convert_kbase_media(m, 1000) for m in environments} - elif not isinstance(environments, dict): environments = {f"media{i}":m for i,m in enumerate(environments)} - pid = current_process().name - model_utils = {} - count = 0 - for model1, models in pairs.items(): - if model1.id == "": model1.id = "model1" - if lazy_load: model1, model1_str = CommScores._load(model1, kbase_obj) - else: model1_str = model1.id - if model1.id not in models_media: - models_media[model1.id] = {"media": _get_media(model_s_=model1, skip_bad_media=skip_bad_media)} - if models_media[model1.id] is None: continue - if model1.id not in model_utils: model_utils[model1.id] = MSModelUtil(model1) - # print(pid, model1) - for model_index, model2 in enumerate(models): - if model2.id == "": model2.id = "model2" - if lazy_load: model2, model2_str = CommScores._load(model2, kbase_obj) - else: model2_str = model2.id - if model2.id not in models_media: - models_media[model2.id] = {"media": _get_media(model_s_=model2, skip_bad_media=skip_bad_media)} - if models_media[model2.id] is None: continue - if model2.id not in model_utils: model_utils[model2.id] = MSModelUtil(model2) - grouping = [model1, model2] ; grouping_utils = [model_utils[model1.id], model_utils[model2.id]] - modelIDs = [model.id for model in grouping] - comm_model = build_from_species_models(grouping) - community = MSCommunity(comm_model, ids=modelIDs) - comm_sol = comm_model.optimize() - print(f"{pid}~~{count}\t{modelIDs}") - for environName, environ in environments.items(): - if print_progress: print(f"\tEnvironment\t{environName}", end="\t") - if not anme_comm: - model1 = CommScores._check_model(model_utils[model1.id], environ, model1_str, skip_bad_media) - model2 = CommScores._check_model(model_utils[model2.id], environ, model2_str, skip_bad_media) - # initiate the KBase output - report_dic = {f"model{i+1}": modelID for i,modelID in enumerate(modelIDs)} - g1, g2, comm = CommScores._determine_growths([model_utils[model1.id], model_utils[model2.id], community.util]) - g1, g2, comm = _sigfig_check(g1, 5, ""), _sigfig_check(g2, 5, ""), _sigfig_check(comm, 5, "") - report_dic.update({"media": environName, "model1 growth": g1, "model2 growth": g2, "community growth": comm}) - coculture_growths = {mem.id: comm_sol.fluxes[mem.primary_biomass.id] for mem in community.members} - report_dic.update({f"coculture growth model{modelIDs.index(memID)}": growth for memID, growth in coculture_growths.items()}) - # define the MRO content - mro_values = CommScores.mro(grouping, models_media, raw_content=True, environment=environ) - report_dic.update({f"MRO_model{modelIDs.index(models_string.split('--')[0])+1}": - f"{100*len(intersection)/len(memMedia):.3f}% ({len(intersection)}/{len(memMedia)})" - for models_string, (intersection, memMedia) in mro_values.items()}) - mets.append({"MRO metabolites": list(mro_values.values())[0][0]}) - if print_progress: print("MRO done", end="\t") - # define the CIP content - if cip_score: - cip_values = CommScores.cip(modelutils=[model_utils[mem.id] for mem in grouping]) - report_dic.update({"CIP": cip_values[1]}) - mets[-1].update({"CIP metabolites": list(cip_values[0])}) - if print_progress: print("CIP done", end="\t") - # define the MIP content - mip_values = CommScores.mip(grouping, comm_model, 0.1, None, None, environ, print_progress, True, - costless, costless, skip_bad_media) - # print(mip_values) - if mip_values is not None: - report_dic.update({f"MIP_model{modelIDs.index(models_name)+1}": str(len(received)) - for models_name, received in mip_values[0].items()}) - mets[-1].update({"MIP model1 metabolites": list(mip_values[0].values())[0], - "MIP model2 metabolites": list(mip_values[0].values())[1]}) - if costless: - for models_name, received in mip_values[1].items(): - report_dic[f"MIP_model{modelIDs.index(models_name)+1} (costless)"] = report_dic[ - f"MIP_model{modelIDs.index(models_name)+1}"] + f" ({len(received)})" - del report_dic[f"MIP_model{modelIDs.index(models_name)+1}"] - if print_progress: print("costless_MIP done", end="\t") - else: - report_dic.update({f"MIP_model1 (costless)": "", f"MIP_model2 (costless)": ""}) - mets[-1].update({"MIP model1 metabolites": [None], "MIP model2 metabolites": [None]}) - if print_progress: print("MIP done", end="\t") - # define the BSS content - bss_values = CommScores.bss(grouping, grouping_utils, environments, models_media, skip_bad_media) - report_dic.update({f"BSS_model{modelIDs.index(name.split(' supporting ')[0])+1}": - f"{_sigfig_check(100*val, 5, '')}%" for name, (mets, val) in bss_values.items()}) - mets[-1].update({"BSS model1 metabolites": [met_set for met_set, val in bss_values.values()][0], - "BSS model2 metabolites": [met_set for met_set, val in bss_values.values()][1]}) - # mets[-1].update({"bss_mets": list(bss_values[0].values())}) - if print_progress: print("BSS done", end="\t") - # define the PC content - pc_values = CommScores.pc(grouping, grouping_utils, comm_model, None, comm_sol, environ, True, community) - report_dic.update({"PC_comm": _sigfig_check(pc_values[0], 5, ""), - "PC_model1": _sigfig_check(list(pc_values[1].values())[0], 5, ""), - "PC_model2": _sigfig_check(list(pc_values[1].values())[1], 5, ""), - "BIT": pc_values[3]}) - if print_progress: print("PC done\tBIT done", end="\t") - # print([mem.slim_optimize() for mem in grouping]) - # define the GYD content - gyd1, gyd2, g1, g2 = list(CommScores.gyd(grouping, grouping_utils, environ, False, community, anme_comm).values())[0] - report_dic.update({"GYD1": _sigfig_check(gyd1, 5, ""), "GYD2": _sigfig_check(gyd2, 5, "")}) - if print_progress: print("GYD done\t\t", end="\t" if annotated_genomes else "\n") - # define the FS content - if kbase_obj is not None and annotated_genomes and not anme_comm: - fs_values = list(CommScores.fs(grouping, kbase_obj, annotated_genomes=annotated_genomes).values())[0] - print(len(fs_values[0]) if fs_values[0] is not None else "NaN", fs_values[1]) - report_dic.update({"FS": sigfig.round(fs_values[1], 5)}) - if fs_values is not None: mets[-1].update({"FS features": fs_values[0]}) - if print_progress: print("FS done\t\t") - # return a pandas Series, which can be easily aggregated with other results into a DataFrame - series.append(Series(report_dic)) - count += 1 - return series, mets - - @staticmethod - def html_report(df, mets, export_html_path="commscores_report.html", msdb_path=None): - from modelseedpy.core.report import commscores_report - return commscores_report(df, mets, export_html_path, msdb_path) - - @staticmethod - def report_generation(all_models:iter=None, # a list of distinct lists is provided for specifying exclusive groups - pairs:dict=None, mem_media:dict=None, pair_limit:int=None, - exclude_pairs:list=None, kbase_obj=None, annotated_genomes:dict=True, # True triggers internal acquisition of the genomes, where None skips - see_media=True, environments:iter=None, # a collection of environment dicts or KBase media objects - pool_size:int=None, cip_score=True, costless=True, skip_bad_media=False, anme_comm=False, - print_progress=False): - from pandas import concat - - if pairs: model_pairs = unique([{model1, model2} for model1, models in pairs.items() for model2 in models]) - elif all_models is not None: - if not isinstance(all_models[0], list): - all_models = list(set(all_models)) ; model_pairs = array(list(combinations(all_models, 2))) - else: - model_pairs = [] - for models1, models2 in combinations(all_models, 2): - models1 = set(models1) ; models2 = set(models2) - if len(models1) > len(models2): larger_list = models1 ; smaller_list = models2 - else: larger_list = models2 ; smaller_list = models1 - model_pairs.append([list(zip(combin, smaller_list)) for combin in permutations(larger_list, len(smaller_list))]) - # flatten the assembled pairs and filter duplicates - model_pairs = array([x for x in set(tuple(x) for x in [i for y in list(chain.from_iterable(model_pairs)) for i in y])]) - all_models = list(chain.from_iterable(all_models)) - if pair_limit is not None: - shuffle(model_pairs) - new_pairs = [] - for index, pair in enumerate(model_pairs): - if set(pair) not in exclude_pairs and index < pair_limit: new_pairs.append(pair) - elif index >= pair_limit: break - model_pairs = array(new_pairs) - if isinstance(model_pairs[0], str): model_pairs = unique(sort(model_pairs, axis=1)) - pairs = {first: model_pairs[where(model_pairs[:, 0] == first)][:, 1] for first in model_pairs[:, 0]} - else: raise ValueError("Either < all_models > or < pairs > must be defined to simulate interactions.") - if not all_models: all_models = list(chain(*[list(values) for values in pairs.values()])) + list(pairs.keys()) - lazy_load = len(model_pairs) > 10000 # all_models[0], (list,set,tuple)) - if lazy_load and not kbase_obj: ValueError("The < kbase_obj > argument must be provided to lazy load models.") - new_models = [] - for index, model in enumerate(all_models): - if model.id == "": model.id = f"model_index{index}" - new_models.append(model) - all_models = new_models[:] - if not mem_media: models_media = _get_media(model_s_=all_models, skip_bad_media=skip_bad_media) - else: - models_media = mem_media.copy() - missing_models = set() - missing_modelID = [] - for model in all_models: - if model is not None and model.id not in models_media: - missing_models.add(model) - missing_modelID.append(model if not hasattr(model, "id") else model.id) - if missing_models != set(): - print(f"Media of the {missing_modelID} models are not defined, and will be calculated separately.") - models_media.update(_get_media(model_s_=missing_models), skip_bad_media=skip_bad_media) - if see_media: print(f"The minimal media of all members:\n{models_media}") - print(f"\nExamining the {len(list(model_pairs))} model pairs") - if pool_size is not None: - from datetime import datetime ; from multiprocess import Pool - print(f"Loading {int(pool_size)} workers and computing the scores", datetime.now()) - pool = Pool(int(pool_size)) #.map(calculate_scores, [{k: v} for k,v in pairs.items()]) - args = [[dict([pair]), models_media, environments, annotated_genomes, lazy_load, kbase_obj] - for pair in list(pairs.items())] - output = pool.map(CommScores.calculate_scores, args) - series = chain.from_iterable([ele[0] for ele in output]) - mets = chain.from_iterable([ele[1] for ele in output]) - else: series, mets = CommScores.calculate_scores(pairs, models_media, environments, annotated_genomes, lazy_load, - kbase_obj, cip_score, costless, skip_bad_media, anme_comm, print_progress) - return concat(series, axis=1).T, mets - - @staticmethod - def mro(member_models:Iterable=None, mem_media:dict=None, min_growth=0.1, media_dict=None, - raw_content=False, environment=None, skip_bad_media=False, printing=False, compatibilized=False): - """Determine the overlap of nutritional requirements (minimal media) between member organisms.""" - # determine the member minimal media if they are not parameterized - if not mem_media: - if not member_models: - raise ParameterError("The either member_models or minimal_media parameter must be defined.") - member_models = member_models if compatibilized else _compatibilize(member_models, printing) - mem_media = _get_media(media_dict, None, member_models, min_growth, environment, printing=printing, - skip_bad_media=skip_bad_media) - if "community_media" in mem_media: mem_media = mem_media["members"] - # MROs = array(list(map(len, pairs.values()))) / array(list(map(len, mem_media.values()))) - mro_values = {} - for model1, model2 in combinations(member_models, 2): - intersection = set(mem_media[model1.id]["media"].keys()) & set(mem_media[model2.id]["media"].keys()) - inter = [ex.replace("EX_", "").replace("_e0", "") for ex in intersection] - m1_media = mem_media[model1.id]["media"] ; m2_media = mem_media[model2.id]["media"] - if raw_content: mro_values.update({f"{model1.id}---{model2.id})": (inter, m1_media), - f"{model2.id}---{model1.id})": (inter, m2_media)}) - else: - mro_values.update({f"{model1.id}---{model2.id})": 100*(len(inter) / len(m1_media), len(inter), len(m1_media)), - f"{model2.id}---{model1.id})": 100*(len(inter) / len(m2_media), len(inter), len(m2_media)), - "mets": inter}) - return mro_values - # return mean(list(map(len, pairs.values()))) / mean(list(map(len, mem_media.values()))) - - @staticmethod - def mip(member_models: Iterable, com_model=None, min_growth=0.1, interacting_media_dict=None, - noninteracting_media_dict=None, environment=None, printing=False, compatibilized=False, - costless=False, multi_output=False, skip_bad_media=False): - """Determine the quantity of nutrients that can be potentially sourced through syntrophy""" - member_models, community = _load_models(member_models, com_model, not compatibilized, printing=printing) - # determine the interacting and non-interacting media for the specified community .util.model - noninteracting_medium, noninteracting_sol = _get_media( - noninteracting_media_dict, community, None, min_growth, environment, False, skip_bad_media=skip_bad_media) - if noninteracting_medium is None: return None - if "community_media" in noninteracting_medium: noninteracting_medium = noninteracting_medium["community_media"] - interacting_medium, interacting_sol = _get_media( - interacting_media_dict, community, None, min_growth, environment, True, skip_bad_media=skip_bad_media) - if interacting_medium is None: return None - if "community_media" in interacting_medium: interacting_medium = interacting_medium["community_media"] - interact_diff = DeepDiff(noninteracting_medium, interacting_medium) - if "dictionary_item_removed" not in interact_diff: return None - cross_fed_exIDs = [re.sub("(root\['|'\])", "", x) for x in interact_diff["dictionary_item_removed"]] - # Determine each direction of the MIP score interactions - comm_util = MSModelUtil(community) - cross_fed_metIDs = [ex.replace("EX_", "").replace("_e0", "") for ex in cross_fed_exIDs] - cross_fed_copy = cross_fed_metIDs[:] - directionalMIP = {mem.id:[] for mem in member_models} - for rxn in comm_util.transport_list(): - # print(rxn.reaction, "\t", [met.id for met in rxn.metabolites if "_e0" in met.id]) - metIDs = list(set([met.id.split("_")[0] for met in rxn.reactants]).intersection( - set([met.id.split("_")[0] for met in rxn.products]))) - if len(metIDs) == 1: metID = metIDs[0] - else: - if "cpd00067" in metIDs: metIDs.remove("cpd00067") - metID = metIDs[0] - if metID not in cross_fed_metIDs: continue - rxn_index = FBAHelper.compartment_index(rxn.id.split("_")[-1]) - if rxn_index == 0: continue - mets = [met for met in rxn.metabolites if met.id == f"{metID}_c{rxn_index}"] - if mets == []: print(f"The {metID}_c{rxn_index} is missing in {rxn.reaction}.") ; continue - rxn_model = member_models[rxn_index-1] - # comm_trans[metID] = comm_trans.get(f"{metID}_c{rxn_index}", {}) - if (rxn.metabolites[mets[0]] > 0 and interacting_sol.fluxes[rxn.id] > 0 - or rxn.metabolites[mets[0]] < 0 and interacting_sol.fluxes[rxn.id] < 0): # donor - directionalMIP[rxn_model.id].append(metID) - if metID in cross_fed_copy: cross_fed_copy.remove(metID) ; continue - # if printing: print(f"{mets[0]} in {rxn.id} ({rxn.reaction}) is not assigned a receiving member.") - if cross_fed_copy != [] and printing: print(f"Missing directions for the {cross_fed_copy} cross-fed metabolites") - outputs = [directionalMIP] - # TODO categorize all of the cross-fed substrates to examine potential associations of specific compounds - if costless: - costless_mets, numExs = CommScores.cip(member_models=member_models) - # print(list(directionalMIP.values()), costless_mets) - costlessDirectionalMIP = {member_name: set(receive_mets).intersection(costless_mets) - for member_name, receive_mets in directionalMIP.items()} - if not multi_output: return costlessDirectionalMIP - outputs.append(costlessDirectionalMIP) - return outputs - - @staticmethod - def cip(modelutils=None, member_models=None): # costless interaction potential - if not modelutils: modelutils = {MSModelUtil(model) for model in member_models} - costless_mets = set(chain.from_iterable([modelutil.costless_excreta() for modelutil in modelutils])) - return costless_mets, len(costless_mets) - - @staticmethod - def contributions(org_possible_contributions, scores, model_util, abstol): - # identify and log excreta from the solution - model_util.add_objective(sum(ex_rxn.flux_expression for ex_rxn in org_possible_contributions)) - sol = model_util.model.optimize() - if sol.status != "optimal": - # exit the while loop by returning the original possible_contributions, - ## hence DeepDiff == {} and the while loop terminates - return scores, org_possible_contributions - # identify and log excreta from the solution - possible_contributions = org_possible_contributions[:] - for ex in org_possible_contributions: - if ex.id in sol.fluxes.keys() and sol.fluxes[ex.id] >= abstol: - possible_contributions.remove(ex) - scores[model_util.model.id].update([met.id for met in ex.metabolites]) - return scores, possible_contributions - - @staticmethod - def mp(member_models:Iterable, environment, com_model=None, minimal_media=None, abstol=1e-3, printing=False): - """Discover the metabolites that each species can contribute to a community""" - community = _compatibilize(com_model) if com_model else build_from_species_models(member_models,standardize=True) - community.medium = minimal_media or MSMinimalMedia.minimize_flux(community) - scores = {} - for org_model in member_models: # TODO support parsing the individual members through the MSCommunity object - model_util = MSModelUtil(org_model) - model_util.compatibilize(printing=printing) - if environment: model_util.add_medium(environment) - scores[model_util.model.id] = set() - # determines possible member contributions in the community environment, where the excretion of media compounds is irrelevant - org_possible_contr = [ex_rxn for ex_rxn in model_util.exchange_list() - if (ex_rxn.id not in community.medium and ex_rxn.upper_bound > 0)] - # ic(org_possible_contributions, len(model_util.exchange_list()), len(community.medium)) - scores, possible_contr = CommScores.contributions(org_possible_contr, scores, model_util, abstol) - while DeepDiff(org_possible_contr, possible_contr): - print("remaining possible_contributions", len(possible_contr), end="\r") - ## optimize the sum of the remaining exchanges that have not surpassed the abstol - org_possible_contr = possible_contr[:] - scores, possible_contr = CommScores.contributions(org_possible_contr, scores, model_util, abstol) - - ## individually checks the remaining possible contributions - for ex_rxn in possible_contr: - model_util.model.objective = Objective(ex_rxn.flux_expression) - sol = model_util.model.optimize() - if sol.status == 'optimal' or sol.objective_value > abstol: - for met in ex_rxn.metabolites: - if met.id in scores[model_util.model.id]: - scores[model_util.model.id].remove(met.id) ; print("removing", met.id) - return scores - - @staticmethod - def mu(member_models:Iterable, environment=None, member_excreta=None, n_solutions=100, abstol=1e-3, - compatibilized=False, printing=True): - """the fractional frequency of each received metabolite amongst all possible alternative syntrophic solutions""" - # member_solutions = member_solutions if member_solutions else {model.id: model.optimize() for model in member_models} - scores = {} - member_models = member_models if compatibilized else _compatibilize(member_models, printing) - if member_excreta: - missing_members = [model for model in member_models if model.id not in member_excreta] - if missing_members: - print(f"The {','.join(missing_members)} members are missing from the defined " - f"excreta list and will therefore be determined through an additional MP simulation.") - member_excreta.update(CommScores.mp(missing_members, environment)) - else: member_excreta = CommScores.mp(member_models, environment, None, abstol, printing) - for org_model in member_models: - other_excreta = set(chain.from_iterable([excreta for model, excreta in member_excreta.items() - if model != org_model.id])) - print(f"\n{org_model.id}\tOther Excreta", other_excreta) - model_util = MSModelUtil(org_model, True) - if environment: - model_util.add_medium(environment) - ex_rxns = {ex_rxn: list(ex_rxn.metabolites)[0] for ex_rxn in model_util.exchange_list()} - print(f"\n{org_model.id}\tExtracellular reactions", ex_rxns) - variables = {ex_rxn.id: Variable('___'.join([model_util.model.id, ex_rxn.id]), - lb=0, ub=1, type="binary") for ex_rxn in ex_rxns} - model_util.add_cons_vars(list(variables.values())) - media, solutions = [], [] - sol = model_util.model.optimize() - while sol.status == "optimal" and len(solutions) < n_solutions: - solutions.append(sol) - medium = set([ex for ex in ex_rxns if sol.fluxes[ex.id] < -abstol and ex in other_excreta]) - model_util.create_constraint(Constraint(sum([variables[ex.id] for ex in medium]), - ub=len(medium)-1, name=f"iteration_{len(solutions)}")) - media.append(medium) - sol = model_util.model.optimize() - counter = Counter(chain(*media)) - scores[model_util.model.id] = {met.id: counter[ex] / len(media) - for ex, met in ex_rxns.items() if counter[ex] > 0} - return scores - - @staticmethod - def sc(member_models:Iterable=None, com_model=None, min_growth=0.1, n_solutions=100, - abstol=1e-6, compatibilized=True, printing=False): - """Calculate the frequency of interspecies dependency in a community""" - member_models, community = _load_models( - member_models, com_model, not compatibilized, printing=printing) - for rxn in com_model.reactions: - rxn.lower_bound = 0 if 'bio' in rxn.id else rxn.lower_bound - - # c_{rxn.id}_lb: rxn < 1000*y_{species_id} - # c_{rxn.id}_ub: rxn > -1000*y_{species_id} - variables = {} - constraints = [] - # TODO this can be converted to an MSCommunity object by looping through each index - # leverage CommKinetics - for org_model in member_models: - model_util = MSModelUtil(org_model, True) - variables[model_util.model.id] = Variable(name=f'y_{model_util.model.id}', lb=0, ub=1, type='binary') - model_util.add_cons_vars([variables[model_util.model.id]]) - for rxn in model_util.model.reactions: - if "bio" not in rxn.id: - # print(rxn.flux_expression) - lb = Constraint(rxn.flux_expression + 1000*variables[model_util.model.id], - name="_".join(["c", model_util.model.id, rxn.id, "lb"]), lb=0) - ub = Constraint(rxn.flux_expression - 1000*variables[model_util.model.id], - name="_".join(["c", model_util.model.id, rxn.id, "ub"]), ub=0) - constraints.extend([lb, ub]) - - # calculate the SCS - scores = {} - for model in member_models: - com_model_util = MSModelUtil(com_model) - com_model_util.add_cons_vars(constraints, sloppy=True) - # model growth is guaranteed while minimizing the growing members of the community - ## SMETANA_Biomass: {biomass_reactions} > {min_growth} - com_model_util.create_constraint(Constraint(sum(rxn.flux_expression for rxn in model.reactions - if "bio" in rxn.id), name='SMETANA_Biomass', lb=min_growth)) # sloppy = True) - other_members = [other for other in member_models if other.id != model.id] - com_model_util.add_objective(sum([variables[other.id] for other in other_members]), "min") - previous_constraints, donors_list = [], [] - for i in range(n_solutions): - sol = com_model.optimize() # FIXME The solution is not optimal - if sol.status != 'optimal': - scores[model.id] = None - break - donors = [o for o in other_members if com_model.solver.primal_values[f"y_{o.id}"] > abstol] - donors_list.append(donors) - previous_con = f'iteration_{i}' - previous_constraints.append(previous_con) - com_model_util.add_cons_vars([Constraint(sum(variables[o.id] for o in donors), name=previous_con, - ub=len(previous_constraints)-1)], sloppy=True) - if i != 0: - donors_counter = Counter(chain(*donors_list)) - scores[model.id] = {o.id: donors_counter[o] / len(donors_list) for o in other_members} - return scores - - @staticmethod - def gyd(member_models:Iterable=None, model_utils:Iterable=None, environment=None, coculture_growth=False, - community=None, anme_comm=False): - gyds = {} - for combination in combinations(model_utils or member_models, 2): - if model_utils is None: - model1_util = MSModelUtil(combination[0], True) ; model2_util = MSModelUtil(combination[1], True) - print(f"{model1_util.model.id} ++ {model2_util.model.id}", model1_util.model.slim_optimize(), model2_util.model.slim_optimize()) - if environment and not anme_comm: model1_util.add_medium(environment); model2_util.add_medium(environment) - else: model1_util = combination[0] ; model2_util = combination[1] - if not coculture_growth: - G_m1, G_m2 = CommScores._determine_growths([model1_util, model2_util]) - G_m1, G_m2 = G_m1 if FBAHelper.isnumber(str(G_m1)) else 0, G_m2 if FBAHelper.isnumber(str(G_m2)) else 0 - else: - community = community or MSCommunity(member_models=[model1_util.model, model2_util.model], - ids=[mem.id for mem in member_models]) - community.run_fba() - member_growths = community.parse_member_growths() - G_m1, G_m2 = member_growths[model1_util.model.id], member_growths[model2_util.model.id] - if G_m2 <= 0 or G_m1 <= 0: gyds[f"{model1_util.model.id} ++ {model2_util.model.id}"] = ("", "", G_m1, G_m2) ; continue - gyds[f"{model1_util.model.id} ++ {model2_util.model.id}"] = (abs(G_m1-G_m2)/G_m1, abs(G_m2-G_m1)/G_m2, G_m1, G_m2) - return gyds - - @staticmethod - def pc(member_models=None, modelutils=None, com_model=None, isolate_growths=None, comm_sol=None, - environment=None, comm_effects=True, community=None, interaction_threshold=0.1, compatibilized=False): - assert member_models or modelutils or community, "Members must be defined through either < member_models >" \ - "or < modelutils > or < community >." - member_models = member_models or [mem.model for mem in modelutils] or community.members - if com_model is None: member_models, com_model = _load_models(member_models, None, not compatibilized, printing=False) - community = community or MSCommunity(com_model, member_models) - if comm_sol is None: community.util.add_medium(environment) ; comm_sol = community.util.model.optimize() - model_utils = modelutils or [MSModelUtil(mem, True) for mem in member_models] ; modelutils = [] - for mem in model_utils: - mem.add_medium(environment) ; modelutils.append(mem) - if isolate_growths is None: isolate_growths = {mem.id: mem.model.slim_optimize() for mem in modelutils} - pc_score = (comm_sol.objective_value/sum(list(isolate_growths.values()))) - if not comm_effects: return pc_score - - comm_member_growths = {mem.id: comm_sol.fluxes[mem.primary_biomass.id] for mem in community.members} - comm_growth_effect = {memID: nanFilter(comm_environ/isolate_growths[memID]) - for memID, comm_environ in comm_member_growths.items()} - growth_diffs = array([nanFilter(x, False) for x in list(comm_growth_effect.values())]) - th_pos, th_neg = 1+interaction_threshold, 1-interaction_threshold - if all(growth_diffs > th_pos): bit = "mutualism" - elif all(growth_diffs < th_neg): bit = "competitive" - elif ((th_pos > growth_diffs) & (growth_diffs > th_neg)).all(): bit = "neutral" - elif all(growth_diffs > th_neg) and any(growth_diffs > th_pos): bit = "commensalism" - elif all(growth_diffs < th_pos) and any(growth_diffs < th_neg): bit = "amensalism" - elif any(growth_diffs > th_pos) and any(growth_diffs < th_neg): bit = "parasitism" - else: print(f"The relative growths {comm_growth_effect} from {comm_member_growths} coculture and" - f" {isolate_growths} monoculture are not captured.") ; bit = "" - return (pc_score, comm_growth_effect, comm_member_growths, bit) - - @staticmethod - def bss(member_models:Iterable=None, model_utils:Iterable=None, environments=None, minMedia=None, skip_bad_media=False): - def compute_score(minMedia, environment=None, index=0): - minMedia = minMedia or _get_media(model_s_=[modelUtil.model for modelUtil in model_utils], - environment=environment, skip_bad_media=skip_bad_media) - model1_media = set([re.sub(r"(\_\w\d+$)", "", rxnID.replace("EX_", "")) - for rxnID in minMedia[model1_util.id]["media"].keys()]) - model2_media = set([re.sub(r"(\_\w\d+$)", "", rxnID.replace("EX_", "")) - for rxnID in minMedia[model2_util.id]["media"].keys()]) - model1_internal = {rm_comp(met.id) for rxn in model1_util.internal_list() for met in rxn.products} - model2_internal = {rm_comp(met.id) for rxn in model2_util.internal_list() for met in rxn.products} - bss_scores[f"{model1_util.id} supporting {model2_util.id} in media{index}"] = (model1_internal, - len(model2_media.intersection(model1_internal)) / len(model2_media)) - bss_scores[f"{model2_util.id} supporting {model1_util.id} in media{index}"] = (model2_internal, - len(model1_media.intersection(model2_internal)) / len(model1_media)) - - bss_scores = {} - for combination in combinations(model_utils or member_models, 2): - if model_utils is None: - model1_util = MSModelUtil(combination[0], True) ; model2_util = MSModelUtil(combination[1], True) - model_utils = [model1_util, model2_util] - else: model1_util = combination[0] ; model2_util = combination[1] - if environments: - for index, environment in enumerate(environments): - compute_score(minMedia, environment, index) - else: compute_score(minMedia) - return bss_scores - - @staticmethod - def mqs(): - pass - - - @staticmethod - def _calculate_jaccard_score(set1, set2): - if set1 == set2: print(f"The sets are identical, with a length of {len(set1)}.") - if len(set1.union(set2)) == 0: return (None, None) - return (set1.intersection(set2), len(set1.intersection(set2)) / len(set1.union(set2))) - - @staticmethod - def get_all_genomes_from_ws(ws_id, kbase_object=None, cobrakbase_repo_path:str=None, kbase_token_path:str=None): - def get_genome(genome_name): - return kbase_object.ws_client.get_objects2( - {'objects': [{'ref': f"{ws_id}/{genome_name}"}]})["data"][0]['data'] - # load the kbase client instance - if not kbase_object: - import os - os.environ["HOME"] = cobrakbase_repo_path - import cobrakbase - with open(kbase_token_path) as token_file: kbase_object = cobrakbase.KBaseAPI(token_file.readline()) - - # calculate the complementarity - genome_list = kbase_object.ws_client.list_objects( - {"ids": [ws_id], "type": 'KBaseGenomes.Genome', 'minObjectID': 0, 'maxObjectID': 10000}) - genome_names = [g[1] for g in genome_list if g[1].endswith("RAST")] - return {genome_name: set([sso for j in get_genome(genome_name)['cdss'] - for sso in j['ontology_terms']['SSO'].keys()]) - for genome_name in genome_names} - - @staticmethod - def fs(models:Iterable=None, kbase_object=None, cobrakbase_repo_path:str=None, - kbase_token_path:str=None, annotated_genomes:dict=None, printing=False): - if not isinstance(annotated_genomes, dict): - if not kbase_object: - import os ; os.environ["HOME"] = cobrakbase_repo_path ; import cobrakbase - with open(kbase_token_path) as token_file: kbase_object = cobrakbase.KBaseAPI(token_file.readline()) - annotated_genomes = {model.id: kbase_object.get_from_ws(model.genome_ref) - for model in models if hasattr(model, "genome_ref")} - elif isinstance(annotated_genomes, list): annotated_genomes = dict(zip([model.id for model in models], annotated_genomes)) - elif models is not None: - annotated_genomes = {k:v for k,v in annotated_genomes.items() if k in [model.id for model in models]} - genome_combinations = list(combinations(annotated_genomes.keys(), 2)) - if printing: print(f"The Functionality Score (FS) will be calculated for {len(genome_combinations)} pairs.") - if not isinstance(list(annotated_genomes.values())[0], dict): - genome1_set, genome2_set = set(), set() - distances = {} - for genome1, genome2 in genome_combinations: - for j in annotated_genomes[genome1].features: - for key, val in j.ontology_terms.items(): - if key == 'SSO': genome1_set.update(val) - for j in annotated_genomes[genome2].features: - for key, val in j.ontology_terms.items(): - if key == 'SSO': genome2_set.update(val) - distances[f"{genome1} ++ {genome2}"] = CommScores._calculate_jaccard_score(genome1_set, genome2_set) - else: - distances = {f"{genome1} ++ {genome2}": CommScores._calculate_jaccard_score( - set(list(content["SSO"].keys())[0] for dic in annotated_genomes[genome1]["cdss"] - for x, content in dic.items() if x == "ontology_terms" and len(content["SSO"].keys()) > 0), - set(list(content["SSO"].keys())[0] for dic in annotated_genomes[genome2]["cdss"] - for x, content in dic.items() if x == "ontology_terms" and len(content["SSO"].keys()) > 0)) - for genome1, genome2 in combinations(annotated_genomes.keys(), 2)} - return distances - - @staticmethod - def smetana(member_models: Iterable, environment, com_model=None, min_growth=0.1, n_solutions=100, - abstol=1e-6, prior_values=None, compatibilized=False, sc_coupling=False, printing=False): - """Quantifies the extent of syntrophy as the sum of all exchanges in a given nutritional environment""" - member_models, community = _load_models( - member_models, com_model, compatibilized==False, printing=printing) - sc = None - if not prior_values: - mp = CommScores.mp(member_models, environment, com_model, abstol) - mu = CommScores.mu(member_models, environment, mp, n_solutions, abstol, compatibilized) - if sc_coupling: - sc = CommScores.sc(member_models, com_model, min_growth, n_solutions, abstol, compatibilized) - elif len(prior_values) == 3: sc, mu, mp = prior_values - else: mu, mp = prior_values - - smetana_scores = {} - for pairs in combinations(member_models, 2): - for model1, model2 in permutations(pairs): - if model1.id not in smetana_scores: - smetana_scores[model1.id] = {} - if not any([not mu[model1.id], not mp[model1.id]]): - sc_score = 1 if not sc_coupling else sc[model1.id][model2.id] - models_mets = list(model1.metabolites)+list(model2.metabolites) - unique_mets = set([met.id for met in models_mets]) - smetana_scores[model1.id][model2.id] = 0 - for met in models_mets: - if met.id in unique_mets: - mp_score = 0 if met.id not in mp[model1.id] else 1 - smetana_scores[model1.id][model2.id] += mu[model1.id].get(met.id,0)*sc_score*mp_score - return smetana_scores - - @staticmethod - def antiSMASH(json_path=None, zip_path=None): - # TODO Scores 2, 4, and 5 are being explored for relevance to community formation and reveal specific member interactions/targets - # load the antiSMASH report from either the JSON or the raw ZIP, or both - from os import mkdir, listdir, path - from zipfile import ZipFile - from json import load - if json_path: - cwd_files = listdir() - if json_path not in cwd_files and zip_path: - with ZipFile(zip_path, "r") as zip_file: - zip_file.extract(json_path) - with open(json_path, "r") as json_file: - data = load(json_file) - elif zip_path: - mkdir("extracted_antiSMASH") - with ZipFile(zip_path, "r") as zip_file: - zip_file.extractall("extracted_antiSMASH") - json_files = [x for x in listdir("extracted_antiSMASH") if x.endswith("json")] - if len(json_files) > 1: - print(f"The antiSMASH report describes {len(json_files)} JSON files, the first of which is selected " - f"{json_files[0]} for analysis, otherwise explicitly identify the desired JSON file in the json_path parameter.") - with open(path.join("extracted_antiSMASH", json_files[0]), "r") as json_file: - data = load(json_file) - else: - raise ParameterError("Either the json_path or zip_path from the antiSMASH analysis must be provided," - " for these scores to be determined.") - # Parse data and scores from the antiSMASH report - biosynthetic_areas = data["records"][0]['areas'] - BGCs = set(array([data["records"][0]['areas'][i]['products'] for i in range(biosynthetic_areas)]).flatten()) - len_proteins = len(data["records"][0]['modules']['antismash.modules.clusterblast']['knowncluster']['proteins']) - protein_annotations = [data["records"][0]['modules']['antismash.modules.clusterblast']['knowncluster']['proteins'][i]['annotations'] - for i in range(len_proteins)] - clusterBlast = [s for s in protein_annotations if "resistance" in s] - num_clusterBlast = sum([item.count("resistance") for item in protein_annotations]) - - return biosynthetic_areas, BGCs, protein_annotations, clusterBlast, num_clusterBlast diff --git a/modelseedpy/community/commscores_template.html b/modelseedpy/community/commscores_template.html deleted file mode 100644 index b379568..0000000 --- a/modelseedpy/community/commscores_template.html +++ /dev/null @@ -1,157 +0,0 @@ - - - - - - CommScores Results - - - - - - - - - - - - - - - -

CommScores Results

- - - - \ No newline at end of file diff --git a/modelseedpy/community/datastandardization.py b/modelseedpy/community/datastandardization.py deleted file mode 100644 index 932ae46..0000000 --- a/modelseedpy/community/datastandardization.py +++ /dev/null @@ -1,770 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Aug 1 11:44:07 2022 - -@author: Andrew Freiburger -""" -from modelseedpy.community.commhelper import phenotypes -from modelseedpy.core.exceptions import ParameterError -from modelseedpy.core.optlanghelper import isIterable -from modelseedpy.core.fbahelper import FBAHelper -from optlang import Constraint -from optlang.symbolics import Zero -from scipy.constants import hour -from zipfile import ZipFile, ZIP_LZMA -from itertools import chain -from typing import Union, Iterable -from copy import deepcopy -from icecream import ic -# from cplex import Cplex -import logging, json, os, re -from pandas import read_csv, DataFrame, ExcelFile -import numpy as np - - -import logging -logger = logging.getLogger(__name__) - -def isnumber(string): - try: - float(string) - except: - return False - return True - -def _findDate(string, numerical=False): - monthNames = ["January", "February", "March", "April", "May", "June", "July", - "August", "September", "October", "November", "December"] - monthNums = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - days = list(range(31, 0, -1)) # [f"{num}-" for num in list(range(31,0,-1))] - years = list(range(2010, 2025))+list(range(10,25)) # [f"-{num}" for num in list(range(2000, 2100))] - americanDates = [f"{mon}-{day}-{year}" for mon in monthNums for day in days for year in years] - - for date in americanDates: - if re.search(date, string): - month, day, year = date.split("-") - if numerical: - return "-".join([day, month, year]) - return f"{monthNames[int(month)-1][:3]} {day}, {year}" - # # determine the month - # for monName in monthNames: - # if re.search(monName, string): - # month = monName - # break - # if not month: - # for monNum in monthNums: - # if re.search(monNum, string): - # month = monNum # maybe should be converted to the Name for standardization - # # determine the day - # for dayNum in days: - # if re.search(dayNum, string): - # day = dayNum - # break - # # determine the year - # for yearNum in years: - # if re.search(yearNum, string): - # year = yearNum - # break - # return day+month+year - -def dict_keys_exists(dic, *keys): - if keys[0] in dic: - remainingKeys = keys[1:] - if len(remainingKeys) > 0: - dict_keys_exists(dic[keys[0]], keys[1:]) - return True - return False - -def find_dic_number(dic): - for k, v in dic.items(): - if isnumber(v): - return v - num = find_dic_number(dic[k]) - return num - -def default_dict_values(dic, key, default): - return default if not key in dic else dic[key] - -def trial_contents(short_code, indices_tup, values): - matches = [ele == short_code for ele in indices_tup] - return np.array(values)[matches] - -def _spreadsheet_extension_load(path): - if ".csv" in path: - return read_csv(path) - elif ".xls" in path: - return ExcelFile(path) - -def _spreadsheet_extension_parse(path, raw_data, org_sheet): - if ".csv" in path: - return raw_data - elif ".xls" in path: - return raw_data.parse(org_sheet) - -def _met_id_parser(met): - met_id = re.sub('(\_\w\d+)', '', met) - met_id = met_id.replace('EX_', '', 1) - met_id = met_id.replace('c_', '', 1) - return met_id - -def _column_reduction(org_df): - dataframe = org_df.copy() # this prevents an irrelevant warning from pandas - dataframe.columns = map(str, dataframe.columns) - dataframe.index = dataframe['Well'] - dataframe.drop('Well', axis=1, inplace=True) - for col in dataframe.columns: - if any([x in col for x in ['Plate', 'Well', 'Cycle']]): - dataframe.drop(col, axis=1, inplace=True) - dataframe.columns = list(map(int, list(map(float, dataframe.columns)))) - return dataframe - -def _remove_trials(org_df, ignore_trials, signal, name, significant_deviation): - # refine the ignore_trials parameter - if isinstance(ignore_trials, dict): - ignore_trials['columns'] = list(map(str, ignore_trials['columns'])) if 'columns' in ignore_trials else [] - ignore_trials['rows'] = list(map(str, ignore_trials['rows'])) if 'rows' in ignore_trials else [] - ignore_trials['wells'] = ignore_trials['wells'] if 'wells' in ignore_trials else [] - elif isIterable(ignore_trials): - if ignore_trials[0][0].isalpha() and isnumber(ignore_trials[0][1:]): - short_code = True # TODO - drop trials with respect to the short codes, and not the full codes - - dataframe = org_df.copy() # this prevents an irrelevant warning from pandas - dropped_trials = [] - for trial in dataframe.index: - if isinstance(ignore_trials, dict) and any( - [trial[0] in ignore_trials['rows'], trial[1:] in ignore_trials['columns'], trial in ignore_trials['wells']] - ) or isIterable(ignore_trials) and trial in ignore_trials: - dataframe.drop(trial, axis=0, inplace=True) - dropped_trials.append(trial) - elif isIterable(ignore_trials) and trial in ignore_trials: - dataframe.drop(trial, axis=0, inplace=True) - dropped_trials.append(trial) - removed_trials = [] - if 'OD' not in signal: - for trial, row in dataframe.iterrows(): - row_array = np.array(row.to_list()) - ## remove trials for which the biomass growth did not change by the determined minimum deviation - if row_array[-1] / row_array[0] < significant_deviation: - dataframe.drop(trial, axis=0, inplace=True) - removed_trials.append(trial) - if removed_trials: - print(f'The {removed_trials} trials were removed from the {name} measurements, ' - f'with their deviation over time being less than the threshold of {significant_deviation}.') - if dropped_trials: - print(f'The {dropped_trials} trials were dropped from the {name} measurements ' - 'per the ignore_trials parameter.') - return dataframe, dropped_trials+removed_trials - -def _check_plateau(org_df, signal, name, significant_deviation, timesteps_len): - significant_deviation = max([2, significant_deviation]) - dataframe = org_df.copy() # this prevents an irrelevant warning from pandas - dropped = [] - for trial, row in dataframe.iterrows(): - row_array = np.array(row.to_list()) - values = [] - tracking = False - ## remove trials for which the biomass growth did not change by the determined minimum deviation - for index, val in enumerate(row_array): - if val / row_array[0] >= significant_deviation or tracking: - tracking = True - values.append(val) - if len(values) > timesteps_len: - del values[0] - remaining_values = list(dataframe.columns[index-timesteps_len+1:]) - if all([len(values) == timesteps_len, values[-1] <= values[0], - remaining_values[0] <= remaining_values[-1]*1.1]): - # the entire plateau, minus the first point of plateau, are removed - dropped = remaining_values - break - if dropped: - break - if dropped: - content = f"{name} {signal}" if name != signal else signal - print(f"The {dropped} timesteps (with {row_array[index-len(values)+1:]} values) were removed " - f"from the {content} data since the OD plateaued and is no longer valid.") - return dropped - -def _remove_timesteps(org_df, ignore_timesteps, name, signal): - dataframe = org_df.copy() # this prevents an irrelevant warning from pandas - if ignore_timesteps: - dropped = [] - for col in dataframe: - if col in ignore_timesteps: - dataframe.drop(col, axis=1, inplace=True) - dropped.append(col) - if dropped == ignore_timesteps: - print(f"The ignore_timesteps columns were dropped for the {name} {signal} data.") - else: - raise ParameterError(f"The ignore_timesteps values {ignore_timesteps} " - f"were unsuccessfully dropped for the {name} {signal} data.") - return dataframe, ignore_timesteps - -def _df_construction(name, df_name, ignore_trials, ignore_timesteps, - significant_deviation, dataframe, row_num, buffer_col1=True): - # refine the DataFrames - time_df = _column_reduction(dataframe.iloc[0::2]) - values_df = _column_reduction(dataframe.iloc[1::2]) - # display(name, time_df, values_df) - - # remove specified data trials - if ignore_trials: - values_df, removed_trials = _remove_trials( - values_df, ignore_trials, df_name, name, significant_deviation) - for row in removed_trials: - time_df.drop(row, axis=0, inplace=True) - - # remove specified data timesteps - if ignore_timesteps: - values_df, removed_timesteps = _remove_timesteps( - values_df, ignore_timesteps, name, df_name) - for col in list(map(int, removed_timesteps)): - time_df.drop(col, axis=1, inplace=True) - - # remove undefined trials - if buffer_col1: - possible_rows = [chr(ord("A")+row) for row in range(1, row_num+1)] - for trial_code in values_df.index: - if trial_code[0] not in possible_rows: - values_df.drop(trial_code, axis=0, inplace=True) - time_df.drop(trial_code, axis=0, inplace=True) - - # process the data for subsequent operations and optimal efficiency - values_df.astype(str); time_df.astype(str) - return time_df, values_df - -def _find_culture(string): - matches = re.findall(r"([A-Z]{2}\+?[A-Z]*)", string) - return [m for m in matches if not any([x in m for x in ["BIOLOG", "III"]])] - -def reverse_strip_comp(ID): - return ID.replace("~", "-") - -def _process_csv(self, csv_path, index_col): - self.zipped_output.append(csv_path) - csv = read_csv(csv_path) ; csv.index = csv[index_col] - csv.drop(index_col, axis=1, inplace=True) - csv.astype(str) - return csv - -def add_rel_flux_cons(model, ex, phenoRXN, carbon_ratio, rel_flux=0.2): - # {ex.id}_uptakeLimit: {net_{carbonous_ex}} >= {net_{carbon_source}}*{rel_flux}*{carbon_ratio} - # The negative flux sign of influxes specifies that the carbon_source value must be lesser than the other - # carbon influx that is being constrained. - cons = Constraint(Zero, lb=0, ub=None, name=f"{ex.id}_uptakeLimit") - model.add_cons_vars(cons) - cons.set_linear_coefficients({ - ex.forward_variable:1, ex.reverse_variable:-1, - phenoRXN.forward_variable:-rel_flux*carbon_ratio, phenoRXN.reverse_variable:rel_flux*carbon_ratio}) - return model, cons - - -class GrowthData: - - @staticmethod - def process(community_members: dict, base_media=None, solver: str = 'glpk', all_phenotypes=True, - data_paths: dict = None, species_abundances: str = None, carbon_conc_series: dict = None, - ignore_trials: Union[dict, list] = None, ignore_timesteps: list = None, species_identities_rows=None, - significant_deviation: float = 2, extract_zip_path: str = None, determine_requisite_biomass=False): #, msdb_path:str=None): - # define the number of rows in the experimental data - row_num = len(species_identities_rows) - if "rows" in carbon_conc_series and carbon_conc_series["rows"]: - row_num = len(list(carbon_conc_series["rows"].values())[0]) - # load and parse data and metadata - (media_conc, data_timestep_hr, simulation_time, dataframes, trials, fluxes_df - ) = GrowthData.load_data( - base_media, community_members, solver, data_paths, ignore_trials, all_phenotypes, - ignore_timesteps, significant_deviation, row_num, extract_zip_path) - experimental_metadata, standardized_carbon_conc, trial_name_conversion = GrowthData.metadata( - base_media, community_members, species_abundances, carbon_conc_series, - species_identities_rows, row_num, _findDate(data_paths["path"])) - data_df = GrowthData.data_process(dataframes, trial_name_conversion) - requisite_biomass = {} if not determine_requisite_biomass else GrowthData.biomass_growth( - carbon_conc_series, fluxes_df, data_df.index.unique(), trial_name_conversion, - data_paths, community_members if all_phenotypes else None) - return (experimental_metadata, data_df, fluxes_df, standardized_carbon_conc, requisite_biomass, - trial_name_conversion, np.mean(data_timestep_hr), simulation_time, media_conc) - - @staticmethod - def load_data(base_media, community_members, solver, data_paths, ignore_trials, all_phenotypes, - ignore_timesteps, significant_deviation, row_num, extract_zip_path, min_timesteps=False): - # define default values - significant_deviation = significant_deviation or 0 - data_paths = data_paths or {} - ignore_timesteps = ignore_timesteps or "0:0" - start, end = ignore_timesteps.split(':') - raw_data = _spreadsheet_extension_load(data_paths['path']) - for org_sheet, name in data_paths.items(): - if org_sheet == 'path': - continue - df = _spreadsheet_extension_parse(data_paths['path'], raw_data, org_sheet) - df.columns = df.iloc[6] - df.drop(df.index[:7], inplace=True) - ## acquire the default start and end indices of ignore_timesteps - start = int(start or df.columns[0]) - end = int(end or df.columns[-1]) - break - ignore_timesteps = list(range(start, end+1)) if start != end else None - if extract_zip_path: - with ZipFile(extract_zip_path, 'r') as zp: - zp.extractall() - - # define only species for which data is defined - fluxes_df, comm_members = phenotypes(community_members, all_phenotypes, solver=solver) - modeled_species = list(v for v in data_paths.values() if ("OD" not in v and " " not in v)) - removed_phenotypes = [col for col in fluxes_df if not any([species in col for species in modeled_species])] - fluxes_df.drop(removed_phenotypes, axis=1, inplace=True) - if removed_phenotypes: - print(f'The {removed_phenotypes} phenotypes were removed ' - f'since their species is not among those with data: {modeled_species}.') - - # determine the time range in which all datasets are significant - data_timestep_hr = [] - dataframes = {} - max_timestep_cols = [] - if min_timesteps: - for org_sheet, name in data_paths.items(): - if org_sheet == 'path' or "OD" in sheet: continue - ## define the DataFrame - sheet = org_sheet.replace(' ', '_') - df_name = f"{name}:{sheet}" - dataframes[df_name] = _spreadsheet_extension_parse(data_paths['path'], raw_data, org_sheet) - dataframes[df_name].columns = dataframes[df_name].iloc[6] - dataframes[df_name].drop(dataframes[df_name].index[:7], inplace=True) - ## parse the timesteps from the DataFrame - drop_timestep_range = GrowthData._min_significant_timesteps( - dataframes[df_name], ignore_timesteps, significant_deviation, ignore_trials, df_name, name) - max_timestep_cols.append(drop_timestep_range) - ## timesteps that must be dropped for the most restrictive dataset is acquired - max_cols = max(list(map(len, max_timestep_cols))) - for ignore_timesteps in max_timestep_cols: - if len(ignore_timesteps) == max_cols: break - - # remove trials for which the OD has plateaued - # TODO - this somehow seems to break when the requisite_biomass is ignored - for org_sheet, name in data_paths.items(): - if "OD" not in name: continue - ## load the OD DataFrame - sheet = org_sheet.replace(' ', '_') - df_name = f"{name}:{sheet}" - dataframes[df_name] = _spreadsheet_extension_parse(data_paths['path'], raw_data, org_sheet) - dataframes[df_name].columns = dataframes[df_name].iloc[6] - dataframes[df_name].drop(dataframes[df_name].index[:7], inplace=True) - ## process the OD DataFrame - data_times_df, data_values_df = _df_construction( - name, df_name, ignore_trials, ignore_timesteps, - significant_deviation, dataframes[df_name], row_num) - plateaued_times = _check_plateau(data_values_df, name, name, significant_deviation, 3) - ## define and store the final DataFrames - for col in plateaued_times: - if col in data_times_df.columns: data_times_df.drop(col, axis=1, inplace=True) - if col in data_values_df.columns: data_values_df.drop(col, axis=1, inplace=True) - dataframes[df_name] = (data_times_df, data_values_df) - break - - # refine the non-OD signals - for org_sheet, name in data_paths.items(): - if org_sheet == 'path' or "OD" in name: continue - sheet = org_sheet.replace(' ', '_') - df_name = f"{name}:{sheet}" - if df_name not in dataframes: - dataframes[df_name] = _spreadsheet_extension_parse( - data_paths['path'], raw_data, org_sheet) - dataframes[df_name].columns = dataframes[df_name].iloc[6] - dataframes[df_name].drop(dataframes[df_name].index[:7], inplace=True) - # parse the DataFrame for values - simulation_time = dataframes[df_name].iloc[0, -1] / hour - data_timestep_hr.append(simulation_time / int(dataframes[df_name].columns[-1])) - # define the times and data - data_times_df, data_values_df = _df_construction( - name, df_name, ignore_trials, ignore_timesteps, significant_deviation, - dataframes[df_name], row_num) - # display(data_times_df) ; display(data_values_df) - for col in plateaued_times: - if col in data_times_df.columns: data_times_df.drop(col, axis=1, inplace=True) - if col in data_values_df.columns: data_values_df.drop(col, axis=1, inplace=True) - dataframes[df_name] = (data_times_df, data_values_df) - - # differentiate the phenotypes for each species - trials = set(chain.from_iterable([list(times.index) for times, values in dataframes.values()])) - media_conc = {} if not base_media else {cpd.id: cpd.concentration for cpd in base_media.mediacompounds} - return (media_conc, data_timestep_hr, simulation_time, dataframes, trials, fluxes_df) - - @staticmethod - def _min_significant_timesteps(full_df, ignore_timesteps, significant_deviation, ignore_trials, df_name, name): - # refine the DataFrames - values_df = _column_reduction(full_df.iloc[1::2]) - values_df, removed_trials = _remove_trials(values_df, ignore_trials, df_name, name, significant_deviation) - timestep_range = list(set(list(values_df.columns)) - set(ignore_timesteps)) - start, end = ignore_timesteps[0], ignore_timesteps[-1] - start_index = list(values_df.columns).index(start) - end_index = list(values_df.columns).index(end) - ## adjust the customized range such that the threshold is reached. - for trial, row in values_df.iterrows(): - row_array = np.delete(np.array(row.to_list()), list(range(start_index, end_index + 1))) - ## remove trials for which the biomass growth did not change by the determined minimum deviation - while all([row_array[-1] / row_array[0] < significant_deviation, - end <= values_df.columns[-1], start >= values_df.columns[0]]): - # print(timestep_range[0], values_df.columns[0], values_df.columns[-1], end, start) - if timestep_range[0] == values_df.columns[0] and start != values_df.columns[-1]: - timestep_range.append(timestep_range[-1] + 1) - start += 1 - print(f"The end boundary for {name} is increased to {timestep_range[-1]}", end="\r") - elif timestep_range[-1] == values_df.columns[-1] and end != values_df.columns[0]: - timestep_range.append(timestep_range[0] - 1) - end -= 1 - print(f"The start boundary for {name} is decreased to {timestep_range[0]}", end="\r") - else: - raise ParameterError(f"All of the timesteps were omitted for {name}.") - row_array = np.delete(np.array(row.to_list()), list(range( - list(values_df.columns).index(start), list(values_df.columns).index(end) + 1))) - print("\n") - return list(range(start, end+1)) - - @staticmethod - def metadata(base_media, community_members, species_abundances, - carbon_conc, species_identities_rows, row_num, date): - # define carbon concentrations for each trial - carbon_conc = carbon_conc or {} - carbon_conc['columns'] = default_dict_values(carbon_conc, "columns", {}) - carbon_conc['rows'] = default_dict_values(carbon_conc, "rows", {}) - column_num = len(species_abundances) - - # define the metadata DataFrame and a few columns - constructed_experiments = DataFrame(index = [f"G{x+1}" for x in list(range(column_num*row_num))]) - constructed_experiments.index.name = "short_code" - base_media_path = "minimal components media" if not base_media else base_media.path[0] - constructed_experiments["base_media"] = [base_media_path] * (column_num*row_num) - - # define community content - # species_mets = {mem["name"]: np.array([mets["consumed"] for mets in mem["phenotypes"].values()]).flatten() - # for mem in community_members.values()} - # define the strains column - strains, additional_compounds, experiment_ids = [], [], [] - trial_name_conversion = {} - count = 1 - ## apply universal values to all trials - base_row_conc = [] if '*' not in carbon_conc else [ - ':'.join([met, str(carbon_conc['*'][met][0]), str(carbon_conc['*'][met][1])]) for met in carbon_conc['*']] - members = list(mem["name"] for mem in community_members.values()) - for row in range(1, row_num+1): - row_conc = base_row_conc[:] - trial_letter = chr(ord("A") + row) - trial_name_conversion[trial_letter] = {} - ## add rows where the initial concentration in the first trial is non-zero - for met, conc_dict in carbon_conc["rows"].items(): - if conc_dict[sorted(list(conc_dict.keys()))[row-1]] > 0: - row_conc.append(':'.join([ - met, str(conc_dict[sorted(list(conc_dict.keys()))[row-1]]), - str(conc_dict[sorted(list(conc_dict.keys()), reverse=True)[-row]])])) - - row_concentration = ';'.join(row_conc) - composition = {} - for col in range(1, column_num+1): - ## construct the columns of information - additional_compounds.append(row_concentration) - experiment_id = [] - for member in members: - ### define the relative community abundances - composition[member] = [member, f"r{species_abundances[col][member]}"] - ### define the member strain, where it is appropriate - if member in species_identities_rows[row]: - composition[member][0] += f"_{species_identities_rows[row][member]}" - ### the experimental ID is abundance+memberID - if int(composition[member][1][1:]) != 0: - experiment_id.append(f"{composition[member][1]}_{composition[member][0]}") - composition[member] = ':'.join(composition[member]) - strains.append(';'.join(composition[member] for member in members)) - # for row2 in row_conc: - # metID, init, end = row2.split(':') - # ### get the met_name for the corresponding match in values - # met_name = None - # for index, mets in enumerate(species_mets.values()): - # if metID in mets: - # met_name = list(species_mets.keys())[index] - # break - # if "met_name" not in locals() or not met_name: - # logger.critical(f"The specified phenotypes {species_mets} for the {members} members" - # f" does not include the consumption of the available sources" - # f" {row_conc}; hence, the model cannot grow.") - # content = "" - # else: - # content = f"{init}_{met_name}" - # experiment_id.append(content) - experiment_id.extend([":".join(row.split(":")[:2]) for row in row_conc]) - experiment_id = '-'.join(experiment_id) - experiment_ids.append(experiment_id) - trial_name_conversion[trial_letter][str(col+1)] = ("G"+str(count), experiment_id) - count += 1 - - # convert the variable concentrations to short codes - standardized_carbon_conc = {} - for met, conc in carbon_conc["rows"].items(): - standardized_carbon_conc[met] = {} - for row, val in conc.items(): - standardized_carbon_conc[met].update({short_code:val for ( - short_code, expID) in trial_name_conversion[row].values()}) - for met, conc in carbon_conc["columns"].items(): - standardized_carbon_conc[met] = default_dict_values(standardized_carbon_conc, met, {}) - for col, val in conc.items(): - for row in trial_name_conversion: - standardized_carbon_conc[met][trial_name_conversion[row][str(col)][0]] = val - - # add columns to the exported dataframe - constructed_experiments.insert(0, "trial_IDs", experiment_ids) - constructed_experiments["additional_compounds"] = additional_compounds - constructed_experiments["strains"] = strains - constructed_experiments["date"] = [date] * (column_num*row_num) - constructed_experiments.to_csv("growth_metadata.tsv", sep="\t") - return constructed_experiments, standardized_carbon_conc, trial_name_conversion - - @staticmethod - def biomass_growth(carbon_conc, fluxes_df, data_df_trials, trial_name_conversion, - data_paths, community_members=None, pheno_info=None): - # TODO - leverage cFBA to partition metabolite consumption between the defined phenotypes - pheno_info = pheno_info or {f"{content['name']}_{pheno}": mets - for model, content in community_members.items() - for pheno, mets in content["phenotypes"].items()} - # invert the trial_name_conversion and data_paths keys and values - short_code_trials = {contents[0]: row+col for row in trial_name_conversion - for col, contents in trial_name_conversion[row].items()} - # short_code_trials = {contents[0]:contents[1] for contents in trial_name_conversion[row].values()} - name_signal = {name: signal for signal, name in data_paths.items()} - - # calculate the 90% concentration for each carbon source - requisite_fluxes = {} - for trial in [short_code_trials[ID] for ID in data_df_trials]: - row_letter = trial[0] ; col_number = trial[1:] - ## add rows where the initial concentration in the first trial is non-zero - utilized_phenos = {} - food_gradient = carbon_conc.copy() - for dimension, content in food_gradient.items(): - for met, conc_dict in content.items(): - source_conc = conc_dict[row_letter if dimension == "rows" else int(col_number)] - # print(met, source_conc) - if source_conc == 0 or f"EX_{met}_e0" not in fluxes_df.index: continue - for pheno, val in fluxes_df.loc[f"EX_{met}_e0"].items(): - # print(pheno, val) - if val < 0: utilized_phenos[pheno] = source_conc*0.9 / val - total_consumed = sum(list(utilized_phenos.values())) - # print(utilized_phenos) - - display(fluxes_df) - short_code = trial_name_conversion[row_letter][col_number][0] - requisite_fluxes[short_code] = {} - excreta = {} - for pheno, flux_conversion in utilized_phenos.items(): - species, phenotype = pheno.split("_", 1) - fluxes = fluxes_df.loc[:, pheno]*abs(flux_conversion) * abs(flux_conversion/total_consumed) - requisite_fluxes[short_code][f"{species}|{name_signal[species]}"] = fluxes[fluxes != 0] - pheno = reverse_strip_comp(pheno) - if "excreted" in pheno_info[pheno]: - # print(pheno_info[pheno]["excreted"]) - excreta.update({met:fluxes.loc[met] for met in pheno_info[pheno]["excreted"]}) - ## determine the fluxes for the other members of the community through cross-feeding - participated_species = [] - for pheno, mets in pheno_info.items(): - species, phenotype = pheno.split("_", 1) - if any([species in ph for ph in utilized_phenos]) or species in participated_species: continue - for met in mets["consumed"]: - exMet = f"EX_{met}_e0" - if exMet not in excreta: continue - fluxes = abs(excreta[exMet] * 0.99 / fluxes_df.loc[exMet, pheno]) * fluxes_df.loc[:, pheno] - requisite_fluxes[short_code][f"{species}|{name_signal[species]}"] = fluxes[fluxes != 0] - participated_species.append(species) - # print(requisite_fluxes) - return requisite_fluxes - - @staticmethod - def data_process(dataframes, trial_name_conversion): - short_codes, trials_list = [], [] - values, times = {}, {} # The times must capture upstream - first = True - for df_name, (times_df, values_df) in dataframes.items(): - # print(df_name) - # display(times_df) ; display(values_df) - times_tup = FBAHelper.parse_df(times_df) - average_times = np.mean(times_tup.values, axis=0) - values[df_name], times[df_name] = [], [] - for trial_code in values_df.index: - row_let, col_num = trial_code[0], trial_code[1:] - # print(trial_code, row_let, col_num) - for trial_row_values in trial_contents(trial_code, values_df.index, values_df.values): - if first: - short_code, experimentalID = trial_name_conversion[row_let][col_num] - trials_list.extend([experimentalID] * len(values_df.columns)) - short_codes.extend([short_code] * len(values_df.columns)) - values[df_name].extend(trial_row_values) - times[df_name].extend(average_times) - first = False - # process the data to the smallest dataset, to accommodate heterogeneous data sizes - minVal = min(list(map(len, values.values()))) - for df_name, data in values.items(): - values[df_name] = data[:minVal] - times2 = times.copy() - for df_name, data in times2.items(): - times[df_name] = data[:minVal] - # construct the growth DataFrame - df_data = {"trial_IDs": trials_list[:minVal], "short_codes": short_codes[:minVal]} - df_data.update({"Time (s)": np.mean(list(times.values()), axis=0)}) # element-wise average - df_data.update({df_name:vals for df_name, vals in values.items()}) - data_df = DataFrame(df_data) - data_df.index = data_df["short_codes"] - data_df = data_df.drop(["short_codes"], axis=1) - data_df.to_csv("growth_spectra.tsv", sep="\t") - return data_df - - -class BiologData: - - @staticmethod - def process(data_paths, trial_conditions_path, community_members, col_row_num, member_conversions, - culture=None, date=None, significant_deviation=None, solver="glpk", msdb_path:str=None): - row_num = 8 ; column_num = 12 - (zipped_output, data_timestep_hr, simulation_time, dataframes, trials, culture, date, fluxes_df - ) = BiologData.load_data(data_paths, significant_deviation, community_members, - col_row_num, row_num, culture, date, solver) - experimental_metadata, standardized_carbon_conc, trial_name_conversion = BiologData.metadata( - trial_conditions_path, row_num, column_num, culture, date) - biolog_df = BiologData.data_process(dataframes, trial_name_conversion) - requisite_biomass = BiologData.biomass_growth(biolog_df, member_conversions) - return (experimental_metadata, biolog_df, fluxes_df, standardized_carbon_conc, requisite_biomass, - trial_name_conversion, np.mean(data_timestep_hr), simulation_time) - - @staticmethod - def load_data(data_paths, significant_deviation, community_members, col_row_num, - row_num, culture, date, solver): - zipped_output = [data_paths['path'], "fluxes.tsv"] - # determine the metabolic fluxes for each member and phenotype - # import and parse the raw CSV data - # TODO - this may be capable of emulating leveraged functions from the GrowthData object - fluxes_df = phenotypes(community_members, solver=solver) - # fluxes_df = None - data_timestep_hr = [] - dataframes = {} - raw_data = _spreadsheet_extension_load(data_paths['path']) - significant_deviation = significant_deviation or 2 - # culture = culture or _find_culture(data_paths['path']) - culture = culture or ",".join([x for x in data_paths.values() if (x not in ["OD"] and not re.search(r"\w\.\w", x))]) - date = date or _findDate(data_paths['path']) - for org_sheet, name in data_paths.items(): - if org_sheet == 'path': - continue - sheet = org_sheet.replace(" ", "_") - df_name = f"{name}:{sheet}" - if df_name not in dataframes: - dataframes[df_name] = _spreadsheet_extension_parse( - data_paths['path'], raw_data, org_sheet) - dataframes[df_name].columns = dataframes[df_name].iloc[col_row_num] - dataframes[df_name].drop(dataframes[df_name].index[:col_row_num+1], inplace=True) - dataframes[df_name].dropna(inplace=True) - # parse the DataFrame for values - dataframes[df_name].columns = [str(x).strip() for x in dataframes[df_name].columns] - simulation_time = dataframes[df_name].iloc[0, -1] / hour - # display(dataframes[df_name]) - data_timestep_hr.append(simulation_time / int(float(dataframes[df_name].columns[-1]))) - # define the times and data - data_times_df, data_values_df = _df_construction( - name, df_name, None, None, significant_deviation, - dataframes[df_name], row_num, False) - # display(data_times_df) ; display(data_values_df) - dataframes[df_name] = (data_times_df, data_values_df) - - # differentiate the phenotypes for each species - trials = set(chain.from_iterable([list(df.index) for df, times in dataframes.values()])) - return (zipped_output, data_timestep_hr, simulation_time, dataframes, trials, culture, date, fluxes_df) - - @staticmethod - def metadata(trial_conditions_path, row_num, column_num, culture, date): - # define the conditions for each trial - with open(trial_conditions_path) as trials: - trial_conditions = json.load(trials) - - # define the metadata DataFrame and a few columns - constructed_experiments = DataFrame() - ex_prefix = "B" - constructed_experiments.index = [f"{ex_prefix}{x+1}" for x in list(range(row_num*column_num))] - constructed_experiments.index.name = "short_code" - - # define the strains column - experiment_ids, trial_names = [], [] - trial_name_conversion, trial_mets = {}, {} - count = 1 - ## apply universal values to all trials - for row in range(row_num): - trial_letter = chr(ord("A") + row) - trial_name_conversion[trial_letter] = {} - ## add rows where the initial concentration in the first trial is non-zero - for col in range(1, column_num+1): - ## construct the columns of information - dataID = trial_letter+str(col) - MSID = trial_conditions[dataID]["ModelSEED_ID"] - short_code = ex_prefix+str(count) - - experiment_ids.append(MSID) - trial_names.append(trial_conditions[dataID]["name"]) - trial_name_conversion[trial_letter][str(col)] = (short_code, MSID) - trial_mets[MSID] = {short_code:trial_conditions[dataID]["mM"]} - count += 1 - - # add columns to the exported dataframe - constructed_experiments.insert(0, "ModelSEED_ID", experiment_ids) - constructed_experiments.insert(0, "condition", trial_names) - constructed_experiments["strain"] = [culture] * (column_num*row_num) - constructed_experiments["date"] = [date] * (column_num*row_num) - constructed_experiments.to_csv("growth_metadata.tsv", sep="\t") - return constructed_experiments, trial_mets, trial_name_conversion - - @staticmethod - def data_process(dataframes, trial_name_conversion): - short_codes, trials_list = [], [] - values, times = {}, {} # The times must capture upstream - first = True - for df_name, (times_df, values_df) in dataframes.items(): - # display(df_name, times_df, values_df) - times_tup = FBAHelper.parse_df(times_df) - # display(DataFrame(times_tup.values)) - average_times = list(np.mean(times_tup.values, axis=0)) - # print(average_times) - # print(len(average_times)) - values[df_name], times[df_name] = [], [] - for exprID in values_df.index: - row_let, col_num = exprID[0], exprID[1:] - for trial_row_values in trial_contents(exprID, values_df.index, values_df.values): - if first: - short_code, experimentalID = trial_name_conversion[row_let][col_num] - trials_list.extend([experimentalID] * len(values_df.columns)) - short_codes.extend([short_code] * len(values_df.columns)) - if len(trial_row_values) != len(average_times): - print(f"The length of the trial data {len(trial_row_values)} " - f"exceeds that of the timesteps {len(average_times)} " - f"which creates an incompatible DataFrame.") - values[df_name].extend(trial_row_values) - times[df_name].extend(average_times) - first = False - # process the data to the smallest dataset, to accommodate heterogeneous data sizes - minVal = min(list(map(len, values.values()))) - for df_name, data in values.items(): - values[df_name] = data[:minVal] - times2 = times.copy() - for df_name, data in times2.items(): - times[df_name] = data[:minVal] - df_data = {"trial_IDs": trials_list, "short_codes": short_codes} - df_data.update({"Time (s)": list(np.mean(list(times.values()), axis=0))}) # element-wise average - df_data.update({df_name:vals for df_name, vals in values.items()}) - biolog_df = DataFrame(df_data) - biolog_df.index = biolog_df["short_codes"] - del biolog_df["short_codes"] - biolog_df.to_csv("growth_spectra.tsv", sep="\t") - - return biolog_df - - @staticmethod - def biomass_growth(biolog_df, member_conversions): - requisite_biomass = {} - for short_code in biolog_df.index.unique(): - requisite_biomass[short_code] = {} - for signal, conversion in member_conversions.items(): - short_code_df = biolog_df[biolog_df.index == short_code] - requisite_biomass[short_code][signal] = conversion * short_code_df[ - signal.replace("|", ":").replace(" ", "_")].iloc[-1] - return requisite_biomass diff --git a/modelseedpy/community/get_ncbi_gbff.pl b/modelseedpy/community/get_ncbi_gbff.pl deleted file mode 100644 index cbeddcf..0000000 --- a/modelseedpy/community/get_ncbi_gbff.pl +++ /dev/null @@ -1,13 +0,0 @@ -use strict; - -while (<>){ - chomp ($_); - next if ($_=~/^\s*$/); - my $val = `grep $_ assembly_summary_refseq.txt |cut -f 20`; - chomp ($val); - my @p = split ("/", $val); - my $n = $p[-1]; - my $url = "${val}/${n}_genomic.gbff.gz"; - my $fpath = "${n}_genomic.gbff.gz "; - print "curl $url -o $fpath" . "\n"; -} diff --git a/modelseedpy/community/log b/modelseedpy/community/log deleted file mode 100644 index 5e3c4b1..0000000 --- a/modelseedpy/community/log +++ /dev/null @@ -1,5 +0,0 @@ -[ - "here", - 1, - 18 -] \ No newline at end of file diff --git a/modelseedpy/community/metquest_code.py b/modelseedpy/community/metquest_code.py deleted file mode 100644 index d2001ed..0000000 --- a/modelseedpy/community/metquest_code.py +++ /dev/null @@ -1,858 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import absolute_import -from collections import deque, defaultdict -import os -import glob -import sys -import warnings -from itertools import combinations -import re -import pandas as pd -import numpy as np -import cobra -import networkx as nx - -from modelseedpy.community import commhelper -from modelseedpy import MSModelUtil - -warnings.filterwarnings("ignore") - - -def _create_graph_with_internal_reaction(organismsdata): - """ - This function creates a NetworkX DiGraph object which consists of - reactions and metabolites happening inside the organisms in a community. - This makes use of the reaction information i.e., irreversible and - reversible, which is obtained from another script fetch_reactions. - - Parameters - ---------- - organismsdata : dict - Dictionary containing the reaction information about organisms - - Returns - ------- - G : NetworkX DiGraph Object - Bipartite graph consisting of internal reactions in organisms - """ - G = nx.DiGraph() - for modelname in organismsdata: - G.add_nodes_from(organismsdata[modelname]['irreversible_rxn_no'], bipartite=1) - G.add_nodes_from(organismsdata[modelname]['reversible_rxn_no'], bipartite=1) - G.add_nodes_from(organismsdata[modelname]['reversible_back_rxn_no'], bipartite=1) - irrev_lhs_nodes = list(set( - [item for sublist in organismsdata[modelname]['irreversible_lhs_nodes'] for item in sublist])) - irrev_rhs_nodes = list(set( - [item for sublist in organismsdata[modelname]['irreversible_rhs_nodes'] for item in sublist])) - rev_lhs_nodes = list(set( - [item for sublist in organismsdata[modelname]['reversible_lhs_nodes'] for item in sublist])) - rev_rhs_nodes = list(set( - [item for sublist in organismsdata[modelname]['reversible_rhs_nodes'] for item in sublist])) - G.add_nodes_from(irrev_lhs_nodes, bipartite=0) - G.add_nodes_from(irrev_rhs_nodes, bipartite=0) - G.add_nodes_from(rev_lhs_nodes, bipartite=0) - G.add_nodes_from(rev_rhs_nodes, bipartite=0) - for irrevidx in range(len(organismsdata[modelname]['irreversible_rxn_no'])): - for lhsmetidx in range(len(organismsdata[modelname]['irreversible_lhs_nodes'][irrevidx])): - G.add_edges_from([(organismsdata[modelname]['irreversible_lhs_nodes'][irrevidx][lhsmetidx], - organismsdata[modelname]['irreversible_rxn_no'][irrevidx])]) - for rhsmetidx in range(len(organismsdata[modelname]['irreversible_rhs_nodes'][irrevidx])): - G.add_edges_from([(organismsdata[modelname]['irreversible_rxn_no'][irrevidx], - organismsdata[modelname]['irreversible_rhs_nodes'][irrevidx][rhsmetidx])]) - for revidx in range(len(organismsdata[modelname]['reversible_rxn_no'])): - for lhsmetidxrev in range(len(organismsdata[modelname]['reversible_lhs_nodes'][revidx])): - G.add_edges_from([(organismsdata[modelname]['reversible_lhs_nodes'][revidx][lhsmetidxrev], - organismsdata[modelname]['reversible_rxn_no'][revidx])]) - G.add_edges_from([(organismsdata[modelname]['reversible_back_rxn_no'][revidx], - organismsdata[modelname]['reversible_lhs_nodes'][revidx][lhsmetidxrev])]) - for rhsmetidxrev in range(len(organismsdata[modelname]['reversible_rhs_nodes'][revidx])): - G.add_edges_from([(organismsdata[modelname]['reversible_rxn_no'][revidx], - organismsdata[modelname]['reversible_rhs_nodes'][revidx][rhsmetidxrev])]) - G.add_edges_from([(organismsdata[modelname]['reversible_rhs_nodes'][revidx][rhsmetidxrev], - organismsdata[modelname]['reversible_back_rxn_no'][revidx])]) - return G - - -def _create_graph_with_exchange_reactions(G, orgs, namemap): - """ - This function first identifies the common exchange metabolites - and the non-common exchange metabolites and adds them to the - DiGraph object generated above. - - Parameters - ---------- - G : NetworkX DiGraph Object - Bipartite graph of reaction network from organisms - orgs : dict - Dictionary consisting of irreversible, reversible and exchange - reactions pertaining to the organisms. If more than one organism - is used, this dictionary consists of information about all the - organisms. - namemap : dict - Dictionary mapping the adhoc reaction names to reaction names in - the model - - Returns - ------- - G : NetworkX DiGraph Object - Bipartite graph consisting of internal and exchange reactions in organisms - namemap : dict - Dictionary mapping the adhoc exchange reaction names to reaction names in - the model - """ - metabolite_exchanged = [] - for orgnames in orgs: - exc_met = orgs[orgnames]['exchange_metab_nodes'] - metabolite_exchanged.append(exc_met) - # Common exchange metabolites in different organisms - common_exchange_metabolite = list(set.intersection(*list(map(set, metabolite_exchanged)))) - common_exchange_metabolite.sort() - # Adding the common exchange metabolites to the graph - for orgnames in orgs: - renamed_exc_met = [f"{orgnames} {comexcmet}" for comexcmet in common_exchange_metabolite] - number_exc_met = list(range(0, len(common_exchange_metabolite))) - mod_exc_rxn_number = [f'Org_{orgnames} ER{str(num + 1)}' for num in number_exc_met] - mod_exc_rev_rxn_number = [f'Org_{orgnames} ERR{str(num + 1)}' for num in number_exc_met] - G.add_nodes_from(mod_exc_rxn_number, bipartite=1) - G.add_nodes_from(mod_exc_rev_rxn_number, bipartite=1) - G.add_nodes_from(common_exchange_metabolite, bipartite=0) - G.add_nodes_from(renamed_exc_met, bipartite=0) - for k in range(len(renamed_exc_met)): - namemap[mod_exc_rxn_number[k]] = common_exchange_metabolite[k] - namemap[mod_exc_rev_rxn_number[k]] = common_exchange_metabolite[k] - G.add_edges_from([(renamed_exc_met[k], mod_exc_rxn_number[k])]) - G.add_edges_from([(mod_exc_rxn_number[k], common_exchange_metabolite[k])]) - G.add_edges_from([(common_exchange_metabolite[k], mod_exc_rev_rxn_number[k])]) - G.add_edges_from([(mod_exc_rev_rxn_number[k], renamed_exc_met[k])]) - # Adding the uncommon exchange metabolites to the graph - for orgnames in orgs: - metitems = orgs[orgnames]['exchange_metab_nodes'] - non_common_exc_met = list(set(metitems) - set(common_exchange_metabolite)) - non_common_exc_met.sort() - renamed_non_common_exc_met = [f"{orgnames} {s}" for s in non_common_exc_met] - number_non_common_exc_met = list(range(0, len(non_common_exc_met))) - mod_non_common_exc_rxn_number = [f"Org_{orgnames} NCER{str(num + 1)}" for num in number_non_common_exc_met] - mod_non_common_exc_rev_rxn_number = [f"Org_{orgnames} NCERR{str(num + 1)}" - for num in number_non_common_exc_met] - G.add_nodes_from(mod_non_common_exc_rxn_number, bipartite=1) - G.add_nodes_from(mod_non_common_exc_rev_rxn_number, bipartite=1) - G.add_nodes_from(non_common_exc_met, bipartite=0) - G.add_nodes_from(renamed_non_common_exc_met, bipartite=0) - for k in range(len(renamed_non_common_exc_met)): - namemap[mod_non_common_exc_rxn_number[k]] = non_common_exc_met[k] - namemap[mod_non_common_exc_rev_rxn_number[k]] = non_common_exc_met[k] - G.add_edges_from([(renamed_non_common_exc_met[k], mod_non_common_exc_rxn_number[k])]) - G.add_edges_from([(mod_non_common_exc_rxn_number[k], non_common_exc_met[k])]) - G.add_edges_from([(non_common_exc_met[k], mod_non_common_exc_rev_rxn_number[k])]) - G.add_edges_from([(mod_non_common_exc_rev_rxn_number[k], renamed_non_common_exc_met[k])]) - return G, namemap - -def create_graph(file_names, no_of_orgs): - """ - This function creates bipartite graph of the organisms based on the - path provided and the number of organsisms. For instance, if a folder - has 3 model files, and the number of organisms is 2, 3 (3C2) different - bipartite graphs are created. The graph objects and the dictionary - are saved as gpickle and pickle files respectively. - - Parameters - ---------- - file_names : list - List containing the file names of models - no_of_orgs : int - Number of organisms to be used for creating the DiGraph. - - Returns - ------- - H : NetworkX DiGraph Object - Bipartite graph consisting of internal and exchange reactions in organisms - full_name_map : dict - Dictionary mapping the adhoc reaction names to reaction names in - the model - """ - - H=[] - organisms_reaction_data, partial_name_map = segregate_reactions_from_models(file_names) - if organisms_reaction_data: - organisms_names = list(organisms_reaction_data.keys()) - all_possible_combis = list(combinations(list(range(len(organisms_names))), int(no_of_orgs))) - if int(no_of_orgs)>1 and sorted(organisms_names)[0][0]=='0': - all_possible_combis = all_possible_combis[:len(organisms_names)-1] - if all_possible_combis: - for ncom in range(len(all_possible_combis)): - file_name = '' - current_combination = {} - for numincom in range(len(all_possible_combis[ncom])): - current_combination[organisms_names[all_possible_combis[ncom][numincom]]] = \ - organisms_reaction_data[organisms_names[all_possible_combis[ncom][numincom]]] - file_name = file_name + organisms_names[all_possible_combis[ncom][numincom]] + '_' - H.append(_create_graph_with_internal_reaction(current_combination)) - temp, full_name_map = _create_graph_with_exchange_reactions( - H[ncom], current_combination, partial_name_map) - H[ncom]=temp - print(len(H), H[ncom]) - print('Number of edges in graph', len(H[ncom].edges())) - print('Number of nodes in graph', len(H[ncom].nodes())) - - # Uncomment the following code to save the graph files externally in your machine - # Note: Graph files can occupy a large space for large datasets - ''' - if os.access(path_name_with_models, os.W_OK): - with open(file_name + 'namemap' + '.pickle', 'wb') as filetodump: - dump(full_name_map, filetodump) - nx.write_gpickle(H[ncom], file_name + '.gpickle') - print('Graph and namemap saved for file(s) in', path_name_with_models) - ''' - else: - print( - 'Number of organisms for creating a consortium graph is more than the models given') - print('Program will now exit') - sys.exit() - else: - print("Cannot create graph") - sys.exit() - return H, full_name_map - - -def forward_pass(graph_object, media): - """ - This function carries out the Guided Breadth First Search on a directed - bipartite graph starting from the entries in seed metabolite set. - - Parameters - ---------- - graph_object : NetworkX DiGraph Object - Bipartite graph of the metabolic network - - seedmet : set - Set of seed metabolites including the source - - Returns - ------- - lower_bound_metabolite : defaultdict - Minimum number of steps required to reach a metabolite - status_dict : defaultdict - Dictionary pertaining to the status of every reaction - whether it - has been visited or not - scope : set - Set of metabolites that can be produced from the given set of - seed metabolites - - Notes - ----- - Starting with the set of seed metabolites S, the algorithm first finds - all the reactions from the set R, whose precursor metabolites are in S. - Such reactions are marked visited and added to the visited reaction set. - Metabolites produced by these reactions are checked. The reactions where - these metabolites participate are then checked for the presence of all its - predecessors and are added to the queue. This traversal continues in a - breadth-first manner and stops when there are no further reactions to - be visited. - """ - pred = graph_object.predecessors - succ = graph_object.successors - lower_bound_metabolite = {cpd: [0] for cpd in media} - lower_bound_reaction = defaultdict(list) - status_dict = defaultdict(str) - # Using a deque since deques have O(1) speed for appendleft() and popleft() - # while lists have O(n) performance for inserting and popping. - queue = deque([]) - # All seed metabolites are always present, hence require 0 steps - stage = 1 - mediaMets = list(media.keys()) - scope = list(media.keys()) - starting_rxn_node = [] - # First stage where starting_rxn_node list contains all the reactions - # which require only the seed metabolites as input - for starting_met_nodes in mediaMets: - # Essential when analysing mutiple networks with same seed metabolite - # set, although would be redundant in case of single network - if starting_met_nodes in graph_object: - for startingrxns in succ(starting_met_nodes): - if set(pred(startingrxns)).issubset(mediaMets): - if startingrxns not in starting_rxn_node: - starting_rxn_node.append(startingrxns) - for metsprod in succ(startingrxns): - scope.add(metsprod) - if stage not in lower_bound_metabolite[metsprod]: - lower_bound_metabolite[metsprod].append(stage) - if stage not in lower_bound_reaction[startingrxns]: - lower_bound_reaction[startingrxns].append(stage) - for rxn in starting_rxn_node: - for metabs in succ(rxn): - for nextrxn in succ(metabs): - if set(pred(nextrxn)).issubset(scope): - if nextrxn not in queue: queue.append(nextrxn) - status_dict[rxn] = 'V' - while queue: - stage += 1 - for parentrxn in list(queue): - if status_dict[parentrxn] == '': - if stage not in lower_bound_reaction[parentrxn]: - lower_bound_reaction[parentrxn].append(stage) - for mets in succ(parentrxn): - scope.add(mets) - if stage not in lower_bound_metabolite[mets]: - lower_bound_metabolite[mets].append(stage) - for progeny in succ(mets): - if set(pred(progeny)).issubset(scope): - if status_dict[progeny] != 'V': - if progeny not in queue: queue.append(progeny) - status_dict[parentrxn] = 'V' - elif status_dict[parentrxn] == 'V': - for mets in succ(parentrxn): - if stage not in lower_bound_metabolite[mets]: lower_bound_metabolite[mets].append(stage) - queue.popleft() - return lower_bound_metabolite, status_dict, scope - -def find_different_reaction_types(stoi_matrix, model, current_model_name): - """ - This function finds the exchange, irreversible and the reversible reactions - from the model. - - Parameters - ---------- - stoi_matrix : numpy array - full path name where the model files are - model : COBRA model object - COBRA model object created from SBML models - current_model_name : str - Name which is to be prefixed against every - reaction/metabolite (to differentiate the entries in multiple organisms, - when a community model is built) - Returns - ------- - exchange_met_ids : list - Metabolite identifiers of exchange metabolites - irrev_lhs_nodes : list - Metabolite identifiers of reactants of irreversible reactions - irrev_rhs_nodes : list - Metabolite identifiers of products of irreversible reactions - rev_lhs_nodes : list - Metabolite identifiers of reactants of reversible reactions - rev_rhs_nodes : list - Metabolite identifiers of products of reversible reactions - exchange_rxn_ids : list - Reaction identifers of exchange reactions - irrev_rxn_ids : list - Reaction identifiers of irreversible reactions - rev_rxn_ids : list - Reaction identifiers of reversible reactions - - """ - - xdim = np.shape(stoi_matrix) - reactants_of_reaction, total_metabolites_in_reaction, products_of_reaction = [], [], [] - number_of_reactants_in_reaction, total_number_of_metabs_in_reaction = [], [] - number_of_products_in_reaction, exchange_reaction_idx = [], [] - reaction_identifiers, reaction_in_model, metabolite_identifiers = [], [], [] - for metab in model.metabolites: - metabolite_identifiers.append(metab.id) - for rxns in model.reactions: - reaction_identifiers.append(rxns.id) - reaction_in_model.append(rxns.reaction) - for rxnidx in range(xdim[0]): - reactants_of_reaction.append(np.where(stoi_matrix[rxnidx] == -1)) - total_metabolites_in_reaction.append(np.where(stoi_matrix[rxnidx] != 0)) - products_of_reaction.append(np.where(stoi_matrix[rxnidx] == 1)) - number_of_reactants_in_reaction.append(len(reactants_of_reaction[rxnidx][0])) - total_number_of_metabs_in_reaction.append(len(total_metabolites_in_reaction[rxnidx][0])) - number_of_products_in_reaction.append(len(products_of_reaction[rxnidx][0])) - - # Case 1 - Presence of bulk metabolites in the medium - - if reaction_in_model[rxnidx][-1] == 'b': # Assuming the bulk metabolites end in 'b' - if number_of_reactants_in_reaction[rxnidx] == 1 and number_of_products_in_reaction[rxnidx] == 1: - exchange_reaction_idx.append(rxnidx) - # Case 2 - Presence of exchange metabolites - elif number_of_reactants_in_reaction[rxnidx] == 1 and total_number_of_metabs_in_reaction[rxnidx] == 1: - exchange_reaction_idx.append(rxnidx) - elif number_of_products_in_reaction[rxnidx] == 1 and total_number_of_metabs_in_reaction[rxnidx] == 1: - exchange_reaction_idx.append(rxnidx) - exchange_met_ids, exchange_met_index, exchange_rxn_ids = [], [], [] - for excentry in exchange_reaction_idx: - exchange_rxn_ids.append(reaction_identifiers[excentry]) - if reaction_in_model[excentry][-1] == 'b': - exchange_met_ids.append(metabolite_identifiers[np.nonzero(stoi_matrix[excentry])[0][0]]) - else: exchange_met_index.append(np.nonzero(stoi_matrix[excentry])[0].tolist()[0]) - if exchange_met_index: - for metind in exchange_met_index: - exchange_met_ids.append(metabolite_identifiers[metind]) - all_rxn_idx = list(range(len(reaction_in_model))) - internal_rxns = list(set(all_rxn_idx) ^ set(exchange_reaction_idx)) - reversible_rxns, irreversible_rxns, rxns_lowerbound, rxns_upperbound = [], [], [], [] - for rxns in model.reactions: - rxns_lowerbound.append(rxns.lower_bound) ; rxns_upperbound.append(rxns.upper_bound) - for idxint in internal_rxns: - if rxns_lowerbound[idxint] < 0 and rxns_upperbound[idxint] >= 0: reversible_rxns.append(idxint) - elif rxns_lowerbound[idxint] >= 0 and rxns_upperbound[idxint] >= 0: irreversible_rxns.append(idxint) - # Irreversible reaction nodes - irrev_lhs_temporary, irrev_rhs_temporary, irrev_lhs_nodes, irrev_rhs_nodes, irrev_rxn_ids = [], [], [], [], [] - for irridx in irreversible_rxns: - irrev_rxn_ids.append(reaction_identifiers[irridx]) - irrev_lhs_temporary.append(np.where(stoi_matrix[irridx] < 0)[0].tolist()) - irrev_rhs_temporary.append(np.where(stoi_matrix[irridx] > 0)[0].tolist()) - for lhsirridx in range(len(irrev_lhs_temporary)): - temp_metab_list_lhs = [] - for met_idx_lhs in irrev_lhs_temporary[lhsirridx]: - met_namech_lhs = f"{current_model_name} {metabolite_identifiers[met_idx_lhs]}" - temp_metab_list_lhs.append(met_namech_lhs) - irrev_lhs_nodes.append(temp_metab_list_lhs) - for rhsirridx in range(len(irrev_rhs_temporary)): - temp_metab_list_rhs = [] - for met_idx_rhs in irrev_rhs_temporary[rhsirridx]: - met_namech_rhs = f"{current_model_name} {metabolite_identifiers[met_idx_rhs]}" - temp_metab_list_rhs.append(met_namech_rhs) - irrev_rhs_nodes.append(temp_metab_list_rhs) - - # Reversible reaction nodes - rev_lhs_temporary, rev_rhs_temporary, rev_lhs_nodes, rev_rhs_nodes, rev_rxn_ids = [], [], [], [], [] - for rridx in reversible_rxns: - rev_rxn_ids.append(reaction_identifiers[rridx]) - rev_lhs_temporary.append(np.where(stoi_matrix[rridx] < 0)[0].tolist()) - rev_rhs_temporary.append(np.where(stoi_matrix[rridx] > 0)[0].tolist()) - for lhsrevidx in range(len(rev_lhs_temporary)): - temp_metab_list_lhs_rev = [] - for met_idx_lhs in rev_lhs_temporary[lhsrevidx]: - met_namech_lhs = "%s %s" % (current_model_name, metabolite_identifiers[met_idx_lhs]) - temp_metab_list_lhs_rev.append(met_namech_lhs) - rev_lhs_nodes.append(temp_metab_list_lhs_rev) - for rhsrevidx in range(len(rev_rhs_temporary)): - temp_metab_list_rhs_rev = [] - for met_idx_rhs in rev_rhs_temporary[rhsrevidx]: - met_namech_rhs = "%s %s" % (current_model_name, metabolite_identifiers[met_idx_rhs]) - temp_metab_list_rhs_rev.append(met_namech_rhs) - rev_rhs_nodes.append(temp_metab_list_rhs_rev) - return (exchange_met_ids, irrev_lhs_nodes, irrev_rhs_nodes, rev_lhs_nodes, - rev_rhs_nodes, exchange_rxn_ids, irrev_rxn_ids, rev_rxn_ids) - - -def segregate_reactions_from_models(models): - """ - This function gets the data pertaining to the reactions and the - metabolites from the models of multiple organisms. - This requires as input the pathname where the '.xml' files are located. - From this path, this function reads all the files using the functions - in the COBRA toolbox and generates the stoichiometric model for these - SBML models. - - Parameters - ---------- - models : list - List of model objects - - Returns - ------- - all_organisms_info : dict - Dictionary of all model data (reaction information about all the - organisms) - namemap : dict - Dictionary mapping the adhoc reaction names to reaction names in - the model - - """ - all_organisms_info = {} - namemap = {} - for model in models: - stoi = cobra.util.array.create_stoichiometric_matrix(model) - current_organisms_info = {} - rxns_in_model, mets_in_model = [], [] - for metab in model.metabolites: - mets_in_model.append(metab.id) - for reac in model.reactions: - rxns_in_model.append(reac.id) - stoi_matrix = stoi.T - (exchange_nodes, irrev_lhs_nodes, irrev_rhs_nodes, rev_lhs_nodes, rev_rhs_nodes, - exc_name, irrev_rxn_name, rev_rxn_name - ) = find_different_reaction_types(stoi_matrix, model, model.id) - current_organisms_info[model.id] = { - 'exchange_metab_nodes': exchange_nodes, 'irreversible_lhs_nodes': irrev_lhs_nodes, - 'irreversible_rhs_nodes': irrev_rhs_nodes, 'reversible_lhs_nodes': rev_lhs_nodes, - 'reversible_rhs_nodes': rev_rhs_nodes, 'exch_rxn_name': exc_name, 'irrev_rxn_name': irrev_rxn_name, - 'rev_rxn_name': rev_rxn_name} - - irrev_rxn_number = [] - for num in range(len(irrev_lhs_nodes)): - modified_name_irrev = f'Org_{model.id} IR' + str(num + 1) - irrev_rxn_number.append(modified_name_irrev) - namemap[modified_name_irrev] = irrev_rxn_name[num] - - rev_rxn_number = [] - for num in range(len(rev_lhs_nodes)): - modified_name_rev = f'Org_{model.id} RR' + str(num + 1) - rev_rxn_number.append(modified_name_rev) - namemap[modified_name_rev] = rev_rxn_name[num] - - rev_back_rxn_number = [] - for num in range(len(rev_lhs_nodes)): - modified_name_back_rev = f'Org_{model.id} RevBR' + str(num + 1) - rev_back_rxn_number.append(modified_name_back_rev) - namemap[modified_name_back_rev] = rev_rxn_name[num] - - current_organisms_info[model.id]['reversible_rxn_no'] = rev_rxn_number - current_organisms_info[model.id]['irreversible_rxn_no'] = irrev_rxn_number - current_organisms_info[model.id]['total_nodes'] = len( - exchange_nodes) + len(irrev_lhs_nodes) + len(rev_lhs_nodes) - current_organisms_info[model.id]['model_rxns'] = rxns_in_model - current_organisms_info[model.id]['reversible_back_rxn_no'] = rev_back_rxn_number - current_organisms_info[model.id]['metabolites'] = mets_in_model - all_organisms_info.update(current_organisms_info) - return all_organisms_info, namemap - -def find_relievedrxns(model, org_info, org_info_pert): - relieved = {i: list(set(org_info_pert[i]) - set(org_info[i])) for i in org_info_pert} - detailed_rel_rxns, rel_rxns_name = {}, {} - - for i in model: - j = i.id - detailed_rel_rxns[j] = [] - rel_rxns_name[j] = [] - if len(relieved[j]): - rxn_ids = [] - for r in i.reactions: - rxn_ids.append(r.id) - for rel in relieved[j]: - rel_rxn = i.reactions[rxn_ids.index(rel)].reaction - detailed_rel_rxns[j].append(rel_rxn) - rel_rxns_name[j].append(i.reactions[rxn_ids.index(rel)].name) - - return relieved, detailed_rel_rxns, rel_rxns_name - -def find_stuckrxns(model, community, media, no_of_orgs): - # Constructing graphs - warnings.filterwarnings("ignore") - G, full_name_map = create_graph(community, no_of_orgs) - if not os.path.exists('results'): os.makedirs('results') - all_possible_combis = list(combinations(list(range(len(community))), int(no_of_orgs))) - if no_of_orgs > 1 and sorted(community)[0][0] == '0': - all_possible_combis = all_possible_combis[:len(community) - 1] - org_info = {} - scope = {} - print('No. of graphs constructed: ', len(G)) - - # This loop finds all the stuck reaction - for i in range(len(all_possible_combis)): - lbm, sd, s = forward_pass(G[i], media) - for j in range(len(all_possible_combis[i])): - stuck, rxnNode = [], [] - model1 = model[all_possible_combis[i][j]].id - visited = list(sd.keys()) - for r in G[i].nodes: - if r.find(model1) >= 0: rxnNode.append(r) - for rxn in rxnNode: - if rxn in visited: continue - elif rxn.find('ERR') >= 0: continue - elif rxn.find('Org') >= 0: - if (rxn[len(model1) + 5] == 'I') or (rxn[len(model1) + 5] == 'R'): stuck.append(rxn) - org_info[model1] = stuck - scope[model1] = s - return org_info, scope, full_name_map - -def decrypt_orginfo(org_info, namemap): - """ - This function decrypts the rxn ids using the data in corresponding namemaps - :param org_info: - :param namemap: - :return: - org_info: An dictionary of decrypted rxn ids for each community - """ - for i in org_info: - for j in range(len(org_info[i])): - org_info[i][j] = namemap[org_info[i][j]] - return org_info - -def make_perturbed_community(rem_org, pert_models, pert_community): - pert_model_ids = [i.id for i in pert_models] - for i in rem_org: - if i in pert_model_ids: - pert_models.remove(pert_models[pert_model_ids.index(i)]) - pert_community.remove(pert_community[pert_model_ids.index(i)]) - pert_model_ids.remove(i) - - return pert_models, pert_community, pert_model_ids - -def perform_task(media, model, transport_rxns, pert_community, - org_info_wo_trans_rxn, rem_org_list, n): - org_info_pert, scope_pert, namemap_pert = find_stuckrxns(model, pert_community, media, len(pert_community)) - org_info_pert = decrypt_orginfo(org_info_pert, namemap_pert) - org_info_pert_wo_trans_rxn = {i:list(set(org_info_pert[i]) - set(transport_rxns)) for i in org_info_pert} - - with open(f"results/Community_without_clus{str(n)}.csv", "w") as g: - for m in org_info_pert_wo_trans_rxn: - g.write(m + ',' + str(len(org_info_pert_wo_trans_rxn[m])) + '\n') - stuck_com = stuck_pert_com = 0 - for i in org_info_wo_trans_rxn: - if i not in rem_org_list: stuck_com += len(org_info_wo_trans_rxn[i]) - for i in org_info_pert_wo_trans_rxn: - stuck_pert_com += len(org_info_pert_wo_trans_rxn[i]) - msi = 1 - (stuck_com / stuck_pert_com) - print(n, 'th cluster') - return org_info_pert, org_info_pert_wo_trans_rxn, msi - -def write_relieved_rxns(g, relieved, detailed_rel_rxns, rel_rxns_name): - g.write('acceptor\trelieved reactions\n') - for i in relieved: - g.write(i + '\t') - for j in list(set(relieved[i])): - g.write(j + '\t\n\t') - for d in list(set(rel_rxns_name[i])): - g.write(d + '\t\n\t') - for k in list(set(detailed_rel_rxns[i])): - g.write(k + '\t\n') - -def write_relieved_rxn_metadata(h, org_info_wo_trans_rxn, org_info_pert_wo_trans_rxn): - nrelieved = {} - for i in org_info_pert_wo_trans_rxn: - nrelieved[i] = len(org_info_pert_wo_trans_rxn[i]) - len(org_info_wo_trans_rxn[i]) - if nrelieved[i]: - h.write(i + ',' + str(len(org_info_wo_trans_rxn[i])) + ',' + str( - len(org_info_pert_wo_trans_rxn[i])) + ',' + str(nrelieved[i]) + '\n') - -def find_relieved_rxn(model, media_name, org_info_single, org_info_pair): - """ - This function extracts and writes the relieved rxns into a tsv file - :param model: - :param media_name: name of the media used (identifer to know what media is used when analysis is done using multiple media) - :param org_info_single: Dictionary containing stuck reactions of all microbes in the community - :param org_info_pair: Dictionary containing stuck reactions of all microbes in the community - :return: None - """ - relieved = {} - for org1 in model: - for org2 in model: - if org1.id + '_' + org2.id in org_info_pair.keys(): - relieved[org1.id + '_' + org2.id] = [] - temp = list(set(org_info_single[org1.id + '_' + org1.id]) - set(org_info_pair[org1.id + '_' + org2.id])) - for j in temp: - relieved[org1.id + '_' + org2.id].append(j) - else: continue - - rel_rxns_name, detailed_rel_rxns = {}, {} - for i in model: - rxn_ids = [r.id for r in i.reactions] - for j in model: - org1 = i.id ; org2 = j.id - if org1 + '_' + org2 in relieved.keys(): - detailed_rel_rxns[org1 + '_' + org2] = [] - rel_rxns_name[org1 + '_' + org2] = [] - for rel in relieved[org1 + '_' + org2]: - rel_rxn = i.reactions[rxn_ids.index(rel)].reaction - detailed_rel_rxns[org1 + '_' + org2].append(rel_rxn) - rel_rxns_name[org1 + '_' + org2].append(i.reactions[rxn_ids.index(rel)].name) - - relieved_rxn_output_file = f'results/relieved_rxns_{media_name}_w_excrxns.tsv' - with open(relieved_rxn_output_file, 'w') as g: - header = 'acceptor\tdonor\trelieved reactions\n' - g.write(header) - for i in model: - for j in model: - org1 = i.id ; org2 = j.id - if org1 + '_' + org2 in relieved.keys(): - g.write(org1 + '\t' + org2 + '\t') - rel_rxns = list(set(relieved[org1 + '_' + org2])) - det_rel_rxns = list(set(detailed_rel_rxns[org1 + '_' + org2])) - rel_rxn_nam = list(set(rel_rxns_name[org1 + '_' + org2])) - for x in rel_rxns: - g.write(x + '\t\n\t\t') - for d in rel_rxn_nam: - g.write(d + '\t\n\t\t') - for k in det_rel_rxns: - g.write(k + '\t\n') - print('relieved reactions are written at:\n', relieved_rxn_output_file) - -def find_stuck_rxns(models, community, media, comm_size): - """ - Constructs graphs using MetQuest and finds all stuck reactions in the cellular compartment - :param models: list of GEMs - :param community: the community model - :param seedmet_file: path to txt file containing seed metabolites - :param comm_size: number of organisms in a community - :return: - org_info: Dictionary containing stuck reactions of all microbes in the community - scope: Dictionary containing all the metabolites that can be produced by the microbes in the community - namemap: Dictionaru containing all the decrypted rxn ids - """ - warnings.filterwarnings("ignore") - G, full_name_map = create_graph(community, comm_size) - if not os.path.exists('results'): os.makedirs('results') - - all_possible_combis = combinations(models, comm_size) - org_info, scope, vis = {}, {}, {} - print('No. of graphs constructed: ', len(G)) - - # This loop finds all the stuck reaction - for i in range(len(all_possible_combis)): - lbm, sd, s = forward_pass(G[i], media) - for j in range(len(all_possible_combis[i])): - stuck, rxnNode = [], [] - model1 = models[all_possible_combis[i][j]].id - visited = list(sd.keys()) - for r in G[i].nodes: - if r.find(model1) >= 0: rxnNode.append(r) - for rxn in rxnNode: - if rxn in visited or rxn.find('ERR') >= 0: continue - elif rxn.find('Org') >= 0: - if (rxn[len(model1) + 5] == 'I') or (rxn[len(model1) + 5] == 'R'): stuck.append(rxn) - model2 = models[all_possible_combis[i][j - 1]].id - org_info[model1 + '_' + model2] = stuck - scope[model1 + '_' + model2] = s - vis[model1 + '_' + model2] = visited - return org_info, scope, full_name_map, vis - -def decrypt_org_info(org_info, namemap): - """ - This function decrypts the rxn ids using the data in corresponding namemaps - :param org_info: - :param namemap: - :return: - org_info: An dictionary of decrypted rxn ids for each community - """ - for i in org_info: - for j in range(len(org_info[i])): - org_info[i][j] = namemap[org_info[i][j]] - return org_info - -def pMSI(models, media): - """ - Calculates MSI for CarveMe models - Extracts and writes relieved reactions in every pair - :param community: list of GSMM files - :param sd_file: path to txt file containing seed metabolites - :return: msi: Dictionary containing MSI values for every pair - """ - # find all transport reactions - community_model = commhelper.build_from_species_models(models) - comm_util = MSModelUtil(community_model) - # find stuck reactions - org_info_single, scope_sin, namemap_sin, vis = find_stuck_rxns(models, community_model, media, 1) - org_info_pair, scope_pair, namemap_pair, vis = find_stuck_rxns(models, models, media, 2) - # decrypt the stuck reactions - org_info_single = decrypt_org_info(org_info_single, namemap_sin) - org_info_pair = decrypt_org_info(org_info_pair, namemap_pair) - # Filter out the transport reactions from every stuck reaction list - org_info_single_wo_trans_rxn, org_info_pair_wo_trans_rxn = {}, {} - for i in org_info_single: - org_info_single_wo_trans_rxn[i] = list(set(org_info_single[i]) - set(comm_util.transport_list())) - for i in org_info_pair: - org_info_pair_wo_trans_rxn[i] = list(set(org_info_pair[i]) - set(comm_util.transport_list())) - # find all the relieved reactions in every pairs - find_relieved_rxn(models, "relieved_rxns", org_info_single, org_info_pair) - # calculate MSI for every pair - msi = {} - for org1 in models: - stuck_A = len(org_info_single_wo_trans_rxn[org1.id + '_' + org1.id]) - for org2 in models: - if org1.id + '_' + org2.id in org_info_pair_wo_trans_rxn.keys(): - stuck_AUB = len(org_info_pair_wo_trans_rxn[org1.id + '_' + org2.id]) - if stuck_A == 0: msi[org1.id + '_' + org2.id] = 0 - else: msi[org1.id + '_' + org2.id] = 1 - (stuck_AUB / stuck_A) - return msi, community_model - -def calculate_pairwiseMSI(models, media): - """ - This function calculates pairwise-MSI for all given microbes. - - Creates a csv file containing the MSI values of all pairs. - - Creates an tsv file containing the list of reaction relieved - in all acceptor microbes in the presence of corresponding donor microbes. - - :param path: path to all xml files - :param sd_file: path to txt file containing seed metabolites - """ - - warnings.filterwarnings("ignore") - msi, community_model = pMSI(models, media) - msi_output_file = f"results/MSI_{os.path.basename(media).replace('.txt', '')}.csv" - with open(msi_output_file, 'w') as f: - header = 'organism,in_the_presence,msi_value\n' - f.write(header) - for org1, org2 in combinations(models, 2): - if org1.id + '_' + org2.id in msi.keys(): - f.write(f"{org1.id},{org2.id},{str(msi[org1.id + '_' + org2.id])}\n") - print('MSI values are written at:\n', msi_output_file) - -def calculate_higherorderMSI(models, media, clusters = 'individual_clusters'): - community_model = commhelper.build_from_species_models(models) - comm_util = MSModelUtil(community_model) - org_info, scope, namemap = find_stuckrxns(model, community, media, len(community)) - org_info = decrypt_orginfo(org_info, namemap) - org_info_wo_trans_rxn = {i: list(set(org_info[i]) - set(comm_util.transport_list())) for i in org_info} - - with open(f"results/community_unperturbed.csv", 'w') as f: - for i, diff in org_info_wo_trans_rxn.items(): - f.write(i + ',' + str(len(diff)) + '\n') - - if clusters == 'individual_clusters': - rem_org_list1, rem_org_list2 = {}, {} - for i, model in enumerate(models): - rem_org_list1[i] = model.id ; rem_org_list2[i] = model.id - else: - cluster_data = pd.read_csv(clusters, sep=',') - rem_org_list1 = cluster_data.set_index('Cluster').T.to_dict('list') - for n in rem_org_list1: - rem_org_list1[n] = [j for j in rem_org_list1[n] if pd.isna(j) is False] - for n in rem_org_list1: - rem_org_list1[n] = [cobra.io.read_sbml_model(i).id for i in rem_org_list1[n]] - # rem_org_list1[n] = [model_ids[model_ids.index(i)] for i in rem_org_list1[n]] - rem_org_list2 = rem_org_list1.copy() - - for nclus in rem_org_list2: - rem_org_list2[nclus] = [x.replace('.xml', '') for x in rem_org_list2[nclus]] - - with open(f"results/higher_order_msi.csv", 'w') as f: - for n in rem_org_list1: - # os.chdir(path) - # new_models = model.copy() - # new_community = glob.glob('*.xml') - # if not new_community: - # new_community = glob.glob('*.sbml') - # new_community.sort() - - pert_models, pert_community, pert_model_ids = make_perturbed_community(rem_org_list1[n], new_models, - new_community) - - org_info_pert, org_info_pert_wo_trans_rxn, msi = perform_task( - media, pert_models, transport_rxns, pert_community, org_info_wo_trans_rxn, rem_org_list2[n], n) - for i in rem_org_list2[n]: - f.write('Comm,clus_' + str(n) + '#' + i + ',' + str(msi) + '\n') - - if msi: - relieved, detailed_rel_rxns, rel_rxns_name = find_relievedrxns(pert_models, org_info, org_info_pert) - with open(f'results/clusterKO_/data_analysis/relieved_rxns_Comm--clus{n}.tsv', 'w') as g: - write_relieved_rxns(g, relieved, detailed_rel_rxns, rel_rxns_name) - with open(f'results/clusterKO_/data_analysis/Comm--clus{n}.tsv', 'w') as h: - h.write('Comm--clus' + str(n) + '\n') - for i in rem_org_list2[n]: - h.write(i + '\n') - h.write('num of rxns relieved in the below orgs in the presence of clust' + str(n) + '\n') - h.write('org,unpert,clust_' + str(n) + 'KO,rxns relieved\n') - write_relieved_rxn_metadata(h, org_info_wo_trans_rxn, org_info_pert_wo_trans_rxn) - print('Comm--clus' + str(n)) - - new_models = model.copy() - new_community = glob.glob('*.xml') - if not new_community: - new_community = glob.glob('*.sbml') - new_community.sort() - ko_models, ko_community, model_ids = make_perturbed_community(pert_model_ids, new_models, new_community) - ko_org_list = [x for x in pert_model_ids] - if len(ko_org_list) < len(model): - org_info_pert, org_info_pert_wo_trans_rxn, msi = perform_task( - media, ko_models, transport_rxns, ko_community, org_info_wo_trans_rxn, ko_org_list, n) - for i in ko_community: - f.write('clus_' + str(n) + '#' + i + ',Comm,' + str(msi) + '\n') - - if msi: - relieved, detailed_rel_rxns, rel_rxns_name = find_relievedrxns(ko_models, org_info, org_info_pert) - with open(f'results/clusterKO_/data_analysis/relieved_rxns_Comm--clus{n}.tsv', 'w') as g: - write_relieved_rxns(g, relieved, detailed_rel_rxns, rel_rxns_name) - with open(f'results/clusterKO_/data_analysis/Comm{n}--clus.tsv', 'w') as h: - h.write('clus' + str(n) + '--Comm\n') - for i in ko_org_list: - h.write(i + '\n') - h.write('num of rxns relieved in the below orgs in the presence of Comm') - h.write('org,unpert,commKO,rxns relieved\n') - write_relieved_rxn_metadata(h, org_info_wo_trans_rxn, org_info_pert_wo_trans_rxn) - print('clus' + str(n) + '--Comm') diff --git a/modelseedpy/community/mscommfitting.py b/modelseedpy/community/mscommfitting.py deleted file mode 100644 index 2a42d63..0000000 --- a/modelseedpy/community/mscommfitting.py +++ /dev/null @@ -1,1091 +0,0 @@ -# -*- coding: utf-8 -*- -from modelseedpy.fbapkg.mspackagemanager import MSPackageManager -from modelseedpy.core.exceptions import FeasibilityError -from pandas import read_table, read_csv, DataFrame -from optlang import Variable, Constraint, Objective, Model -from modelseedpy.core.fbahelper import FBAHelper -from scipy.constants import hour -from scipy.optimize import newton -from collections import OrderedDict -from zipfile import ZipFile, ZIP_LZMA -from optlang.symbolics import Zero -from sympy.core.add import Add -from matplotlib import pyplot - -# from pprint import pprint -from time import sleep, process_time -import numpy as np - -# from cplex import Cplex -import json, os, re - - -def _name(name, suffix, time, trial): - return "-".join([name + suffix, time, trial]) - - -class MSCommFitting: - def __init__(self): - ( - self.parameters, - self.variables, - self.constraints, - self.dataframes, - self.signal_species, - self.values, - ) = ({}, {}, {}, {}, {}, {}) - self.phenotypes_parsed_df: np.ndarray - self.problem: object - self.species_phenotypes_bool_df: object - self.zipped_output, self.plots = [], [] - - def _process_csv(self, csv_path, index_col): - self.zipped_output.append(csv_path) - csv = read_csv(csv_path) - csv.index = csv[index_col] - csv.drop(index_col, axis=1, inplace=True) - csv.astype(str) - return csv - - def load_data( - self, - community_members: dict = {}, - kbase_token: str = None, - solver: str = "glpk", - signal_tsv_paths: dict = {}, - phenotypes_csv_path: str = None, - media_conc_path: str = None, - species_abundance_path: str = None, - carbon_conc_series: dict = {}, - ignore_trials: dict = {}, - ignore_timesteps: list = [], - significant_deviation: float = 2, - zip_path: str = None, - ): - self.zipped_output = [] - if zip_path: - with ZipFile(zip_path, "r") as zp: - zp.extractall() - if species_abundance_path: - self.species_abundances = self._process_csv( - species_abundance_path, "trial_column" - ) - if phenotypes_csv_path: - # process a predefined exchanges table - self.zipped_output.append(phenotypes_csv_path) - fluxes_df = read_csv(phenotypes_csv_path) - fluxes_df.index = fluxes_df["rxn"] - to_drop = [col for col in fluxes_df.columns if " " in col] - for col in to_drop + ["rxn"]: - fluxes_df.drop(col, axis=1, inplace=True) - print( - f'The {to_drop+["rxn"]} columns were dropped from the phenotypes CSV.' - ) - - # import and process the media concentrations CSV - self.media_conc = self._process_csv(media_conc_path, "media_compound") - elif community_members: - # import the media for each model - models = OrderedDict() - ex_rxns: set = set() - species: dict = {} - # Using KBase media to constrain exchange reactions in model - for model, content in community_members.items(): - model.solver = solver - ex_rxns.update(model.exchanges) - species.update({content["name"]: content["phenotypes"].keys()}) - models[model] = [] - for media in content["phenotypes"].values(): - with model: # !!! Is this the correct method of parameterizing a media for a model? - pkgmgr = MSPackageManager.get_pkg_mgr(model) - pkgmgr.getpkg("KBaseMediaPkg").build_package( - media, default_uptake=0, default_excretion=1000 - ) - models[model].append(model.optimize()) - - # construct the parsed table of all exchange fluxes for each phenotype - fluxes_df = DataFrame( - data={ - "bio": [ - sol.fluxes["bio1"] - for solutions in models.values() - for sol in solutions - ] - }, - columns=["rxn"] - + [ - spec + "-" + phenotype - for spec, phenotypes in species.items() - for phenotype in phenotypes - ] - + [spec + "-stationary" for spec in species.keys()], - ) - fluxes_df.index.name = "rxn" - fluxes_df.drop("rxn", axis=1, inplace=True) - for ex_rxn in ex_rxns: - elements = [] - for model, solutions in models.items(): - for sol in solutions: - elements.append( - sol.fluxes[ex_rxn] if ex_rxn in sol.fluxes else 0 - ) - if any(np.array(elements) != 0): - fluxes_df.iloc[ex_rxn.id] = elements - - # define only species for which data is defined - signal_tsv_paths - modeled_species = list(signal_tsv_paths.values()) - modeled_species.remove("OD") - removed_phenotypes = [ - col - for col in fluxes_df - if not any([species in col for species in modeled_species]) - ] - for col in removed_phenotypes: - fluxes_df.drop(col, axis=1, inplace=True) - if removed_phenotypes != []: - print( - f"The {removed_phenotypes} phenotypes were removed since their species is not among those that are defined with data: {modeled_species}." - ) - fluxes_df.astype(str) - self.phenotypes_parsed_df = FBAHelper.parse_df(fluxes_df) - self.species_phenotypes_bool_df = DataFrame( - columns=self.phenotypes_parsed_df[1] - ) - - if "columns" not in carbon_conc_series: - carbon_conc_series["columns"] = {} - if "rows" not in carbon_conc_series: - carbon_conc_series["rows"] = {} - self.carbon_conc = carbon_conc_series - - self.parameters["data_timestep_hr"] = [] - if "columns" not in ignore_trials: - ignore_trials["columns"] = [] - if "rows" not in ignore_trials: - ignore_trials["rows"] = [] - if "wells" not in ignore_trials: - ignore_trials["wells"] = [] - ignore_trials["columns"] = list(map(str, ignore_trials["columns"])) - ignore_trials["rows"] = list(map(str, ignore_trials["rows"])) - ignore_timesteps = list(map(str, ignore_timesteps)) - for path, name in signal_tsv_paths.items(): - self.zipped_output.append(path) - signal = os.path.splitext(path)[0].split("_")[0] - # define the signal dataframe - self.signal_species[signal] = name # {name:phenotypes} - self.dataframes[signal] = read_table(path) - self.simulation_time = self.dataframes[signal].iloc[0, -1] / hour - self.parameters["data_timestep_hr"].append( - self.simulation_time / int(self.dataframes[signal].columns[-1]) - ) - self.dataframes[signal] = self.dataframes[signal].iloc[ - 1::2 - ] # excludes the times - self.dataframes[signal].index = self.dataframes[signal]["Well"] - # filter data contents - dropped_trials = [] - for trial in self.dataframes[signal].index: - if any( - [ - trial[0] in ignore_trials["rows"], - trial[1:] in ignore_trials["columns"], - trial in ignore_trials["wells"], - ] - ): - self.dataframes[signal].drop(trial, axis=0, inplace=True) - dropped_trials.append(trial) - if dropped_trials != []: - print( - f"The {dropped_trials} trials were dropped from the {name} measurements." - ) - for col in ["Plate", "Cycle", "Well"]: - self.dataframes[signal].drop(col, axis=1, inplace=True) - for col in self.dataframes[signal]: - if col in ignore_timesteps: - self.dataframes[signal].drop(col, axis=1, inplace=True) - if "OD" not in signal: - removed_trials = [] - for trial, row in self.dataframes[signal].iterrows(): - row_array = np.array(row.to_list()) - if row_array[-1] / row_array[0] < significant_deviation: - self.dataframes[signal].drop(trial, axis=0, inplace=True) - removed_trials.append(trial) - if removed_trials != []: - print( - f"The {removed_trials} trials were removed from the {name} measurements, with their deviation over time being less than the threshold of {significant_deviation}." - ) - - # process the data for subsequent operations and optimal efficiency - self.dataframes[signal].astype(str) - self.dataframes[signal]: np.ndarray = FBAHelper.parse_df( - self.dataframes[signal] - ) - - # differentiate the phenotypes for each species - if "OD" not in signal: - self.species_phenotypes_bool_df.loc[signal]: np.ndarray[int] = np.array( - [ - 1 if self.signal_species[signal] in pheno else 0 - for pheno in self.phenotypes_parsed_df[1] - ] - ) - - self.parameters["data_timestep_hr"] = sum( - self.parameters["data_timestep_hr"] - ) / len(self.parameters["data_timestep_hr"]) - self.data_timesteps = int( - self.simulation_time / self.parameters["data_timestep_hr"] - ) - - def define_problem( - self, - parameters={}, - zip_name: str = None, - export_parameters: bool = True, - export_lp: bool = True, - final_relative_carbon_conc: float = None, - metabolites_to_track: list = None, - ): - self.parameters.update( - { - "timestep_hr": self.parameters[ - "data_timestep_hr" - ], # Timestep size of the simulation in hours - "cvct": 1, # Coefficient for the minimization of phenotype conversion to the stationary phase. - "cvcf": 1, # Coefficient for the minimization of phenotype conversion from the stationary phase. - "bcv": 1, # This is the highest fraction of biomass for a given species that can change phenotypes in a single time step - "cvmin": 0, # This is the lowest value the limit on phenotype conversion goes, - "v": 1000, # the kinetics constant that is externally adjusted - "carbon_sources": ["cpd00136", "cpd00179"], # 4hb, maltose - "diffpos": 1, - "diffneg": 1, # objective coefficients to the diffpos and diffneg variables that correspond with the components of difference between experimental and predicted bimoass values - } - ) - self.parameters.update(parameters) - self.problem = Model() - print("Solver:", type(self.problem)) - trial: str - time: str - name: str - phenotype: str - met: str - obj_coef = {} - constraints: list = [] - variables: list = ( - [] - ) # lists are orders-of-magnitude faster than numpy arrays for appending - self.simulation_timesteps = list( - map( - str, - range( - 1, int(self.simulation_time / self.parameters["timestep_hr"]) + 1 - ), - ) - ) - time_1 = process_time() - for signal, parsed_df in self.dataframes.items(): - for met in self.phenotypes_parsed_df[0]: - met_id = re.sub("(\_\w\d+)", "", met) - met_id = met_id.replace("EX_", "", 1) - if ( - not metabolites_to_track - and met_id != "cpd00001" - or metabolites_to_track - and met_id in metabolites_to_track - ): - self.variables["c_" + met] = {} - self.constraints["dcc_" + met] = {} - initial_time = True - final_time = False - for time in self.simulation_timesteps: - if time == self.simulation_timesteps[-1]: - final_time = True - self.variables["c_" + met][time] = {} - self.constraints["dcc_" + met][time] = {} - for trial in parsed_df[0]: - # define biomass measurement conversion variables - self.variables["c_" + met][time][trial] = Variable( - _name("c_", met, time, trial), lb=0, ub=1000 - ) - # constrain initial time concentrations to the media or a large number if it is not explicitly defined - if ( - initial_time and not "bio" in met_id - ): # !!! the value of initial_time changes - initial_val = ( - self.media_conc.at[met_id, "mM"] - if met_id in list(self.media_conc.index) - else 100 - ) - if ( - met_id in self.carbon_conc["rows"] - and trial[0] in self.carbon_conc["rows"][met_id] - ): - initial_val = self.carbon_conc["rows"][met_id][ - trial[0] - ] - if ( - met_id in self.carbon_conc["columns"] - and trial[1:] in self.carbon_conc["columns"][met_id] - ): - initial_val = self.carbon_conc["columns"][met_id][ - trial[1:] - ] - self.variables["c_" + met][time][trial] = Variable( - _name("c_", met, time, trial), - lb=initial_val, - ub=initial_val, - ) - # mandate complete carbon consumption - if ( - final_time - and met_id in self.parameters["carbon_sources"] - ): - self.variables["c_" + met][time][trial] = Variable( - _name("c_", met, time, trial), lb=0, ub=0 - ) - if final_relative_carbon_conc: - self.variables["c_" + met][time][trial] = Variable( - _name("c_", met, time, trial), - lb=0, - ub=self.variables["c_" + met]["1"][trial].lb - * final_relative_carbon_conc, - ) - variables.append(self.variables["c_" + met][time][trial]) - initial_time = False - break # prevents duplicated variables - for signal, parsed_df in self.dataframes.items(): - if "OD" not in signal: - for phenotype in self.phenotypes_parsed_df[1]: - if self.signal_species[signal] in phenotype: - self.constraints["dbc_" + phenotype] = {} - for time in self.simulation_timesteps: - self.constraints["dbc_" + phenotype][time] = {} - - for phenotype in self.phenotypes_parsed_df[1]: - self.variables["cvt_" + phenotype] = {} - self.variables["cvf_" + phenotype] = {} - self.variables["b_" + phenotype] = {} - self.variables["g_" + phenotype] = {} - self.variables["v_" + phenotype] = {} - self.constraints["gc_" + phenotype] = {} - self.constraints["cvc_" + phenotype] = {} - for time in self.simulation_timesteps: - self.variables["cvt_" + phenotype][time] = {} - self.variables["cvf_" + phenotype][time] = {} - self.variables["b_" + phenotype][time] = {} - self.variables["g_" + phenotype][time] = {} - self.variables["v_" + phenotype][time] = {} - self.constraints["gc_" + phenotype][time] = {} - self.constraints["cvc_" + phenotype][time] = {} - for trial in parsed_df[0]: - self.variables["b_" + phenotype][time][ - trial - ] = Variable( # predicted biomass abundance - _name("b_", phenotype, time, trial), lb=0, ub=100 - ) - self.variables["g_" + phenotype][time][ - trial - ] = Variable( # biomass growth - _name("g_", phenotype, time, trial), lb=0, ub=1000 - ) - - if "stationary" not in phenotype: - self.variables["cvt_" + phenotype][time][ - trial - ] = Variable( # conversion rate to the stationary phase - _name("cvt_", phenotype, time, trial), lb=0, ub=100 - ) - self.variables["cvf_" + phenotype][time][ - trial - ] = Variable( # conversion from to the stationary phase - _name("cvf_", phenotype, time, trial), lb=0, ub=100 - ) - - # 0 <= -cvt + bcv*b_{phenotype} + cvmin - self.constraints["cvc_" + phenotype][time][trial] = Constraint( - -self.variables["cvt_" + phenotype][time][trial] - + self.parameters["bcv"] - * self.variables["b_" + phenotype][time][trial] - + self.parameters["cvmin"], - lb=0, - ub=None, - name=_name("cvc_", phenotype, time, trial), - ) - - # g_{phenotype} - b_{phenotype}*v = 0 - self.constraints["gc_" + phenotype][time][trial] = Constraint( - self.variables["g_" + phenotype][time][trial] - - self.parameters["v"] - * self.variables["b_" + phenotype][time][trial], - lb=0, - ub=0, - name=_name("gc_", phenotype, time, trial), - ) - - obj_coef.update( - { - self.variables["cvf_" + phenotype][time][ - trial - ]: self.parameters["cvcf"], - self.variables["cvt_" + phenotype][time][ - trial - ]: self.parameters["cvct"], - } - ) - variables.extend( - [ - self.variables["cvf_" + phenotype][time][trial], - self.variables["cvt_" + phenotype][time][trial], - ] - ) - constraints.extend( - [ - self.constraints["cvc_" + phenotype][time][trial], - self.constraints["gc_" + phenotype][time][trial], - ] - ) - - variables.extend( - [ - self.variables["b_" + phenotype][time][trial], - self.variables["g_" + phenotype][time][trial], - ] - ) - - # define non-concentration variables - half_dt = self.parameters["data_timestep_hr"] / 2 - time_2 = process_time() - print(f"Done with biomass loop: {(time_2-time_1)/60} min") - for parsed_df in self.dataframes.values(): - for r_index, met in enumerate(self.phenotypes_parsed_df[0]): - met_id = re.sub("(\_\w\d+)", "", met) - met_id = met_id.replace("EX_", "", 1) - if ( - not metabolites_to_track - and "cpd00001" != met_id - or metabolites_to_track - and met_id in metabolites_to_track - ): - for trial in parsed_df[0]: - last_column = False - for time in self.simulation_timesteps: - next_time = str(int(time) + 1) - if next_time == self.simulation_timesteps[-1]: - last_column = True - # c_{met} + dt*sum_k^K() - c+1_{met} = 0 - self.constraints["dcc_" + met][time][trial] = Constraint( - self.variables["c_" + met][time][trial] - - self.variables["c_" + met][next_time][trial] - + np.dot( - self.phenotypes_parsed_df[2][r_index] * half_dt, - np.array( - [ - self.variables["g_" + phenotype][time][ - trial - ] - + self.variables["g_" + phenotype][ - next_time - ][trial] - for phenotype in self.phenotypes_parsed_df[ - 1 - ] - ] - ), - ), - ub=0, - lb=0, - name=_name("dcc_", met, time, trial), - ) - - constraints.append( - self.constraints["dcc_" + met][time][trial] - ) - if last_column: - break - break # prevents duplicated constraints - - time_3 = process_time() - print(f"Done with metabolites loop: {(time_3-time_2)/60} min") - for signal, parsed_df in self.dataframes.items(): - data_timestep = 1 - self.variables[signal + "__conversion"] = Variable( - signal + "__conversion", lb=0, ub=1000 - ) - variables.append(self.variables[signal + "__conversion"]) - - self.variables[signal + "__bio"] = {} - self.variables[signal + "__diffpos"] = {} - self.variables[signal + "__diffneg"] = {} - self.constraints[signal + "__bioc"] = {} - self.constraints[signal + "__diffc"] = {} # diffc is defined latter - for time in self.simulation_timesteps: - if ( - int(time) * self.parameters["timestep_hr"] - >= data_timestep * self.parameters["data_timestep_hr"] - ): # synchronizes user timesteps with data timesteps - data_timestep += 1 - if int(data_timestep) > self.data_timesteps: - break - next_time = str(int(time) + 1) - self.variables[signal + "__bio"][time] = {} - self.variables[signal + "__diffpos"][time] = {} - self.variables[signal + "__diffneg"][time] = {} - self.constraints[signal + "__bioc"][time] = {} - self.constraints[signal + "__diffc"][time] = {} - for r_index, trial in enumerate(parsed_df[0]): - total_biomass: Add = 0 - signal_sum: Add = 0 - from_sum: Add = 0 - to_sum: Add = 0 - for phenotype in self.phenotypes_parsed_df[1]: - total_biomass += self.variables["b_" + phenotype][time][ - trial - ] - val = ( - 1 - if "OD" in signal - else self.species_phenotypes_bool_df.loc[ - signal, phenotype - ] - ) - signal_sum += ( - val * self.variables["b_" + phenotype][time][trial] - ) - if all( - [ - "OD" not in signal, - self.signal_species[signal] in phenotype, - "stationary" not in phenotype, - ] - ): - from_sum += ( - val - * self.variables["cvf_" + phenotype][time][trial] - ) - to_sum += ( - val - * self.variables["cvt_" + phenotype][time][trial] - ) - for phenotype in self.phenotypes_parsed_df[1]: - if ( - "OD" not in signal - and self.signal_species[signal] in phenotype - ): - if "stationary" in phenotype: - # b_{phenotype} - sum_k^K(es_k*cvf) + sum_k^K(pheno_bool*cvt) - b+1_{phenotype} = 0 - self.constraints["dbc_" + phenotype][time][ - trial - ] = Constraint( - self.variables["b_" + phenotype][time][trial] - - from_sum - + to_sum - - self.variables["b_" + phenotype][next_time][ - trial - ], - ub=0, - lb=0, - name=_name("dbc_", phenotype, time, trial), - ) - else: - # -b_{phenotype} + dt*g_{phenotype} + cvf - cvt - b+1_{phenotype} = 0 - self.constraints["dbc_" + phenotype][time][ - trial - ] = Constraint( - self.variables["b_" + phenotype][time][trial] - - self.variables["b_" + phenotype][next_time][ - trial - ] - + half_dt - * ( - self.variables["g_" + phenotype][time][ - trial - ] - + self.variables["g_" + phenotype][ - next_time - ][trial] - ) - + self.variables["cvf_" + phenotype][time][ - trial - ] - - self.variables["cvt_" + phenotype][time][ - trial - ], - ub=0, - lb=0, - name=_name("dbc_", phenotype, time, trial), - ) - - constraints.append( - self.constraints["dbc_" + phenotype][time][trial] - ) - - self.variables[signal + "__bio"][time][trial] = Variable( - _name(signal, "__bio", time, trial), lb=0, ub=1000 - ) - self.variables[signal + "__diffpos"][time][trial] = Variable( - _name(signal, "__diffpos", time, trial), lb=0, ub=100 - ) - self.variables[signal + "__diffneg"][time][trial] = Variable( - _name(signal, "__diffneg", time, trial), lb=0, ub=100 - ) - - # {signal}__conversion*datum = {signal}__bio - self.constraints[signal + "__bioc"][time][trial] = Constraint( - self.variables[signal + "__conversion"] - * parsed_df[2][r_index, int(data_timestep) - 1] - - self.variables[signal + "__bio"][time][trial], - name=_name(signal, "__bioc", time, trial), - lb=0, - ub=0, - ) - - # {speces}_bio - sum_k^K(es_k*b_{phenotype}) - {signal}_diffpos + {signal}_diffneg = 0 - self.constraints[signal + "__diffc"][time][trial] = Constraint( - self.variables[signal + "__bio"][time][trial] - - signal_sum - - self.variables[signal + "__diffpos"][time][trial] - + self.variables[signal + "__diffneg"][time][trial], - name=_name(signal, "__diffc", time, trial), - lb=0, - ub=0, - ) - - obj_coef.update( - { - self.variables[signal + "__diffpos"][time][ - trial - ]: self.parameters["diffpos"], - self.variables[signal + "__diffneg"][time][ - trial - ]: self.parameters["diffneg"], - } - ) - variables.extend( - [ - self.variables[signal + "__bio"][time][trial], - self.variables[signal + "__diffpos"][time][trial], - self.variables[signal + "__diffneg"][time][trial], - ] - ) - constraints.extend( - [ - self.constraints[signal + "__bioc"][time][trial], - self.constraints[signal + "__diffc"][time][trial], - ] - ) - - time_4 = process_time() - print(f"Done with the dbc & diffc loop: {(time_4-time_3)/60} min") - # construct the problem - self.problem.add(variables) - self.problem.update() - self.problem.add(constraints) - self.problem.update() - self.problem.objective = Objective(Zero, direction="min") # , sloppy=True) - self.problem.objective.set_linear_coefficients(obj_coef) - time_5 = process_time() - print( - f"Done with loading the variables, constraints, and objective: {(time_5-time_4)/60} min" - ) - - # print contents - if export_parameters: - self.zipped_output.append("parameters.csv") - DataFrame( - data=list(self.parameters.values()), - index=list(self.parameters.keys()), - columns=["values"], - ).to_csv("parameters.csv") - if export_lp: - self.zipped_output.extend(["mscommfitting.lp", "mscommfitting.json"]) - with open("mscommfitting.lp", "w") as lp: - lp.write(self.problem.to_lp()) - with open("mscommfitting.json", "w") as lp: - json.dump(self.problem.to_json(), lp, indent=3) - if zip_name: - self.zip_name = zip_name - sleep(2) - with ZipFile(self.zip_name, "w", compression=ZIP_LZMA) as zp: - for file in self.zipped_output: - zp.write(file) - os.remove(file) - - time_6 = process_time() - print(f"Done exporting the content: {(time_6-time_5)/60} min") - - def compute(self, graphs: list = [], zip_name=None): - solution = self.problem.optimize() - # categorize the primal values by trial and time - for variable, value in self.problem.primal_values.items(): - if "conversion" not in variable: - basename, time, trial = variable.split("-") - time = int(time) * self.parameters["data_timestep_hr"] - if not trial in self.values: - self.values[trial] = {} - if not basename in self.values[trial]: - self.values[trial][basename] = {} - self.values[trial][basename][time] = value - - # export the processed primal values for graphing - with open("primal_values.json", "w") as out: - json.dump(self.values, out, indent=3) - if not zip_name: - if hasattr(self, zip_name): - zip_name = self.zip_name - if zip_name: - with ZipFile(zip_name, "a", compression=ZIP_LZMA) as zp: - zp.write("primal_values.json") - os.remove("primal_values.json") - - if graphs != []: - self.graph(graphs, zip_name=zip_name) - - if "optimal" in solution: - print("The solution is optimal.") - else: - raise FeasibilityError( - f"The solution is sub-optimal, with a {solution} status." - ) - - def graph( - self, - graphs=[], - primal_values_filename: str = None, - primal_values_zip_path: str = None, - zip_name: str = None, - data_timestep_hr: float = 0.163, - ): - def add_plot(ax, labels, basename, trial): - labels.append(basename.split("-")[-1]) - ax.plot( - self.values[trial][basename].keys(), - self.values[trial][basename].values(), - label=basename, - ) - ax.legend(labels) - ax.set_xticks( - list(self.values[trial][basename].keys())[ - :: int(2 / data_timestep_hr / timestep_ratio) - ] - ) - return ax, labels - - timestep_ratio = 1 - if self.parameters != {}: - data_timestep_hr = self.parameters["data_timestep_hr"] - timestep_ratio = ( - self.parameters["data_timestep_hr"] / self.parameters["timestep_hr"] - ) - if primal_values_filename: - if primal_values_zip_path: - with ZipFile(primal_values_zip_path, "r") as zp: - zp.extract(primal_values_filename) - with open(primal_values_filename, "r", encoding="utf-8") as primal: - self.values = json.load(primal) - - # plot the content for desired trials - self.plots = [] - for graph in graphs: - if any([x in graph["content"] for x in ["total", "OD"]]): - ys = [] - print(graph) - pyplot.rcParams["figure.figsize"] = (11, 7) - pyplot.rcParams["figure.dpi"] = 150 - fig, ax = pyplot.subplots() - y_label = "Variable value" - x_label = "Time (hr)" - for trial, basenames in self.values.items(): - content = graph["content"] - if graph["content"] == "OD": - y_label = "Biomass (g)" - graph["phenotype"] = graph["species"] = "*" - elif "biomass" in graph["content"]: - content = "b" - y_label = "Biomass (g)" - elif graph["content"] == "growth": - content = "g" - y_label = "Biomass (g/hr)" - elif "stress-test" in graph["content"]: - content = graph["content"].split("_")[1] - y_label = graph["species"] + " coculture %" - x_label = content + " (mM)" - if trial == graph["trial"]: - labels: list = [] - for basename in basenames: - # parse for non-concentration variables - if any([x in graph["content"] for x in ["total", "OD"]]): - if "b_" in basename: - if graph["content"] == "OD": - labels.append("predicted") - label = "predicted" - xs = np.array( - list(self.values[trial][basename].keys()) - ) - ys.append( - np.array( - list(self.values[trial][basename].values()) - ) - ) - elif graph["content"] == "total": - if graph["species"] in basename: - labels.append("total_biomass") - label = "total_biomass" - xs = np.array( - list(self.values[trial][basename].keys()) - ) - ys.append( - np.array( - list( - self.values[trial][ - basename - ].values() - ) - ) - ) - if ( - "experimental_data" in graph - and graph["experimental_data"] - ): - if basename == "OD__bio": - labels.append("experimental") - exp_xs = np.array( - list(self.values[trial][basename].keys()) - ) - exp_xs = exp_xs.astype(np.float32) - exp_xs = np.around(exp_xs, 2) - ax.plot( - exp_xs, - list(self.values[trial][basename].values()), - label="experimental", - ) - ax.set_xticks( - exp_xs[ - :: int( - 2 / data_timestep_hr / timestep_ratio - ) - ] - ) - elif graph["phenotype"] == "*" and all( - [x in basename for x in [graph["species"], content]] - ): - if "total" in graph["content"]: - labels = [basename] - xs = np.array(list(self.values[trial][basename].keys())) - ys.append( - np.array( - list(self.values[trial][basename].values()) - ) - ) - else: - ax, labels = add_plot(ax, labels, basename, trial) - print("1") - # - elif all( - [ - x in basename - for x in [graph["species"], graph["phenotype"], content] - ] - ): - ax, labels = add_plot(ax, labels, basename, trial) - print("2") - # concentration plots - elif "EX_" in basename and graph["content"] in basename: - ax, labels = add_plot(ax, labels, basename, trial) - y_label = "Concentration (mM)" - print("3") - - if labels != []: - if any([x in graph["content"] for x in ["total", "OD"]]): - xs = xs.astype(np.float32) - xs = np.around(xs, 2) - ax.plot(xs, sum(ys), label=label) - ax.set_xticks( - xs[:: int(2 / data_timestep_hr / timestep_ratio)] - ) - phenotype_id = ( - graph["phenotype"] - if graph["phenotype"] != "*" - else "all phenotypes" - ) - species_id = ( - graph["species"] - if graph["species"] != "*" - else "all species" - ) - ax.set_xlabel(x_label) - ax.set_ylabel(y_label) - if len(labels) > 1: - ax.legend() - ax.set_title( - f'{graph["content"]} of {species_id} ({phenotype_id}) in the {trial} trial' - ) - fig_name = f'{"_".join([trial, species_id, phenotype_id, graph["content"]])}.jpg' - fig.savefig(fig_name) - self.plots.append(fig_name) - - # combine the figures with the other cotent - if not zip_name: - if hasattr(self, zip_name()): - zip_name = self.zip_name - if zip_name: - with ZipFile(zip_name, "a", compression=ZIP_LZMA) as zp: - for plot in self.plots: - zp.write(plot) - os.remove(plot) - - def load_model( - self, mscomfit_json_path: str, zip_name: str = None, class_object: bool = False - ): - if zip_name: - with ZipFile(zip_name, "r") as zp: - zp.extract(mscomfit_json_path) - with open(mscomfit_json_path, "r") as mscmft: - model = Model.from_json(json.load(mscmft)) - if class_object: - self.problem = model - return model - - def change_parameters( - self, - cvt=None, - cvf=None, - diff=None, - vmax=None, - mscomfit_json_path="mscommfitting.json", - export_zip_name=None, - extract_zip_name=None, - final_concentrations: dict = None, - final_relative_carbon_conc: float = None, - previous_relative_conc: float = None, - ): - def change_param(arg, param, time, trial): - if param: - if not isinstance(param, dict): - arg[0]["value"] = param - else: - if time in param: - if trial in param[time]: - arg[0]["value"] = param[time][trial] - arg[0]["value"] = param[time] - else: - arg[0]["value"] = param["default"] - return arg - - time_1 = process_time() - if not export_zip_name: - export_zip_name = self.zip_name - if not os.path.exists(mscomfit_json_path): - if not extract_zip_name: - extract_zip_name = self.zip_name - with ZipFile(extract_zip_name, "r") as zp: - zp.extract(mscomfit_json_path) - with open(mscomfit_json_path, "r") as mscmft: - mscomfit_json = json.load(mscmft) - else: - with open("mscommfitting.json", "r") as mscmft: - mscomfit_json = json.load(mscmft) - - time_2 = process_time() - print(f"Done loading the JSON: {(time_2-time_1)/60} min") - - # change objective coefficients - for arg in mscomfit_json["objective"]["expression"]["args"]: - name, time, trial = arg["args"][1]["name"].split("-") - if "cvf" in name: - arg["args"] = change_param(arg["args"], cvf, time, trial) - elif "cvt" in name: - arg["args"] = change_param(arg["args"], cvt, time, trial) - elif "diff" in name: - arg["args"] = change_param(arg["args"], diff, time, trial) - - # change final concentrations - if final_concentrations: # absolute concentration - for met in mscomfit_json["variables"]: - name, time, trial = met["name"].split("-") - if ( - name in final_concentrations - and time == self.simulation_timesteps[-1] - ): - met["lb"] = 0 - met["ub"] = final_concentrations[met] - - if final_relative_carbon_conc: # relative concentration - for met in mscomfit_json["variables"]: - if "EX_" in met["name"]: - name, time, trial = met["name"].split("-") - if ( - any([x in name for x in self.parameters["carbon_sources"]]) - and time == self.simulation_timesteps[-1] - ): - print(met["ub"]) - met["lb"] = 0 - met["ub"] *= final_relative_carbon_conc - if previous_relative_conc: - met["ub"] /= previous_relative_conc - print(met["ub"]) - - # change Vmax values - for arg in mscomfit_json["constraints"]: - name, time, trial = arg["name"].split("-") - if "gc" in name: - arg["expression"]["args"][1]["args"] = change_param( - arg["expression"]["args"][1]["args"], vmax, time, trial - ) - - with open(mscomfit_json_path, "w") as mscmft: - json.dump(mscomfit_json, mscmft, indent=3) - with ZipFile(export_zip_name, "a", compression=ZIP_LZMA) as zp: - zp.write(mscomfit_json_path) - os.remove(mscomfit_json_path) - time_3 = process_time() - print(f"Done exporting the model: {(time_3-time_2)/60} min") - - self.problem = Model.from_json(mscomfit_json) - time_4 = process_time() - print( - f"Done loading the model: {(time_4-time_3)/60} min" - ) # ~1/2 the defining a new problem - - def introduce_km( - self, vmax, km, met, graphs, zipname, extract_zipname - ): # Good starting values to try are: vmax = 3.75; km = 2.5 : Equivalent of vmax = 0.5 because at starting maltose of 5 this is vmax/(km + [maltose]) = 3.75/(2.5+5) = 0.5 - vmax_var = {"default": -0.3} - last_conc = {} - count = 0 - while 1: # Dangerous - if there's never convergence, then this never stops - error = None - for t in self.variables["c_" + met]: - if t not in vmax_var: - vmax_var[t] = {} - if t not in last_conc: - last_conc[t] = {} - for trial in self.variables["c_" + met][t]: - if trial in last_conc[t]: - error += ( - last_conc[t][trial] - - self.variables["c_" + met][t][trial].primal - ) ** 2 - last_conc[t][trial] = self.variables["c_" + met][t][trial].primal - vmax_var[t][trial] = -1 * vmax / (km + last_conc[t][trial]) - count += 1 - # Not sure if I'm using the vmax argument right here... please check - self.change_parameters( - vmax_var, zipname, extract_zipname - ) # The Vmax argument can be either a number or a dictionary that is organized by ["time"]["trial"], just as the naming scheme of the variables and constraints - self.compute(graphs, zipname) - if error: - error = (error / count) ** 0.5 - print("Error:", error) - if ( - error < 1 - ): # Definitely don't know what the error threshold should actually be for convergence - break - - def parameter_optimization( - self, - ): - with ZipFile(self.zip_name, "r") as zp: - zp.extract("mscommfitting.json") - - newton diff --git a/modelseedpy/community/mscommunity.py b/modelseedpy/community/mscommunity.py deleted file mode 100644 index 1dea47f..0000000 --- a/modelseedpy/community/mscommunity.py +++ /dev/null @@ -1,292 +0,0 @@ -# -*- coding: utf-8 -*- -from modelseedpy.fbapkg.mspackagemanager import MSPackageManager -from modelseedpy.core.msmodelutl import MSModelUtil -from modelseedpy.community.mssteadycom import MSSteadyCom -from modelseedpy.community.commhelper import build_from_species_models -from modelseedpy.core.exceptions import ObjectAlreadyDefinedError, FeasibilityError, NoFluxError -from modelseedpy.core.msgapfill import MSGapfill -from modelseedpy.core.fbahelper import FBAHelper -#from modelseedpy.fbapkg.gapfillingpkg import default_blacklist -from modelseedpy.core.msatpcorrection import MSATPCorrection -from cobra.io import save_matlab_model, write_sbml_model -from cobra.core.dictlist import DictList -from optlang.symbolics import Zero -from optlang import Constraint -from os import makedirs, path -from pandas import DataFrame -from pprint import pprint -from cobra import Reaction -from json import dump -import logging - -logger = logging.getLogger(__name__) - - -class CommunityMember: - def __init__(self, community, biomass_cpd, ID=None, index=None, abundance=0): - print(ID, "biomass compound:", biomass_cpd) - self.community, self.biomass_cpd = community, biomass_cpd - try: self.index = int(self.biomass_cpd.compartment[1:]) - except: self.index = index - self.abundance = abundance - if self.biomass_cpd in self.community.primary_biomass.metabolites: - self.abundance = abs(self.community.primary_biomass.metabolites[self.biomass_cpd]) - if ID is not None: self.id = ID - elif "species_name" in self.biomass_cpd.annotation: - self.id = self.biomass_cpd.annotation["species_name"] - else: self.id = "Species"+str(self.index) - - logger.info("Making atp hydrolysis reaction for species: "+self.id) - atp_rxn = self.community.mdlutl.add_atp_hydrolysis("c"+str(self.index)) - self.atp_hydrolysis = atp_rxn["reaction"] - self.biomass_drain = self.primary_biomass = None - self.reactions = [] - for rxn in self.community.mdlutl.model.reactions: - # print(rxn.id, rxn.reaction, "\t\t\t", end="\r") - rxnComp = FBAHelper.rxn_compartment(rxn) - if rxnComp is None: print(f"The reaction {rxn.id} compartment is undefined.") - if rxnComp[1:] == '': print("no compartment", rxn, rxnComp) - elif int(rxnComp[1:]) == self.index and 'bio' not in rxn.name: self.reactions.append(rxn) - if self.biomass_cpd.id not in [met.id for met in rxn.metabolites]: continue - for met in rxn.metabolites: - if met.id != self.biomass_cpd.id: continue - if rxn.metabolites[met] == 1 and len(rxn.metabolites) > 1: self.primary_biomass = rxn ; break - elif len(rxn.metabolites) == 1 and rxn.metabolites[met] < 0: self.biomass_drain = rxn - - if self.primary_biomass is None: logger.critical("No biomass reaction found for species " + self.id) - if not self.biomass_drain: - logger.info("Making biomass drain reaction for species: "+self.id) - self.biomass_drain = Reaction( - id="DM_"+self.biomass_cpd.id, name="DM_" + self.biomass_cpd.name, lower_bound=0, upper_bound=100) - self.community.mdlutl.model.add_reactions([self.biomass_drain]) - self.biomass_drain.add_metabolites({self.biomass_cpd: -1}) - self.biomass_drain.annotation["sbo"] = 'SBO:0000627' - - def disable_species(self): - for reaction in self.community.model.reactions: - reaction_index = FBAHelper.rxn_compartment(reaction)[1:] - if int(reaction_index) == self.index: reaction.upper_bound = reaction.lower_bound = 0 - - def compute_max_biomass(self): - if self.primary_biomass is None: logger.critical("No biomass reaction found for species "+self.id) - self.community.mdlutl.add_objective(self.primary_biomass.flux_expression) - if self.community.lp_filename: self.community.print_lp(f"{self.community.lp_filename}_{self.id}_Biomass") - return self.community.model.optimize() - - def compute_max_atp(self): - if not self.atp_hydrolysis: logger.critical("No ATP hydrolysis found for species:" + self.id) - self.community.mdlutl.add_objective(Zero, coef={self.atp_hydrolysis.forward_variable: 1}) - if self.community.lp_filename: self.community.print_lp(f"{self.community.lp_filename}_{self.id}_ATP") - return self.community.model.optimize() - - -class MSCommunity: - def __init__(self, model=None, member_models: list = None, abundances=None, kinetic_coeff=2000, lp_filename=None, flux_limit=300, printing=False): - assert model is not None or member_models is not None, "Either the community model and the member models must be defined." - self.lp_filename = lp_filename - self.gapfillings = {} - - #Define Data attributes as None - self.solution = self.biomass_cpd = self.primary_biomass = self.biomass_drain = None - self.msgapfill = self.element_uptake_limit = self.kinetic_coeff = self.msdb_path = None - # defining the models - if model is None and member_models is not None: - model = build_from_species_models(member_models, abundances=abundances, printing=printing) - self.id = model.id - self.mdlutl = MSModelUtil(model, True) - self.pkgmgr = MSPackageManager.get_pkg_mgr(self.mdlutl.model) - # print(msid_cobraid_hash) - # write_sbml_model(model, "test_comm.xml") - msid_cobraid_hash = self.mdlutl.msid_hash() - if "cpd11416" not in msid_cobraid_hash: raise KeyError("Could not find biomass compound for the model.") - other_biomass_cpds = [] - with open(path.dirname(__file__)+"/MSID_hash.json", 'w') as jsonOut: - dump([v.id for v in msid_cobraid_hash["cpd11416"]], jsonOut, indent=3) - print(msid_cobraid_hash["cpd11416"]) - for self.biomass_cpd in msid_cobraid_hash["cpd11416"]: - if "c0" in self.biomass_cpd.id: - for rxn in self.mdlutl.model.reactions: - if self.biomass_cpd not in rxn.metabolites: continue - # print(self.biomass_cpd, rxn, end=";\t") - with open(path.dirname(__file__)+"/log", 'w') as jsonOut: - dump(["here", rxn.metabolites[self.biomass_cpd], len(rxn.metabolites)], jsonOut, indent=3) - if rxn.metabolites[self.biomass_cpd] == 1 and len(rxn.metabolites) > 1 or len(msid_cobraid_hash["cpd11416"]) == 1: - if self.primary_biomass: raise ObjectAlreadyDefinedError( - f"The primary biomass {self.primary_biomass} is already defined," - f"hence, the {rxn.id} cannot be defined as the model primary biomass.") - if printing: print('primary biomass defined', rxn.id) - self.primary_biomass = rxn - break - elif rxn.metabolites[self.biomass_cpd] < 0 and len(rxn.metabolites) == 1: - self.biomass_drain = rxn - break - elif 'c' in self.biomass_cpd.compartment: other_biomass_cpds.append(self.biomass_cpd) - else: print(f"The {model.id} has non-cytoplasm biomass reactions: {self.biomass_cpd}") - if not abundances: - if member_models is None: - abundances = {f"Species{memIndex}": {"biomass_compound": bioCpd, "abundance": 1/len(other_biomass_cpds)} - for memIndex, bioCpd in enumerate(other_biomass_cpds)} - else: - abundances = {} - for memID, bioCPD in model.notes["member_biomass_cpds"].items(): - abundances[memID] = {"abundance": 1/len(other_biomass_cpds)} - for met in model.metabolites: - if bioCPD.id == met.id: - if "biomass_compound" in abundances[memID]: print("duplicate", bioCPD.id, met.id) - abundances[memID].update({"biomass_compound": met}) - # print(bioCPD, met.id) - if "biomass_compound" not in abundances[memID]: print(f"The {memID} bioCPD was not captured") - - # print() # this returns the carriage after the tab-ends in the biomass compound printing - self.members = DictList(CommunityMember(self, info["biomass_compound"], ID, index, info["abundance"]) - for index, (ID, info) in enumerate(abundances.items())) - # assign the MSCommunity constraints and objective - self.abundances_set = False - if isinstance(abundances, dict): self.set_abundance(abundances) - self.pkgmgr.getpkg("CommKineticPkg").build_package(kinetic_coeff, self) - for member in self.members: - vars_coef = {} - for rxn in self.util.model.reactions: - if "EX_" not in rxn.id and member.index == FBAHelper.rxn_compartment(rxn)[1:]: - vars_coef[rxn.forward_variable] = vars_coef[rxn.reverse_variable] = 1 - print(member.id, flux_limit, member.abundance) - self.util.create_constraint(Constraint(Zero, lb=0, ub=flux_limit*member.abundance, - name=f"{member.id}_resource_balance"), coef=vars_coef) - - #Manipulation functions - def set_abundance(self, abundances): - #calculate the normalized biomass - total_abundance = sum(list([content["abundance"] for content in abundances.values()])) - # map abundances to all species - for modelID, content in abundances.items(): - if modelID in self.members: self.members.get_by_id(modelID).abundance = content["abundance"]/total_abundance - #remake the primary biomass reaction based on abundances - if self.primary_biomass is None: logger.critical("Primary biomass reaction not found in community model") - all_metabolites = {met: 1 for met in self.primary_biomass.metabolites} - all_metabolites.update({mem.biomass_cpd: -abundances[mem.id]["abundance"]/total_abundance for mem in self.members}) - self.primary_biomass.add_metabolites(all_metabolites, combine=False) - self.abundances_set = True - - def set_objective(self, target=None, targets=None, minimize=False): - targets = targets or [self.mdlutl.model.reactions.get_by_id(target or self.primary_biomass.id).flux_expression] - self.mdlutl.model.objective = self.mdlutl.model.problem.Objective( - sum(targets), direction="max" if not minimize else "min") - - def constrain(self, element_uptake_limit=None, thermo_params=None, msdb_path=None): - if element_uptake_limit: - self.element_uptake_limit = element_uptake_limit - self.pkgmgr.getpkg("ElementUptakePkg").build_package(element_uptake_limit) - if thermo_params: - if msdb_path: - self.msdb_path = msdb_path - thermo_params.update({'modelseed_db_path':msdb_path}) - self.pkgmgr.getpkg("FullThermoPkg").build_package(thermo_params) - else: self.pkgmgr.getpkg("SimpleThermoPkg").build_package(thermo_params) - - def interactions(self, solution=None, media=None, msdb=None, msdb_path=None, filename=None, figure_format="svg", - node_metabolites=True, flux_threshold=1, visualize=True, ignore_mets=None): - return MSSteadyCom.interactions(self, solution or self.solution, media, flux_threshold, msdb, msdb_path, - visualize, filename, figure_format, node_metabolites, True, ignore_mets) - - def add_commkinetics(self, member_biomasses, abundances): - # TODO this creates an error with the member biomass reactions not being identified in the model - coef = {} - for rxn in self.mdlutl.model.reactions: - if rxn.id[:3] == "rxn": coef[rxn.forward_variable] = coef[rxn.reverse_variable] = 1 - for member in self.members: - if member_biomasses[member.id] not in abundances: continue - coef[member_biomasses[member.id]] = -abundances[member_biomasses[member.id]] - self.mdlutl.create_constraint(Constraint(Zero, name="member_flux_limit"), coef=coef, printing=True) - - def add_resource_balance(self, flux_limit=300): - for member in self.members: - vars_coef = {} - for rxn in self.mdlutl.model.reactions: - if "EX_" not in rxn.id and member.index == FBAHelper.rxn_compartment(rxn)[1:]: - vars_coef[rxn.forward_variable] = vars_coef[rxn.reverse_variable] = 1 - print(member.id, flux_limit, member.abundance) - self.mdlutl.create_constraint(Constraint(Zero, lb=0, ub=flux_limit*member.abundance, - name=f"{member.id}_resource_balance"), coef=vars_coef) - - #Utility functions - def print_lp(self, filename=None): - filename = filename or self.lp_filename - makedirs(path.dirname(filename), exist_ok=True) - with open(filename, 'w') as out: out.write(str(self.mdlutl.model.solver)) ; out.close() - - def to_sbml(self, export_name): - makedirs(path.dirname(export_name), exist_ok=True) - write_sbml_model(self.mdlutl.model, export_name) - - #Analysis functions - def gapfill(self, media = None, target = None, minimize = False, default_gapfill_templates=None, default_gapfill_models=None, - test_conditions=None, reaction_scores=None, blacklist=None, suffix = None, solver:str="glpk"): - default_gapfill_templates = default_gapfill_templates or [] - default_gapfill_models = default_gapfill_models or [] - test_conditions, blacklist = test_conditions or [], blacklist or [] - reaction_scores = reaction_scores or {} - if not target: target = self.primary_biomass.id - self.set_objective(target, minimize) - gfname = FBAHelper.mediaName(media) + "-" + target - if suffix: gfname += f"-{suffix}" - self.gapfillings[gfname] = MSGapfill(self.mdlutl.model, default_gapfill_templates, default_gapfill_models, - test_conditions, reaction_scores, blacklist, solver) - gfresults = self.gapfillings[gfname].run_gapfilling(media, target) - assert gfresults, f"Gapfilling of {self.mdlutl.model.id} in {gfname} towards {target} failed." - return self.gapfillings[gfname].integrate_gapfill_solution(gfresults) - - def test_individual_species(self, media=None, interacting=True, run_atp=True, run_biomass=True): - assert run_atp or run_biomass, ValueError("Either the run_atp or run_biomass arguments must be True.") - # self.pkgmgr.getpkg("KBaseMediaPkg").build_package(media) - if media is not None: self.mdlutl.add_medium(media) - data = {"Species": [], "Biomass": [], "ATP": []} - for individual in self.members: - data["Species"].append(individual.id) - with self.mdlutl.model: - if not interacting: - for other in self.members: - if other != individual: other.disable_species() - if run_biomass: data["Biomass"].append(individual.compute_max_biomass()) - if run_atp: data["ATP"].append(individual.compute_max_atp()) - return DataFrame(data) - - def atp_correction(self, core_template, atp_medias, max_gapfilling=None, gapfilling_delta=0): - self.atp = MSATPCorrection(self.mdlutl.model, core_template, atp_medias, "c0", max_gapfilling, gapfilling_delta) - - # TODO evaluate the comparison of this method with MICOM - def predict_abundances(self, media=None, pfba=True): - with self.mdlutl.model: - self.mdlutl.model.objective = self.mdlutl.model.problem.Objective( - sum([species.primary_biomass.forward_variable for species in self.members]), direction="max") - self.run_fba(media, pfba) - return self._compute_relative_abundance_from_solution() - - def run_fba(self, media=None, pfba=False, fva_reactions=None): - if media is not None: self.mdlutl.add_medium(media) - return self._set_solution(self.mdlutl.run_fba(None, pfba, fva_reactions)) - - def _compute_relative_abundance_from_solution(self, solution=None): - if not solution and not self.solution: logger.warning("The simulation lacks any flux.") ; return None - comm_growth = sum([self.solution.fluxes[member.primary_biomass.id] for member in self.members]) - assert comm_growth > 0, NoFluxError(f"The total community growth is {comm_growth}") - return {member.id: self.solution.fluxes[member.primary_biomass.id]/comm_growth for member in self.members} - - def _set_solution(self, solution): - if solution.status != "optimal": - FeasibilityError(f'The solution is sub-optimal, with a(n) {solution} status.') - self.solution = None - self.print_lp() - save_matlab_model(self.mdlutl.model, self.mdlutl.model.name + ".mat") - self.solution = solution - logger.info(self.mdlutl.model.summary()) - return self.solution - - def parse_member_growths(self): - # f"cpd11416_c{member.index}" - return {member.name: self.solution.fluxes[member.primary_biomass.id] for member in self.members} - - def return_member_models(self): - # TODO return a list of member models that is parsed from the .members attribute - ## which will have applicability in disaggregating community models that do not have member models - ## such as Filipe's Nitrate reducing community model for the SBI ENIGMA team. - return diff --git a/modelseedpy/community/mskineticsfba.py b/modelseedpy/community/mskineticsfba.py deleted file mode 100644 index 7a5e92b..0000000 --- a/modelseedpy/community/mskineticsfba.py +++ /dev/null @@ -1,281 +0,0 @@ -# -*- coding: utf-8 -*- - -from scipy.constants import milli, hour, minute, day, femto -from modelseedpy.fbapkg.basefbapkg import BaseFBAPkg -from modelseedpy import MSModelUtil -from optlang import Constraint -from modelseedpy.core.fbahelper import FBAHelper -from collections import OrderedDict -from optlang.symbolics import Zero -from numpy import log10, nan, mean -from warnings import warn -from matplotlib import pyplot -from pprint import pprint -from datetime import date -from math import inf -import pandas -import json, re, os - -def _x_axis_determination(total_time): - time = total_time * minute - if time <= 600: return minute, "s" - if time > 600: return 1, "min" - if time > 7200: return 1/hour, "hr" - return 1/day, "days" - -def _check_datum(datum): - if "substituted_rate_law" not in datum: - print(f"RateLawError: The {datum} datum lacks a rate law.") - return False - remainder = re.sub("([0-9A-Za-z/()e\-\+\.\*\_])", "", datum["substituted_rate_law"]) - if remainder != "": - print(f'RateLawError: The {datum["substituted_rate_law"]}' - f' rate law contains unknown characters: {remainder}') - return False - return True - - -class MSKineticsFBA: - def __init__(self, model, warnings: bool = True, verbose: bool = False, - printing: bool = False, jupyter: bool = False): - self.warnings, self.verbose, self.printing, self.jupyter = warnings, verbose, printing, jupyter - self.model_util = MSModelUtil(model) - self.met_ids = OrderedDict({met.id: met.id for met in self.model_util.model.metabolites}) - - def baseKinFBA(self, kinetics_path: str = None, kinetics_data: dict = None, - initial_M: dict = None, # a dictionary of the initial metabolic concentrations, which supplants concentrations from the defined kinetics data - total_min: float = 200, ts_min: float = 20, export_name = None, export_directory = None, - chemostat_L: float = None, feed_profile: dict = None, chemostat_L_hr: float = None, - temperature: float = 25, p_h: float = 7, cell_dry_g: float = 1.44e-13, cellular_L: float = 1e-18, - conc_figure_title = "Metabolic perturbation", included_mets: list = None, labeled_plots = True, - visualize = True, export = True): - # define the dataframe for the time series content - feed_profile, constrained, self.constraints = feed_profile or {}, {}, {} - included_mets, self.sols = included_mets or [], [] - self.parameters = {"timesteps": int(total_min/ts_min), "pH": p_h, "temperature": temperature} - self.variables = {"elapsed_time": 0} - self.ts_min, self.minimum = ts_min, inf - timestep_hr = self.ts_min / (hour / minute) - self.constrained = OrderedDict() - cell_g_L = (cell_dry_g/cellular_L) # https://journals.asm.org/doi/full/10.1128/AEM.64.2.688-694.1998 - - # define reaction kinetics and initial concentrations - assert kinetics_path or kinetics_data, "Either < kinetics_path > or < kinetics_data > must be provided" - if kinetics_path: - with open(kinetics_path) as data: self.kinetics_data = json.load(data) - elif kinetics_data: self.kinetics_data = kinetics_data.copy() - ## define the concentration, moles, and fluxes DataFrames - self.time = "0 min" - self.conc = pandas.DataFrame([0]*len(self.met_ids), index=list(self.met_ids.keys()), columns=[self.time]) - self.conc.index.name = "metabolite (mM)" - self.moles = self.conc.copy(deep=True) - self.fluxes = pandas.DataFrame(index=[rxn.id for rxn in self.model_util.model.reactions], columns=[self.time]) - self.fluxes.index.name = "reaction (\u0394mmol/hr*g_(dw)))" # Delta - ## parse the kinetics data - for content in self.kinetics_data.values(): - for condition, datum in content.items(): - if "initial_M" not in datum: continue - for var, conc in datum["initial_M"].items(): - met_id = datum["met_id"][var] - if met_id in self.met_ids: self.conc.at[met_id, self.time] += conc/milli - elif self.warnings: warn(f"KineticsError: The {met_id} reagent ({var}) in the" - f" {datum['substituted_rate_law']} rate law is not defined by the model.") - ## incorporate custom initial concentrations, which overwrites values from the kinetics data - for met_id in initial_M: - self.conc.at[met_id, self.time] = initial_M[met_id] / milli - defined_concs = self.conc[self.conc[self.time] != 0][self.time].to_dict() - chemostat_requirements = [chemostat_L is not None, feed_profile != {}, chemostat_L_hr is not None] - # execute FBA for each timestep, then calculate custom fluxes, constrain the model, and update concentrations - model_rxns = [rxn.id for rxn in self.model_util.model.reactions] - newTime = 0 - for timestep in range(1, self.parameters["timesteps"] + 1): - oldTime = newTime ; newTime = timestep * self.ts_min ; t = timestep * timestep_hr - self.previous_time = f"{oldTime} min" ; self.time = f"{newTime} min" - self.conc[self.time] = [float(0)] * len(self.conc.index) - self.fluxes[self.time] = [0] * len(self.fluxes.index) - ## create a metabolite variable that prevents negative concentrations - for met in self.model_util.model.metabolites: - if met.id not in defined_concs: continue - if met.id not in self.constraints: self.constraints[met.id] = {} - coef = {} - for rxn in met.reactions: - ### The product of the reaction stoichiometry and the timestep - stoich = abs(timestep_hr * rxn.metabolites[met]) - coef[rxn.forward_variable], coef[rxn.reverse_variable] = stoich, -stoich - ### build the metabolite constraint - if newTime-self.ts_min in self.constraints[met.id]: - self.model_util.remove_cons_vars([self.constraints[met.id][newTime-self.ts_min]]) - self.constraints[met.id][newTime] = Constraint(Zero, lb=0, ub=None, name=f"{met.id}_conc") - self.model_util.create_constraint(self.constraints[met.id][newTime], coef) - ## calculate the flux - display(self.conc[self.conc["0 min"] != 0], self.fluxes) - for rxnID in self.kinetics_data: - # TODO allocate the following code into a function and recusively reduce the timestep until - ## the concentration becomes not negative, following the model of microBialSim. This may require - ## time dependency in the kinetics expression to achieve the desired behavior. - if rxnID not in model_rxns and self.warnings: - warn(f"ReactionError: {rxnID} is not in the model.") ; continue - fluxes = [] - for source in self.kinetics_data[rxnID]: - datum = self.kinetics_data[rxnID][source] - if not _check_datum(datum): continue - ### define rate law variables; calculate flux; average or overwrite the flux based on data criteria - locals().update({metID: self.conc.at[metID, self.previous_time]*milli for metID in datum["mets"]}) - flux = eval(datum["substituted_rate_law"]) - print(datum["substituted_rate_law"], flux) - if ("metadata" not in self.kinetics_data[rxnID][source] - or self.__find_data_match(rxnID, source) == 'a'): fluxes.append(flux) - else: fluxes = [flux] - - flux = mean(fluxes) - rxn = self.model_util.model.reactions.get_by_id(rxnID) - rxn.lb = rxn.ub = flux - self.fluxes.at[rxnID, self.time] = flux - ## execute the COBRA model - sol = self.model_util.model.optimize() - self.sols.append(sol) - ## add previously undefined fluxes and concentrations - for rxnID in self.fluxes.index: - if self.fluxes.at[rxnID, self.time] == 0: - self.fluxes.at[rxnID, self.time] = sol.fluxes[rxnID] - for met in self.model_util.model.metabolites: - self.conc.at[met.id, self.time] = 0 - for rxn in met.reactions: - flux = self.fluxes.at[rxn.id, self.time] - if flux == 0: continue - # print(rxn.metabolites[met], flux, timestep_hr, cell_g_L) - self.conc.at[met.id, self.time] += rxn.metabolites[met] * flux * timestep_hr * cell_g_L - if all(chemostat_requirements): - self.moles[self.time] = (self.conc[self.time] * milli * chemostat_L) - self._chemostat(feed_profile, chemostat_L_hr, chemostat_L) - elif any(chemostat_requirements): warn("The < chemostat_L > , < feed_profile >, and < chemostat_L_hr >" - " parameters must all be defined to simulate a chemostat.") - self.variables["elapsed_time"] += self.ts_min - if self.printing: print(f"\nObjective value (\u0394t{self.ts_min}): ", self.sols[-1].objective_value) - - # identify the chemicals that dynamically changed in concentrations - self.changed = set([met_id for met_id in self.met_ids - if self.conc.at[met_id, "0 min"] != self.conc.at[met_id, self.time]]) - self.unchanged = set(self.met_ids.keys()) - self.changed - - # visualize concentration changes over time - if visualize: self._visualize(conc_figure_title, included_mets, labeled_plots) - if export: self._export(export_name, export_directory, total_min) - if self.verbose: print(f"\nChanged concentrations:\t{self.changed}", - f"\nConstrained reactions:\t{constrained.keys()}") - elif self.printing: - if self.jupyter: pandas.set_option("max_rows", None) ; display(self.conc, self.fluxes) - if self.unchanged == set(): print("All of the metabolites changed concentration over the simulation") - else: print(f"\nUnchanged metabolite concentrations\t{self.unchanged}") - return self.conc, self.fluxes - - def _chemostat(self, feed_profile:dict, chemostat_L_hr, chemostat_L): - L_changed = chemostat_L_hr * self.ts_min - # chemostat addition - for met_id, conc in feed_profile.items(): - self.moles.at[met_id, self.time] += conc * L_changed - self.conc.at[met_id, self.time] = ( - self.moles.at[met_id, self.time] / milli / chemostat_L) # normalize to the chemostat volume - # chemostat subtraction - for met in self.model_util.model.metabolites: - if met.compartment[0] != "e": continue - ## update the chemical moles - self.moles.at[met.id, self.time] -= (self.conc.at[met.id, self.time] * L_changed) - ## define the chemical concentration - self.conc.at[met.id, self.time] = ( - self.moles.at[met.id, self.time] / milli / chemostat_L) - - # nested functions - def __find_data_match(self, rxnID: str, source: str): - # identifies the datum whose experimental conditions most closely matches the simulation conditions - temperature_deviation = ph_deviation = 0 - if FBAHelper.isnumber(self.kinetics_data[rxnID][source]["metadata"]["Temperature"]): - temp = float(self.kinetics_data[rxnID][source]["metadata"]["Temperature"]) - temperature_deviation = (abs(self.parameters["temperature"] - temp) / self.parameters["temperature"]) - if FBAHelper.isnumber(self.kinetics_data[rxnID][source]["metadata"]["pH"]): - pH = float(self.kinetics_data[rxnID][source]["metadata"]["pH"]) - ph_deviation = (abs(self.parameters["pH"] - pH) / self.parameters["pH"]) - - # equally weight between temperature and pH deviation from the simulation conditions - old_minimum = self.minimum - deviation = mean(temperature_deviation, ph_deviation) - self.minimum = min(deviation, self.minimum) - return "a" if old_minimum == self.minimum else "w" # append or write a list of data - - def _visualize(self, conc_fig_title, included_mets, labeled_plots): - # TODO construct a Vega visualization with a range bind that permits scanning over a time series - ## and accordingly adjusting arrowhead widths to reflect flux at the particularly timestep. - ## The heatmap may likewise be dynamic for each timestep over a bind range. - - - # define the figure - pyplot.rcParams['figure.figsize'] = (11, 7) - pyplot.rcParams['figure.dpi'] = 150 - self.figure, ax = pyplot.subplots() - ax.set_title(conc_fig_title) - ax.set_ylabel("Concentrations (mM)") - - x_axis_scalar, unit = _x_axis_determination(self.total_min) - ax.set_xlabel("Time " + unit) - legend_list = [] - times = [t * self.ts_min * x_axis_scalar for t in range(self.parameters["timesteps"] + 1)] - - # determine the plotted metabolites and the scale of the figure axis - bbox = (1, 1) - if not included_mets: - bbox = (1.7, 1) - # 1e-2 is an arbitrary concentration threshold for plotting on the figure - included_mets = [chem for chem in self.changed - if max(self.conc.loc[[chem]].values[0].tolist()) > 1e-2] - - log_axis = False - minimum, maximum = inf, -inf - printed_concentrations = {} - for chem in self.changed: - if chem not in included_mets: continue - concentrations = self.conc.loc[[chem]].values[0].tolist() - maximum = max(maximum, max([x if x > 1e-9 else 0 for x in concentrations])) - minimum = min(minimum, min([x if x > 1e-9 else 0 for x in concentrations])) - # plot chemicals with perturbed concentrations - ax.plot(times, concentrations) - if len(chem) > 25: chem = list(self.met_ids.keys())[self.met_ids.index(chem)] - if not concentrations[0] < 1e-9: legend_list.append(chem) - else: legend_list.append(f"(rel) {chem}") - - # design the proper location of the overlaid labels in the figure - if not labeled_plots: continue - for i, conc in enumerate(concentrations): - if conc <= 1e-9: continue - x_value = i * self.ts_min - vertical_adjustment = 0 - if x_value in printed_concentrations: - vertical_adjustment = (maximum - minimum) * 0.05 - if log_axis: vertical_adjustment = log10(maximum - minimum) / 3 - ax.text(x_value, conc + vertical_adjustment, f"{chem} - {round(conc, 4)}", ha="left") - printed_concentrations[x_value] = conc - break - - # finalize figure details - if maximum > 10 * minimum: ax.set_yscale("log") - ax.set_xticks(times) - ax.grid(True) - ax.legend(legend_list, title="Changed chemicals", loc="upper right", - bbox_to_anchor=bbox, title_fontsize="x-large", fontsize="large") - - def _export(self, export_name="kineticsFBA", export_directory: str=None): - # define a unique simulation name - directory = os.path.dirname(export_directory) if export_directory else os.getcwd() - self.parameters["simulation_path"] = self.simulation_path = os.path.join(directory, export_name) - # export simulation content - self.fluxes.to_csv(os.path.join(self.simulation_path, "fluxes.csv")) - self.conc.to_csv(os.path.join(self.simulation_path, "concentrations.csv")) - obj_vals_df = pandas.DataFrame([(self.fluxes.columns[index].replace(' min', ''), sol.objective_value) - for index, sol in enumerate(self.sols)], columns=["min", "objective_value"]) - obj_vals_df.index = obj_vals_df["min"] ; obj_vals_df.drop(["min"], axis=1, inplace=True) - obj_vals_df.to_csv(os.path.join(self.simulation_path, "objective_values.csv")) - # export the parameters - parameters_table = pandas.DataFrame(self.parameters, columns=["parameter", "value"]) - parameters_table.to_csv(os.path.join(self.simulation_path, "parameters.csv")) - # export the figure - self.figure.savefig(os.path.join(self.simulation_path, "changed_concentrations.svg")) - if self.verbose and not self.jupyter: self.figure.show() diff --git a/modelseedpy/community/mssteadycom.py b/modelseedpy/community/mssteadycom.py deleted file mode 100644 index 69fa448..0000000 --- a/modelseedpy/community/mssteadycom.py +++ /dev/null @@ -1,282 +0,0 @@ -from icecream import ic - -from modelseedpy import FBAHelper -from modelseedpy.core.exceptions import ObjectAlreadyDefinedError, ParameterError, NoFluxError -# from modelseedpy.community.commhelper import build_from_species_models, CommHelper -from optlang import Constraint, Variable -from itertools import combinations -from optlang.symbolics import Zero -from pandas import DataFrame, concat -from matplotlib import pyplot -from numpy import array -import networkx -import sigfig -import os, re - - -def add_collection_item(met_name, normalized_flux, flux_threshold, ignore_mets, - species_collection, first, second): - if flux_threshold and normalized_flux <= flux_threshold: return species_collection - if not any([re.search(x, met_name, flags=re.IGNORECASE) for x in ignore_mets]): - species_collection[first][second].append(re.sub(r"(_\w\d$)", "", met_name)) - return species_collection - - -class MSSteadyCom: - - @staticmethod - def run_fba(mscommodel, media, pfba=False, fva_reactions=None, ava=False, minMemGrwoth:float=1, interactions=True): - - - # minGrowth = Constraint(name="minMemGrowth", lb=, ub=None) - # mscommodel.model.add_cons_vars - - # fix member abundances - if not mscommodel.abundances_set: - for member in mscommodel.members: - member.biomass_cpd.lb = minMemGrwoth - all_metabolites = {mscommodel.primary_biomass.products[0]: 1} - all_metabolites.update({mem.biomass_cpd: 1 / len(mscommodel.members) for mem in mscommodel.members}) - mscommodel.primary_biomass.add_metabolites(all_metabolites, combine=False) - # TODO constrain fluxes to be proportional to the relative abundance - - # TODO constrain the sum of fluxes to be proportional with the abundance - sol = mscommodel.run_fba(media, pfba, fva_reactions) - if interactions: return MSSteadyCom.interactions(mscommodel, sol) - if ava: return MSSteadyCom.abundance_variability_analysis(mscommodel, sol) - - @staticmethod - def abundance_variability_analysis(mscommodel, media): - variability = {} - for mem in mscommodel.members: - variability[mem.id] = {} - # minimal variability - mscommodel.set_objective(mem.biomasses, minimize=True) - variability[mem.id]["minVar"] = mscommodel.run_fba(media) - # maximal variability - mscommodel.set_objective(mem.biomasses, minimize=False) - variability[mem.id]["maxVar"] = mscommodel.run_fba(media) - return variability - - @staticmethod - def interactions( - mscommodel, # The MSCommunity object of the model (mandatory to prevent circular imports) - solution = None, # the COBRA simulation solution that will be parsed and visualized - media=None, # The media in which the community model will be simulated - # names=None, abundances=None, # names and abundances of the community species - flux_threshold: int = 1, # The threshold of normalized flux below which a reaction is not plotted - msdb=None, msdb_path:str=None, - visualize: bool = True, # specifies whether the net flux will be depicted in a network diagram - filename: str = 'cross_feeding', # Cross-feeding figure export name - export_format: str = "svg", - node_metabolites: bool = True, # specifies whether the metabolites of each node will be printed - show_figure: bool = True, # specifies whether the figure will be printed to the console - ignore_mets=None # cross-fed exchanges that will not be displayed in the graphs - ): - # verify that the model has a solution and parallelize where the solver is permissible - solver = str(type(mscommodel.util.model.solver)) - print(f"{solver} model loaded") - # if "gurobi" in solver: model.configuration._solver_model.setParam("Threads", os.cpu_count() // 2) - solution = solution or mscommodel.run_fba(media) - if not solution: raise ParameterError("A solution must be provided, from which interactions are computed.") - if all(array(list(solution.fluxes.values)) == 0): - print(list(solution.fluxes.values)) - raise NoFluxError("The simulation lacks any flux.") - - #Initialize data - metabolite_data, species_data, species_collection = {}, {"Environment":{}}, {"Environment":{}} - data = {"IDs":[], "Metabolites/Donor":[], "Environment":[]} - species_list = {} - - # track extracellularly exchanged metabolites - exchange_mets_list = mscommodel.util.exchange_mets_list() - for met in exchange_mets_list: - data["IDs"].append(met.id) - data["Metabolites/Donor"].append(re.sub(r"(_\w\d$)", "", met.name)) - metabolite_data[met.id] = {"Environment": 0} - metabolite_data[met.id].update({individual.id: 0 for individual in mscommodel.members}) - - # computing net metabolite flux from each reaction - # print([mem.id for mem in mscommodel.members]) - for individual in mscommodel.members: - species_data[individual.id], species_collection[individual.id] = {}, {} - species_list[individual.index] = individual - data[individual.id] = [] - for other in mscommodel.members: - species_data[individual.id][other.id] = 0 - species_collection[individual.id][other.id] = [] - species_data["Environment"][individual.id] = species_data[individual.id]["Environment"] = 0 - species_collection["Environment"][individual.id] = [] - species_collection[individual.id]["Environment"] = [] - - for rxn in mscommodel.util.model.reactions: - if rxn.id[0:3] == "EX_": - cpd = list(rxn.metabolites.keys())[0] - # the Environment takes the opposite perspective to the members - metabolite_data[cpd.id]["Environment"] += -solution.fluxes[rxn.id] - rxn_index = int(FBAHelper.rxn_compartment(rxn)[1:]) - if not any([met not in exchange_mets_list for met in rxn.metabolites] - ) or rxn_index not in species_list: continue - for met in rxn.metabolites: - if met.id not in metabolite_data: continue - metabolite_data[met.id][species_list[rxn_index].id] += solution.fluxes[rxn.id]*rxn.metabolites[met] - - # translating net metabolite flux into species interaction flux - ignore_mets = ignore_mets if ignore_mets is not None else ["h2o_e0", "co2_e0"] - for met in exchange_mets_list: - #Iterating through the metabolite producers - # TODO Why are fluxes normalized? - total = sum([max([metabolite_data[met.id][individual.id], 0]) for individual in mscommodel.members - ]) + max([metabolite_data[met.id]["Environment"], 0]) - for individual in mscommodel.members: - ## calculate metabolic consumption of a species from the environment - if metabolite_data[met.id][individual.id] < Zero: - if metabolite_data[met.id]["Environment"] <= Zero: continue - normalized_flux = abs(metabolite_data[met.id][individual.id] - * metabolite_data[met.id]["Environment"]) / total - species_data["Environment"][individual.id] += normalized_flux - species_collection = add_collection_item(met.name, normalized_flux, flux_threshold, ignore_mets, - species_collection, "Environment", individual.id) - ## calculate and track metabolic donations between a member and another or the environment - elif metabolite_data[met.id][individual.id] > Zero: - for other in mscommodel.members: - ### filter against organisms that do not consume - if metabolite_data[met.id][other.id] >= Zero: continue - normalized_flux = abs(metabolite_data[met.id][individual.id] - * metabolite_data[met.id][other.id])/total - species_data[individual.id][other.id] += normalized_flux - species_collection = add_collection_item(met.name, normalized_flux, flux_threshold, ignore_mets, - species_collection, individual.id, other.id) - ## calculate donations to the environment - if metabolite_data[met.id]["Environment"] >= Zero: continue - normalized_flux = abs(metabolite_data[met.id][individual.id] - * metabolite_data[met.id]["Environment"])/total - species_data[individual.id]["Environment"] += normalized_flux - species_collection = add_collection_item(met.name, normalized_flux, flux_threshold, ignore_mets, - species_collection, individual.id, "Environment") - - # construct the dataframes - for metID in metabolite_data: - for individual in mscommodel.members: - data[individual.id].append(metabolite_data[metID][individual.id]) - data["Environment"].append(metabolite_data[metID]["Environment"]) - - ## process the fluxes dataframe - data["IDs"].append("zz_Environment") - data["Metabolites/Donor"].append(0) - for individual in mscommodel.members: - data[individual.id].append(species_data["Environment"][individual.id]) - data["Environment"].append(0) - for individual in mscommodel.members: - for other in mscommodel.members: - data[individual.id].append(species_data[individual.id][other.id]) - data["Environment"].append(species_data[individual.id]["Environment"]) - data["IDs"].append(f"zz_Species{individual.index}") - data["Metabolites/Donor"].append(individual.id) - - # if len(set(list(map(len, list(data.values()))))) != 1: - # print([(col, len(content)) for col, content in data.items()]) - cross_feeding_df = DataFrame(data) - cross_feeding_df.index = [ID.replace("_e0", "") for ID in map(str, cross_feeding_df["IDs"])] - cross_feeding_df.index.name = "Metabolite/Donor ID" - cross_feeding_df.drop(['IDs', "Metabolites/Donor"], axis=1, inplace=True) - cross_feeding_df = cross_feeding_df.loc[(cross_feeding_df != 0).any(axis=1)] - cross_feeding_df.sort_index(inplace=True) - - ## process the identities dataframe - exchanged_mets = {"Environment": [" "], "Donor ID": ["Environment"]} - exchanged_mets.update({ind.id: [] for ind in mscommodel.members}) - for individual in mscommodel.members: - ### environment exchanges - exchanged_mets[individual.id].append("; ".join(species_collection["Environment"][individual.id])) - exchanged_mets["Environment"].append("; ".join(species_collection[individual.id]["Environment"])) - ### member exchanges - exchanged_mets["Donor ID"].append(individual.id) - for other in mscommodel.members: - exchanged_mets[individual.id].append("; ".join(species_collection[individual.id][other.id])) - - # if len(set(list(map(len, list(exchanged_mets.values()))))) != 1: - # print([(col, len(content)) for col, content in exchanged_mets.items()]) - exMets_df = DataFrame(exchanged_mets) - exMets_df.index = [ID.replace("_e0", "") for ID in map(str, exMets_df["Donor ID"])] - exMets_df.index.name = "Donor ID" - exMets_df.drop(["Donor ID"], axis=1, inplace=True) - exMets_df.sort_index(inplace=True) - exMets_df.fillna(" ") - - # graph the network diagram - if visualize: MSSteadyCom.visual_interactions(cross_feeding_df, filename, export_format, - msdb, msdb_path, show_figure, node_metabolites) - - return cross_feeding_df, exMets_df - - @staticmethod - def visual_interactions(cross_feeding_df, filename="cross_feeding", export_format="svg", msdb=None, - msdb_path=None, view_figure=True, node_metabolites=True): - # load the MSDB - assert msdb or msdb_path, ValueError("Either the MSDB object or the local MSDB path must be provided") - from modelseedpy.biochem import from_local - msdb = msdb or from_local(msdb_path) - # construct the structure of the cross-feeding DataFrame - if "Metabolite/Donor ID" in cross_feeding_df.columns: - cross_feeding_df.index = [metID.replace("_e0", "") for metID in cross_feeding_df["Metabolite/Donor ID"].values] - cross_feeding_df.index.name = "Metabolite/Donor ID" - cross_feeding_df.drop([col for col in cross_feeding_df.columns if "ID" in col], axis=1, inplace=True) - else: cross_feeding_df.index = [metID.replace("_e0", "") for metID in cross_feeding_df.index] - # define the cross-fed metabolites - cross_feeding_rows = [] - for index, row in cross_feeding_df.iterrows(): - positive = negative = False - for col, val in row.items(): - if col not in ["Environment"]: - if val > 1e-4: positive = True - elif val < -1e-4: negative = True - if negative and positive: cross_feeding_rows.append(row) ; break - metabolites_df = concat(cross_feeding_rows, axis=1).T - metabolites_df.index.name = "Metabolite ID" - display(metabolites_df) - metabolites = [msdb.compounds.get_by_id(metID.replace("_e0", "")) for metID in metabolites_df.index.tolist() - if metID not in ["cpdETCM", "cpdETCMe"]] - # define the community members that participate in cross-feeding - members = metabolites_df.loc[:, (metabolites_df != 0).any(axis=0)].columns.tolist() - members.remove("Environment") - members_cluster1, members_cluster2 = members[:int(len(members) / 2)], members[int(len(members) / 2):] - - # TODO define a third node tier of just the environment as a rectangle that spans the width of the members - ## which may alleviate much of the ambiguity about mass imbalance between the member fluxes - import graphviz - dot = graphviz.Digraph(filename, format=export_format) # directed graph - # define nodes - ## top-layer members - # TODO hyperlink the member nodes with their Narrative link - dot.attr('node', shape='rectangle', color="lightblue2", style="filled") - for mem in members_cluster1: - index = members.index(mem) - dot.node(f"S{index}", mem) - ## mets in the middle layer - with dot.subgraph(name="mets") as mets_subgraph: - mets_subgraph.attr(rank="same") - mets_subgraph.attr('node', shape='circle', color="green", style="filled") - for metIndex, met in enumerate(metabolites): - mets_subgraph.node(met.abbr[:3], fixedsize="true", height="0.4", tooltip=f"{met.id} ; {met.name}", - URL=f"https://modelseed.org/biochem/compounds/{met.id}") - ## bottom-layer members - with dot.subgraph(name="members") as members_subgraph: - members_subgraph.attr(rank="same") - for mem in members_cluster2: - index = members.index(mem) - dot.node(f"S{index}", mem) - # define the edges by parsing the interaction DataFrame - for met in metabolites: - row = metabolites_df.loc[met.id] - maxVal = max(list(row.to_numpy())) - for col, val in row.items(): - if col == "Environment": continue - index = members.index(col) - # TODO color carbon sources red - if val > 0: dot.edge(f"S{index}", met.abbr[:3], arrowsize=f"{val / maxVal}", edgetooltip=str(val)) - if val < 0: dot.edge(met.abbr[:3], f"S{index}", arrowsize=f"{abs(val / maxVal)}", edgetooltip=str(val)) - - # render and export the source - dot.render(filename, view=view_figure) - return dot.source diff --git a/modelseedpy/community/steadycom_template.html b/modelseedpy/community/steadycom_template.html deleted file mode 100644 index b894c7f..0000000 --- a/modelseedpy/community/steadycom_template.html +++ /dev/null @@ -1,54 +0,0 @@ - - - - - - SteadyCom Results - - - - - - - - - - -

SteadyCom Results

- - - - \ No newline at end of file diff --git a/modelseedpy/core/msmodelutl.py b/modelseedpy/core/msmodelutl.py index 9d1d78f..a454461 100644 --- a/modelseedpy/core/msmodelutl.py +++ b/modelseedpy/core/msmodelutl.py @@ -121,7 +121,7 @@ def build_from_kbase_json_file(filename, kbaseapi): model = factory.build_object_from_file(filename, "KBaseFBA.FBAModel") return MSModelUtil(model) - def __init__(self, model): + def __init__(self, model, copy=False, environment=None): self.model = model if environment is not None: self.add_medium(environment) self.id = model.id