CommScores updates

freiburgermsu · Aug 21, 2023 · 6af0b8a · 6af0b8a
1 parent 8a08860
commit 6af0b8a
Show file tree

Hide file tree

Showing 9 changed files with 115 additions and 2,531 deletions.
diff --git a/examples/Community Modeling/smetana/Vorholt_replication.ipynb b/examples/Community Modeling/smetana/Vorholt_replication.ipynb
diff --git a/modelseedpy/biochem/modelseed_reaction.py b/modelseedpy/biochem/modelseed_reaction.py
@@ -1,4 +1,3 @@
-
 # -*- coding: utf-8 -*-
 import math
 from modelseedpy.biochem.seed_object import ModelSEEDObject

diff --git a/modelseedpy/community/commscores.py b/modelseedpy/community/commscores.py
@@ -327,6 +327,8 @@ def calculate_scores(pairs, models_media=None, environments=None, annotated_geno
                     g1, g2, comm = CommScores._determine_growths([model_utils[model1.id], model_utils[model2.id], community.util])
                     g1, g2, comm = _sigfig_check(g1, 5, ""), _sigfig_check(g2, 5, ""), _sigfig_check(comm, 5, "")
                     kbase_dic.update({"media": environName, "model1 growth": g1, "model2 growth": g2, "community growth": comm})
+                    coculture_growths = {mem.id: comm_sol.fluxes[mem.primary_biomass.id] for mem in community.members}
+                    kbase_dic.update({f"coculture growth model{modelIDs.index(mem.id)}": growth for mem, growth in coculture_growths.items()})
                     # define the MRO content
                     mro_values = CommScores.mro(grouping, models_media, raw_content=True, environment=environ)
                     kbase_dic.update({f"MRO_model{modelIDs.index(models_string.split('--')[0])+1}":
@@ -370,11 +372,11 @@ def calculate_scores(pairs, models_media=None, environments=None, annotated_geno
                     kbase_dic.update({"PC_comm": _sigfig_check(pc_values[0], 5, ""),
                                       "PC_model1": _sigfig_check(list(pc_values[1].values())[0], 5, ""),
                                       "PC_model2": _sigfig_check(list(pc_values[1].values())[1], 5, ""),
-                                      "BIT": pc_values[2]})
+                                      "BIT": pc_values[3]})
                     if print_progress:  print("PC  done\tBIT done", end="\t")
                     # print([mem.slim_optimize() for mem in grouping])
-                    gyd, g1, g2 = list(CommScores.gyd(grouping, grouping_utils, environ, False, community, anme_comm).values())[0]
-                    kbase_dic.update({"GYD": _sigfig_check(gyd, 5, "")})
+                    gyd1, gyd2, g1, g2 = list(CommScores.gyd(grouping, grouping_utils, environ, False, community, anme_comm).values())[0]
+                    kbase_dic.update({"GYD1": _sigfig_check(gyd1, 5, ""), "GYD2": _sigfig_check(gyd2, 5, "")})
                     if print_progress:  print("GYD done\t\t", end="\t" if annotated_genomes else "\n")
                     if kbase_obj is not None and annotated_genomes and not anme_comm:
                         fs_values = list(CommScores.fs(grouping, kbase_obj, annotated_genomes=annotated_genomes).values())[0]
@@ -435,7 +437,7 @@ def kbase_output(all_models:iter=None,  # a list of distinct lists is provided f
             if missing_models != set():
                 print(f"Media of the {missing_modelID} models are not defined, and will be calculated separately.")
                 models_media.update(_get_media(model_s_=missing_models), skip_bad_media=skip_bad_media)
-        if see_media and not mem_media:  print(f"The minimal media of all members:\n{models_media}")
+        if see_media:  print(f"The minimal media of all members:\n{models_media}")
         print(f"\nExamining the {len(list(model_pairs))} model pairs")
         if pool_size is not None:
             from datetime import datetime  ;  from multiprocess import Pool
@@ -704,7 +706,7 @@ def gyd(member_models:Iterable=None, model_utils:Iterable=None, environment=None
                 G_m1, G_m2 = member_growths[model1_util.model.id], member_growths[model2_util.model.id]
             if G_m2 <= 0 and G_m1 <= 0: gyds[f"{model1_util.model.id} ++ {model2_util.model.id}"] = ("", G_m1, G_m2)  ;  continue
             if G_m2 <= 0 or G_m1 <= 0: gyds[f"{model1_util.model.id} ++ {model2_util.model.id}"] = ("", G_m1, G_m2)  ;  continue
-            gyds[f"{model1_util.model.id} ++ {model2_util.model.id}"] = (abs(G_m1-G_m2) / min([G_m1, G_m2]), G_m1, G_m2)
+            gyds[f"{model1_util.model.id} ++ {model2_util.model.id}"] = (abs(G_m1-G_m2)/G_m1, abs(G_m2-G_m1)/G_m2, G_m1, G_m2)
         return gyds
 
     @staticmethod
@@ -735,7 +737,7 @@ def pc(member_models=None, modelutils=None, com_model=None, isolate_growths=None
         elif all(growth_diffs < th_pos) and any(growth_diffs < th_neg):  bit = "amensalism"
         elif any(growth_diffs > th_pos) and any(growth_diffs < th_neg):  bit = "parasitism"
         else: print(f"The relative growths {comm_growth_effect} from {comm_member_growths} are not captured.")  ;  bit = ""
-        return (pc_score, comm_growth_effect, bit)
+        return (pc_score, comm_growth_effect, comm_member_growths, bit)
 
     @staticmethod
     def bss(member_models:Iterable=None, model_utils:Iterable=None, environments=None, minMedia=None, skip_bad_media=False):

diff --git a/modelseedpy/core/report.py b/modelseedpy/core/report.py
@@ -1,5 +1,5 @@
 # from django import shortcuts
-from numpy import nan, isnan, unique
+from numpy import nan, isnan, unique, load
 import jinja2
 import os, re, math, json
 
@@ -189,20 +189,43 @@ def steadycom_report(flux_df, exMets_df, export_html_path="steadycom_report.html
     with open(export_html_path, "w") as out:  out.writelines(html_report)
     return html_report
 
+
+
+rm_costless = re.compile("(\s\(.+\))")
+def remove_metadata(element):
+    try:  element = float(rm_costless.sub("", str(element)).replace("%", ""))
+    except: pass
+    return element
+def convert_to_int(element):
+    try:  element = int(element)
+    except: pass
+    return element
+
+
+categories_dir = os.path.join(package_dir, "data", "categories")
+sugars, aminoacids = load(os.path.join(categories_dir, "sugars.npy")), load(os.path.join(categories_dir, "aminoAcids.npy"))
+vitamins, minerals = load(os.path.join(categories_dir, "vitamins.npy")), load(os.path.join(categories_dir, "minerals.npy"))
+energy_compounds = load(os.path.join(categories_dir, "energy_compounds.npy"))
+sugars_dic, aminoacids_dic = dict(zip(sugars[:,0], sugars[:,1])), dict(zip(aminoacids[:,0], aminoacids[:,1]))
+vitamins_dic, minerals_dic = dict(zip(vitamins[:,0], vitamins[:,1])), dict(zip(minerals[:,0], minerals[:,1]))
+energy_compounds_dic = dict(zip(energy_compounds[:,0], energy_compounds[:,1]))
+def categorize_mets(metIDs):
+    met_sugars, met_aminoAcids, met_vitamins, met_minerals, met_energy, met_other = [], [], [], [], [], []
+    for metID in metIDs:
+        if metID in sugars[:, 0]:  met_sugars.append(f"{sugars_dic[metID]} ({metID})")
+        elif metID in aminoacids[:, 0]:  met_aminoAcids.append(f"{aminoacids_dic[metID]} ({metID})")
+        elif metID in vitamins[:, 0]:  met_vitamins.append(f"{vitamins_dic[metID]} ({metID})")
+        elif metID in minerals[:, 0]:  met_minerals.append(f"{minerals_dic[metID]} ({metID})")
+        elif metID in energy_compounds[:, 0]:  met_energy.append(f"{energy_compounds_dic[metID]} ({metID})")
+        else:  met_other.append(metID)
+    return met_sugars, met_aminoAcids, met_vitamins, met_minerals, met_energy, met_other
+
+def process_mets(metIDs):
+    return [", ".join(lst) for lst in categorize_mets(metIDs)]
+
+
 def commscores_report(df, mets, export_html_path="commscores_report.html", msdb_path=None):
     from pandas import to_numeric
-
-    # construct a heatmap
-    rm_costless = re.compile("(\s\(.+\))")
-    costless = re.compile(r"(?<=\s\()(\d)(?=\))")
-    def remove_metadata(element):
-        try:  element = float(rm_costless.sub("", str(element)).replace("%", ""))
-        except: pass
-        return element
-    def convert_to_int(element):
-        try:  element = int(element)
-        except: pass
-        return element
     def names_updateCPD(metIDs, update_cpdNames):
         names = []
         for metID in metIDs:
@@ -216,6 +239,7 @@ def names_updateCPD(metIDs, update_cpdNames):
             names.append(name)
         return names, update_cpdNames
 
+    # construct a heatmap
     df.index.name = "Community_index"
     heatmap_df = df.copy(deep=True) # takes some time
     heatmap_df_index = zip(heatmap_df["model1"].to_numpy(), heatmap_df["model2"].to_numpy())
@@ -229,6 +253,7 @@ def names_updateCPD(metIDs, update_cpdNames):
     heatmap_df = heatmap_df.loc[~heatmap_df.index.duplicated(), :]
     heatmap_df = heatmap_df.drop(["model1", "model2"], axis=1)
     if "media" in heatmap_df:  heatmap_df = heatmap_df.drop(["media"], axis=1)
+    costless = re.compile(r"(?<=\s\()(\d)(?=\))")
     if "MIP_model1 (costless)" in heatmap_df.columns:
         mip_model1, mip_model2 = [], []
         for e in heatmap_df["MIP_model1 (costless)"]:
@@ -258,7 +283,7 @@ def names_updateCPD(metIDs, update_cpdNames):
     mro_mets, mro_mets_names, mip_model1_mets, mip_model1_mets_names = [], [], [], []
     mip_model2_mets, mip_model2_mets_names, cip_mets, cip_mets_names = [], [], [], []
     from json import load, dump
-    cpdNames_path = os.path.join(os.path.dirname(__file__), "..", "data", "cpdNames.json")
+    cpdNames_path = os.path.join(package_dir, "data", "cpdNames.json")
     with open(cpdNames_path, "r") as jsonIn:
         cpdNames = load(jsonIn)
     update_cpdNames = {}
@@ -294,7 +319,7 @@ def names_updateCPD(metIDs, update_cpdNames):
     # print(list(map(len, df_content.values())))
     mets_table = DataFrame(data=df_content)
     mets_table.index.name = "Community_index"
-    
+
     # populate the HTML template with the assembled simulation data from the DataFrame -> HTML conversion
     content = {'table': df.to_html(table_id="main", classes="display"), "mets_table": mets_table.to_html(),
                "heatmap": heatmap_df.applymap(lambda x: round(x, 3)).style.background_gradient().to_html(table_id="heat", classes="display")}

diff --git a/modelseedpy/data/categories/aminoAcids.npy b/modelseedpy/data/categories/aminoAcids.npy
diff --git a/modelseedpy/data/categories/categorized_sugars.npy b/modelseedpy/data/categories/categorized_sugars.npy
diff --git a/modelseedpy/data/categories/energy_compounds.npy b/modelseedpy/data/categories/energy_compounds.npy
diff --git a/modelseedpy/data/categories/minerals.npy b/modelseedpy/data/categories/minerals.npy
diff --git a/modelseedpy/data/categories/vitamins.npy b/modelseedpy/data/categories/vitamins.npy