From da815467d589673aa0e6515972f779ae9ed95798 Mon Sep 17 00:00:00 2001
From: Cyril Pommier <cyril.pommier@inra.fr>
Date: Wed, 9 Dec 2020 11:09:36 +0100
Subject: [PATCH 1/4] Slight refactors

---
 brapi_to_isa.py           | 188 ++++++++++++++++++++++++++++++++++----
 brapi_to_isa_converter.py |  72 +++++++--------
 2 files changed, 204 insertions(+), 56 deletions(-)

diff --git a/brapi_to_isa.py b/brapi_to_isa.py
index 87c1ccf..222fd58 100755
--- a/brapi_to_isa.py
+++ b/brapi_to_isa.py
@@ -15,7 +15,7 @@
 from isatools.model import *
 
 from brapi_client import BrapiClient
-from brapi_to_isa_converter import BrapiToIsaConverter, att_test, PAR_NAinData, PAR_NAinBrAPI, PAR_defaultObsLvl, PAR_suppObsLvl
+from brapi_to_isa_converter import BrapiToIsaConverter, get_attribute_or_na, PAR_NAinData, PAR_NAinBrAPI, PAR_defaultObsLvl, PAR_suppObsLvl
 
 __author__ = 'proccaserra (Philippe Rocca-Serra)'
 __author__ = 'cpommier (Cyril Pommier)'
@@ -91,12 +91,14 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
 
     treatments = defaultdict(list)
     allready_converted_obs_unit = [] # Allow to handle multiyear observation units NOTE (INRA specific)
+
+    # currently one assay paer material/source. Need one assay per level with all the source material
     for obs_unit in OBSERVATIONUNITLIST:
         if 'observationLevel' in obs_unit and obs_unit['observationLevel']:
-            i = obs_level_to_assay[obs_unit['observationLevel'].lower()]
+            assay_level = obs_level_to_assay[obs_unit['observationLevel'].lower()]
             obslvl = obs_unit['observationLevel'].lower()
         else:
-            i = 0
+            assay_level = 0
             obslvl = PAR_defaultObsLvl
         # Getting the relevant germplasm used for that observation event:
         # ---------------------------------------------------------------
@@ -115,9 +117,9 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
             
             spat_dist = []
             for key in spat_dist_mapping_dictionary:
-                if att_test(obs_unit,key):
+                if get_attribute_or_na(obs_unit, key):
                     spat_dist.append(spat_dist_mapping_dictionary[key] + ':' + obs_unit[key])
-            if att_test(obs_unit,'observationLevels'):
+            if get_attribute_or_na(obs_unit, 'observationLevels'):
                 for lvl in obs_unit['observationLevels'].split(", "):
                     if len(lvl.split(":")) == 2:    
                         a, b = lvl.split(":")
@@ -134,10 +136,10 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
 
             # Looking for treatment in BRAPI and mapping to ISA samples 
             # ---------------------------------------------------------
-            if att_test(obs_unit, 'treatments'):
+            if get_attribute_or_na(obs_unit, 'treatments'):
                 treatmentbuffer = defaultdict(list)
                 for treatment in obs_unit['treatments']:
-                    if att_test(treatment,'factor') and att_test(treatment, 'modality'):
+                    if get_attribute_or_na(treatment, 'factor') and get_attribute_or_na(treatment, 'modality'):
 
                         if str(treatment['modality']) not in treatmentbuffer[treatment['factor']]:
                             treatmentbuffer[treatment['factor']].append(str(treatment['modality']))
@@ -165,11 +167,12 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
         
         # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
 
-        isa_study.assays[i].samples.append(this_isa_sample)
+        # TODO: This seems to have no impact, check with @procassera et al
+        #isa_study.assays[assay_level].samples.append(this_isa_sample)
 
         phenotyping_process = Process(executes_protocol=phenotyping_protocol)
         phenotyping_process.inputs.append(this_isa_sample)
-        phenotyping_process.name =  att_test(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower()
+        phenotyping_process.name =  get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower()
 
         # Adding Parameter Value[Collection Date] column
         # col_date_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Collection Date"))
@@ -192,15 +195,15 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
         data_transformation_process.inputs.append(RAW_datafile)
         
         # Adding Derived Data File column
-        datafilename = 'd_' + str(brapi_study_id) + '_' + att_test(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt'
+        datafilename = 'd_' + str(brapi_study_id) + '_' + get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt'
         DER_datafile = DataFile(filename=datafilename,
                                         label="Derived Data File")
         data_transformation_process.outputs.append(DER_datafile)
 
-        isa_study.assays[i].process_sequence.append(phenotyping_process)
+        isa_study.assays[assay_level].process_sequence.append(phenotyping_process)
         plink(growth_process, phenotyping_process)
-        
-        isa_study.assays[i].process_sequence.append(data_transformation_process)
+
+        isa_study.assays[assay_level].process_sequence.append(data_transformation_process)
         plink(phenotyping_process, data_transformation_process)
 
         
@@ -213,6 +216,151 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
         f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI))           
         isa_study.factors.append(f)
 
+
+def create_study_sample_and_assay_full(client, brapi_study_id, isa_study,  growth_protocol, phenotyping_protocol, data_transformation_protocol, OBSERVATIONUNITLIST):
+
+    spat_dist_mapping_dictionary = {
+        "X": "X",
+        "Y": "Y",
+        "blockNumber": "block",
+        "plotNumber": "plot",
+        "plantNumber": "plant",
+        "replicate": "replicate"
+    }
+
+
+    # connecting the correct observation level to the correct assayobject
+    # NOTE observation level is temporarily stored inside isa_study.assays[i].characteristic_categories[0] better field available?
+    obs_level_to_assay = {}
+    for k,assay in enumerate(isa_study.assays):
+        obs_level_to_assay[assay.characteristic_categories[0]] = k
+
+    treatments = defaultdict(list)
+    allready_converted_obs_unit = [] # Allow to handle multiyear observation units NOTE (INRA specific)
+    for obs_unit in OBSERVATIONUNITLIST:
+        if 'observationLevel' in obs_unit and obs_unit['observationLevel']:
+            assay_level = obs_level_to_assay[obs_unit['observationLevel'].lower()]
+            obslvl = obs_unit['observationLevel'].lower()
+        else:
+            assay_level = 0
+            obslvl = PAR_defaultObsLvl
+        # Getting the relevant germplasm used for that observation event:
+        # ---------------------------------------------------------------
+        this_source = isa_study.get_source(obs_unit['germplasmName'])
+        if this_source and obs_unit['observationUnitName'] not in allready_converted_obs_unit:
+            this_isa_sample = Sample(
+                name= obs_unit['observationUnitName'],
+                derives_from=[this_source])
+            allready_converted_obs_unit.append(obs_unit['observationUnitName'])
+
+            c = Characteristic(category=OntologyAnnotation(term="Observation Unit Type"),
+                               value=OntologyAnnotation(term=obslvl,
+                                                        term_source="",
+                                                        term_accession=""))
+            this_isa_sample.characteristics.append(c)
+
+            spat_dist = []
+            for key in spat_dist_mapping_dictionary:
+                if get_attribute_or_na(obs_unit, key):
+                    spat_dist.append(spat_dist_mapping_dictionary[key] + ':' + obs_unit[key])
+            if get_attribute_or_na(obs_unit, 'observationLevels'):
+                for lvl in obs_unit['observationLevels'].split(", "):
+                    if len(lvl.split(":")) == 2:
+                        a, b = lvl.split(":")
+                        spat_dist.append(a + ':' + b)
+                    elif len(lvl.split(":")) == 1:
+                        spat_dist.append(lvl)
+            spat_dist_str = ';'.join(spat_dist)
+            if spat_dist:
+                c = Characteristic(category=OntologyAnnotation(term="Spatial Distribution"),
+                                   value=OntologyAnnotation(term=spat_dist_str,
+                                                            term_source="",
+                                                            term_accession=""))
+                this_isa_sample.characteristics.append(c)
+
+            # Looking for treatment in BRAPI and mapping to ISA samples
+            # ---------------------------------------------------------
+            if get_attribute_or_na(obs_unit, 'treatments'):
+                treatmentbuffer = defaultdict(list)
+                for treatment in obs_unit['treatments']:
+                    if get_attribute_or_na(treatment, 'factor') and get_attribute_or_na(treatment, 'modality'):
+
+                        if str(treatment['modality']) not in treatmentbuffer[treatment['factor']]:
+                            treatmentbuffer[treatment['factor']].append(str(treatment['modality']))
+                for factor,modality in treatmentbuffer.items():
+                    modalities = ','.join(modality)
+                    if modalities not in treatments[factor]:
+                        treatments[factor].append(modalities)
+                    f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor))
+                    fv = FactorValue(factor_name=f,
+                                     value=OntologyAnnotation(term=modalities,
+                                                              term_source="",
+                                                              term_accession=""))
+                    this_isa_sample.factor_values.append(fv)
+            isa_study.samples.append(this_isa_sample)
+
+            # Creating the corresponding ISA sample entity for structure the document:
+            # ------------------------------------------------------------------------
+            growth_process = Process(executes_protocol=growth_protocol)
+            growth_process.inputs.append(this_source)
+            growth_process.outputs.append(this_isa_sample)
+            isa_study.process_sequence.append(growth_process)
+
+        # Assays at observation unit level
+        # --------------------------------
+
+        # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
+
+        isa_study.assays[assay_level].samples.append(this_isa_sample)
+
+        phenotyping_process = Process(executes_protocol=phenotyping_protocol)
+        phenotyping_process.inputs.append(this_isa_sample)
+        phenotyping_process.name =  get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower()
+
+        # Adding Parameter Value[Collection Date] column
+        # col_date_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Collection Date"))
+        # col_date_pv = ParameterValue(category=col_date_pp,value=OntologyAnnotation(term=PAR_NAinBrAPI))
+        # sample_collection_process.parameter_values.append(col_date_pv)
+
+        # Adding Parameter Value[Sample Description] column
+        # sampl_des_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Sample Description"))
+        # sampl_des_pv = ParameterValue(category=sampl_des_pp,value=OntologyAnnotation(term=PAR_NAinBrAPI))
+        # sample_collection_process.parameter_values.append(sampl_des_pv)
+
+        # Data Transformation
+        data_transformation_process = Process(executes_protocol=data_transformation_protocol)
+
+        # Adding Raw Data File column
+        RAW_datafile = DataFile(filename=PAR_NAinData,
+                                label="Raw Data File",
+                                generated_from=[this_isa_sample])
+        phenotyping_process.outputs.append(RAW_datafile)
+        data_transformation_process.inputs.append(RAW_datafile)
+
+        # Adding Derived Data File column
+        datafilename = 'd_' + str(brapi_study_id) + '_' + get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt'
+        DER_datafile = DataFile(filename=datafilename,
+                                label="Derived Data File")
+        data_transformation_process.outputs.append(DER_datafile)
+
+        isa_study.assays[assay_level].process_sequence.append(phenotyping_process)
+        plink(growth_process, phenotyping_process)
+
+        isa_study.assays[assay_level].process_sequence.append(data_transformation_process)
+        plink(phenotyping_process, data_transformation_process)
+
+
+    # Mapping treatments to ISA study Factor Value:
+    # ---------------------------------------------
+    for factor, modalities in treatments.items():
+        f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor))
+        modality = ";".join(modalities)
+        f.comments.append(Comment(name="Study Factor Values",value=modality))
+        f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI))
+        isa_study.factors.append(f)
+
+
+
 def write_records_to_file(this_study_id, records, this_directory, filetype, ObservationLevel=''):
     logger.info('Writing to file')
     # tdf_file = 'out/' + this_study_id
@@ -300,18 +448,18 @@ def main(arg=SERVER):
         investigation.title = trial['trialName']
 
         #Investigation fields unavailable in BrAPI
-        investigation.description = att_test(trial, "trialDescription", PAR_NAinData)
+        investigation.description = get_attribute_or_na(trial, "trialDescription", PAR_NAinData)
         investigation.submission_date = PAR_NAinBrAPI
         investigation.public_release_date = PAR_NAinBrAPI
         investigation.comments.append(Comment(name="License", value=PAR_NAinBrAPI))
 
-        if att_test(trial, 'contacts'):
+        if get_attribute_or_na(trial, 'contacts'):
             for brapicontact in trial['contacts']:
                 #NOTE: brapi has just name attribute -> no separate first/last name
                 ContactName = brapicontact['name'].split(' ')
-                role = OntologyAnnotation(term=att_test(brapicontact, 'type', PAR_NAinData))
+                role = OntologyAnnotation(term=get_attribute_or_na(brapicontact, 'type', PAR_NAinData))
                 contact = Person(first_name=ContactName[0], last_name=' '.join(ContactName[1:]),
-                affiliation=att_test(brapicontact,'institutionName', PAR_NAinData), email=att_test(brapicontact,'email'), address=PAR_NAinBrAPI, roles=[role])
+                                 affiliation=get_attribute_or_na(brapicontact, 'institutionName', PAR_NAinData), email=get_attribute_or_na(brapicontact, 'email'), address=PAR_NAinBrAPI, roles=[role])
                 investigation.contacts.append(contact)
         else:
             role = OntologyAnnotation(term=PAR_NAinData)
@@ -321,10 +469,10 @@ def main(arg=SERVER):
 
         investigation.comments.append(Comment(name="MIAPPE version", value="1.1"))
 
-        if att_test(trial, 'publications'):
+        if get_attribute_or_na(trial, 'publications'):
             for brapipublic in trial['publications']:
                 #This is BrAPI v1.3 specific (when older, skipped) 
-                publication = Publication(doi=att_test(brapipublic, 'publicationPUI', PAR_NAinData))
+                publication = Publication(doi=get_attribute_or_na(brapipublic, 'publicationPUI', PAR_NAinData))
                 publication.status = OntologyAnnotation(term="published")
                 investigation.publications.append(publication)
         else:
@@ -395,7 +543,7 @@ def main(arg=SERVER):
                         germplasminfo[germ['germplasmDbId']] = [germ['accessionNumber']]
 
                     # Associating ISA sources to ISA isa_study object
-                    isa_study.sources.append(source)
+                    isa_study.sources.append(source) # this line has all the necessary information, but it doesn't end up in the study file
 
                 # Now dealing with BRAPI observation units and attempting to create ISA samples
                 create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_protocol, phenotyping_protocol, data_transformation_protocol, OBSERVATIONUNITLIST)
diff --git a/brapi_to_isa_converter.py b/brapi_to_isa_converter.py
index 5e1f4cf..9a33e82 100644
--- a/brapi_to_isa_converter.py
+++ b/brapi_to_isa_converter.py
@@ -8,7 +8,7 @@
 import re
 import platform
 
-def att_test(dictionary, attribute, NA=""):
+def get_attribute_or_na(dictionary, attribute, NA=""):
     if attribute in dictionary and dictionary[attribute]:
         if dictionary[attribute] in ['NA', 'na','Na', 'n.a.', 'N.A.', 'N.a.']:
             return NA
@@ -59,7 +59,7 @@ def get_obs_levels(self, brapi_study_id, OBSERVATIONUNITLIST):
             for obs in ou['observations']:
                 if 'observationLevel' in ou and ou['observationLevel']:
                     obs_level_in_study[ou['observationLevel'].lower()].add(
-                        re.sub('[\s]+', '_', att_test(obs, 'observationVariableName', "NA variable name")))
+                        re.sub('[\s]+', '_', get_attribute_or_na(obs, 'observationVariableName', "NA variable name")))
                     if 'observationLevels' in ou.keys() and ou['observationLevels']:
                         for obslvl in ou['observationLevels'].split(","):
                             if len(obslvl.split(":")) == 2:
@@ -68,7 +68,7 @@ def get_obs_levels(self, brapi_study_id, OBSERVATIONUNITLIST):
                             elif len(obslvl.split(":")) == 1:
                                 obs_levels[ou['observationLevel'].lower()].add(obslvl)
                 else:
-                    obs_level_in_study[PAR_defaultObsLvl].add(re.sub('[\s]+', '_', att_test(obs, 'observationVariableName', "NA variable")))
+                    obs_level_in_study[PAR_defaultObsLvl].add(re.sub('[\s]+', '_', get_attribute_or_na(obs, 'observationVariableName', "NA variable")))
                     lvlNotAvailable = True
         if lvlNotAvailable:
             self.logger.info("This BrAPI endpoint does not contain observation levels. Please add 'observationLevel' to the observations. Default " + PAR_defaultObsLvl + " is taken as observation level.")
@@ -82,8 +82,8 @@ def organism_characteristic(self, all_germplasm_attributes, taxonId):
         """" Given a a dictionairy with the germplasm details, retrieve the organism characteristic"""
 
         #Testing for genus and species availability
-        genus = att_test(all_germplasm_attributes, 'genus')
-        species = att_test(all_germplasm_attributes, 'species')
+        genus = get_attribute_or_na(all_germplasm_attributes, 'genus')
+        species = get_attribute_or_na(all_germplasm_attributes, 'species')
 
         #Checking if taxonId is supplied or not, otherwise fetch it from www.ebi.ac.uk
         if not taxonId or not taxonId.isdigit():
@@ -91,7 +91,7 @@ def organism_characteristic(self, all_germplasm_attributes, taxonId):
         if taxonId:
             return self.create_isa_characteristic('Organism', "NCBITAXON:{}".format(str(taxonId)))
         else:
-            return self.create_isa_characteristic('Organism', att_test(all_germplasm_attributes, 'commonCropName', PAR_NAinData))
+            return self.create_isa_characteristic('Organism', get_attribute_or_na(all_germplasm_attributes, 'commonCropName', PAR_NAinData))
 
     def create_germplasm_chars(self, germplasm):
         """" Given a BRAPI Germplasm ID, retrieve the list of all attributes from BRAPI and returns a list of ISA
@@ -106,7 +106,7 @@ def create_germplasm_chars(self, germplasm):
         else:
             all_germplasm_attributes = germplasm
 
-        if att_test(all_germplasm_attributes, 'taxonIds'):
+        if get_attribute_or_na(all_germplasm_attributes, 'taxonIds'):
             for taxonid in all_germplasm_attributes['taxonIds']:
                 if taxonid['sourceName'] in ['NCBITaxon', 'ncbiTaxon']:
                     c = self.organism_characteristic(
@@ -130,7 +130,7 @@ def create_germplasm_chars(self, germplasm):
 
         for key in mapping_dictionnary:
             c = self.create_isa_characteristic(
-                mapping_dictionnary[key], str(att_test(all_germplasm_attributes, key)))
+                mapping_dictionnary[key], str(get_attribute_or_na(all_germplasm_attributes, key)))
             returned_characteristics.append(c)
 
         return returned_characteristics
@@ -154,17 +154,17 @@ def create_isa_study(self, brapi_study_id, investigation, obs_levels_in_study):
         else:
             this_study.title = PAR_NAinData
 
-        this_study.description = att_test(brapi_study, 'studyDescription', PAR_NAinData)
+        this_study.description = get_attribute_or_na(brapi_study, 'studyDescription', PAR_NAinData)
 
-        oa_st_design = OntologyAnnotation(term=att_test(brapi_study, 'studyType', PAR_NAinData))
+        oa_st_design = OntologyAnnotation(term=get_attribute_or_na(brapi_study, 'studyType', PAR_NAinData))
         oa_st_design.comments.append(Comment(name="Study Design Description", value=PAR_NAinBrAPI))
         oa_st_design.comments.append(Comment(name="Observation Unit Level Hierarchy", value=PAR_NAinBrAPI))
         oa_st_design.comments.append(Comment(name="Observation Unit Description", value=PAR_NAinBrAPI))
         oa_st_design.comments.append(Comment(name="Map of Experimental Design", value=PAR_NAinBrAPI))  
         this_study.design_descriptors = [oa_st_design]
 
-        this_study.comments.append(Comment(name="Study Start Date", value=att_test(brapi_study, 'startDate')))
-        this_study.comments.append(Comment(name="Study End Date", value=att_test(brapi_study, 'endDate')))
+        this_study.comments.append(Comment(name="Study Start Date", value=get_attribute_or_na(brapi_study, 'startDate')))
+        this_study.comments.append(Comment(name="Study End Date", value=get_attribute_or_na(brapi_study, 'endDate')))
         this_study.comments.append(Comment(name="Trait Definition File", value="t_" + str(brapi_study_id) + ".txt"))
         this_study.comments.append(Comment(name="Description of Growth Facility",value=PAR_NAinBrAPI))
         this_study.comments.append(Comment(name="Type of Growth Facility",value=PAR_NAinBrAPI))
@@ -172,7 +172,7 @@ def create_isa_study(self, brapi_study_id, investigation, obs_levels_in_study):
         
         # Adding Location information 
         if 'location' in brapi_study and brapi_study['location']:
-            this_study.comments.append(Comment(name="Study Experimental Site", value=att_test(brapi_study['location'], 'name', PAR_NAinData)))
+            this_study.comments.append(Comment(name="Study Experimental Site", value=get_attribute_or_na(brapi_study['location'], 'name', PAR_NAinData)))
 
             if 'countryCode' in brapi_study['location'] and brapi_study['location']['countryCode']:
                 if len(brapi_study['location']['countryCode']) == 3:
@@ -188,11 +188,11 @@ def create_isa_study(self, brapi_study_id, investigation, obs_levels_in_study):
                 this_study.comments.append(
                     Comment(name="Study Country", value=PAR_NAinData))
 
-            if att_test(brapi_study['location'], 'latitude'):
+            if get_attribute_or_na(brapi_study['location'], 'latitude'):
                 this_study.comments.append(Comment(name="Study Latitude", value=brapi_study['location']['latitude']))
-            if att_test(brapi_study['location'], 'longitude'):
+            if get_attribute_or_na(brapi_study['location'], 'longitude'):
                 this_study.comments.append(Comment(name="Study Longitude", value=brapi_study['location']['longitude']))
-            if att_test(brapi_study['location'], 'altitude'):
+            if get_attribute_or_na(brapi_study['location'], 'altitude'):
                 this_study.comments.append(Comment(name="Study Altitude",value=brapi_study['location']['altitude']))
         else:
             self.logger.info("BrAPI study " + brapi_study['studyDbId'] + "has no location attribute, this is mandatory to be MIAPPE compliant.")
@@ -200,17 +200,17 @@ def create_isa_study(self, brapi_study_id, investigation, obs_levels_in_study):
             this_study.comments.append(Comment(name="Study Experimental Site",value=PAR_NAinData))
 
         # Adding Contacts information
-        if att_test(brapi_study,'contacts' ):
+        if get_attribute_or_na(brapi_study, 'contacts'):
             for brapicontact in brapi_study['contacts']:
                 #NOTE: brapi has just name attribute -> no separate first/last name
                 ContactName = brapicontact['name'].split(' ')
-                role = OntologyAnnotation(term=att_test(brapicontact, 'type', PAR_NAinData))
+                role = OntologyAnnotation(term=get_attribute_or_na(brapicontact, 'type', PAR_NAinData))
                 contact = Person(first_name=ContactName[0], last_name=ContactName[1],
-                affiliation=att_test(brapicontact, 'institutionName', PAR_NAinData), email=att_test(brapicontact, 'email'), address=PAR_NAinBrAPI, roles=[role])
+                                 affiliation=get_attribute_or_na(brapicontact, 'institutionName', PAR_NAinData), email=get_attribute_or_na(brapicontact, 'email'), address=PAR_NAinBrAPI, roles=[role])
                 this_study.contacts.append(contact)
 
         # Adding dataLinks information
-        if att_test(brapi_study,'dataLinks'):
+        if get_attribute_or_na(brapi_study, 'dataLinks'):
             for brapidata in brapi_study['dataLinks']:
                 this_study.comments.append(Comment(name="Study Data File Link", value=brapidata['url']))
                 this_study.comments.append(Comment(name="Study Data File Description", value=brapidata['type']))
@@ -265,39 +265,39 @@ def create_isa_tdf_from_obsvars(self, obsvars):
 
         # decorating dictionairy
         for obs_var in obsvars:
-            obs_var_id = re.search('([a-zA-Z]*):[0-9]*', att_test(obs_var, 'observationVariableDbId'))
-            obs_var_name = att_test(obs_var, 'name')
-            obs_var_trait_id = re.search('([a-zA-Z]*):[0-9]*', att_test(obs_var['trait'], 'traitDbId'))
-            obs_var_method_id = re.search('([a-zA-Z]*):[0-9]*', att_test(obs_var['method'], 'methodDbId'))
+            obs_var_id = re.search('([a-zA-Z]*):[0-9]*', get_attribute_or_na(obs_var, 'observationVariableDbId'))
+            obs_var_name = get_attribute_or_na(obs_var, 'name')
+            obs_var_trait_id = re.search('([a-zA-Z]*):[0-9]*', get_attribute_or_na(obs_var['trait'], 'traitDbId'))
+            obs_var_method_id = re.search('([a-zA-Z]*):[0-9]*', get_attribute_or_na(obs_var['method'], 'methodDbId'))
 
             elements['Variable ID'].append(re.sub('[\s]+', '_', obs_var_name))
             
             if obs_var_id and obs_var_id.group(1).lower() in self.ontologies:
-                if att_test(obs_var, 'synonyms'):  
+                if get_attribute_or_na(obs_var, 'synonyms'):
                     elements['Variable Name'].append('; '.join(obs_var['synonyms']))
 
                 elements['Variable Accession Number'].append(obs_var_id.group(0).upper())
 
             else:
-                if att_test(obs_var, 'synonyms'):  
-                    elements['Variable Name'].append('; '.join(obs_var['synonyms']) + ' (BrAPI variableDbId: ' + att_test(obs_var, 'observationVariableDbId', PAR_NAinData) + ')')
+                if get_attribute_or_na(obs_var, 'synonyms'):
+                    elements['Variable Name'].append('; '.join(obs_var['synonyms']) + ' (BrAPI variableDbId: ' + get_attribute_or_na(obs_var, 'observationVariableDbId', PAR_NAinData) + ')')
                 else: 
-                     elements['Variable Name'].append('(BrAPI variableDbId: ' + att_test(obs_var, 'observationVariableDbId', PAR_NAinData) + ')')
+                     elements['Variable Name'].append('(BrAPI variableDbId: ' + get_attribute_or_na(obs_var, 'observationVariableDbId', PAR_NAinData) + ')')
 
-            elements['Trait'].append(att_test(obs_var['trait'], 'name'))
+            elements['Trait'].append(get_attribute_or_na(obs_var['trait'], 'name'))
 
             if obs_var_trait_id and obs_var_trait_id.group(1).lower() in self.ontologies:
                 elements['Trait Accession Number'].append(obs_var_trait_id.group(0).upper())
 
-            elements['Method'].append(att_test(obs_var['method'], 'name', att_test(obs_var, 'name', PAR_NAinData)))
+            elements['Method'].append(get_attribute_or_na(obs_var['method'], 'name', get_attribute_or_na(obs_var, 'name', PAR_NAinData)))
             
-            elements['Method Description'].append(att_test(obs_var['method'], 'description', att_test(obs_var['trait'], 'description', PAR_NAinData)))
+            elements['Method Description'].append(get_attribute_or_na(obs_var['method'], 'description', get_attribute_or_na(obs_var['trait'], 'description', PAR_NAinData)))
             
             if obs_var_method_id and obs_var_method_id.group(1).lower() in self.ontologies:
                 elements['Method Accession Number'].append(obs_var_method_id.group(0).upper())
 
-            elements['Reference Associated to the Method'].append(att_test(obs_var['method'], 'reference'))
-            elements['Scale'].append(att_test(obs_var['scale'], 'name', PAR_NAinData))
+            elements['Reference Associated to the Method'].append(get_attribute_or_na(obs_var['method'], 'reference'))
+            elements['Scale'].append(get_attribute_or_na(obs_var['scale'], 'name', PAR_NAinData))
 
         # Deleting empty columns
         data_elements = []
@@ -379,7 +379,7 @@ def create_isa_obs_data_from_obsvars(self, obs_units, obs_variables, level, germ
 
                 timestamps ={}
                 for measurement in obs_unit['observations']:
-                    if FLATTEN_boolean and att_test(measurement, 'observationTimeStamp'):
+                    if FLATTEN_boolean and get_attribute_or_na(measurement, 'observationTimeStamp'):
                         if measurement['observationTimeStamp'] not in timestamps:
                             timestamps[measurement['observationTimeStamp']] = copy.deepcopy(rowbuffer)
                         for obs_attribute in obs_header:
@@ -390,7 +390,7 @@ def create_isa_obs_data_from_obsvars(self, obs_units, obs_variables, level, germ
                                 timestamps[measurement['observationTimeStamp']][head.index(obs_attribute)
                                     ] = PAR_NAinData
                                 # DEBUG self.logger.info(obs_attribute + " does not exist in observation in observationUnit " + obs_unit['observationUnitDbId'])
-                        if re.sub('[\s]+', '_', att_test(measurement, 'observationVariableName', "NA variable")) in head:
+                        if re.sub('[\s]+', '_', get_attribute_or_na(measurement, 'observationVariableName', "NA variable")) in head:
                             timestamps[measurement['observationTimeStamp']][head.index(re.sub('[\s]+', '_', measurement["observationVariableName"]))] = str(
                                 measurement["value"])
                     
@@ -403,7 +403,7 @@ def create_isa_obs_data_from_obsvars(self, obs_units, obs_variables, level, germ
                             row[head.index(obs_attribute)
                                 ] = PAR_NAinData
                             # DEBUG self.logger.info(obs_attribute + " does not exist in observation in observationUnit " + obs_unit['observationUnitDbId'])
-                    if re.sub('[\s]+', '_', att_test(measurement, 'observationVariableName', "NA variable")) in head:
+                    if re.sub('[\s]+', '_', get_attribute_or_na(measurement, 'observationVariableName', "NA variable")) in head:
                         row[head.index(re.sub('[\s]+', '_', measurement["observationVariableName"]))] = str(
                             measurement["value"])
                         data_records.append('\t'.join(row))

From cbd9517fca8183dee28e931d55ceeea282746bcd Mon Sep 17 00:00:00 2001
From: Cyril Pommier <cyril.pommier@inra.fr>
Date: Wed, 9 Dec 2020 12:10:40 +0100
Subject: [PATCH 2/4] WIP

---
 brapi_to_isa.py           | 212 +++++++++++---------------------------
 brapi_to_isa_converter.py |   2 +-
 2 files changed, 59 insertions(+), 155 deletions(-)

diff --git a/brapi_to_isa.py b/brapi_to_isa.py
index 222fd58..afd9b1a 100755
--- a/brapi_to_isa.py
+++ b/brapi_to_isa.py
@@ -89,6 +89,8 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
     for k,assay in enumerate(isa_study.assays):
         obs_level_to_assay[assay.characteristic_categories[0]] = k
 
+    all_samples = []
+    all_levels = set()
     treatments = defaultdict(list)
     allready_converted_obs_unit = [] # Allow to handle multiyear observation units NOTE (INRA specific)
 
@@ -100,6 +102,7 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
         else:
             assay_level = 0
             obslvl = PAR_defaultObsLvl
+        all_levels.add(obslvl)
         # Getting the relevant germplasm used for that observation event:
         # ---------------------------------------------------------------
         this_source = isa_study.get_source(obs_unit['germplasmName'])
@@ -157,10 +160,10 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
 
             # Creating the corresponding ISA sample entity for structure the document:
             # ------------------------------------------------------------------------
-            growth_process = Process(executes_protocol=growth_protocol)
-            growth_process.inputs.append(this_source)
-            growth_process.outputs.append(this_isa_sample)
-            isa_study.process_sequence.append(growth_process)
+            # growth_process = Process(executes_protocol=growth_protocol)
+            # growth_process.inputs.append(this_source)
+            # growth_process.outputs.append(this_isa_sample)
+            # isa_study.process_sequence.append(growth_process)
 
         # Assays at observation unit level
         # --------------------------------
@@ -169,10 +172,10 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
 
         # TODO: This seems to have no impact, check with @procassera et al
         #isa_study.assays[assay_level].samples.append(this_isa_sample)
-
-        phenotyping_process = Process(executes_protocol=phenotyping_protocol)
-        phenotyping_process.inputs.append(this_isa_sample)
-        phenotyping_process.name =  get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower()
+        #
+        # phenotyping_process = Process(executes_protocol=phenotyping_protocol)
+        # phenotyping_process.inputs.append(this_isa_sample)
+        # phenotyping_process.name =  get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower()
 
         # Adding Parameter Value[Collection Date] column
         # col_date_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Collection Date"))
@@ -185,170 +188,70 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
         # sample_collection_process.parameter_values.append(sampl_des_pv)
         
         # Data Transformation
+        #data_transformation_process = Process(executes_protocol=data_transformation_protocol)
+
+        #test
+        all_samples.append(this_isa_sample)
+
+        # # Adding Raw Data File column
+        # RAW_datafile = DataFile(filename=PAR_NAinData,
+        #                                 label="Raw Data File",
+        #                                 generated_from=[this_isa_sample])
+        # phenotyping_process.outputs.append(RAW_datafile)
+        # data_transformation_process.inputs.append(RAW_datafile)
+        #
+        # # Adding Derived Data File column
+        # #TODO: this is used here and for datafile generation, make it DRY in a dedicated method getDataFileNAme
+        # #TODO: the level part is also used at the begining of the for obsUnit loop
+        # datafilename = 'd_' + str(brapi_study_id) + '_' + get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt'
+        # DER_datafile = DataFile(filename=datafilename,
+        #                                 label="Derived Data File")
+        # data_transformation_process.outputs.append(DER_datafile)
+        #
+        # isa_study.assays[assay_level].process_sequence.append(phenotyping_process)
+        # plink(growth_process, phenotyping_process)
+        #
+        # isa_study.assays[assay_level].process_sequence.append(data_transformation_process)
+        # plink(phenotyping_process, data_transformation_process)
+
+    # BEGIN test
+
+    for level in all_levels:
         data_transformation_process = Process(executes_protocol=data_transformation_protocol)
 
-        # Adding Raw Data File column
-        RAW_datafile = DataFile(filename=PAR_NAinData,
-                                        label="Raw Data File",
-                                        generated_from=[this_isa_sample])
-        phenotyping_process.outputs.append(RAW_datafile)
-        data_transformation_process.inputs.append(RAW_datafile)
-        
-        # Adding Derived Data File column
-        datafilename = 'd_' + str(brapi_study_id) + '_' + get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt'
-        DER_datafile = DataFile(filename=datafilename,
-                                        label="Derived Data File")
-        data_transformation_process.outputs.append(DER_datafile)
-
-        isa_study.assays[assay_level].process_sequence.append(phenotyping_process)
-        plink(growth_process, phenotyping_process)
-
-        isa_study.assays[assay_level].process_sequence.append(data_transformation_process)
-        plink(phenotyping_process, data_transformation_process)
-
-        
-    # Mapping treatments to ISA study Factor Value:
-    # ---------------------------------------------
-    for factor, modalities in treatments.items():
-        f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor))
-        modality = ";".join(modalities)
-        f.comments.append(Comment(name="Study Factor Values",value=modality))
-        f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI))           
-        isa_study.factors.append(f)
-
-
-def create_study_sample_and_assay_full(client, brapi_study_id, isa_study,  growth_protocol, phenotyping_protocol, data_transformation_protocol, OBSERVATIONUNITLIST):
-
-    spat_dist_mapping_dictionary = {
-        "X": "X",
-        "Y": "Y",
-        "blockNumber": "block",
-        "plotNumber": "plot",
-        "plantNumber": "plant",
-        "replicate": "replicate"
-    }
-
-
-    # connecting the correct observation level to the correct assayobject
-    # NOTE observation level is temporarily stored inside isa_study.assays[i].characteristic_categories[0] better field available?
-    obs_level_to_assay = {}
-    for k,assay in enumerate(isa_study.assays):
-        obs_level_to_assay[assay.characteristic_categories[0]] = k
-
-    treatments = defaultdict(list)
-    allready_converted_obs_unit = [] # Allow to handle multiyear observation units NOTE (INRA specific)
-    for obs_unit in OBSERVATIONUNITLIST:
-        if 'observationLevel' in obs_unit and obs_unit['observationLevel']:
-            assay_level = obs_level_to_assay[obs_unit['observationLevel'].lower()]
-            obslvl = obs_unit['observationLevel'].lower()
-        else:
-            assay_level = 0
-            obslvl = PAR_defaultObsLvl
-        # Getting the relevant germplasm used for that observation event:
-        # ---------------------------------------------------------------
-        this_source = isa_study.get_source(obs_unit['germplasmName'])
-        if this_source and obs_unit['observationUnitName'] not in allready_converted_obs_unit:
-            this_isa_sample = Sample(
-                name= obs_unit['observationUnitName'],
-                derives_from=[this_source])
-            allready_converted_obs_unit.append(obs_unit['observationUnitName'])
-
-            c = Characteristic(category=OntologyAnnotation(term="Observation Unit Type"),
-                               value=OntologyAnnotation(term=obslvl,
-                                                        term_source="",
-                                                        term_accession=""))
-            this_isa_sample.characteristics.append(c)
-
-            spat_dist = []
-            for key in spat_dist_mapping_dictionary:
-                if get_attribute_or_na(obs_unit, key):
-                    spat_dist.append(spat_dist_mapping_dictionary[key] + ':' + obs_unit[key])
-            if get_attribute_or_na(obs_unit, 'observationLevels'):
-                for lvl in obs_unit['observationLevels'].split(", "):
-                    if len(lvl.split(":")) == 2:
-                        a, b = lvl.split(":")
-                        spat_dist.append(a + ':' + b)
-                    elif len(lvl.split(":")) == 1:
-                        spat_dist.append(lvl)
-            spat_dist_str = ';'.join(spat_dist)
-            if spat_dist:
-                c = Characteristic(category=OntologyAnnotation(term="Spatial Distribution"),
-                                   value=OntologyAnnotation(term=spat_dist_str,
-                                                            term_source="",
-                                                            term_accession=""))
-                this_isa_sample.characteristics.append(c)
-
-            # Looking for treatment in BRAPI and mapping to ISA samples
-            # ---------------------------------------------------------
-            if get_attribute_or_na(obs_unit, 'treatments'):
-                treatmentbuffer = defaultdict(list)
-                for treatment in obs_unit['treatments']:
-                    if get_attribute_or_na(treatment, 'factor') and get_attribute_or_na(treatment, 'modality'):
-
-                        if str(treatment['modality']) not in treatmentbuffer[treatment['factor']]:
-                            treatmentbuffer[treatment['factor']].append(str(treatment['modality']))
-                for factor,modality in treatmentbuffer.items():
-                    modalities = ','.join(modality)
-                    if modalities not in treatments[factor]:
-                        treatments[factor].append(modalities)
-                    f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor))
-                    fv = FactorValue(factor_name=f,
-                                     value=OntologyAnnotation(term=modalities,
-                                                              term_source="",
-                                                              term_accession=""))
-                    this_isa_sample.factor_values.append(fv)
-            isa_study.samples.append(this_isa_sample)
-
-            # Creating the corresponding ISA sample entity for structure the document:
-            # ------------------------------------------------------------------------
-            growth_process = Process(executes_protocol=growth_protocol)
-            growth_process.inputs.append(this_source)
-            growth_process.outputs.append(this_isa_sample)
-            isa_study.process_sequence.append(growth_process)
-
-        # Assays at observation unit level
-        # --------------------------------
-
-        # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization
-
-        isa_study.assays[assay_level].samples.append(this_isa_sample)
-
         phenotyping_process = Process(executes_protocol=phenotyping_protocol)
         phenotyping_process.inputs.append(this_isa_sample)
-        phenotyping_process.name =  get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower()
-
-        # Adding Parameter Value[Collection Date] column
-        # col_date_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Collection Date"))
-        # col_date_pv = ParameterValue(category=col_date_pp,value=OntologyAnnotation(term=PAR_NAinBrAPI))
-        # sample_collection_process.parameter_values.append(col_date_pv)
-
-        # Adding Parameter Value[Sample Description] column
-        # sampl_des_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Sample Description"))
-        # sampl_des_pv = ParameterValue(category=sampl_des_pp,value=OntologyAnnotation(term=PAR_NAinBrAPI))
-        # sample_collection_process.parameter_values.append(sampl_des_pv)
-
-        # Data Transformation
-        data_transformation_process = Process(executes_protocol=data_transformation_protocol)
+        phenotyping_process.name = level
 
         # Adding Raw Data File column
         RAW_datafile = DataFile(filename=PAR_NAinData,
                                 label="Raw Data File",
-                                generated_from=[this_isa_sample])
+                                generated_from=all_samples)
         phenotyping_process.outputs.append(RAW_datafile)
         data_transformation_process.inputs.append(RAW_datafile)
 
+        for sample in all_samples:
+            growth_process = Process(executes_protocol=growth_protocol)
+            growth_process.inputs.append(sample.derives_from)
+            growth_process.outputs.append(sample)
+            isa_study.process_sequence.append(growth_process)
+
+
         # Adding Derived Data File column
-        datafilename = 'd_' + str(brapi_study_id) + '_' + get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt'
+        #TODO: this is used here and for datafile generation, make it DRY in a dedicated method getDataFileNAme
+        # TODO: the level part is also used at the begining of the for obsUnit loop
+        datafilename = 'd_' + str(brapi_study_id) + '_' + level + '.txt'
         DER_datafile = DataFile(filename=datafilename,
                                 label="Derived Data File")
         data_transformation_process.outputs.append(DER_datafile)
 
-        isa_study.assays[assay_level].process_sequence.append(phenotyping_process)
-        plink(growth_process, phenotyping_process)
+        isa_study.assays[obs_level_to_assay[level]].process_sequence.append(phenotyping_process)
+        #plink(growth_process, phenotyping_process)
 
-        isa_study.assays[assay_level].process_sequence.append(data_transformation_process)
+        isa_study.assays[obs_level_to_assay[level]].process_sequence.append(data_transformation_process)
         plink(phenotyping_process, data_transformation_process)
 
+    #END test
 
     # Mapping treatments to ISA study Factor Value:
     # ---------------------------------------------
@@ -356,11 +259,12 @@ def create_study_sample_and_assay_full(client, brapi_study_id, isa_study,  growt
         f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor))
         modality = ";".join(modalities)
         f.comments.append(Comment(name="Study Factor Values",value=modality))
-        f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI))
+        f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI))           
         isa_study.factors.append(f)
 
 
 
+
 def write_records_to_file(this_study_id, records, this_directory, filetype, ObservationLevel=''):
     logger.info('Writing to file')
     # tdf_file = 'out/' + this_study_id
diff --git a/brapi_to_isa_converter.py b/brapi_to_isa_converter.py
index 9a33e82..80be2fc 100644
--- a/brapi_to_isa_converter.py
+++ b/brapi_to_isa_converter.py
@@ -21,7 +21,7 @@ def get_attribute_or_na(dictionary, attribute, NA=""):
 
 PAR_NAinData = "NA in endpoint"
 PAR_NAinBrAPI = "NA in BrAPI"
-PAR_defaultObsLvl = "plant"
+PAR_defaultObsLvl = "plant" # TODO: check this can generate misleading data
 PAR_suppObsLvl = ['study', 'block', 'sub-block', 'plot', 'sub-plot', 'pot', 'plant']
 
 class BrapiToIsaConverter:

From f51203a85012027b4fd549836248edd84d342a28 Mon Sep 17 00:00:00 2001
From: Erik Kimmel <erik.kimmel@inrae.fr>
Date: Mon, 14 Dec 2020 17:03:03 +0100
Subject: [PATCH 3/4] test: try to use single growth protocol.

---
 brapi_to_isa.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/brapi_to_isa.py b/brapi_to_isa.py
index afd9b1a..46485f5 100755
--- a/brapi_to_isa.py
+++ b/brapi_to_isa.py
@@ -224,17 +224,24 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
         phenotyping_process.name = level
 
         # Adding Raw Data File column
+        # RAW_datafile = DataFile(filename=PAR_NAinData,
+        #                         label="Raw Data File",
+        #                         generated_from=all_samples[0])
         RAW_datafile = DataFile(filename=PAR_NAinData,
-                                label="Raw Data File",
-                                generated_from=all_samples)
+                                label="Raw Data File")
         phenotyping_process.outputs.append(RAW_datafile)
         data_transformation_process.inputs.append(RAW_datafile)
 
+
+
+
         for sample in all_samples:
             growth_process = Process(executes_protocol=growth_protocol)
-            growth_process.inputs.append(sample.derives_from)
+            growth_process.inputs.append(sample.derives_from[0])
             growth_process.outputs.append(sample)
             isa_study.process_sequence.append(growth_process)
+            # if growth_process.name != 'None':
+            #     print(growth_process.name)
 
 
         # Adding Derived Data File column
@@ -246,7 +253,7 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study,  growth_pro
         data_transformation_process.outputs.append(DER_datafile)
 
         isa_study.assays[obs_level_to_assay[level]].process_sequence.append(phenotyping_process)
-        #plink(growth_process, phenotyping_process)
+        plink(growth_process, phenotyping_process)
 
         isa_study.assays[obs_level_to_assay[level]].process_sequence.append(data_transformation_process)
         plink(phenotyping_process, data_transformation_process)

From ce8db04b9d1d2aa9bbacbed0177501efb8a4345a Mon Sep 17 00:00:00 2001
From: Cyril Pommier <cyril.pommier@inra.fr>
Date: Thu, 17 Dec 2020 11:21:53 +0100
Subject: [PATCH 4/4] Adding the right shebang

---
 brapi_to_isa.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/brapi_to_isa.py b/brapi_to_isa.py
index 46485f5..7f450a4 100755
--- a/brapi_to_isa.py
+++ b/brapi_to_isa.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 import datetime
 import argparse
 import datetime