From da815467d589673aa0e6515972f779ae9ed95798 Mon Sep 17 00:00:00 2001 From: Cyril Pommier Date: Wed, 9 Dec 2020 11:09:36 +0100 Subject: [PATCH 1/4] Slight refactors --- brapi_to_isa.py | 188 ++++++++++++++++++++++++++++++++++---- brapi_to_isa_converter.py | 72 +++++++-------- 2 files changed, 204 insertions(+), 56 deletions(-) diff --git a/brapi_to_isa.py b/brapi_to_isa.py index 87c1ccf..222fd58 100755 --- a/brapi_to_isa.py +++ b/brapi_to_isa.py @@ -15,7 +15,7 @@ from isatools.model import * from brapi_client import BrapiClient -from brapi_to_isa_converter import BrapiToIsaConverter, att_test, PAR_NAinData, PAR_NAinBrAPI, PAR_defaultObsLvl, PAR_suppObsLvl +from brapi_to_isa_converter import BrapiToIsaConverter, get_attribute_or_na, PAR_NAinData, PAR_NAinBrAPI, PAR_defaultObsLvl, PAR_suppObsLvl __author__ = 'proccaserra (Philippe Rocca-Serra)' __author__ = 'cpommier (Cyril Pommier)' @@ -91,12 +91,14 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro treatments = defaultdict(list) allready_converted_obs_unit = [] # Allow to handle multiyear observation units NOTE (INRA specific) + + # currently one assay paer material/source. Need one assay per level with all the source material for obs_unit in OBSERVATIONUNITLIST: if 'observationLevel' in obs_unit and obs_unit['observationLevel']: - i = obs_level_to_assay[obs_unit['observationLevel'].lower()] + assay_level = obs_level_to_assay[obs_unit['observationLevel'].lower()] obslvl = obs_unit['observationLevel'].lower() else: - i = 0 + assay_level = 0 obslvl = PAR_defaultObsLvl # Getting the relevant germplasm used for that observation event: # --------------------------------------------------------------- @@ -115,9 +117,9 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro spat_dist = [] for key in spat_dist_mapping_dictionary: - if att_test(obs_unit,key): + if get_attribute_or_na(obs_unit, key): spat_dist.append(spat_dist_mapping_dictionary[key] + ':' + obs_unit[key]) - if att_test(obs_unit,'observationLevels'): + if get_attribute_or_na(obs_unit, 'observationLevels'): for lvl in obs_unit['observationLevels'].split(", "): if len(lvl.split(":")) == 2: a, b = lvl.split(":") @@ -134,10 +136,10 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro # Looking for treatment in BRAPI and mapping to ISA samples # --------------------------------------------------------- - if att_test(obs_unit, 'treatments'): + if get_attribute_or_na(obs_unit, 'treatments'): treatmentbuffer = defaultdict(list) for treatment in obs_unit['treatments']: - if att_test(treatment,'factor') and att_test(treatment, 'modality'): + if get_attribute_or_na(treatment, 'factor') and get_attribute_or_na(treatment, 'modality'): if str(treatment['modality']) not in treatmentbuffer[treatment['factor']]: treatmentbuffer[treatment['factor']].append(str(treatment['modality'])) @@ -165,11 +167,12 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization - isa_study.assays[i].samples.append(this_isa_sample) + # TODO: This seems to have no impact, check with @procassera et al + #isa_study.assays[assay_level].samples.append(this_isa_sample) phenotyping_process = Process(executes_protocol=phenotyping_protocol) phenotyping_process.inputs.append(this_isa_sample) - phenotyping_process.name = att_test(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + phenotyping_process.name = get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() # Adding Parameter Value[Collection Date] column # col_date_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Collection Date")) @@ -192,15 +195,15 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro data_transformation_process.inputs.append(RAW_datafile) # Adding Derived Data File column - datafilename = 'd_' + str(brapi_study_id) + '_' + att_test(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt' + datafilename = 'd_' + str(brapi_study_id) + '_' + get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt' DER_datafile = DataFile(filename=datafilename, label="Derived Data File") data_transformation_process.outputs.append(DER_datafile) - isa_study.assays[i].process_sequence.append(phenotyping_process) + isa_study.assays[assay_level].process_sequence.append(phenotyping_process) plink(growth_process, phenotyping_process) - - isa_study.assays[i].process_sequence.append(data_transformation_process) + + isa_study.assays[assay_level].process_sequence.append(data_transformation_process) plink(phenotyping_process, data_transformation_process) @@ -213,6 +216,151 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI)) isa_study.factors.append(f) + +def create_study_sample_and_assay_full(client, brapi_study_id, isa_study, growth_protocol, phenotyping_protocol, data_transformation_protocol, OBSERVATIONUNITLIST): + + spat_dist_mapping_dictionary = { + "X": "X", + "Y": "Y", + "blockNumber": "block", + "plotNumber": "plot", + "plantNumber": "plant", + "replicate": "replicate" + } + + + # connecting the correct observation level to the correct assayobject + # NOTE observation level is temporarily stored inside isa_study.assays[i].characteristic_categories[0] better field available? + obs_level_to_assay = {} + for k,assay in enumerate(isa_study.assays): + obs_level_to_assay[assay.characteristic_categories[0]] = k + + treatments = defaultdict(list) + allready_converted_obs_unit = [] # Allow to handle multiyear observation units NOTE (INRA specific) + for obs_unit in OBSERVATIONUNITLIST: + if 'observationLevel' in obs_unit and obs_unit['observationLevel']: + assay_level = obs_level_to_assay[obs_unit['observationLevel'].lower()] + obslvl = obs_unit['observationLevel'].lower() + else: + assay_level = 0 + obslvl = PAR_defaultObsLvl + # Getting the relevant germplasm used for that observation event: + # --------------------------------------------------------------- + this_source = isa_study.get_source(obs_unit['germplasmName']) + if this_source and obs_unit['observationUnitName'] not in allready_converted_obs_unit: + this_isa_sample = Sample( + name= obs_unit['observationUnitName'], + derives_from=[this_source]) + allready_converted_obs_unit.append(obs_unit['observationUnitName']) + + c = Characteristic(category=OntologyAnnotation(term="Observation Unit Type"), + value=OntologyAnnotation(term=obslvl, + term_source="", + term_accession="")) + this_isa_sample.characteristics.append(c) + + spat_dist = [] + for key in spat_dist_mapping_dictionary: + if get_attribute_or_na(obs_unit, key): + spat_dist.append(spat_dist_mapping_dictionary[key] + ':' + obs_unit[key]) + if get_attribute_or_na(obs_unit, 'observationLevels'): + for lvl in obs_unit['observationLevels'].split(", "): + if len(lvl.split(":")) == 2: + a, b = lvl.split(":") + spat_dist.append(a + ':' + b) + elif len(lvl.split(":")) == 1: + spat_dist.append(lvl) + spat_dist_str = ';'.join(spat_dist) + if spat_dist: + c = Characteristic(category=OntologyAnnotation(term="Spatial Distribution"), + value=OntologyAnnotation(term=spat_dist_str, + term_source="", + term_accession="")) + this_isa_sample.characteristics.append(c) + + # Looking for treatment in BRAPI and mapping to ISA samples + # --------------------------------------------------------- + if get_attribute_or_na(obs_unit, 'treatments'): + treatmentbuffer = defaultdict(list) + for treatment in obs_unit['treatments']: + if get_attribute_or_na(treatment, 'factor') and get_attribute_or_na(treatment, 'modality'): + + if str(treatment['modality']) not in treatmentbuffer[treatment['factor']]: + treatmentbuffer[treatment['factor']].append(str(treatment['modality'])) + for factor,modality in treatmentbuffer.items(): + modalities = ','.join(modality) + if modalities not in treatments[factor]: + treatments[factor].append(modalities) + f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor)) + fv = FactorValue(factor_name=f, + value=OntologyAnnotation(term=modalities, + term_source="", + term_accession="")) + this_isa_sample.factor_values.append(fv) + isa_study.samples.append(this_isa_sample) + + # Creating the corresponding ISA sample entity for structure the document: + # ------------------------------------------------------------------------ + growth_process = Process(executes_protocol=growth_protocol) + growth_process.inputs.append(this_source) + growth_process.outputs.append(this_isa_sample) + isa_study.process_sequence.append(growth_process) + + # Assays at observation unit level + # -------------------------------- + + # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization + + isa_study.assays[assay_level].samples.append(this_isa_sample) + + phenotyping_process = Process(executes_protocol=phenotyping_protocol) + phenotyping_process.inputs.append(this_isa_sample) + phenotyping_process.name = get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + + # Adding Parameter Value[Collection Date] column + # col_date_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Collection Date")) + # col_date_pv = ParameterValue(category=col_date_pp,value=OntologyAnnotation(term=PAR_NAinBrAPI)) + # sample_collection_process.parameter_values.append(col_date_pv) + + # Adding Parameter Value[Sample Description] column + # sampl_des_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Sample Description")) + # sampl_des_pv = ParameterValue(category=sampl_des_pp,value=OntologyAnnotation(term=PAR_NAinBrAPI)) + # sample_collection_process.parameter_values.append(sampl_des_pv) + + # Data Transformation + data_transformation_process = Process(executes_protocol=data_transformation_protocol) + + # Adding Raw Data File column + RAW_datafile = DataFile(filename=PAR_NAinData, + label="Raw Data File", + generated_from=[this_isa_sample]) + phenotyping_process.outputs.append(RAW_datafile) + data_transformation_process.inputs.append(RAW_datafile) + + # Adding Derived Data File column + datafilename = 'd_' + str(brapi_study_id) + '_' + get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt' + DER_datafile = DataFile(filename=datafilename, + label="Derived Data File") + data_transformation_process.outputs.append(DER_datafile) + + isa_study.assays[assay_level].process_sequence.append(phenotyping_process) + plink(growth_process, phenotyping_process) + + isa_study.assays[assay_level].process_sequence.append(data_transformation_process) + plink(phenotyping_process, data_transformation_process) + + + # Mapping treatments to ISA study Factor Value: + # --------------------------------------------- + for factor, modalities in treatments.items(): + f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor)) + modality = ";".join(modalities) + f.comments.append(Comment(name="Study Factor Values",value=modality)) + f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI)) + isa_study.factors.append(f) + + + def write_records_to_file(this_study_id, records, this_directory, filetype, ObservationLevel=''): logger.info('Writing to file') # tdf_file = 'out/' + this_study_id @@ -300,18 +448,18 @@ def main(arg=SERVER): investigation.title = trial['trialName'] #Investigation fields unavailable in BrAPI - investigation.description = att_test(trial, "trialDescription", PAR_NAinData) + investigation.description = get_attribute_or_na(trial, "trialDescription", PAR_NAinData) investigation.submission_date = PAR_NAinBrAPI investigation.public_release_date = PAR_NAinBrAPI investigation.comments.append(Comment(name="License", value=PAR_NAinBrAPI)) - if att_test(trial, 'contacts'): + if get_attribute_or_na(trial, 'contacts'): for brapicontact in trial['contacts']: #NOTE: brapi has just name attribute -> no separate first/last name ContactName = brapicontact['name'].split(' ') - role = OntologyAnnotation(term=att_test(brapicontact, 'type', PAR_NAinData)) + role = OntologyAnnotation(term=get_attribute_or_na(brapicontact, 'type', PAR_NAinData)) contact = Person(first_name=ContactName[0], last_name=' '.join(ContactName[1:]), - affiliation=att_test(brapicontact,'institutionName', PAR_NAinData), email=att_test(brapicontact,'email'), address=PAR_NAinBrAPI, roles=[role]) + affiliation=get_attribute_or_na(brapicontact, 'institutionName', PAR_NAinData), email=get_attribute_or_na(brapicontact, 'email'), address=PAR_NAinBrAPI, roles=[role]) investigation.contacts.append(contact) else: role = OntologyAnnotation(term=PAR_NAinData) @@ -321,10 +469,10 @@ def main(arg=SERVER): investigation.comments.append(Comment(name="MIAPPE version", value="1.1")) - if att_test(trial, 'publications'): + if get_attribute_or_na(trial, 'publications'): for brapipublic in trial['publications']: #This is BrAPI v1.3 specific (when older, skipped) - publication = Publication(doi=att_test(brapipublic, 'publicationPUI', PAR_NAinData)) + publication = Publication(doi=get_attribute_or_na(brapipublic, 'publicationPUI', PAR_NAinData)) publication.status = OntologyAnnotation(term="published") investigation.publications.append(publication) else: @@ -395,7 +543,7 @@ def main(arg=SERVER): germplasminfo[germ['germplasmDbId']] = [germ['accessionNumber']] # Associating ISA sources to ISA isa_study object - isa_study.sources.append(source) + isa_study.sources.append(source) # this line has all the necessary information, but it doesn't end up in the study file # Now dealing with BRAPI observation units and attempting to create ISA samples create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_protocol, phenotyping_protocol, data_transformation_protocol, OBSERVATIONUNITLIST) diff --git a/brapi_to_isa_converter.py b/brapi_to_isa_converter.py index 5e1f4cf..9a33e82 100644 --- a/brapi_to_isa_converter.py +++ b/brapi_to_isa_converter.py @@ -8,7 +8,7 @@ import re import platform -def att_test(dictionary, attribute, NA=""): +def get_attribute_or_na(dictionary, attribute, NA=""): if attribute in dictionary and dictionary[attribute]: if dictionary[attribute] in ['NA', 'na','Na', 'n.a.', 'N.A.', 'N.a.']: return NA @@ -59,7 +59,7 @@ def get_obs_levels(self, brapi_study_id, OBSERVATIONUNITLIST): for obs in ou['observations']: if 'observationLevel' in ou and ou['observationLevel']: obs_level_in_study[ou['observationLevel'].lower()].add( - re.sub('[\s]+', '_', att_test(obs, 'observationVariableName', "NA variable name"))) + re.sub('[\s]+', '_', get_attribute_or_na(obs, 'observationVariableName', "NA variable name"))) if 'observationLevels' in ou.keys() and ou['observationLevels']: for obslvl in ou['observationLevels'].split(","): if len(obslvl.split(":")) == 2: @@ -68,7 +68,7 @@ def get_obs_levels(self, brapi_study_id, OBSERVATIONUNITLIST): elif len(obslvl.split(":")) == 1: obs_levels[ou['observationLevel'].lower()].add(obslvl) else: - obs_level_in_study[PAR_defaultObsLvl].add(re.sub('[\s]+', '_', att_test(obs, 'observationVariableName', "NA variable"))) + obs_level_in_study[PAR_defaultObsLvl].add(re.sub('[\s]+', '_', get_attribute_or_na(obs, 'observationVariableName', "NA variable"))) lvlNotAvailable = True if lvlNotAvailable: self.logger.info("This BrAPI endpoint does not contain observation levels. Please add 'observationLevel' to the observations. Default " + PAR_defaultObsLvl + " is taken as observation level.") @@ -82,8 +82,8 @@ def organism_characteristic(self, all_germplasm_attributes, taxonId): """" Given a a dictionairy with the germplasm details, retrieve the organism characteristic""" #Testing for genus and species availability - genus = att_test(all_germplasm_attributes, 'genus') - species = att_test(all_germplasm_attributes, 'species') + genus = get_attribute_or_na(all_germplasm_attributes, 'genus') + species = get_attribute_or_na(all_germplasm_attributes, 'species') #Checking if taxonId is supplied or not, otherwise fetch it from www.ebi.ac.uk if not taxonId or not taxonId.isdigit(): @@ -91,7 +91,7 @@ def organism_characteristic(self, all_germplasm_attributes, taxonId): if taxonId: return self.create_isa_characteristic('Organism', "NCBITAXON:{}".format(str(taxonId))) else: - return self.create_isa_characteristic('Organism', att_test(all_germplasm_attributes, 'commonCropName', PAR_NAinData)) + return self.create_isa_characteristic('Organism', get_attribute_or_na(all_germplasm_attributes, 'commonCropName', PAR_NAinData)) def create_germplasm_chars(self, germplasm): """" Given a BRAPI Germplasm ID, retrieve the list of all attributes from BRAPI and returns a list of ISA @@ -106,7 +106,7 @@ def create_germplasm_chars(self, germplasm): else: all_germplasm_attributes = germplasm - if att_test(all_germplasm_attributes, 'taxonIds'): + if get_attribute_or_na(all_germplasm_attributes, 'taxonIds'): for taxonid in all_germplasm_attributes['taxonIds']: if taxonid['sourceName'] in ['NCBITaxon', 'ncbiTaxon']: c = self.organism_characteristic( @@ -130,7 +130,7 @@ def create_germplasm_chars(self, germplasm): for key in mapping_dictionnary: c = self.create_isa_characteristic( - mapping_dictionnary[key], str(att_test(all_germplasm_attributes, key))) + mapping_dictionnary[key], str(get_attribute_or_na(all_germplasm_attributes, key))) returned_characteristics.append(c) return returned_characteristics @@ -154,17 +154,17 @@ def create_isa_study(self, brapi_study_id, investigation, obs_levels_in_study): else: this_study.title = PAR_NAinData - this_study.description = att_test(brapi_study, 'studyDescription', PAR_NAinData) + this_study.description = get_attribute_or_na(brapi_study, 'studyDescription', PAR_NAinData) - oa_st_design = OntologyAnnotation(term=att_test(brapi_study, 'studyType', PAR_NAinData)) + oa_st_design = OntologyAnnotation(term=get_attribute_or_na(brapi_study, 'studyType', PAR_NAinData)) oa_st_design.comments.append(Comment(name="Study Design Description", value=PAR_NAinBrAPI)) oa_st_design.comments.append(Comment(name="Observation Unit Level Hierarchy", value=PAR_NAinBrAPI)) oa_st_design.comments.append(Comment(name="Observation Unit Description", value=PAR_NAinBrAPI)) oa_st_design.comments.append(Comment(name="Map of Experimental Design", value=PAR_NAinBrAPI)) this_study.design_descriptors = [oa_st_design] - this_study.comments.append(Comment(name="Study Start Date", value=att_test(brapi_study, 'startDate'))) - this_study.comments.append(Comment(name="Study End Date", value=att_test(brapi_study, 'endDate'))) + this_study.comments.append(Comment(name="Study Start Date", value=get_attribute_or_na(brapi_study, 'startDate'))) + this_study.comments.append(Comment(name="Study End Date", value=get_attribute_or_na(brapi_study, 'endDate'))) this_study.comments.append(Comment(name="Trait Definition File", value="t_" + str(brapi_study_id) + ".txt")) this_study.comments.append(Comment(name="Description of Growth Facility",value=PAR_NAinBrAPI)) this_study.comments.append(Comment(name="Type of Growth Facility",value=PAR_NAinBrAPI)) @@ -172,7 +172,7 @@ def create_isa_study(self, brapi_study_id, investigation, obs_levels_in_study): # Adding Location information if 'location' in brapi_study and brapi_study['location']: - this_study.comments.append(Comment(name="Study Experimental Site", value=att_test(brapi_study['location'], 'name', PAR_NAinData))) + this_study.comments.append(Comment(name="Study Experimental Site", value=get_attribute_or_na(brapi_study['location'], 'name', PAR_NAinData))) if 'countryCode' in brapi_study['location'] and brapi_study['location']['countryCode']: if len(brapi_study['location']['countryCode']) == 3: @@ -188,11 +188,11 @@ def create_isa_study(self, brapi_study_id, investigation, obs_levels_in_study): this_study.comments.append( Comment(name="Study Country", value=PAR_NAinData)) - if att_test(brapi_study['location'], 'latitude'): + if get_attribute_or_na(brapi_study['location'], 'latitude'): this_study.comments.append(Comment(name="Study Latitude", value=brapi_study['location']['latitude'])) - if att_test(brapi_study['location'], 'longitude'): + if get_attribute_or_na(brapi_study['location'], 'longitude'): this_study.comments.append(Comment(name="Study Longitude", value=brapi_study['location']['longitude'])) - if att_test(brapi_study['location'], 'altitude'): + if get_attribute_or_na(brapi_study['location'], 'altitude'): this_study.comments.append(Comment(name="Study Altitude",value=brapi_study['location']['altitude'])) else: self.logger.info("BrAPI study " + brapi_study['studyDbId'] + "has no location attribute, this is mandatory to be MIAPPE compliant.") @@ -200,17 +200,17 @@ def create_isa_study(self, brapi_study_id, investigation, obs_levels_in_study): this_study.comments.append(Comment(name="Study Experimental Site",value=PAR_NAinData)) # Adding Contacts information - if att_test(brapi_study,'contacts' ): + if get_attribute_or_na(brapi_study, 'contacts'): for brapicontact in brapi_study['contacts']: #NOTE: brapi has just name attribute -> no separate first/last name ContactName = brapicontact['name'].split(' ') - role = OntologyAnnotation(term=att_test(brapicontact, 'type', PAR_NAinData)) + role = OntologyAnnotation(term=get_attribute_or_na(brapicontact, 'type', PAR_NAinData)) contact = Person(first_name=ContactName[0], last_name=ContactName[1], - affiliation=att_test(brapicontact, 'institutionName', PAR_NAinData), email=att_test(brapicontact, 'email'), address=PAR_NAinBrAPI, roles=[role]) + affiliation=get_attribute_or_na(brapicontact, 'institutionName', PAR_NAinData), email=get_attribute_or_na(brapicontact, 'email'), address=PAR_NAinBrAPI, roles=[role]) this_study.contacts.append(contact) # Adding dataLinks information - if att_test(brapi_study,'dataLinks'): + if get_attribute_or_na(brapi_study, 'dataLinks'): for brapidata in brapi_study['dataLinks']: this_study.comments.append(Comment(name="Study Data File Link", value=brapidata['url'])) this_study.comments.append(Comment(name="Study Data File Description", value=brapidata['type'])) @@ -265,39 +265,39 @@ def create_isa_tdf_from_obsvars(self, obsvars): # decorating dictionairy for obs_var in obsvars: - obs_var_id = re.search('([a-zA-Z]*):[0-9]*', att_test(obs_var, 'observationVariableDbId')) - obs_var_name = att_test(obs_var, 'name') - obs_var_trait_id = re.search('([a-zA-Z]*):[0-9]*', att_test(obs_var['trait'], 'traitDbId')) - obs_var_method_id = re.search('([a-zA-Z]*):[0-9]*', att_test(obs_var['method'], 'methodDbId')) + obs_var_id = re.search('([a-zA-Z]*):[0-9]*', get_attribute_or_na(obs_var, 'observationVariableDbId')) + obs_var_name = get_attribute_or_na(obs_var, 'name') + obs_var_trait_id = re.search('([a-zA-Z]*):[0-9]*', get_attribute_or_na(obs_var['trait'], 'traitDbId')) + obs_var_method_id = re.search('([a-zA-Z]*):[0-9]*', get_attribute_or_na(obs_var['method'], 'methodDbId')) elements['Variable ID'].append(re.sub('[\s]+', '_', obs_var_name)) if obs_var_id and obs_var_id.group(1).lower() in self.ontologies: - if att_test(obs_var, 'synonyms'): + if get_attribute_or_na(obs_var, 'synonyms'): elements['Variable Name'].append('; '.join(obs_var['synonyms'])) elements['Variable Accession Number'].append(obs_var_id.group(0).upper()) else: - if att_test(obs_var, 'synonyms'): - elements['Variable Name'].append('; '.join(obs_var['synonyms']) + ' (BrAPI variableDbId: ' + att_test(obs_var, 'observationVariableDbId', PAR_NAinData) + ')') + if get_attribute_or_na(obs_var, 'synonyms'): + elements['Variable Name'].append('; '.join(obs_var['synonyms']) + ' (BrAPI variableDbId: ' + get_attribute_or_na(obs_var, 'observationVariableDbId', PAR_NAinData) + ')') else: - elements['Variable Name'].append('(BrAPI variableDbId: ' + att_test(obs_var, 'observationVariableDbId', PAR_NAinData) + ')') + elements['Variable Name'].append('(BrAPI variableDbId: ' + get_attribute_or_na(obs_var, 'observationVariableDbId', PAR_NAinData) + ')') - elements['Trait'].append(att_test(obs_var['trait'], 'name')) + elements['Trait'].append(get_attribute_or_na(obs_var['trait'], 'name')) if obs_var_trait_id and obs_var_trait_id.group(1).lower() in self.ontologies: elements['Trait Accession Number'].append(obs_var_trait_id.group(0).upper()) - elements['Method'].append(att_test(obs_var['method'], 'name', att_test(obs_var, 'name', PAR_NAinData))) + elements['Method'].append(get_attribute_or_na(obs_var['method'], 'name', get_attribute_or_na(obs_var, 'name', PAR_NAinData))) - elements['Method Description'].append(att_test(obs_var['method'], 'description', att_test(obs_var['trait'], 'description', PAR_NAinData))) + elements['Method Description'].append(get_attribute_or_na(obs_var['method'], 'description', get_attribute_or_na(obs_var['trait'], 'description', PAR_NAinData))) if obs_var_method_id and obs_var_method_id.group(1).lower() in self.ontologies: elements['Method Accession Number'].append(obs_var_method_id.group(0).upper()) - elements['Reference Associated to the Method'].append(att_test(obs_var['method'], 'reference')) - elements['Scale'].append(att_test(obs_var['scale'], 'name', PAR_NAinData)) + elements['Reference Associated to the Method'].append(get_attribute_or_na(obs_var['method'], 'reference')) + elements['Scale'].append(get_attribute_or_na(obs_var['scale'], 'name', PAR_NAinData)) # Deleting empty columns data_elements = [] @@ -379,7 +379,7 @@ def create_isa_obs_data_from_obsvars(self, obs_units, obs_variables, level, germ timestamps ={} for measurement in obs_unit['observations']: - if FLATTEN_boolean and att_test(measurement, 'observationTimeStamp'): + if FLATTEN_boolean and get_attribute_or_na(measurement, 'observationTimeStamp'): if measurement['observationTimeStamp'] not in timestamps: timestamps[measurement['observationTimeStamp']] = copy.deepcopy(rowbuffer) for obs_attribute in obs_header: @@ -390,7 +390,7 @@ def create_isa_obs_data_from_obsvars(self, obs_units, obs_variables, level, germ timestamps[measurement['observationTimeStamp']][head.index(obs_attribute) ] = PAR_NAinData # DEBUG self.logger.info(obs_attribute + " does not exist in observation in observationUnit " + obs_unit['observationUnitDbId']) - if re.sub('[\s]+', '_', att_test(measurement, 'observationVariableName', "NA variable")) in head: + if re.sub('[\s]+', '_', get_attribute_or_na(measurement, 'observationVariableName', "NA variable")) in head: timestamps[measurement['observationTimeStamp']][head.index(re.sub('[\s]+', '_', measurement["observationVariableName"]))] = str( measurement["value"]) @@ -403,7 +403,7 @@ def create_isa_obs_data_from_obsvars(self, obs_units, obs_variables, level, germ row[head.index(obs_attribute) ] = PAR_NAinData # DEBUG self.logger.info(obs_attribute + " does not exist in observation in observationUnit " + obs_unit['observationUnitDbId']) - if re.sub('[\s]+', '_', att_test(measurement, 'observationVariableName', "NA variable")) in head: + if re.sub('[\s]+', '_', get_attribute_or_na(measurement, 'observationVariableName', "NA variable")) in head: row[head.index(re.sub('[\s]+', '_', measurement["observationVariableName"]))] = str( measurement["value"]) data_records.append('\t'.join(row)) From cbd9517fca8183dee28e931d55ceeea282746bcd Mon Sep 17 00:00:00 2001 From: Cyril Pommier Date: Wed, 9 Dec 2020 12:10:40 +0100 Subject: [PATCH 2/4] WIP --- brapi_to_isa.py | 212 +++++++++++--------------------------- brapi_to_isa_converter.py | 2 +- 2 files changed, 59 insertions(+), 155 deletions(-) diff --git a/brapi_to_isa.py b/brapi_to_isa.py index 222fd58..afd9b1a 100755 --- a/brapi_to_isa.py +++ b/brapi_to_isa.py @@ -89,6 +89,8 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro for k,assay in enumerate(isa_study.assays): obs_level_to_assay[assay.characteristic_categories[0]] = k + all_samples = [] + all_levels = set() treatments = defaultdict(list) allready_converted_obs_unit = [] # Allow to handle multiyear observation units NOTE (INRA specific) @@ -100,6 +102,7 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro else: assay_level = 0 obslvl = PAR_defaultObsLvl + all_levels.add(obslvl) # Getting the relevant germplasm used for that observation event: # --------------------------------------------------------------- this_source = isa_study.get_source(obs_unit['germplasmName']) @@ -157,10 +160,10 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro # Creating the corresponding ISA sample entity for structure the document: # ------------------------------------------------------------------------ - growth_process = Process(executes_protocol=growth_protocol) - growth_process.inputs.append(this_source) - growth_process.outputs.append(this_isa_sample) - isa_study.process_sequence.append(growth_process) + # growth_process = Process(executes_protocol=growth_protocol) + # growth_process.inputs.append(this_source) + # growth_process.outputs.append(this_isa_sample) + # isa_study.process_sequence.append(growth_process) # Assays at observation unit level # -------------------------------- @@ -169,10 +172,10 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro # TODO: This seems to have no impact, check with @procassera et al #isa_study.assays[assay_level].samples.append(this_isa_sample) - - phenotyping_process = Process(executes_protocol=phenotyping_protocol) - phenotyping_process.inputs.append(this_isa_sample) - phenotyping_process.name = get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + # + # phenotyping_process = Process(executes_protocol=phenotyping_protocol) + # phenotyping_process.inputs.append(this_isa_sample) + # phenotyping_process.name = get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() # Adding Parameter Value[Collection Date] column # col_date_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Collection Date")) @@ -185,170 +188,70 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro # sample_collection_process.parameter_values.append(sampl_des_pv) # Data Transformation + #data_transformation_process = Process(executes_protocol=data_transformation_protocol) + + #test + all_samples.append(this_isa_sample) + + # # Adding Raw Data File column + # RAW_datafile = DataFile(filename=PAR_NAinData, + # label="Raw Data File", + # generated_from=[this_isa_sample]) + # phenotyping_process.outputs.append(RAW_datafile) + # data_transformation_process.inputs.append(RAW_datafile) + # + # # Adding Derived Data File column + # #TODO: this is used here and for datafile generation, make it DRY in a dedicated method getDataFileNAme + # #TODO: the level part is also used at the begining of the for obsUnit loop + # datafilename = 'd_' + str(brapi_study_id) + '_' + get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt' + # DER_datafile = DataFile(filename=datafilename, + # label="Derived Data File") + # data_transformation_process.outputs.append(DER_datafile) + # + # isa_study.assays[assay_level].process_sequence.append(phenotyping_process) + # plink(growth_process, phenotyping_process) + # + # isa_study.assays[assay_level].process_sequence.append(data_transformation_process) + # plink(phenotyping_process, data_transformation_process) + + # BEGIN test + + for level in all_levels: data_transformation_process = Process(executes_protocol=data_transformation_protocol) - # Adding Raw Data File column - RAW_datafile = DataFile(filename=PAR_NAinData, - label="Raw Data File", - generated_from=[this_isa_sample]) - phenotyping_process.outputs.append(RAW_datafile) - data_transformation_process.inputs.append(RAW_datafile) - - # Adding Derived Data File column - datafilename = 'd_' + str(brapi_study_id) + '_' + get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt' - DER_datafile = DataFile(filename=datafilename, - label="Derived Data File") - data_transformation_process.outputs.append(DER_datafile) - - isa_study.assays[assay_level].process_sequence.append(phenotyping_process) - plink(growth_process, phenotyping_process) - - isa_study.assays[assay_level].process_sequence.append(data_transformation_process) - plink(phenotyping_process, data_transformation_process) - - - # Mapping treatments to ISA study Factor Value: - # --------------------------------------------- - for factor, modalities in treatments.items(): - f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor)) - modality = ";".join(modalities) - f.comments.append(Comment(name="Study Factor Values",value=modality)) - f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI)) - isa_study.factors.append(f) - - -def create_study_sample_and_assay_full(client, brapi_study_id, isa_study, growth_protocol, phenotyping_protocol, data_transformation_protocol, OBSERVATIONUNITLIST): - - spat_dist_mapping_dictionary = { - "X": "X", - "Y": "Y", - "blockNumber": "block", - "plotNumber": "plot", - "plantNumber": "plant", - "replicate": "replicate" - } - - - # connecting the correct observation level to the correct assayobject - # NOTE observation level is temporarily stored inside isa_study.assays[i].characteristic_categories[0] better field available? - obs_level_to_assay = {} - for k,assay in enumerate(isa_study.assays): - obs_level_to_assay[assay.characteristic_categories[0]] = k - - treatments = defaultdict(list) - allready_converted_obs_unit = [] # Allow to handle multiyear observation units NOTE (INRA specific) - for obs_unit in OBSERVATIONUNITLIST: - if 'observationLevel' in obs_unit and obs_unit['observationLevel']: - assay_level = obs_level_to_assay[obs_unit['observationLevel'].lower()] - obslvl = obs_unit['observationLevel'].lower() - else: - assay_level = 0 - obslvl = PAR_defaultObsLvl - # Getting the relevant germplasm used for that observation event: - # --------------------------------------------------------------- - this_source = isa_study.get_source(obs_unit['germplasmName']) - if this_source and obs_unit['observationUnitName'] not in allready_converted_obs_unit: - this_isa_sample = Sample( - name= obs_unit['observationUnitName'], - derives_from=[this_source]) - allready_converted_obs_unit.append(obs_unit['observationUnitName']) - - c = Characteristic(category=OntologyAnnotation(term="Observation Unit Type"), - value=OntologyAnnotation(term=obslvl, - term_source="", - term_accession="")) - this_isa_sample.characteristics.append(c) - - spat_dist = [] - for key in spat_dist_mapping_dictionary: - if get_attribute_or_na(obs_unit, key): - spat_dist.append(spat_dist_mapping_dictionary[key] + ':' + obs_unit[key]) - if get_attribute_or_na(obs_unit, 'observationLevels'): - for lvl in obs_unit['observationLevels'].split(", "): - if len(lvl.split(":")) == 2: - a, b = lvl.split(":") - spat_dist.append(a + ':' + b) - elif len(lvl.split(":")) == 1: - spat_dist.append(lvl) - spat_dist_str = ';'.join(spat_dist) - if spat_dist: - c = Characteristic(category=OntologyAnnotation(term="Spatial Distribution"), - value=OntologyAnnotation(term=spat_dist_str, - term_source="", - term_accession="")) - this_isa_sample.characteristics.append(c) - - # Looking for treatment in BRAPI and mapping to ISA samples - # --------------------------------------------------------- - if get_attribute_or_na(obs_unit, 'treatments'): - treatmentbuffer = defaultdict(list) - for treatment in obs_unit['treatments']: - if get_attribute_or_na(treatment, 'factor') and get_attribute_or_na(treatment, 'modality'): - - if str(treatment['modality']) not in treatmentbuffer[treatment['factor']]: - treatmentbuffer[treatment['factor']].append(str(treatment['modality'])) - for factor,modality in treatmentbuffer.items(): - modalities = ','.join(modality) - if modalities not in treatments[factor]: - treatments[factor].append(modalities) - f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor)) - fv = FactorValue(factor_name=f, - value=OntologyAnnotation(term=modalities, - term_source="", - term_accession="")) - this_isa_sample.factor_values.append(fv) - isa_study.samples.append(this_isa_sample) - - # Creating the corresponding ISA sample entity for structure the document: - # ------------------------------------------------------------------------ - growth_process = Process(executes_protocol=growth_protocol) - growth_process.inputs.append(this_source) - growth_process.outputs.append(this_isa_sample) - isa_study.process_sequence.append(growth_process) - - # Assays at observation unit level - # -------------------------------- - - # !!!: fix isatab.py to access other protocol_type values to enable Assay Tab serialization - - isa_study.assays[assay_level].samples.append(this_isa_sample) - phenotyping_process = Process(executes_protocol=phenotyping_protocol) phenotyping_process.inputs.append(this_isa_sample) - phenotyping_process.name = get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() - - # Adding Parameter Value[Collection Date] column - # col_date_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Collection Date")) - # col_date_pv = ParameterValue(category=col_date_pp,value=OntologyAnnotation(term=PAR_NAinBrAPI)) - # sample_collection_process.parameter_values.append(col_date_pv) - - # Adding Parameter Value[Sample Description] column - # sampl_des_pp = ProtocolParameter(parameter_name=OntologyAnnotation(term="Sample Description")) - # sampl_des_pv = ParameterValue(category=sampl_des_pp,value=OntologyAnnotation(term=PAR_NAinBrAPI)) - # sample_collection_process.parameter_values.append(sampl_des_pv) - - # Data Transformation - data_transformation_process = Process(executes_protocol=data_transformation_protocol) + phenotyping_process.name = level # Adding Raw Data File column RAW_datafile = DataFile(filename=PAR_NAinData, label="Raw Data File", - generated_from=[this_isa_sample]) + generated_from=all_samples) phenotyping_process.outputs.append(RAW_datafile) data_transformation_process.inputs.append(RAW_datafile) + for sample in all_samples: + growth_process = Process(executes_protocol=growth_protocol) + growth_process.inputs.append(sample.derives_from) + growth_process.outputs.append(sample) + isa_study.process_sequence.append(growth_process) + + # Adding Derived Data File column - datafilename = 'd_' + str(brapi_study_id) + '_' + get_attribute_or_na(obs_unit, 'observationLevel', PAR_defaultObsLvl).lower() + '.txt' + #TODO: this is used here and for datafile generation, make it DRY in a dedicated method getDataFileNAme + # TODO: the level part is also used at the begining of the for obsUnit loop + datafilename = 'd_' + str(brapi_study_id) + '_' + level + '.txt' DER_datafile = DataFile(filename=datafilename, label="Derived Data File") data_transformation_process.outputs.append(DER_datafile) - isa_study.assays[assay_level].process_sequence.append(phenotyping_process) - plink(growth_process, phenotyping_process) + isa_study.assays[obs_level_to_assay[level]].process_sequence.append(phenotyping_process) + #plink(growth_process, phenotyping_process) - isa_study.assays[assay_level].process_sequence.append(data_transformation_process) + isa_study.assays[obs_level_to_assay[level]].process_sequence.append(data_transformation_process) plink(phenotyping_process, data_transformation_process) + #END test # Mapping treatments to ISA study Factor Value: # --------------------------------------------- @@ -356,11 +259,12 @@ def create_study_sample_and_assay_full(client, brapi_study_id, isa_study, growt f = StudyFactor(name=factor, factor_type=OntologyAnnotation(term=factor)) modality = ";".join(modalities) f.comments.append(Comment(name="Study Factor Values",value=modality)) - f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI)) + f.comments.append(Comment(name="Study Factor Description", value=PAR_NAinBrAPI)) isa_study.factors.append(f) + def write_records_to_file(this_study_id, records, this_directory, filetype, ObservationLevel=''): logger.info('Writing to file') # tdf_file = 'out/' + this_study_id diff --git a/brapi_to_isa_converter.py b/brapi_to_isa_converter.py index 9a33e82..80be2fc 100644 --- a/brapi_to_isa_converter.py +++ b/brapi_to_isa_converter.py @@ -21,7 +21,7 @@ def get_attribute_or_na(dictionary, attribute, NA=""): PAR_NAinData = "NA in endpoint" PAR_NAinBrAPI = "NA in BrAPI" -PAR_defaultObsLvl = "plant" +PAR_defaultObsLvl = "plant" # TODO: check this can generate misleading data PAR_suppObsLvl = ['study', 'block', 'sub-block', 'plot', 'sub-plot', 'pot', 'plant'] class BrapiToIsaConverter: From f51203a85012027b4fd549836248edd84d342a28 Mon Sep 17 00:00:00 2001 From: Erik Kimmel Date: Mon, 14 Dec 2020 17:03:03 +0100 Subject: [PATCH 3/4] test: try to use single growth protocol. --- brapi_to_isa.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/brapi_to_isa.py b/brapi_to_isa.py index afd9b1a..46485f5 100755 --- a/brapi_to_isa.py +++ b/brapi_to_isa.py @@ -224,17 +224,24 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro phenotyping_process.name = level # Adding Raw Data File column + # RAW_datafile = DataFile(filename=PAR_NAinData, + # label="Raw Data File", + # generated_from=all_samples[0]) RAW_datafile = DataFile(filename=PAR_NAinData, - label="Raw Data File", - generated_from=all_samples) + label="Raw Data File") phenotyping_process.outputs.append(RAW_datafile) data_transformation_process.inputs.append(RAW_datafile) + + + for sample in all_samples: growth_process = Process(executes_protocol=growth_protocol) - growth_process.inputs.append(sample.derives_from) + growth_process.inputs.append(sample.derives_from[0]) growth_process.outputs.append(sample) isa_study.process_sequence.append(growth_process) + # if growth_process.name != 'None': + # print(growth_process.name) # Adding Derived Data File column @@ -246,7 +253,7 @@ def create_study_sample_and_assay(client, brapi_study_id, isa_study, growth_pro data_transformation_process.outputs.append(DER_datafile) isa_study.assays[obs_level_to_assay[level]].process_sequence.append(phenotyping_process) - #plink(growth_process, phenotyping_process) + plink(growth_process, phenotyping_process) isa_study.assays[obs_level_to_assay[level]].process_sequence.append(data_transformation_process) plink(phenotyping_process, data_transformation_process) From ce8db04b9d1d2aa9bbacbed0177501efb8a4345a Mon Sep 17 00:00:00 2001 From: Cyril Pommier Date: Thu, 17 Dec 2020 11:21:53 +0100 Subject: [PATCH 4/4] Adding the right shebang --- brapi_to_isa.py | 1 + 1 file changed, 1 insertion(+) diff --git a/brapi_to_isa.py b/brapi_to_isa.py index 46485f5..7f450a4 100755 --- a/brapi_to_isa.py +++ b/brapi_to_isa.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python import datetime import argparse import datetime