Skip to content

Commit

Permalink
Merge pull request #653 from opencb/TASK-1111
Browse files Browse the repository at this point in the history
TASK-1111 Add Pharmagenomics data to CellBase
  • Loading branch information
jtarraga authored Jul 12, 2023
2 parents 8a0ab9c + b62f088 commit 4374d16
Show file tree
Hide file tree
Showing 24 changed files with 2,157 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,9 @@ public class LoadCommandOptions {
@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;

@Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation, "
+ "conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed. 'all' loads everything",
required = true, arity = 1)
@Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation,"
+ " conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed, pharmacogenomics."
+ " 'all' loads everything", required = true, arity = 1)
public String data;

@Parameter(names = {"-i", "--input"}, required = true, arity = 1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import java.util.Collections;
import java.util.List;

import static org.opencb.cellbase.lib.EtlCommons.PHARMGKB_DATA;

/**
* Created by imedina on 03/02/15.
*/
Expand Down Expand Up @@ -163,6 +165,9 @@ public void execute() {
case EtlCommons.PUBMED_DATA:
parser = buildPubMed();
break;
case EtlCommons.PHARMACOGENOMICS_DATA:
parser = buildPharmacogenomics();
break;
default:
logger.error("Build option '" + buildCommandOptions.data + "' is not valid");
break;
Expand Down Expand Up @@ -414,4 +419,22 @@ private CellBaseBuilder buildPubMed() throws IOException {
CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(pubmedOutputFolder);
return new PubMedBuilder(pubmedInputFolder, serializer);
}

private CellBaseBuilder buildPharmacogenomics() throws IOException {
Path inFolder = downloadFolder.resolve(EtlCommons.PHARMACOGENOMICS_DATA);
Path outFolder = buildFolder.resolve(EtlCommons.PHARMACOGENOMICS_DATA);
if (!outFolder.toFile().exists()) {
outFolder.toFile().mkdirs();
}

logger.info("Copying PharmGKB version file...");
if (inFolder.resolve(PHARMGKB_DATA).resolve(EtlCommons.PHARMGKB_VERSION_FILENAME).toFile().exists()) {
Files.copy(inFolder.resolve(PHARMGKB_DATA).resolve(EtlCommons.PHARMGKB_VERSION_FILENAME),
outFolder.resolve(EtlCommons.PHARMGKB_VERSION_FILENAME),
StandardCopyOption.REPLACE_EXISTING);
}

CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(outFolder);
return new PharmGKBBuilder(inFolder, serializer);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ public void execute() {
case EtlCommons.PUBMED_DATA:
downloadFiles.addAll(downloader.downloadPubMed());
break;
case EtlCommons.PHARMACOGENOMICS_DATA:
downloadFiles.addAll(downloader.downloadPharmKGB());
break;
default:
System.out.println("Value \"" + data + "\" is not allowed for the data parameter. Allowed values"
+ " are: {genome, gene, gene_disease_association, variation, variation_functional_score,"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ public LoadCommandExecutor(AdminCliOptionsParser.LoadCommandOptions loadCommandO
EtlCommons.CONSERVATION_DATA, EtlCommons.REGULATION_DATA, EtlCommons.PROTEIN_DATA,
EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA, EtlCommons.VARIATION_DATA,
EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.REPEATS_DATA,
EtlCommons.OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PUBMED_DATA};
EtlCommons.OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PUBMED_DATA,
EtlCommons.PHARMACOGENOMICS_DATA};
} else {
loadOptions = loadCommandOptions.data.split(",");
}
Expand Down Expand Up @@ -289,6 +290,11 @@ public void execute() throws CellBaseException {
loadPubMed();
break;
}
case EtlCommons.PHARMACOGENOMICS_DATA: {
// Load data, create index and update release
loadPharmacogenomica();
break;
}
default:
logger.warn("Not valid 'data'. We should not reach this point");
break;
Expand Down Expand Up @@ -546,12 +552,39 @@ private void loadPubMed() throws CellBaseException {

// Update release (collection and sources)
List<Path> sources = Collections.singletonList(pubmedPath.resolve(EtlCommons.PUBMED_VERSION_FILENAME));
dataReleaseManager.update(dataRelease, "pubmed", EtlCommons.REPEATS_DATA, sources);
dataReleaseManager.update(dataRelease, EtlCommons.PUBMED_DATA, EtlCommons.PUBMED_DATA, sources);
} else {
logger.warn("PubMed folder {} not found", pubmedPath);
}
}

private void loadPharmacogenomica() throws IOException, CellBaseException {
Path pharmaPath = input.resolve(EtlCommons.PHARMACOGENOMICS_DATA);

if (!Files.exists(pharmaPath)) {
logger.warn("Pharmacogenomics folder {} not found to load", pharmaPath);
return;
}

// Load data
Path pharmaJsonPath = pharmaPath.resolve(EtlCommons.PHARMACOGENOMICS_DATA + ".json.gz");
logger.info("Loading file '{}'", pharmaJsonPath.toFile().getName());
try {
loadRunner.load(pharmaJsonPath, EtlCommons.PHARMACOGENOMICS_DATA, dataRelease);
} catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
| IllegalAccessException | ExecutionException | IOException | InterruptedException | CellBaseException
| LoaderException e) {
logger.error("Error loading file '{}': {}", pharmaJsonPath.toFile().getName(), e.toString());
}

// Create index
createIndex(EtlCommons.PHARMACOGENOMICS_DATA);

// Update release (collection and sources)
List<Path> sources = Collections.singletonList(pharmaPath.resolve(EtlCommons.PHARMGKB_VERSION_FILENAME));
dataReleaseManager.update(dataRelease, EtlCommons.PHARMACOGENOMICS_DATA, EtlCommons.PHARMACOGENOMICS_DATA, sources);
}

private void createIndex(String collection) {
if (!createIndexes) {
return;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
/*
* Copyright 2015-2020 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.opencb.cellbase.core.api;

import org.apache.commons.collections4.CollectionUtils;
import org.opencb.cellbase.core.api.query.AbstractQuery;
import org.opencb.cellbase.core.api.query.QueryException;
import org.opencb.cellbase.core.api.query.QueryParameter;

import java.util.List;
import java.util.Map;

public class PharmaChemicalQuery extends AbstractQuery {

@QueryParameter(id = "id")
private List<String> ids;

@QueryParameter(id = "name")
private List<String> names;

@QueryParameter(id = "source", allowedValues = {"PharmGKB"})
private List<String> sources;

@QueryParameter(id = "types", alias = {"type"})
private List<String> types;

@QueryParameter(id = "variants.variantId", alias = {"variant"})
private List<String> variants;

@QueryParameter(id = "variants.location", alias = {"location"})
private List<String> locations;

@QueryParameter(id = "variants.chromosome", alias = {"chromosome"})
private List<String> chromosomes;

@QueryParameter(id = "variants.haplotypes", alias = {"haplotype"})
private List<String> hapolotypes;

@QueryParameter(id = "variants.geneNames", alias = {"geneName"})
private List<String> geneNames;

@QueryParameter(id = "variants.phenotypes", alias = {"phenotype"})
private List<String> phenotypes;

@QueryParameter(id = "variants.phenotypeTypes", alias = {"phenotypeType"})
private List<String> phenotypeTypes;

@QueryParameter(id = "variants.confidence", alias = {"confidence"})
private List<String> confidences;

@QueryParameter(id = "variants.evidences.pubmed", alias = {"pubmedId"})
private List<String> pubmedIds;

public PharmaChemicalQuery() {
}

public PharmaChemicalQuery(Map<String, String> params) throws QueryException {
super(params);

objectMapper.readerForUpdating(this);
objectMapper.readerFor(PharmaChemicalQuery.class);
objectWriter = objectMapper.writerFor(PharmaChemicalQuery.class);
}

@Override
protected void validateQuery() throws QueryException {
if (CollectionUtils.isNotEmpty(variants)) {
for (String variant : variants) {
if (!variant.startsWith("rs")) {
throw new QueryException("Invalid variant ID: '" + variant + "'; it has to start with rs");
}
}
}
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("PharmaChemicalQuery{");
sb.append("ids=").append(ids);
sb.append(", names=").append(names);
sb.append(", sources=").append(sources);
sb.append(", types=").append(types);
sb.append(", variants=").append(variants);
sb.append(", locations=").append(locations);
sb.append(", chromosomes=").append(chromosomes);
sb.append(", hapolotypes=").append(hapolotypes);
sb.append(", geneNames=").append(geneNames);
sb.append(", phenotypes=").append(phenotypes);
sb.append(", phenotypeTypes=").append(phenotypeTypes);
sb.append(", confidences=").append(confidences);
sb.append(", pubmedIds=").append(pubmedIds);
sb.append('}');
return sb.toString();
}

public List<String> getIds() {
return ids;
}

public PharmaChemicalQuery setIds(List<String> ids) {
this.ids = ids;
return this;
}

public List<String> getNames() {
return names;
}

public PharmaChemicalQuery setNames(List<String> names) {
this.names = names;
return this;
}

public List<String> getSources() {
return sources;
}

public PharmaChemicalQuery setSources(List<String> sources) {
this.sources = sources;
return this;
}

public List<String> getTypes() {
return types;
}

public PharmaChemicalQuery setTypes(List<String> types) {
this.types = types;
return this;
}

public List<String> getVariants() {
return variants;
}

public PharmaChemicalQuery setVariants(List<String> variants) {
this.variants = variants;
return this;
}

public List<String> getLocations() {
return locations;
}

public PharmaChemicalQuery setLocations(List<String> locations) {
this.locations = locations;
return this;
}

public List<String> getChromosomes() {
return chromosomes;
}

public PharmaChemicalQuery setChromosomes(List<String> chromosomes) {
this.chromosomes = chromosomes;
return this;
}

public List<String> getHapolotypes() {
return hapolotypes;
}

public PharmaChemicalQuery setHapolotypes(List<String> hapolotypes) {
this.hapolotypes = hapolotypes;
return this;
}

public List<String> getGeneNames() {
return geneNames;
}

public PharmaChemicalQuery setGeneNames(List<String> geneNames) {
this.geneNames = geneNames;
return this;
}

public List<String> getPhenotypes() {
return phenotypes;
}

public PharmaChemicalQuery setPhenotypes(List<String> phenotypes) {
this.phenotypes = phenotypes;
return this;
}

public List<String> getPhenotypeTypes() {
return phenotypeTypes;
}

public PharmaChemicalQuery setPhenotypeTypes(List<String> phenotypeTypes) {
this.phenotypeTypes = phenotypeTypes;
return this;
}

public List<String> getConfidences() {
return confidences;
}

public PharmaChemicalQuery setConfidences(List<String> confidences) {
this.confidences = confidences;
return this;
}

public List<String> getPubmedIds() {
return pubmedIds;
}

public PharmaChemicalQuery setPubmedIds(List<String> pubmedIds) {
this.pubmedIds = pubmedIds;
return this;
}
}
Loading

0 comments on commit 4374d16

Please sign in to comment.