Skip to content

Commit

Permalink
lib: add PGS data for variant annotation, #TASK-5411, #TASK-5387
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Jan 3, 2024
1 parent 314786d commit b0aeca4
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.core.OntologyTermAnnotation;
import org.opencb.biodata.models.core.pgs.CommonPolygenicScore;
import org.opencb.biodata.models.core.pgs.PgsCohort;
import org.opencb.biodata.models.core.pgs.PolygenicScore;
import org.opencb.biodata.models.core.pgs.VariantPolygenicScore;
import org.opencb.biodata.models.variant.avro.OntologyTermAnnotation;
import org.opencb.biodata.models.variant.avro.PubmedReference;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
Expand Down Expand Up @@ -320,7 +320,7 @@ private void processPgsMetadataFile(File metadataFile, BufferedWriter bw) throws
continue;
}

Map<String, Object> values = new HashMap<>();
Map<String, String> values = new HashMap<>();
if (StringUtils.isNotEmpty(strings.get(2))) {
values.put(SAMPLE_SET_KEY, strings.get(2));
}
Expand Down Expand Up @@ -416,18 +416,18 @@ private void saveVariantPolygenicScore(String line, Map<String, Integer> columnP
}

// Create polygenic score
Map<String, Object> values = new HashMap<>();
Map<String, String> values = new HashMap<>();
if (columnPos.containsKey(EFFECT_WEIGHT_COL)) {
values.put(EFFECT_WEIGHT_KEY, Double.parseDouble(field[columnPos.get(EFFECT_WEIGHT_COL)]));
values.put(EFFECT_WEIGHT_KEY, field[columnPos.get(EFFECT_WEIGHT_COL)]);
}
if (columnPos.containsKey(ALLELEFREQUENCY_EFFECT_COL)) {
values.put(ALLELE_FREQUENCY_EFFECT_KEY, Double.parseDouble(field[columnPos.get(ALLELEFREQUENCY_EFFECT_COL)]));
values.put(ALLELE_FREQUENCY_EFFECT_KEY, field[columnPos.get(ALLELEFREQUENCY_EFFECT_COL)]);
}
if (columnPos.containsKey(ODDS_RATIO_COL)) {
values.put(ODDS_RATIO_KEY, Double.parseDouble(field[columnPos.get(ODDS_RATIO_COL)]));
values.put(ODDS_RATIO_KEY, field[columnPos.get(ODDS_RATIO_COL)]);
}
if (columnPos.containsKey(HAZARD_RATIO_COL)) {
values.put(HAZARD_RATIO_KEY, Double.parseDouble(field[columnPos.get(HAZARD_RATIO_COL)]));
values.put(HAZARD_RATIO_KEY, field[columnPos.get(HAZARD_RATIO_COL)]);
}
if (columnPos.containsKey(LOCUS_NAME_COL)) {
values.put(LOCUS_NAME_KEY, field[columnPos.get(LOCUS_NAME_COL)]);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.opencb.biodata.models.core.pgs.PolygenicScore;
import org.opencb.biodata.models.core.pgs.VariantPolygenicScore;
import org.opencb.biodata.models.variant.avro.PolygenicScoreAnnotation;
import org.opencb.biodata.models.variant.avro.PolygenicScoreVariant;
import org.opencb.cellbase.core.api.PolygenicScoreQuery;
import org.opencb.cellbase.core.api.query.ProjectionQueryOptions;
import org.opencb.cellbase.core.exception.CellBaseException;
Expand All @@ -39,9 +40,7 @@
import org.opencb.commons.datastore.mongodb.MongoDBIterator;
import org.opencb.commons.datastore.mongodb.MongoDataStore;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;

public class PolygenicScoreMongoDBAdaptor extends CellBaseDBAdaptor
Expand Down Expand Up @@ -79,7 +78,8 @@ public CellBaseDataResult<PolygenicScoreAnnotation> getPolygenicScoreAnnotation(
Bson query = Filters.and(andBsonList);

MongoDBCollection mongoDBCollection = getCollectionByRelease(pgsVariantMongoDBCollectionByRelease, dataRelease);
DataResult<VariantPolygenicScore> pgsVariantDataResult = mongoDBCollection.find(query, null, VariantPolygenicScore.class, new QueryOptions());
DataResult<VariantPolygenicScore> pgsVariantDataResult = mongoDBCollection.find(query, null, VariantPolygenicScore.class,
new QueryOptions());

List<PolygenicScoreAnnotation> results = new ArrayList<>();

Expand All @@ -88,12 +88,33 @@ public CellBaseDataResult<PolygenicScoreAnnotation> getPolygenicScoreAnnotation(
for (VariantPolygenicScore score : pgsVariantDataResult.getResults()) {
if ((score.getEffectAllele().equals(reference) && score.getOtherAllele().equals(alternate))
|| (score.getEffectAllele().equals(alternate) && score.getOtherAllele().equals(reference))) {
PolygenicScoreAnnotation pgsAnnotation = new PolygenicScoreAnnotation();
List<String> pgsIds = score.getPolygenicScores().stream().map(PolygenicScore::getId).collect(Collectors.toList());
// pgsAnnotation.setId(score.get);
pgsAnnotation.getVariants().add(new org.opencb.biodata.models.variant.avro.VariantPolygenicScore(
score.getEffectAllele(), score.getOtherAllele(), score.getPolygenicScores());
results.add(score);
List<CellBaseDataResult<CommonPolygenicScore>> infoResults = info(pgsIds, null, dataRelease, null);
for (CellBaseDataResult<CommonPolygenicScore> infoResult : infoResults) {
CommonPolygenicScore pgs = infoResult.first();

// Init PGS
PolygenicScoreAnnotation pgsAnnotation = new PolygenicScoreAnnotation(pgs.getId(), pgs.getName(), pgs.getSource(),
pgs.getVersion(), pgs.getTraits(), pgs.getPubmedRefs(), pgs.getValues(), new ArrayList());

// Add PGS variant scores to that PGS
PolygenicScoreVariant pgsVariant = new PolygenicScoreVariant(score.getEffectAllele(), score.getOtherAllele(),
new HashMap<>());
for (PolygenicScore polygenicScore : score.getPolygenicScores()) {
// Search the matched PGS
System.out.println(">>> polygenic score ID = " + polygenicScore.getId() + ", " + pgs.getId());
System.out.println(">>> polygenic score variant scores size = " + polygenicScore.getValues().size());
if (pgs.getId().equals(polygenicScore.getId())) {
System.out.println("FOUND !!!!!!");
pgsVariant.setValues(polygenicScore.getValues());
break;
}
}
pgsAnnotation.setVariants(Collections.singletonList(pgsVariant));

// Add annotation to the output list
results.add(pgsAnnotation);
}
}
}
}
Expand Down Expand Up @@ -133,7 +154,8 @@ public CellBaseDataResult<String> distinct(PolygenicScoreQuery query) throws Cel
}

@Override
public List<CellBaseDataResult<CommonPolygenicScore>> info(List<String> ids, ProjectionQueryOptions queryOptions, int dataRelease, String apiKey) throws CellBaseException {
public List<CellBaseDataResult<CommonPolygenicScore>> info(List<String> ids, ProjectionQueryOptions queryOptions, int dataRelease,
String apiKey) throws CellBaseException {
List<CellBaseDataResult<CommonPolygenicScore>> results = new ArrayList<>();
Bson projection = getProjection(queryOptions);
for (String id : ids) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public class CellBaseManagerFactory {
private FileManager fileManager;
private PublicationManager publicationManager;
private Map<String, PharmacogenomicsManager> pharmacogenomicsManagers;
private Map<String, PolygenicScoreManager> polygenicScoreManagers;

private Map<String, DataReleaseManager> dataReleaseManagers;

Expand All @@ -67,6 +68,7 @@ public CellBaseManagerFactory(CellBaseConfiguration configuration) {
ontologyManagers = new HashMap<>();
dataReleaseManagers = new HashMap<>();
pharmacogenomicsManagers = new HashMap<>();
polygenicScoreManagers = new HashMap<>();
}

private String getMultiKey(String species, String assembly) {
Expand Down Expand Up @@ -374,4 +376,15 @@ public PharmacogenomicsManager getPharmacogenomicsManager(String species, String
}
return pharmacogenomicsManagers.get(multiKey);
}

public PolygenicScoreManager getPolygenicScoreManager(String species, String assembly) throws CellBaseException {
String multiKey = getMultiKey(species, assembly);
if (!polygenicScoreManagers.containsKey(multiKey)) {
if (!validateSpeciesAssembly(species, assembly)) {
throw new CellBaseException("Invalid species " + species + " or assembly " + assembly);
}
polygenicScoreManagers.put(multiKey, new PolygenicScoreManager(species, assembly, configuration));
}
return polygenicScoreManagers.get(multiKey);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package org.opencb.cellbase.lib.managers;

import org.opencb.biodata.models.core.pgs.VariantPolygenicScore;
import org.opencb.biodata.models.core.pgs.CommonPolygenicScore;
import org.opencb.biodata.models.variant.avro.PolygenicScoreAnnotation;
import org.opencb.cellbase.core.api.PolygenicScoreQuery;
import org.opencb.cellbase.core.api.query.ProjectionQueryOptions;
Expand All @@ -28,7 +28,7 @@

import java.util.List;

public class PolygenicScoreManager extends AbstractManager implements AggregationApi<PolygenicScoreQuery, VariantPolygenicScore> {
public class PolygenicScoreManager extends AbstractManager implements AggregationApi<PolygenicScoreQuery, CommonPolygenicScore> {

private PolygenicScoreMongoDBAdaptor pgsDBAdaptor;

Expand All @@ -47,11 +47,11 @@ private void init() {
}

@Override
public CellBaseCoreDBAdaptor<PolygenicScoreQuery, VariantPolygenicScore> getDBAdaptor() {
public CellBaseCoreDBAdaptor<PolygenicScoreQuery, CommonPolygenicScore> getDBAdaptor() {
return pgsDBAdaptor;
}

public List<CellBaseDataResult<VariantPolygenicScore>> info(List<String> ids, ProjectionQueryOptions query, int dataRelease,
public List<CellBaseDataResult<CommonPolygenicScore>> info(List<String> ids, ProjectionQueryOptions query, int dataRelease,
String apiKey) throws CellBaseException {
return pgsDBAdaptor.info(ids, query, dataRelease, apiKey);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.opencb.cellbase.lib.managers.*;
import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
import org.opencb.cellbase.lib.variant.annotation.futures.FuturePharmacogenomicsAnnotator;
import org.opencb.cellbase.lib.variant.annotation.futures.FuturePolygenicScoreAnnotator;
import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator;
import org.opencb.commons.datastore.core.QueryOptions;
import org.slf4j.Logger;
Expand Down Expand Up @@ -73,6 +74,7 @@ public class VariantAnnotationCalculator {
private RepeatsManager repeatsManager;
private ProteinManager proteinManager;
private PharmacogenomicsManager pharmacogenomicsManager;
private PolygenicScoreManager polygenicScoreManager;
private int dataRelease;
private String apiKey;
private Set<String> annotatorSet;
Expand Down Expand Up @@ -107,6 +109,7 @@ public VariantAnnotationCalculator(String species, String assembly, int dataRele
this.clinicalManager = cellbaseManagerFactory.getClinicalManager(species, assembly);
this.repeatsManager = cellbaseManagerFactory.getRepeatsManager(species, assembly);
this.pharmacogenomicsManager = cellbaseManagerFactory.getPharmacogenomicsManager(species, assembly);
this.polygenicScoreManager = cellbaseManagerFactory.getPolygenicScoreManager(species, assembly);

// Check data release
this.dataRelease = cellbaseManagerFactory.getDataReleaseManager(species, assembly).checkDataRelease(dataRelease);
Expand Down Expand Up @@ -668,6 +671,9 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
if (futurePharmacogenomicsAnnotator != null) {
futurePharmacogenomicsAnnotator.processResults(pharmacogenomicsFuture, variantAnnotationList);
}
if (futurePolygenicScoreAnnotator != null) {
futurePolygenicScoreAnnotator.processResults(polygenicScoreFuture, variantAnnotationList);
}

// Not needed with newCachedThreadPool
// fixedThreadPool.shutdown();
Expand Down Expand Up @@ -1175,7 +1181,8 @@ private Set<String> getAnnotatorSet(QueryOptions queryOptions) {
// 'expression' removed in CB 5.0
annotatorSet = new HashSet<>(Arrays.asList("variation", "traitAssociation", "conservation", "functionalScore",
"consequenceType", "geneDisease", "drugInteraction", "geneConstraints", "mirnaTargets", "pharmacogenomics",
"cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs"));
"cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs",
EtlCommons.PGS_DATA));
List<String> excludeList = queryOptions.getAsStringList("exclude");
excludeList.forEach(annotatorSet::remove);
}
Expand Down Expand Up @@ -1423,8 +1430,6 @@ private List<ConsequenceType> getConsequenceTypeList(Variant variant, List<Gene>
}

private List<Region> variantListToRegionList(List<Variant> variantList) {
// return variantList.stream().map((variant) -> variantToRegion(variant)).collect(Collectors.toList());

// In great majority of cases returned region list size will equal variant list; this will happen except when
// there's a breakend within the variantList
List<Region> regionList = new ArrayList<>(variantList.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,17 @@
package org.opencb.cellbase.lib.variant.annotation.futures;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.pharma.*;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.*;
import org.opencb.cellbase.core.api.PharmaChemicalQuery;
import org.opencb.cellbase.core.api.PolygenicScoreQuery;
import org.opencb.biodata.models.variant.avro.PolygenicScoreAnnotation;
import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.cellbase.core.result.CellBaseDataResult;
import org.opencb.cellbase.lib.managers.PharmacogenomicsManager;
import org.opencb.cellbase.lib.managers.PolygenicScoreManager;
import org.opencb.commons.datastore.core.QueryOptions;
import org.slf4j.Logger;

import java.util.*;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.*;
import java.util.stream.Collectors;

public class FuturePolygenicScoreAnnotator implements Callable<List<CellBaseDataResult<PolygenicScoreAnnotation>>> {
private PolygenicScoreManager polygenicScoreManager;
Expand Down Expand Up @@ -65,7 +61,7 @@ public List<CellBaseDataResult<PolygenicScoreAnnotation>> call() throws Exceptio
cellBaseDataResultList.add(polygenicScoreManager.getPolygenicScoreAnnotation(variant.getChromosome(), variant.getStart(),
variant.getReference(), variant.getAlternate(), dataRelease));
}
logger.info("Pharmacogenomics queries performance in {} ms for {} variants", System.currentTimeMillis() - startTime,
logger.info("PolygenicScore queries performance in {} ms for {} variants", System.currentTimeMillis() - startTime,
variantList.size());
return cellBaseDataResultList;
}
Expand Down

0 comments on commit b0aeca4

Please sign in to comment.