From e56914cc96ccc271ef6dd0325cd514acecfff5fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 18 Jul 2024 17:53:26 +0200 Subject: [PATCH 1/2] lib: update variant manager and variant annotator to support CADD as licensed data, #TASK-6573, #TASK-6441 On branch TASK-6441 Changes to be committed: modified: cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java modified: cellbase-core/src/main/java/org/opencb/cellbase/core/api/key/ApiKeyLicensedDataUtils.java modified: cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java modified: cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java --- .../executors/ExportCommandExecutor.java | 2 +- .../core/api/key/ApiKeyLicensedDataUtils.java | 4 +++ .../cellbase/lib/managers/VariantManager.java | 33 ++++++++++++++----- .../VariantAnnotationCalculator.java | 2 +- 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java index 72f992f344..f46f5bad6d 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java @@ -194,7 +194,7 @@ public void execute() throws CellBaseException { case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA: { // Export data VariantManager variantManager = managerFactory.getVariantManager(species, assembly); - CellBaseDataResult results = variantManager.getFunctionalScoreRegion(regions, null, + CellBaseDataResult results = variantManager.getFunctionalScoreRegion(regions, null, apiKey, dataRelease); counter = writeExportedData(results.getResults(), "cadd", output); counterMsg = counter + " CADD items"; diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/key/ApiKeyLicensedDataUtils.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/key/ApiKeyLicensedDataUtils.java index e131a4eeee..be837af69c 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/key/ApiKeyLicensedDataUtils.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/key/ApiKeyLicensedDataUtils.java @@ -35,6 +35,10 @@ public final class ApiKeyLicensedDataUtils { public static final Set LICENSED_CLINICAL_DATA = new HashSet<>(Arrays.asList("cosmic", "hgmd")); public static final Set UNLICENSED_CLINICAL_DATA = new HashSet<>(Collections.singletonList("clinvar")); + public static final int NUM_VARIANT_FUNCTIONAL_SCORE_SOURCES = 1; + public static final Set LICENSED_VARIANT_FUNCTIONAL_SCORE_SOURCES = new HashSet<>(Collections.singletonList("cadd")); + + private ApiKeyLicensedDataUtils() { } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java index 28f5c70fa7..bc2c7c4fa4 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java @@ -35,6 +35,7 @@ import org.opencb.cellbase.core.models.DataRelease; import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.core.variant.AnnotationBasedPhasedQueryManager; +import org.opencb.cellbase.lib.EtlCommons; import org.opencb.cellbase.lib.impl.core.CellBaseCoreDBAdaptor; import org.opencb.cellbase.lib.impl.core.SpliceScoreMongoDBAdaptor; import org.opencb.cellbase.lib.impl.core.VariantMongoDBAdaptor; @@ -43,6 +44,7 @@ import org.opencb.cellbase.lib.variant.annotation.CellBaseNormalizerSequenceAdaptor; import org.opencb.cellbase.lib.variant.annotation.VariantAnnotationCalculator; import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator; +import org.opencb.commons.datastore.core.Event; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; @@ -288,17 +290,24 @@ public List getByRegion(Query query, QueryOptions queryOptio return queryResults; } - public CellBaseDataResult getFunctionalScoreVariant(Variant variant, QueryOptions queryOptions, int dataRelease) + public CellBaseDataResult getFunctionalScoreVariant(Variant variant, QueryOptions queryOptions, String apiKey, int dataRelease) throws CellBaseException { - return variantDBAdaptor.getFunctionalScoreVariant(variant, queryOptions, dataRelease); + Set validSources = apiKeyManager.getValidSources(apiKey, null); + if (validSources.contains(EtlCommons.CADD_DATA)) { + return variantDBAdaptor.getFunctionalScoreVariant(variant, queryOptions, dataRelease); + } else { + return new CellBaseDataResult<>(variant.toStringSimple(), 0, Collections.singletonList(new Event(Event.Type.WARNING, + "Your current API key does not grant access to CADD data")), 0); + } } - public List> getFunctionalScoreVariant(List variants, QueryOptions options, int dataRelease) + public List> getFunctionalScoreVariant(List variants, QueryOptions options, String apiKey, + int dataRelease) throws CellBaseException { List> cellBaseDataResults = new ArrayList<>(variants.size()); for (Variant variant: variants) { if (variant.getType() == VariantType.SNV) { - cellBaseDataResults.add(getFunctionalScoreVariant(variant, options, dataRelease)); + cellBaseDataResults.add(getFunctionalScoreVariant(variant, options, apiKey, dataRelease)); } else { cellBaseDataResults.add(new CellBaseDataResult<>(variant.toString(), 0, Collections.emptyList(), 0)); } @@ -344,13 +353,19 @@ public List> getSpliceScoreVariant(List } public CellBaseDataResult getFunctionalScoreRegion(List regions, CellBaseQueryOptions options, - int dataRelease) + String apiKey, int dataRelease) throws CellBaseException { Set chunkIdSet = new HashSet<>(); - for (Region region : regions) { - chunkIdSet.addAll(variantDBAdaptor.getFunctionalScoreChunkIds(region)); - } - return variantDBAdaptor.getFunctionalScoreRegion(new ArrayList<>(chunkIdSet), options, dataRelease); + Set validSources = apiKeyManager.getValidSources(apiKey, null); + if (validSources.contains(EtlCommons.CADD_DATA)) { + for (Region region : regions) { + chunkIdSet.addAll(variantDBAdaptor.getFunctionalScoreChunkIds(region)); + } + return variantDBAdaptor.getFunctionalScoreRegion(new ArrayList<>(chunkIdSet), options, dataRelease); + } else { + return new CellBaseDataResult<>("", 0, Collections.singletonList(new Event(Event.Type.WARNING, "Your current API key does not" + + " grant access to CADD data.")), 0); + } } } diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java index a503ba7045..ed804ca721 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java @@ -1679,7 +1679,7 @@ public List> call() throws Exception { long startTime = System.currentTimeMillis(); logger.debug("Query variant functional score"); List> variantFunctionalScoreCellBaseDataResultList = - variantManager.getFunctionalScoreVariant(variantList, queryOptions, dataRelease); + variantManager.getFunctionalScoreVariant(variantList, queryOptions, apiKey, dataRelease); logger.debug("VariantFunctionalScore query performance is {}ms for {} variants", System.currentTimeMillis() - startTime, variantList.size()); return variantFunctionalScoreCellBaseDataResultList; From 7f4cb9adfe0337a89d7f97ff17ffd51f185119c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 18 Jul 2024 17:56:27 +0200 Subject: [PATCH 2/2] tests: add JUnit tests for CADD as licensed data, #TASK-6573, #TASK-6441 On branch TASK-6441 Changes to be committed: modified: cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java modified: cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java modified: cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptorTest.java --- .../lib/GenericMongoDBAdaptorTest.java | 1 + .../core/VariantAnnotationCalculatorTest.java | 27 +++++++++++ .../impl/core/VariantMongoDBAdaptorTest.java | 45 ++++++++++++++++--- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java index 1b217c671d..4c709f97f5 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java @@ -79,6 +79,7 @@ public class GenericMongoDBAdaptorTest { protected String HGMD_COSMIC_ACCESS_API_KEY = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7ImNvc21pYyI6OTIyMzM3MjAzNjg1NDc3NTgwNywiaGdtZCI6OTIyMzM3MjAzNjg1NDc3NTgwN30sIm1heE51bVF1ZXJpZXMiOjEwMDAwMDAwLCJ2ZXJzaW9uIjoiMS4wIiwic3ViIjoiVEVTVCIsImlhdCI6MTY4OTgzNzQ5OX0.ZsTxFaSzsLwyQMLwNIODerfaOTLywoRwkxvpsnjVTSE"; protected String HGMD_SPLICEAI_ACCESS_API_KEY = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7ImhnbWQiOjkyMjMzNzIwMzY4NTQ3NzU4MDcsInNwbGljZWFpIjo5MjIzMzcyMDM2ODU0Nzc1ODA3fSwibWF4TnVtUXVlcmllcyI6MTAwMDAwMDAsInZlcnNpb24iOiIxLjAiLCJzdWIiOiJURVNUIiwiaWF0IjoxNjg5ODM3NTE4fQ.rDH2BR2EkUgs3fz7hAuCbmAHgE0rHmp9JhD-5gFZmfI"; protected String COSMIC_SPLICEAI_ACCESS_API_KEY = "eyJhbGciOiJIUzI1NiJ9.eyJzb3VyY2VzIjp7ImNvc21pYyI6OTIyMzM3MjAzNjg1NDc3NTgwNywic3BsaWNlYWkiOjkyMjMzNzIwMzY4NTQ3NzU4MDd9LCJtYXhOdW1RdWVyaWVzIjoxMDAwMDAwMCwidmVyc2lvbiI6IjEuMCIsInN1YiI6IlRFU1QiLCJpYXQiOjE2ODk4Mzc1MzZ9.CkXvpNg0NWAXL3N06R2gCqe0kF4ptBk0MPvaAdDSEpQ"; + protected String HGMD_CADD_ACCESS_API_KEY = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJaRVRUQS1URVNUSU5HIiwidmVyc2lvbiI6IjEuMCIsImlhdCI6MTcyMTMxNTg4MCwic291cmNlcyI6eyJjYWRkIjozMTQ5NDc4NDY5MDgwMDAwMCwiaGdtZCI6MzE0OTQ3ODQ2OTA4MDAwMDB9LCJxdW90YSI6eyJtYXhOdW1RdWVyaWVzIjoxMDAwMDAwMH19.kMCyzs6lMo44e-rlDTlZ1lucK1jEKVS0bl5MBOh6Ekg"; protected LoadRunner loadRunner = null; diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java index b973f0b996..263e5d7395 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java @@ -2129,6 +2129,33 @@ public void testLicensedDataCosmicSpliceApiKeyAnnotation() throws Exception { } } + @Test + public void testCaddApiKeyAnnotation() throws Exception { + QueryOptions queryOptions = new QueryOptions("useCache", false); + queryOptions.put("exclude", "pharmacogenomics"); + queryOptions.put("normalize", true); + + variantAnnotationCalculator.setApiKey(HGMD_CADD_ACCESS_API_KEY); + + Variant variant = Variant.parseVariant("X:32896535:A:T"); + CellBaseDataResult cellBaseDataResult = variantAnnotationCalculator.getAnnotationByVariant(variant, queryOptions); + VariantAnnotation variantAnnotation = cellBaseDataResult.first(); + assertTrue(variantAnnotation.getFunctionalScore().size() > 0); + } + + @Test + public void testNoCaddApiKeyAnnotation() throws Exception { + QueryOptions queryOptions = new QueryOptions("useCache", false); + queryOptions.put("exclude", "pharmacogenomics"); + queryOptions.put("normalize", true); + + variantAnnotationCalculator.setApiKey(SPLICEAI_ACCESS_API_KEY); + + Variant variant = Variant.parseVariant("X:32896535:A:T"); + CellBaseDataResult cellBaseDataResult = variantAnnotationCalculator.getAnnotationByVariant(variant, queryOptions); + VariantAnnotation variantAnnotation = cellBaseDataResult.first(); + assertTrue(CollectionUtils.isEmpty(variantAnnotation.getFunctionalScore())); + } private boolean containTraitAssociation(VariantAnnotation variantAnnotation, String source) { if (variantAnnotation == null) { diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptorTest.java index 5a47e04d4b..7ab89a230a 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantMongoDBAdaptorTest.java @@ -22,6 +22,7 @@ import org.opencb.biodata.models.variant.VariantBuilder; import org.opencb.biodata.models.variant.avro.PopulationFrequency; import org.opencb.biodata.models.variant.avro.SampleEntry; +import org.opencb.biodata.models.variant.avro.Score; import org.opencb.cellbase.core.ParamConstants; import org.opencb.cellbase.core.api.VariantQuery; import org.opencb.cellbase.core.api.query.LogicalList; @@ -37,6 +38,7 @@ import java.util.Collections; import java.util.List; +import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -54,15 +56,44 @@ public VariantMongoDBAdaptorTest() throws Exception { variantManager = cellBaseManagerFactory.getVariantManager(SPECIES, ASSEMBLY); } - // TODO: to be finished - properly implemented - @Disabled @Test - public void testGetFunctionalScoreVariant() throws Exception { -// VariantMongoDBAdaptor variationDBAdaptor = dbAdaptorFactory.getVariationDBAdaptor("hsapiens", "GRCh37"); - CellBaseDataResult functionalScoreVariant = variantManager.getFunctionalScoreVariant(Variant.parseVariant("10:130862563:A:G"), - new QueryOptions(), dataRelease.getRelease()); + public void testGetFunctionalScoreVariantValidApiKey() throws Exception { + Variant variant = Variant.parseVariant("X:32896535:A:T"); + CellBaseDataResult functionalScoreVariant = variantManager.getFunctionalScoreVariant(variant, new QueryOptions(), + HGMD_CADD_ACCESS_API_KEY, dataRelease.getRelease()); + + assertEquals(2, functionalScoreVariant.getNumResults()); + } + + @Test + public void testGetFunctionalScoreVariantInvalidApiKey() throws Exception { + Variant variant = Variant.parseVariant("X:32896535:A:T"); + CellBaseDataResult functionalScoreVariant = variantManager.getFunctionalScoreVariant(variant, new QueryOptions(), + SPLICEAI_ACCESS_API_KEY, dataRelease.getRelease()); + + assertEquals(0, functionalScoreVariant.getNumResults()); + System.out.println("functionalScoreVariant.getEvents() = " + functionalScoreVariant.getEvents()); + } - System.out.println("Num. of results: " + functionalScoreVariant.getNumResults()); + @Test + public void testGetFunctionalScoreVariantListValidApiKey() throws Exception { + List variants = Arrays.asList(Variant.parseVariant("X:32896417:T:G"), Variant.parseVariant("X:32896535:A:T")); + List> functionalScoreVariantList = variantManager.getFunctionalScoreVariant(variants, new QueryOptions(), + HGMD_CADD_ACCESS_API_KEY, dataRelease.getRelease()); + for (CellBaseDataResult functionalScoreVariant : functionalScoreVariantList) { + assertTrue(functionalScoreVariant.getNumResults() > 0); + } + } + + @Test + public void testGetFunctionalScoreVariantListInvalidApiKey() throws Exception { + List variants = Arrays.asList(Variant.parseVariant("X:32896417:T:G"), Variant.parseVariant("X:32896535:A:T")); + List> functionalScoreVariantList = variantManager.getFunctionalScoreVariant(variants, new QueryOptions(), + HGMD_SPLICEAI_ACCESS_API_KEY, dataRelease.getRelease()); + for (CellBaseDataResult functionalScoreVariant : functionalScoreVariantList) { + assertEquals(0, functionalScoreVariant.getNumResults()); + System.out.println("functionalScoreVariant.getEvents() = " + functionalScoreVariant.getEvents()); + } } @Test