Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TASK-5318 - Implement custom annotator to allow clients private files #2460

Merged
merged 17 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ public enum VariantStorageOptions implements ConfigurationOption {
ANNOTATOR_CELLBASE_VARIANT_LENGTH_THRESHOLD("annotator.cellbase.variantLengthThreshold", Integer.MAX_VALUE),
ANNOTATOR_CELLBASE_IMPRECISE_VARIANTS("annotator.cellbase.impreciseVariants", true),
ANNOTATOR_CELLBASE_STAR_ALTERNATE("annotator.cellbase.starAlternate", false),
ANNOTATOR_EXTENSION_PREFIX("annotator.extension."),
ANNOTATOR_EXTENSION_LIST("annotator.extension.list"),
ANNOTATOR_EXTENSION_COSMIC_FILE("annotator.extension.cosmic.file"),
ANNOTATOR_EXTENSION_COSMIC_VERSION("annotator.extension.cosmic.version"),

INDEX_SEARCH("indexSearch", false), // Build secondary indexes using search engine.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
import org.opencb.opencga.storage.core.variant.adaptors.VariantField;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.opencb.opencga.storage.core.variant.annotation.annotators.VariantAnnotator;
import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionTask;
import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.VariantAnnotatorExtensionsFactory;
import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils;
import org.opencb.opencga.storage.core.variant.io.db.VariantAnnotationDBWriter;
import org.opencb.opencga.storage.core.variant.io.db.VariantDBReader;
Expand Down Expand Up @@ -265,6 +267,13 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap
return variantAnnotationList;
};

List<VariantAnnotatorExtensionTask> extensions = new VariantAnnotatorExtensionsFactory().getVariantAnnotatorExtensions(params);
for (VariantAnnotatorExtensionTask extension : extensions) {
extension.setup(outDir);
extension.checkAvailable();
annotationTask = annotationTask.then(extension);
}

final DataWriter<VariantAnnotation> variantAnnotationDataWriter;
if (avro) {
//FIXME
Expand All @@ -286,7 +295,7 @@ public URI createAnnotation(URI outDir, String fileName, Query query, ObjectMap
ParallelTaskRunner<Variant, VariantAnnotation> parallelTaskRunner =
new ParallelTaskRunner<>(variantDataReader, annotationTask, variantAnnotationDataWriter, config);
parallelTaskRunner.run();
} catch (ExecutionException e) {
} catch (Exception e) {
throw new VariantAnnotatorException("Error creating annotations", e);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions;

import org.opencb.biodata.models.variant.avro.VariantAnnotation;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.run.Task;

import java.net.URI;
import java.util.List;

public interface VariantAnnotatorExtensionTask extends Task<VariantAnnotation, VariantAnnotation> {

/**
* Set up the annotator extension.
* This method will be called before any other method. It might generate extra files or data needed for the annotation.
*
* @param output Output directory where the annotator extension should write the files
* @return List of URIs of generated files (if any)
* @throws Exception if the annotator extension set up fails
*/
List<URI> setup(URI output) throws Exception;

/**
* Check if the annotator extension is available for the given options.
* @throws IllegalArgumentException if the annotator extension is not available
*/
void checkAvailable() throws IllegalArgumentException;

/**
* Check if the annotator extension is available for the given options. Do not throw any exception if the extension is not available.
* @return true if the annotator extension is available
*/
default boolean isAvailable() {
try {
checkAvailable();
return true;
} catch (IllegalArgumentException e) {
return false;
}
}

@Override
default void pre() throws Exception {
Task.super.pre();
checkAvailable();
}

/**
* Get the options for the annotator extension.
* @return Options for the annotator extension
*/
ObjectMap getOptions();

/**
* Get the metadata for the annotator extension.
* @return Metadata for the annotator extension
*/
ObjectMap getMetadata();

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions;

import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.opencga.storage.core.variant.VariantStorageOptions;
import org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic.CosmicVariantAnnotatorExtensionTask;

import java.lang.reflect.InvocationTargetException;
import java.util.LinkedList;
import java.util.List;

public class VariantAnnotatorExtensionsFactory {

public List<VariantAnnotatorExtensionTask> getVariantAnnotatorExtensions(ObjectMap options) {

List<VariantAnnotatorExtensionTask> tasks = new LinkedList<>();
for (String extensionId : options.getAsStringList(VariantStorageOptions.ANNOTATOR_EXTENSION_LIST.key())) {
VariantAnnotatorExtensionTask task = null;
switch (extensionId) {
case CosmicVariantAnnotatorExtensionTask.ID:
task = new CosmicVariantAnnotatorExtensionTask(options);
break;
default:
String extensionClass = options.getString(VariantStorageOptions.ANNOTATOR_EXTENSION_PREFIX.key() + extensionId);
if (extensionClass != null) {
task = getVariantAnnotatorExtension(extensionClass, options);
} else {
throw new IllegalArgumentException("Unknown annotator extension '" + extensionId + "'");
}
}

if (task == null) {
throw new IllegalArgumentException("Unable to create annotator extension '" + extensionId + "'");
}

tasks.add(task);
}
return tasks;
}

private VariantAnnotatorExtensionTask getVariantAnnotatorExtension(String className, ObjectMap options) {
try {
Class<?> clazz = Class.forName(className);
return (VariantAnnotatorExtensionTask) clazz.getConstructor(ObjectMap.class).newInstance(options);
} catch (ClassNotFoundException
| NoSuchMethodException
| InstantiationException
| IllegalAccessException
| InvocationTargetException e) {
throw new IllegalArgumentException("Unable to create VariantAnnotatorExtensionTask from class " + className, e);
}
}


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package org.opencb.opencga.storage.core.variant.annotation.annotators.extensions.cosmic;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.formats.variant.cosmic.CosmicParserCallback;
import org.opencb.biodata.models.sequence.SequenceLocation;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.avro.EvidenceEntry;
import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField;
import org.opencb.biodata.tools.variant.VariantNormalizer;
import org.opencb.opencga.core.common.JacksonUtils;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

public class CosmicExtensionTaskCallback implements CosmicParserCallback {

private RocksDB rdb;
private VariantNormalizer variantNormalizer;
private ObjectMapper defaultObjectMapper;

private static Logger logger = LoggerFactory.getLogger(CosmicExtensionTaskCallback.class);

private static final String VARIANT_STRING_PATTERN = "([ACGTN]*)|(<CNV[0-9]+>)|(<DUP>)|(<DEL>)|(<INS>)|(<INV>)";

public CosmicExtensionTaskCallback(RocksDB rdb) {
this.rdb = rdb;
this.variantNormalizer = new VariantNormalizer(new VariantNormalizer.VariantNormalizerConfig()
.setReuseVariants(true)
.setNormalizeAlleles(true)
.setDecomposeMNVs(false));
this.defaultObjectMapper = JacksonUtils.getDefaultObjectMapper();
}

@Override
public boolean processEvidenceEntries(SequenceLocation sequenceLocation, List<EvidenceEntry> evidenceEntries) {
// Add evidence entries in the RocksDB
// More than one variant being returned from the normalisation process would mean it's and MNV which has been decomposed
List<String> normalisedVariantStringList;
try {
normalisedVariantStringList = getNormalisedVariantString(sequenceLocation.getChromosome(),
sequenceLocation.getStart(), sequenceLocation.getReference(), sequenceLocation.getAlternate());
if (CollectionUtils.isNotEmpty(normalisedVariantStringList)) {
for (String normalisedVariantString : normalisedVariantStringList) {
rdb.put(normalisedVariantString.getBytes(), defaultObjectMapper.writeValueAsBytes(evidenceEntries));
}
return true;
}
return false;
} catch (NonStandardCompliantSampleField | RocksDBException | JsonProcessingException e) {
logger.warn(StringUtils.join(e.getStackTrace(), "\n"));
return false;
}
}

protected List<String> getNormalisedVariantString(String chromosome, int start, String reference, String alternate)
throws NonStandardCompliantSampleField {
Variant variant = new Variant(chromosome, start, reference, alternate);
return getNormalisedVariantString(variant);
}

protected List<String> getNormalisedVariantString(Variant variant) throws NonStandardCompliantSampleField {
// Checks no weird characters are part of the reference & alternate alleles
if (isValid(variant)) {
List<Variant> normalizedVariantList = variantNormalizer.normalize(Collections.singletonList(variant), true);
return normalizedVariantList.stream().map(Variant::toString).collect(Collectors.toList());
} else {
logger.warn("Variant {} is not valid: skipping it!", variant);
}

return Collections.emptyList();
}

protected boolean isValid(Variant variant) {
return (variant.getReference().matches(VARIANT_STRING_PATTERN)
&& (variant.getAlternate().matches(VARIANT_STRING_PATTERN)
&& !variant.getAlternate().equals(variant.getReference())));
}
}
Loading