Skip to content

Commit

Permalink
analysis: add PLINK adaptor #128
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Sep 8, 2017
1 parent 43c959f commit 2b89706
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public class Executor {
private static int threadInitNumber;
private static volatile String status;

protected static void execute(String commandLine, Path outdir, boolean redirectLogs) throws AnalysisToolException {
public static void execute(String commandLine, Path outdir, boolean redirectLogs) throws AnalysisToolException {
if (!outdir.toFile().isDirectory()) {
throw new AnalysisToolException("Output directory " + outdir + " is not an actual directory");
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.opencb.hpg.bigdata.analysis.tools.adaptors;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import htsjdk.variant.variantcontext.writer.Options;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.SparkSession;
import org.opencb.biodata.models.variant.metadata.VariantStudyMetadata;
import org.opencb.biodata.tools.variant.converters.VCFExporter;
import org.opencb.biodata.tools.variant.metadata.VariantMetadataManager;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.hpg.bigdata.analysis.tools.Executor;
import org.opencb.hpg.bigdata.analysis.tools.Status;
import org.opencb.hpg.bigdata.analysis.tools.ToolManager;
import org.opencb.hpg.bigdata.core.lib.SparkConfCreator;
import org.opencb.hpg.bigdata.core.lib.VariantDataset;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;

public class PlinkAdaptor {

private Query query;
private QueryOptions queryOptions;

private Logger logger;

public PlinkAdaptor() {
this.logger = LoggerFactory.getLogger(PlinkAdaptor.class);
}


public void run() {

String inputAvroFilename = query.getString("input");
String tmpVcfFilename = query.get("outdir") + "/tmp.vcf";
String metaFilename = inputAvroFilename + ".meta.json";

// Generate VCF file by calling VCF exporter from query and query options
VariantMetadataManager manager = new VariantMetadataManager();
try {
manager.load(Paths.get(metaFilename));

SparkConf sparkConf = SparkConfCreator.getConf("tool plink", "local", 1, true);
SparkSession sparkSession = new SparkSession(new SparkContext(sparkConf));

VariantDataset vd = new VariantDataset(sparkSession);
vd.load(inputAvroFilename);
vd.createOrReplaceTempView("vcf");

//vd.regionFilter(new Region("22:16050114-16050214"));
//vd.sampleFilter("GT", "5:0|1");

// out filename
VariantStudyMetadata studyMetadata = manager.getVariantMetadata().getStudies().get(0);
VCFExporter vcfExporter = new VCFExporter(studyMetadata);
vcfExporter.open(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Paths.get(tmpVcfFilename));

vcfExporter.export(vd.iterator());

// close everything
vcfExporter.close();
sparkSession.stop();
} catch (Exception e) {
logger.error("Error executing PLINK tool when retrieving variants to VCF file: {}", e.getMessage());
return;
}

// Execute PLINK
try {
Path tmp = Paths.get("/tmp");
Path plinkPath = Paths.get("/tmp/plink");
ToolManager toolManager = new ToolManager(plinkPath);

Map<String, String> params = new HashMap<>();
params.put("input", tmpVcfFilename);
//params.put("output", "/tmp/test.bam.bai");

String commandLine = toolManager.createCommandLine("plink", "index", params);
System.out.println(commandLine); Path tmp = Paths.get("/tmp");

Executor.execute(commandLine, tmp, true);

ObjectReader reader = new ObjectMapper().reader(Status.class);
Status status = reader.readValue(tmp.resolve("status.json").toFile());
} catch (Exception e) {
logger.error("Error executing PLINK command line: {}", e.getMessage());
return;
}
}
}

0 comments on commit 2b89706

Please sign in to comment.