Skip to content

Commit

Permalink
cli: implements variant plink cli, #126
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Jun 9, 2017
1 parent 3e6393d commit aab70ef
Show file tree
Hide file tree
Showing 6 changed files with 240 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
* Created by jtarraga on 30/01/17.
*/
public abstract class AnalysisExecutor {

public static String metadataExtension = ".meta.json";

protected String datasetName;
protected SparkSession sparkSession;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package org.opencb.hpg.bigdata.analysis.variant.adaptors;

import org.opencb.hpg.bigdata.analysis.AnalysisExecutor;
import org.opencb.hpg.bigdata.analysis.AnalysisExecutorException;

import java.util.List;
import java.util.Map;

/**
* Created by jtarraga on 09/06/17.
*/
public class PlinkAdaptor extends AnalysisExecutor {
private String inFilename;
private String metaFilename;
private String outDirname;

private int splitSize;
private List<String> plinkParams;
private Map<String, String> filterOptions;

public PlinkAdaptor(String inFilename, String metaFilename, String outDirname) {
this.inFilename = inFilename;
this.metaFilename = inFilename + AnalysisExecutor.metadataExtension;
this.outDirname = outDirname;
}

@Override
public void execute() throws AnalysisExecutorException {
System.out.println("plink params = " + plinkParams);
System.out.println("filter options = " + filterOptions);
}

public String getInFilename() {
return inFilename;
}

public void setInFilename(String inFilename) {
this.inFilename = inFilename;
}

public String getMetaFilename() {
return metaFilename;
}

public void setMetaFilename(String metaFilename) {
this.metaFilename = metaFilename;
}

public String getOutDirname() {
return outDirname;
}

public void setOutDirname(String outDirname) {
this.outDirname = outDirname;
}

public int getSplitSize() {
return splitSize;
}

public void setSplitSize(int splitSize) {
this.splitSize = splitSize;
}

public List<String> getPlinkParams() {
return plinkParams;
}

public void setPlinkParams(List<String> plinkParams) {
this.plinkParams = plinkParams;
}

public Map<String, String> getFilterOptions() {
return filterOptions;
}

public void setFilterOptions(Map<String, String> filterOptions) {
this.filterOptions = filterOptions;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.*;

import static java.nio.file.Paths.get;

Expand Down Expand Up @@ -212,4 +210,25 @@ public static VariantCommandOptions createVariantCommandOptions(

return parser.getVariantCommandOptions();
}

public static Map<String, String> getFilterMap(VariantCommandOptions.PlinkVariantCommandOptions options) {
Map<String, String> mapFilter = new HashMap<>();

// regions
if (StringUtils.isNotEmpty(options.regions)) {
mapFilter.put("regions", options.regions);
}

// types
if (StringUtils.isNotEmpty(options.types)) {
mapFilter.put("types", options.types);
}

// biotypes
if (StringUtils.isNotEmpty(options.biotypes)) {
mapFilter.put("biotypes", options.biotypes);
}

return mapFilter;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ public LocalCliOptionsParser() {
variantSubCommands.addCommand("view", variantCommandOptions.viewVariantCommandOptions);
variantSubCommands.addCommand("query", variantCommandOptions.queryVariantCommandOptions);
variantSubCommands.addCommand("metadata", variantCommandOptions.metadataVariantCommandOptions);
variantSubCommands.addCommand("plink", variantCommandOptions.plinkVariantCommandOptions);
variantSubCommands.addCommand("rvtests", variantCommandOptions.rvtestsVariantCommandOptions);
variantSubCommands.addCommand("association", variantCommandOptions.associationVariantCommandOptions);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.opencb.hpg.bigdata.analysis.variant.LinearRegressionAnalysis;
import org.opencb.hpg.bigdata.analysis.variant.LogisticRegressionAnalysis;
import org.opencb.hpg.bigdata.analysis.variant.RvTestsAdaptor;
import org.opencb.hpg.bigdata.analysis.variant.adaptors.PlinkAdaptor;
import org.opencb.hpg.bigdata.app.cli.CommandExecutor;
import org.opencb.hpg.bigdata.app.cli.local.CliUtils;
import org.opencb.hpg.bigdata.app.cli.local.options.VariantCommandOptions;
Expand Down Expand Up @@ -103,6 +104,9 @@ public void execute() throws Exception {
case "metadata":
metadata();
break;
case "plink":
plink();
break;
case "rvtests":
rvtests();
break;
Expand Down Expand Up @@ -863,6 +867,21 @@ public void rvtests() throws Exception {
rvtests.run00(variantCommandOptions.rvtestsVariantCommandOptions.datasetId);
}

public void plink() throws Exception {
PlinkAdaptor plink = new PlinkAdaptor(variantCommandOptions.plinkVariantCommandOptions.inFilename,
variantCommandOptions.plinkVariantCommandOptions.metaFilename,
variantCommandOptions.plinkVariantCommandOptions.outDirname);

plink.setPlinkParams(variantCommandOptions.plinkVariantCommandOptions.plinkParams);
plink.setSplitSize(variantCommandOptions.plinkVariantCommandOptions.splitSize);
plink.setFilterOptions(CliUtils.getFilterMap(variantCommandOptions.plinkVariantCommandOptions));

plink.execute();

//// rvtests.run(variantCommandOptions.rvtestsVariantCommandOptions.datasetId);
//rvtests.run00(variantCommandOptions.rvtestsVariantCommandOptions.datasetId);
}

public void assoc() throws Exception {
// check input file
File metaFile = new File(variantCommandOptions.associationVariantCommandOptions.input + ".meta.json");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import org.apache.parquet.hadoop.ParquetWriter;
import org.opencb.hpg.bigdata.app.cli.local.LocalCliOptionsParser;

import java.util.List;

/**
* Created by jtarraga on 01/06/17.
*/
Expand All @@ -20,6 +22,7 @@ public class VariantCommandOptions {
public ViewVariantCommandOptions viewVariantCommandOptions;
public QueryVariantCommandOptions queryVariantCommandOptions;
public MetadataVariantCommandOptions metadataVariantCommandOptions;
public PlinkVariantCommandOptions plinkVariantCommandOptions;
public RvTestsVariantCommandOptions rvtestsVariantCommandOptions;
public AssociationVariantCommandOptions associationVariantCommandOptions;

Expand All @@ -36,6 +39,7 @@ public VariantCommandOptions(LocalCliOptionsParser.CommonCommandOptions commonCo
this.viewVariantCommandOptions = new ViewVariantCommandOptions();
this.queryVariantCommandOptions = new QueryVariantCommandOptions();
this.metadataVariantCommandOptions = new MetadataVariantCommandOptions();
this.plinkVariantCommandOptions = new PlinkVariantCommandOptions();
this.rvtestsVariantCommandOptions = new RvTestsVariantCommandOptions();
this.associationVariantCommandOptions = new AssociationVariantCommandOptions();
}
Expand Down Expand Up @@ -354,6 +358,117 @@ public class RvTestsVariantCommandOptions {
public String confFilename;
}

@Parameters(commandNames = {"plink"}, commandDescription = "Execute the 'plink' program")
public class PlinkVariantCommandOptions {

@ParametersDelegate
public LocalCliOptionsParser.CommonCommandOptions commonOptions = commonCommandOptions;


@Parameter(names = {"-i", "--input"}, description = "Input file name (in Avro/Parquet file format)",
required = true, arity = 1)
public String inFilename;

@Parameter(names = {"-m", "--metadata"}, description = "Input metadata file name", required = true, arity = 1)
public String metaFilename;

@Parameter(names = {"--dataset"}, description = "Target dataset", arity = 1)
public String datasetId = null;

@Parameter(names = {"--plink-params"}, description = "List of space-separated key=value parameters necessary to"
+ " run the plink program", required = true, variableArity = true)
public List<String> plinkParams;

@Parameter(names = {"--split-size"}, description = "Split size. Range 10000000-100000000", arity = 1)
public int splitSize = 10000000;

@Parameter(names = {"-o", "--output"}, description = "Output directory name to save the plink results",
required = true, arity = 1)
public String outDirname;

// filter parameters
@Parameter(names = {"--id"}, description = "Filter ID; comma separated list of IDs, e.g.:"
+ " \"rs312411,rs421225\"", arity = 1)
public String ids;

@Parameter(names = {"--id-file"}, description = "Filter ID that are stored in a file, one ID per line,"
+ " e.g.: rs312411", arity = 1)
public String idFilename;

@Parameter(names = {"--type"}, description = "Filter type; comma separated list of IDs, e.g.:"
+ " \"INDEL,SNP,SNV\"", arity = 1)
public String types;

@Parameter(names = {"--s", "--study"}, description = "Filter study; comma separated list of study names",
arity = 1)
public String studies;

@Parameter(names = {"--biotype"}, description = "Filter biotype; comma separated list of biotype names,"
+ " e.g.: protein_coding, pseudogene", arity = 1)
public String biotypes;

@Parameter(names = {"-r", "--region"}, description = "Filter region; comma separated list of regions,"
+ " e.g.: 1:300000-400000000,15:343453463-8787665654", arity = 1)
public String regions;

@Parameter(names = {"--region-file"}, description = "Filter regions that are stored in a file, one region"
+ " per line, e.g.: 1:6700000-560000000", arity = 1)
public String regionFilename;

@Parameter(names = {"--maf"}, description = "QuerFilter Minor Allele Frequency of a given study and"
+ " cohort. Use the following format enclosed with double quotes: \"study_name::cohort_name"
+ "[<|>|<=|>=|==|!=]value\", e.g.: \"1000g::all>0.4\"", arity = 1)
public String maf;

@Parameter(names = {"--mgf"}, description = "Filter Minor Genotype Frequency of a given study and"
+ " cohort. Use the following format enclosed with double quotes: \"study_name::cohort_name"
+ "[<|>|<=|>=|==|!=]value\", e.g.: \"1000g::all>0.18198\"", arity = 1)
public String mgf;

@Parameter(names = {"--ct", "--consequence-type"}, description = "Filter Sequence Ontology term names or"
+ " accession codes; comma separated (use double quotes if you provide term names), e.g.:"
+ " \"transgenic insertion,SO:32234,SO:00124\"", arity = 1)
public String consequenceTypes;

@Parameter(names = {"--gene"}, description = "Filter gene; comma separated list of gene names, e.g.:"
+ " \"BIN3,ZNF517\"", arity = 1)
public String genes;

@Parameter(names = {"--clinvar"}, description = "Filter clinvar (accession); comma separated list of"
+ " accessions", arity = 1)
public String clinvar;

@Parameter(names = {"--cosmic"}, description = "Filter cosmic (mutation ID); comma separated list of"
+ " mutations IDs", arity = 1)
public String cosmic;

@Parameter(names = {"--conservation"}, description = "Filter conservation scores (phastCons, phylop, gerp);"
+ "comma separated list of scores and enclosed with double quotes, e.g.: \"phylop<0.3,phastCons<0.1\"",
arity = 1)
public String conservScores;

@Parameter(names = {"--ps", "--protein-substitution"}, description = "Filter protein substitution scores"
+ " (polyphen, sift); comma separated list of scores and enclosed with double quotes, e.g.:"
+ "\"polyphen>0.3,sift>0.6\"", arity = 1)
public String substScores;

@Parameter(names = {"--pf", "--population-frequency"}, description = "Filter alternate population"
+ " frequency of a given study: \"study_name::population_name[<|>|<=|>=|==|!=]frequency_value\", e.g.: "
+ " \"1000g::CEU<0.4\"",
arity = 1)
public String pf;

@Parameter(names = {"--pmaf", "--population-maf"}, description = "Filter population minor allele frequency"
+ " of a given study: \"study_name:: population_name[<|>|<=|>=|==|!=]frequency_value\", e.g.: "
+ "\"1000g::PJL<=0.25\"", arity = 1)
public String pmaf;

@Parameter(names = {"--sample"}, description = "Filter sample names; comma separated list of sample names",
arity = 1)
public String samples;
// end of filter parameters
}

@Parameters(commandNames = {"association"}, commandDescription = "Execute association tests such as chi-square,"
+ " linear and logistic regressions for additive, dominant or recessive genetic models")
public class AssociationVariantCommandOptions {
Expand Down

0 comments on commit aab70ef

Please sign in to comment.