Skip to content

Commit

Permalink
align: create command line to align FastQ. #8
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed May 28, 2015
1 parent 21dc109 commit 7d7a37d
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,7 @@

package org.opencb.hpg.bigdata.app;

import org.opencb.hpg.bigdata.app.cli.AlignCommandExecutor;
import org.opencb.hpg.bigdata.app.cli.BamCommandExecutor;
import org.opencb.hpg.bigdata.app.cli.CliOptionsParser;
import org.opencb.hpg.bigdata.app.cli.CommandExecutor;
import org.opencb.hpg.bigdata.app.cli.FastqCommandExecutor;
import org.opencb.hpg.bigdata.app.cli.ConvertCommandExecutor;
import org.opencb.hpg.bigdata.app.cli.*;

import com.beust.jcommander.ParameterException;

Expand Down Expand Up @@ -67,6 +62,13 @@ public static void main(String[] args) {
commandExecutor = new FastqCommandExecutor(cliOptionsParser.getFastqCommandOptions());
}
break;
case "fastq-align":
if (cliOptionsParser.getFastqAlignCommandOptions().commonOptions.help) {
cliOptionsParser.printUsage();
} else {
commandExecutor = new FastqAlignCommandExecutor(cliOptionsParser.getFastqAlignCommandOptions());
}
break;
case "bam":
if (cliOptionsParser.getBamCommandOptions().commonOptions.help) {
cliOptionsParser.printUsage();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ public class CliOptionsParser {
private final CommonCommandOptions commonCommandOptions;

private FastqCommandOptions fastqCommandOptions;
private FastqAlignCommandOptions fastqAlignCommandOptions;

private BamCommandOptions bamCommandOptions;

private ConvertCommandOptions convertCommandOptions;
private AlignCommandOptions alignCommandOptions;

Expand All @@ -44,11 +47,16 @@ public CliOptionsParser(boolean hadoop) {
jcommander.addCommand(convertCommandOptions);
if (hadoop) {
jcommander.setProgramName("hpg-bigdata.sh");

fastqCommandOptions = new FastqCommandOptions();
fastqAlignCommandOptions = new FastqAlignCommandOptions();

bamCommandOptions = new BamCommandOptions();
alignCommandOptions = new AlignCommandOptions();

jcommander.addCommand(fastqCommandOptions);
jcommander.addCommand(fastqAlignCommandOptions);

jcommander.addCommand(bamCommandOptions);
jcommander.addCommand(alignCommandOptions);
} else { //local
Expand Down Expand Up @@ -105,7 +113,6 @@ public class FastqCommandOptions {
@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;


@Parameter(names = {"-i", "--input"}, description = "HDFS input file (the FastQ file must be stored in GA4GH/Avro model)", required = true, arity = 1)
public String input = null;

Expand All @@ -120,9 +127,23 @@ public class FastqCommandOptions {

@Parameter(names = {"-k", "--kmers"}, description = "Compute k-mers (according to the indicated length)", required = false, arity = 1)
public Integer kmers = 0;

}

@Parameters(commandNames = {"fastq-align"}, commandDescription = "Description")
public class FastqAlignCommandOptions {

@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;

@Parameter(names = {"-i", "--input"}, description = "HDFS input file (the FastQ file must be stored in GA4GH/Avro model)", required = true, arity = 1)
public String input = null;

@Parameter(names = {"-o", "--output"}, description = "HDFS output directory to save alignments stored in GA4GH/Avro model", required = false, arity = 1)
public String output = null;

@Parameter(names = {"", "--index"}, description = "HDFS index file", required = true, arity = 1)
public String index = null;
}

@Parameters(commandNames = {"bam"}, commandDescription = "Description")
public class BamCommandOptions {
Expand Down Expand Up @@ -249,9 +270,9 @@ public GeneralOptions getGeneralOptions() {
return generalOptions;
}

public FastqCommandOptions getFastqCommandOptions() {
return fastqCommandOptions;
}
public FastqCommandOptions getFastqCommandOptions() { return fastqCommandOptions; }

public FastqAlignCommandOptions getFastqAlignCommandOptions() { return fastqAlignCommandOptions; }

public BamCommandOptions getBamCommandOptions() {
return bamCommandOptions;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Copyright 2015 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.opencb.hpg.bigdata.app.cli;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.opencb.hpg.bigdata.core.utils.PathUtils;
import org.opencb.hpg.bigdata.tools.stats.read.mr.ReadKmersMR;
import org.opencb.hpg.bigdata.tools.stats.read.mr.ReadStatsMR;

import java.io.IOException;
import java.util.Date;

/**
* Created by imedina on 03/02/15.
*/
public class FastqAlignCommandExecutor extends CommandExecutor {

private CliOptionsParser.FastqAlignCommandOptions fastqAlignCommandOptions;

public FastqAlignCommandExecutor(CliOptionsParser.FastqAlignCommandOptions fastqAlignCommandOptions) {
super(fastqAlignCommandOptions.commonOptions.logLevel, fastqAlignCommandOptions.commonOptions.verbose,
fastqAlignCommandOptions.commonOptions.conf);

this.fastqAlignCommandOptions = fastqAlignCommandOptions;
}

/**
* Parse specific 'fastq' command options
*/
public void execute() {
logger.info("Executing {} CLI options", "fastq");

// prepare the HDFS output folder
FileSystem fs = null;
Configuration conf = new Configuration();
try {
fs = FileSystem.get(conf);
} catch (IOException e) {
e.printStackTrace();
}
String outHdfsDirname = new String("" + new Date().getTime());

// clean paths
String in = PathUtils.clean(fastqAlignCommandOptions.input);
String index = PathUtils.clean(fastqAlignCommandOptions.index);
String out = PathUtils.clean(fastqAlignCommandOptions.output);

if (!PathUtils.isHdfs(fastqAlignCommandOptions.input)) {
logger.error("To align fastq, the input FastQ file '{}' must be stored in the HDFS/Haddop. Use the command 'convert fastq2sa' to import your file.", fastqAlignCommandOptions.input);
System.exit(-1);
}

if (!PathUtils.isHdfs(fastqAlignCommandOptions.index)) {
logger.error("To align fastq, the index folder '{}' must be stored in the HDFS/Haddop.", fastqAlignCommandOptions.index);
System.exit(-1);
}

if (!PathUtils.isHdfs(fastqAlignCommandOptions.output)) {
logger.error("To align fastq, the output folder '{}' must be stored in the HDFS/Haddop.", fastqAlignCommandOptions.output);
System.exit(-1);
}

try {
System.out.println("input = " + in + ", index = " + index + ", out = " + out);
//AlignMR.run(in, out);
} catch (Exception e) {
e.printStackTrace();
}

/*
// post-processing
Path outFile = new Path(outHdfsDirname + "/part-r-00000");
try {
if (!fs.exists(outFile)) {
System.out.println("out file = " + outFile.getName() + " does not exist !!");
} else {
String outRawFileName = fastqCommandOptions.output + "/raw.json";
fs.copyToLocalFile(outFile, new Path(outRawFileName));
//Utils.parseStatsFile(outRawFileName, out);
}
fs.delete(new Path(outHdfsDirname), true);
} catch (IOException e) {
e.printStackTrace();
}
*/
}
}

0 comments on commit 7d7a37d

Please sign in to comment.