Skip to content

Commit

Permalink
Update to MuSE2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
jiyunmaths committed Oct 10, 2022
1 parent 0c1be9a commit 7f42988
Show file tree
Hide file tree
Showing 7 changed files with 485 additions and 289 deletions.
5 changes: 4 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ FROM ubuntu:20.04

ARG DEBIAN_FRONTEND=noninteractive

RUN apt-get update && apt-get install -y git g++ cmake autoconf libtool liblzma-dev zlib1g-dev libbz2-dev libcurl3-dev libssl-dev
RUN apt-get update && apt-get install -y --no-install-recommends \
git g++ cmake autoconf libtool liblzma-dev zlib1g-dev libbz2-dev libcurl3-dev libssl-dev \
ca-certificates cpp make libltdl-dev wget unzip \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

RUN git clone --recursive https://github.com/wwylab/MuSE
RUN cd MuSE && ./install_muse.sh
Expand Down
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,17 @@ Options:
-G input generated from whole genome sequencing data
-E input generated from whole exome sequencing data
-O STR output file name (VCF format)
-n int number of cores specified (default=1)
-D FILE dbSNP vcf file that should be bgzip compressed,
tabix indexed and based on the same reference
genome used in 'MuSE call'
Example:
MuSE sump -I Output.Prefix.MuSE.txt -G -O Output.Prefix.vcf -D dbsnp.vcf.gz
WGS
MuSE sump -I Output.Prefix.MuSE.txt -O Output.Prefix.vcf -G -n 10 -D dbsnp.vcf.gz
or WES
MuSE sump -I Output.Prefix.MuSE.txt -O Output.Prefix.vcf -E -n 10 -D dbsnp.vcf.gz
```

## Output of MuSE
Expand Down
4 changes: 1 addition & 3 deletions inc/muse_const.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class mplp_conf_t{
minAltFraction = 0.005;
min_output_brlens = 1e-4;
flag = MPLP_NO_ORPHAN;
flag_mask = BAM_DEF_MASK;
flag_mask = BAM_DEF_MASK;

ref = new Reference();
ref->openFile(refName.c_str());
Expand All @@ -128,6 +128,4 @@ struct bam_pileup1_t_pb{
bool is_del, is_head, is_tail, is_refskip;
};



#endif
1 change: 1 addition & 0 deletions inc/muse_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class PBReader {
}
return mHeader_normal->target_name[i];
}

PBReader(std::string& tumorName, std::string& normalName, mplp_conf_t* conf_in): conf(conf_in){
ReadingDone.store(false);
curPtr = nullptr;
Expand Down
1 change: 1 addition & 0 deletions inc/statistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <sys/stat.h>
#include <math.h>
#include <float.h>
#include "omp.h"
#include "muse_const.h"

using namespace std;
Expand Down
45 changes: 40 additions & 5 deletions src/main_muse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ contributions of implementing accelerating techniques in the ‘MuSE call’ ste
#include "muse_reader.h"
#include "timer.h"
#include "tabix.h"
#include <omp.h>

using namespace std;

int tid_global = 0;
int64_t pos_global = -1;

void muse_sump(const char *inFile, const char *outFile, const char *dbsnpFile, bool isWGS, bool isWES, int argc, char *argv[]);
void muse_sump(const char *inFile, const char *outFile, const char *dbsnpFile, bool isWGS, bool isWES, int num_threads, int argc, char *argv[]);

void monitorFun(PBReader* reader, std::atomic<uint32_t>& processQSize, PileupSpscQ& writeQ, std::atomic<bool>& monitorFlag){
while(monitorFlag.load()){
Expand Down Expand Up @@ -208,21 +209,24 @@ void get_MuseCallOpts(int argc, char* argv[]){
}

void get_MuseSumpOpts(int argc, char *argv[]){
int c;
int c;
const char *outFile = NULL;
const char *inFile = NULL;
const char *dbsnpFile = NULL;
bool isWGS = false;
bool isWES = false;

const char *threadNum_c = "0";
int threadNum;
// command options
//

while((c = getopt(argc, argv, "I:O:D:GE")) >= 0) {
while((c = getopt(argc, argv, "I:O:D:n:GE")) >= 0) {
switch(c) {
case 'I': inFile = optarg; break;
case 'O': outFile = optarg; break;
case 'D': dbsnpFile = optarg; break;
case 'n': threadNum_c = optarg; break;
case 'G': isWGS = true; break;
case 'E': isWES = true; break;
}
Expand All @@ -235,6 +239,7 @@ void get_MuseSumpOpts(int argc, char *argv[]){
fprintf(stderr, " -G input generated from whole genome sequencing data\n");
fprintf(stderr, " -E input generated from whole exome sequencing data\n");
fprintf(stderr, " -O STR output file name (VCF format)\n");
fprintf(stderr, " -n int number of cores specified (default=1)\n");
fprintf(stderr, " -D FILE dbSNP vcf file that should be bgzip compressed,\n");
fprintf(stderr, " tabix indexed and based on the same reference\n");
fprintf(stderr, " genome used in 'MuSE call'\n");
Expand Down Expand Up @@ -264,6 +269,19 @@ void get_MuseSumpOpts(int argc, char *argv[]){
exit(EXIT_FAILURE);
}

try{
threadNum = stoi (threadNum_c);
}
catch(const std::exception& e){
cerr << e.what() << endl;
exit(EXIT_FAILURE);
}

if (threadNum < 1){
cerr << "Number of cores cannot be less than 1. Exiting..." << endl;
exit(EXIT_FAILURE);
}

// check if dbSNP file was bgzipped
//
if(dbsnpFile) {
Expand Down Expand Up @@ -298,7 +316,24 @@ void get_MuseSumpOpts(int argc, char *argv[]){
free(fnidx);
}

muse_sump(inFile, outFile, dbsnpFile, isWGS, isWES, argc, argv);
int num_threads = 1;

#ifdef _OPENMP
#pragma omp parallel
{
#pragma omp master
num_threads = omp_get_num_threads();
}
#else
#endif

if (threadNum > num_threads) threadNum = num_threads;

#ifdef _OPENMP
omp_set_num_threads(threadNum);
#endif

muse_sump(inFile, outFile, dbsnpFile, isWGS, isWES, threadNum, argc, argv);
}

//================================================================================================= Main
Expand Down Expand Up @@ -338,4 +373,4 @@ int main(int argc, char* argv[]){
return 1;
}
return 0;
}
}
Loading

0 comments on commit 7f42988

Please sign in to comment.