Update to MuSE2.1

wwylab · Oct 10, 2022 · 7f42988 · 7f42988
1 parent 0c1be9a
commit 7f42988
Show file tree

Hide file tree

Showing 7 changed files with 485 additions and 289 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -2,7 +2,10 @@ FROM ubuntu:20.04
 
 ARG DEBIAN_FRONTEND=noninteractive
 
-RUN apt-get update && apt-get install -y git g++ cmake autoconf libtool liblzma-dev zlib1g-dev libbz2-dev libcurl3-dev libssl-dev
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git g++ cmake autoconf libtool liblzma-dev zlib1g-dev libbz2-dev libcurl3-dev libssl-dev \
+    ca-certificates cpp make libltdl-dev wget unzip \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
 
 RUN git clone --recursive https://github.com/wwylab/MuSE
 RUN cd MuSE && ./install_muse.sh

diff --git a/README.md b/README.md
@@ -62,12 +62,17 @@ Options:
          -G         input generated from whole genome sequencing data
          -E         input generated from whole exome sequencing data
          -O STR     output file name (VCF format)
+         -n int     number of cores specified (default=1)
          -D FILE    dbSNP vcf file that should be bgzip compressed,
                     tabix indexed and based on the same reference
                     genome used in 'MuSE call'
 
 Example:
-MuSE sump -I Output.Prefix.MuSE.txt -G -O Output.Prefix.vcf -D dbsnp.vcf.gz
+WGS
+MuSE sump -I Output.Prefix.MuSE.txt -O Output.Prefix.vcf -G -n 10 -D dbsnp.vcf.gz
+
+or WES
+MuSE sump -I Output.Prefix.MuSE.txt -O Output.Prefix.vcf -E -n 10 -D dbsnp.vcf.gz
 ```
 
 ## Output of MuSE

diff --git a/inc/muse_const.h b/inc/muse_const.h
@@ -103,7 +103,7 @@ class mplp_conf_t{
 		minAltFraction    = 0.005;
 		min_output_brlens = 1e-4;
 		flag              = MPLP_NO_ORPHAN;
-		flag_mask = BAM_DEF_MASK;
+		flag_mask 		  = BAM_DEF_MASK;
 
 		ref = new Reference();
 		ref->openFile(refName.c_str());
@@ -128,6 +128,4 @@ struct bam_pileup1_t_pb{
 	bool is_del, is_head, is_tail, is_refskip;
 };
 
-
-
 #endif
diff --git a/inc/muse_reader.h b/inc/muse_reader.h
@@ -50,6 +50,7 @@ class PBReader {
 		}
 		return mHeader_normal->target_name[i];
 	}
+
 	PBReader(std::string& tumorName, std::string& normalName, mplp_conf_t* conf_in): conf(conf_in){
 		ReadingDone.store(false);
 		curPtr = nullptr;   	

diff --git a/inc/statistics.h b/inc/statistics.h
@@ -4,6 +4,7 @@
 #include <sys/stat.h>
 #include <math.h>
 #include <float.h>
+#include "omp.h"
 #include "muse_const.h"
 
 using namespace std;

diff --git a/src/main_muse.cpp b/src/main_muse.cpp
@@ -14,13 +14,14 @@ contributions of implementing accelerating techniques in the ‘MuSE call’ ste
 #include "muse_reader.h"
 #include "timer.h"
 #include "tabix.h"
+#include <omp.h>
 
 using namespace std;
 
 int tid_global = 0;
 int64_t pos_global = -1;
 
-void muse_sump(const char *inFile, const char *outFile, const char *dbsnpFile, bool isWGS, bool isWES, int argc, char *argv[]);
+void muse_sump(const char *inFile, const char *outFile, const char *dbsnpFile, bool isWGS, bool isWES, int num_threads, int argc, char *argv[]);
 
 void monitorFun(PBReader* reader, std::atomic<uint32_t>& processQSize, PileupSpscQ& writeQ, std::atomic<bool>& monitorFlag){
 	while(monitorFlag.load()){
@@ -208,21 +209,24 @@ void get_MuseCallOpts(int argc, char* argv[]){
 }
 
 void get_MuseSumpOpts(int argc, char *argv[]){
-	int        c;
+    int        c;
     const char *outFile     = NULL;
     const char *inFile      = NULL;
     const char *dbsnpFile   = NULL;
     bool       isWGS        = false;
     bool       isWES        = false;
 
+    const char *threadNum_c = "0";
+    int threadNum;
     // command options
     //
 
-	while((c = getopt(argc, argv, "I:O:D:GE")) >= 0) {
+	while((c = getopt(argc, argv, "I:O:D:n:GE")) >= 0) {
         switch(c) {
         case 'I': inFile    = optarg; break;
         case 'O': outFile   = optarg; break;
         case 'D': dbsnpFile = optarg; break;
+	case 'n': threadNum_c  = optarg; break; 
         case 'G': isWGS     = true;   break;
         case 'E': isWES     = true;   break;
         }
@@ -235,6 +239,7 @@ void get_MuseSumpOpts(int argc, char *argv[]){
         fprintf(stderr, "         -G         input generated from whole genome sequencing data\n");
         fprintf(stderr, "         -E         input generated from whole exome sequencing data\n");
         fprintf(stderr, "         -O STR     output file name (VCF format)\n");
+	fprintf(stderr, "         -n int     number of cores specified (default=1)\n");
         fprintf(stderr, "         -D FILE    dbSNP vcf file that should be bgzip compressed,\n");
         fprintf(stderr, "                    tabix indexed and based on the same reference\n");
         fprintf(stderr, "                    genome used in 'MuSE call'\n");
@@ -264,6 +269,19 @@ void get_MuseSumpOpts(int argc, char *argv[]){
         exit(EXIT_FAILURE);
     }
 
+    try{
+	threadNum = stoi (threadNum_c);
+    }
+    catch(const std::exception& e){
+	cerr << e.what() << endl;
+	exit(EXIT_FAILURE);
+    }
+
+    if (threadNum < 1){
+	cerr << "Number of cores cannot be less than 1. Exiting..." << endl;
+	exit(EXIT_FAILURE);
+    } 
+
     // check if dbSNP file was bgzipped
     //
 	if(dbsnpFile) {
@@ -298,7 +316,24 @@ void get_MuseSumpOpts(int argc, char *argv[]){
         free(fnidx);
     }
 
-    muse_sump(inFile, outFile, dbsnpFile, isWGS, isWES, argc, argv);
+    int num_threads = 1;
+
+#ifdef _OPENMP
+#pragma omp parallel
+    {
+#pragma omp master
+        num_threads = omp_get_num_threads();
+    }
+#else
+#endif
+
+    if (threadNum > num_threads) threadNum = num_threads;
+
+#ifdef _OPENMP 
+    omp_set_num_threads(threadNum);
+#endif
+
+    muse_sump(inFile, outFile, dbsnpFile, isWGS, isWES, threadNum, argc, argv);
 }
 
 //================================================================================================= Main
@@ -338,4 +373,4 @@ int main(int argc, char* argv[]){
         return 1;
     }
 	return 0;
-}
+}