diff --git a/cgat/tools/bam2UniquePairs.py b/cgat/tools/bam2UniquePairs.py deleted file mode 100644 index 22f64f33e..000000000 --- a/cgat/tools/bam2UniquePairs.py +++ /dev/null @@ -1,149 +0,0 @@ -'''bam2UniquePairs.py - filter/report uniquely mapped read pairs from a (bwa!) bam-file -====================================================================================== - -:Tags: Genomics NGS - -Purpose -------- - -Utility script to report and/or filter out "uniquely mapped" properly -paired reads - -Reports: - -1. The percentage of properly mapped read pairs with at least one - uniquely mapped (XT=U) read - -2. The percentage of properly mapped read pairs with at least one best - mapped (X0-1) read - -3. The percentage of properly mapped read pairs with at least one - uniquely or best mapped (X0-1) read - -If outfile is specified, reads are emitted when they are properly -paired and the pair has at least one read that is either best or -uniquely mapped. - -Duplication is ignored. - -Only BWA is supported. - -TODO: cache and emit reads rather than iterating over the samfile twice... - -''' - -import sys -import cgatcore.experiment as E -import pysam - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if not argv: - argv = sys.argv - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument("-f", "--bam-file", "--filename", dest="filename", type=str, - help="bamfile") - - parser.add_argument("-a", "--aligner", dest="aligner", type=str, - help="bamfile", default="bwa") - - parser.add_argument("-r", "--output-report", type=str, dest="report", - help="bamfile", default="") - - parser.add_argument("-o", "--output-filename-bam", "--outfile", dest="outfile", type=str, - help="bamfile", default="") - - # add common options (-h/--help, ...) and parse command line - (args) = E.start(parser, argv=argv, add_output_options=True) - - # Check the aligner is supported - if args.aligner != "bwa": - raise ValueError( - "Currently only bwa is supported as aligner specific flags are used") - - # Check that either a report or outfile name has been specified - if args.report == "" and args.outfile == "": - raise ValueError("Nothing to do") - - # Analyse the bamfile - samfile = pysam.AlignmentFile(args.filename, "rb") - uniq_map, best_map, uORb_map = {}, {}, {} - properly_paired = 0 - - for read in samfile.fetch(): - - if read.is_proper_pair: - tagd = dict(read.tags) - u, b, key = False, False, read.qname - - if tagd["XT"] == "U": - u = True - uniq_map[key] = 1 - - if "X0" in tagd: - if tagd["X0"] == 1: - b = True - best_map[key] = 1 - - if u is True or b is True: - uORb_map[key] = 1 - - properly_paired += 1 - - samfile.close() - - npp = properly_paired / 2 - - E.info("No proper pairs: %s" % npp) - - # Write a tabular report if report name given - if args.report != "": - - E.info("Writing report on no. proper pairs with unique/best reads") - - def _row(x, npp=npp): - name, d = x - n = len(list(d.keys())) - pc = float(n) / npp * 100 - line = "%s\t%i\t%.2f" % (name, n, pc) - return(line) - - header = "\t".join(["pair_criteria", "n_proper_pairs", - "percent_proper_pairs"]) - - with iotools.open_file(args.report, "w") as report: - report.write(header + "\n") - for x in [("unique", uniq_map), ("best", best_map), - ("unique_or_best", uORb_map)]: - report.write(_row(x) + "\n") - - # Create new bam containing uniquely mapping read pairs - # if outfile specified - if args.outfile != "": - - E.info("Writing proper pairs with unique or best read to %s" % - args.outfile) - - samfile = pysam.AlignmentFile(args.filename, "rb") - outbam = pysam.AlignmentFile(args.outfile, "wb", template=samfile) - - for read in samfile.fetch(): - if read.is_proper_pair: - if read.qname in uORb_map: - outbam.write(read) - samfile.close() - outbam.close() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/bam2libtype.py b/cgat/tools/bam2libtype.py deleted file mode 100644 index e90776b20..000000000 --- a/cgat/tools/bam2libtype.py +++ /dev/null @@ -1,180 +0,0 @@ -"""bam2libtype.py - determine the library type of a bam file -============================================================ - -Author: Adam Cribbs - -Purpose -------- - -This tool determines the library type of a BAM file. The naming -convention used is from the salmon documentation: -http://salmon.readthedocs.io/en/latest/library_type.html. - -BAM files need to have a corresponding index file i.e. example.bam -and example.bam.bai - -For single-end data - - Determining which read the strand is on is straightforward using pysam - function .is_reversed. - -For paired-end data - - The relative position of read1 and read2 needs to be determined including - orientation relative to each other. - - -Usage ------ - - cat example.bam | cgat bam2libtype > out.tsv - -options -------- - -There are no options for this script, just pass the script a bam file -as the stdin and an outfile as the stdout. - - -Type:: - - python bam2bed.py --help - -for command line help. - -Command line options --------------------- -""" - -import sys -import pysam -import cgatcore.experiment as E - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if not argv: - argv = sys.argv - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument( - "-i", "--max-iterations", type=int, - help="maximum number of iterations. Set to 0 to go through all reads " - ) - - parser.set_defaults( - max_iteratiors=10000 - ) - - (args) = E.start(parser, argv=argv) - - samfile = pysam.AlignmentFile(args.stdin, "rb") - outfile = args.stdout - - # initialise counts for each library type - MSR = 0 - MSF = 0 - ISF = 0 - ISR = 0 - OSF = 0 - OSR = 0 - SR = 0 - SF = 0 - - reads_processed = set() - - for iteration, read in enumerate(samfile): - - if args.max_iterations and iteration > int(args.max_iterations): - break - - if read.qname not in reads_processed: - reads_processed.add(read.qname) - else: - continue - - # to handle paired end reads: - if read.is_paired and read.is_proper_pair: - # get attributes of read - read_start = read.reference_start - read_end = read.reference_end - read_neg = read.is_reverse - - # specify which read is R1 and which is R2: - # specify which read is R1 and which is R2: - if read.is_read1 is True: - R1_is_reverse = read.is_reverse - R1_reference_start = read.reference_start - - R2_is_reverse = read.mate_is_reverse - R2_reference_start = read.next_reference_start - else: - R1_is_reverse = read.mate_is_reverse - R1_reference_start = read.next_reference_start - - R2_is_reverse = read.is_reverse - R2_reference_start = read.reference_start - - # Decision tree to specify strandness: - # potential to convert this to a machine learning - # decision tree algorithm in the future: - if R1_is_reverse is True: - - if R2_is_reverse is True: - - MSF += 1 - else: - if R2_reference_start - R1_reference_start >= 0: - OSR += 1 - else: - ISR += 1 - - else: - - if R2_is_reverse is True: - - if R1_reference_start - R2_reference_start >= 0: - - OSF += 1 - else: - ISF += 1 - else: - MSR += 1 - else: - if read.is_reverse: - SR += 1 - else: - SF += 1 - - total = MSR + ISR + OSR + ISF + MSF + OSF + SF + SR - - def total_percent(strand, total): - return float(strand)/float(total)*100 - - MSR_total = total_percent(MSR, total) - ISR_total = total_percent(ISR, total) - OSR_total = total_percent(OSR, total) - ISF_total = total_percent(ISF, total) - MSF_total = total_percent(MSF, total) - OSF_total = total_percent(OSF, total) - SF_total = total_percent(SF, total) - SR_total = total_percent(SR, total) - - outfile.write("MSR\tISR\tOSR\tISF\tMSF\tOSF\tSF\tSR\n") - outfile.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % - (int(MSR_total), int(ISR_total), int(OSR_total), - int(ISF_total), int(MSF_total), - int(OSF_total), int(SF_total), int(SR_total))) - - E.stop() - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/bam2peakshape.py b/cgat/tools/bam2peakshape.py deleted file mode 100644 index 687483add..000000000 --- a/cgat/tools/bam2peakshape.py +++ /dev/null @@ -1,658 +0,0 @@ -'''bam2peakshape.py - compute peak shape features from a bam-file -============================================================== - -:Tags: Genomics NGS Intervals BAM BED Summary - -Purpose -------- - -This script takes a :term:`bed` formatted file with regions of -interest, for example binding intervals from a ChIP-Seq -experiment. Using a collection of aligned reads is a :term:`bam` -formatted file or :term:`bigwig` formatted file, the script outputs a -collection of features describing the peak shape. - -This script is designed with a slight emphasis on ChIP-Seq datasets. -The main reason that this script is better suited for ChIP-Seq is -that(1) it is able to center the counting window at the summit of -every individual peak; (2) it is also able to use the control ChIP-Seq -library to enable side-by-side comparison of treatment vs control;(3) -it can randomly shift the set of input regions to generate a -artificial set of regions, in the absence of real ChIP-Seq control -library, the random regions can provide a peaks profile that can be -used as the control. - -For example, given the peaks regions defined by analyzing some -ChIP-Seq dataset (e.g. by using MACS), and without the need to use any -additional genomic annotations (e.g. ENSEMBL, refseq), we can -visualise the binding profiles of transcriptionfactors ChIP-Seq data -relative to the center of each peak regions. - -The script outputs a tab-separated table on stdout containing features -for each interval. A peak is defined as the location of the highest -density in an interval. The width of the peak (peak_width) is defined -as the region around the peak in which the density does not drop below -a threshold of peak_heigt * 90%. - -Usage ------ - -Detailed usage example -++++++++++++++++++++++ - -The following command will generate the peak shape plot for the peak -regions defined in :file:`onepeak.bed`, using the reads stored in -:file:`small.bam`. The command will also create a profile for the -control library. The control library in this example is re-using the -same reads file :file:`small.bam`, however, in your actual experiment, -it should be a different library (the input library for this ChIP-Seq -experiment).:: - - python ./scripts/bam2peakshape.py \ - ./tests/bam2peakshape.py/small.bam \ - ./tests/bam2peakshape.py/onepeak.bed \ - --control-bam-file=./tests/bam2peakshape.py/small.bam \ - --use-interval \ - --normalize-transcript - - -Output files -++++++++++++ - -Among the features output are: - -+-------------------+---------------------------------------------------------+ -|*Column* |*Content* | -+-------------------+---------------------------------------------------------+ -|peak_height |number of reads at peak | -+-------------------+---------------------------------------------------------+ -|peak_median |median coverage compared to peak height | -+-------------------+---------------------------------------------------------+ -|interval_width |width of interval | -+-------------------+---------------------------------------------------------+ -|peak_width |width of peak | -+-------------------+---------------------------------------------------------+ -|bins |bins for a histogram of densities within the interval. | -+-------------------+---------------------------------------------------------+ -|npeaks |number of density peaks in interval. | -+-------------------+---------------------------------------------------------+ -|peak_center |point of highest density in interval | -+-------------------+---------------------------------------------------------+ -|peak_relative_pos |point of highest density in interval coordinates | -+-------------------+---------------------------------------------------------+ -|counts |counts for a histogram of densities within the interval | -+-------------------+---------------------------------------------------------+ -|furthest_half_heigh|Distance of peak center to furthest half-height position | -+-------------------+---------------------------------------------------------+ -|closest_half_height|Distance of peak center to closest half-height position | -+-------------------+---------------------------------------------------------+ - - -Additionally, the script outputs a set of matrixes with densities over -intervals that can be used for plotting. The default filenames are -``(matrix|control)_.tsv.gz``, The names can be controlled -with the ``--output-filename-pattern`` option. - - -Type:: - - python bam2peakshape.py --help - -for command line help. - - -Options -------- - -Option: Shift -+++++++++++++ - -shift the each read by a certain distance, because in a ChIP-Seq -experment, the read is always at the edge of an sonicated fragment, -the actual binding site is usually L/2 distance away from the read, -where L is the length of sonicated fragment (determined either -experimentally or computationally). - -This option is used only if the input reads are in :term:`bam` formatted file. -If input reads are :term:`bigwig` formatted file, this option is ignored. - -Option: Random shift -++++++++++++++++++++ - -randomly shift the set of input regions to generate a artificial set -of regions. In the absence of real ChIP-Seq control library, the -random regions can provide a peaks profile that can be used as the -control. - -Option: Centring method -+++++++++++++++++++++++ - -"reads" will output in the way that the summit of the peaks are -aligned. "middle" will output in the way that the middle of the input -bed intervals are aligned. - -Option: Only interval -+++++++++++++++++++++ - -Only count reads that are in the interval as defined by the input bed file. - -Option: normalization=sum -+++++++++++++++++++++++++ - -normalize counts such that the sum of all counts in all features are -exactly 1000000. - -The detail normalization algorithm as follows: norm = sum(all counts -in all features)/1000000.0 normalized count = normalized count / norm - -.. todo:: - - paired-endedness is not fully implemented. - -Command line options --------------------- - -''' - -import sys -import os -import re -import cgatcore.experiment as E -import cgatcore.iotools as iotools -import pysam -import cgat.Bed as Bed -import numpy -import collections -import pyBigWig - -import cgat.BamTools.peakshape as bam2peakshape - - -def buildArgumentParser(argv): - - if not argv: - argv = sys.argv - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument("-f", "--format", dest="format", type=str, - choices=("bam", "bigwig"), - help="format of genomic input files for densities " - ) - - parser.add_argument( - "-o", "--use-interval", dest="use_interval", action="store_true", - help="only count tags that are in interval given " - "in bed file. Otherwise, use a fixed width window (see --window-size) " - "around peak ") - - parser.add_argument( - "-w", "--window-size", dest="window_size", type=int, - help="window size in bp on either side of a peak used for getting " - "read densities. If ``--window-size`` is 1000, the actual window size" - "will be 2kb, 1kb on either side of the peak in an interval" - ) - - parser.add_argument( - "-b", "--bin-size", dest="bin_size", type=int, - help="bin-size in bp for computing read densities. " - "If ``--window-size`` is set to 1000 and ``--bin-size`` to 10, " - "there will be 100 bins on either side of a peak. " - ) - - parser.add_argument( - "--smooth-method", dest="smooth_method", type=str, - choices=("none", "sum", "sg"), - help="smooting method to apply to density data before sampling " - "according to ``bin-size``. sg=SavitzkyGolay, sum=sum density in bin, " - "none=no smoothing " - ) - - parser.add_argument("-s", "--sort-order", dest="sort_orders", - type=str, - action="append", - choices=("peak-height", "peak-width", "unsorted", - "interval-width", "interval-score"), - help="output sort order for matrices. " - ) - - parser.add_argument( - "-c", "--control-bam-file", "--control-bigwig-file", - action="append", - dest="control_files", - type=str, - help="control file. If given, two peakshapes are computed, " - "one for the primary data and one for the control data. " - "The control file is centered around the same " - "base as the primary file and output in the same " - "sort order as the primary profile to all side-by-side. " - "comparisons. Multiple control files can be given. The " - "control files should have the same format as the " - "principal input file " - ) - - parser.add_argument( - "-r", "--random-shift", dest="random_shift", action="store_true", - help="shift intervals in random direction up/downstream of interval " - ) - - parser.add_argument( - "-e", "--centring-method", dest="centring_method", type=str, - choices=("reads", "middle"), - help="centring method. Available are: " - "reads=use density to determine peak, " - "middle=use middle of interval " - ) - - parser.add_argument( - "-n", "--normalize-matrix", dest="normalization", type=str, - choices=("none", "sum"), - help="matrix normalisation to perform. " - ) - - parser.add_argument( - "--use-strand", dest="strand_specific", action="store_true", - help="use strand information in intervals. Intervals on the " - "negative strand are flipped " - ) - - parser.add_argument( - "-i", "--shift-size", dest="shift", type=int, - help="shift for reads. When processing bam files, " - "reads will be shifted upstream/downstream by this amount. " - ) - - parser.set_defaults( - bin_size=10, - shift=0, - window_size=1000, - sort_orders=[], - centring_method="reads", - control_files=[], - random_shift=False, - strand_specific=False, - format="bam", - report_step=100, - use_interval=False, - smooth_method=None, - ) - - return parser - - -IntervalData = collections.namedtuple( - "IntervalData", - "foreground interval controls shifted") - - -def outputFeatureTable(outfile, features_per_interval, bins): - '''ouput results from density profiles.''' - - outfile.write("\t".join( - ("contig", - "start", - "end", - "name", - "\t".join(bam2peakshape.PeakShapeResult._fields))) + "\n") - - # output principal table - n = 0 - for foreground, bed, controls, shifted in features_per_interval: - n += 1 - if "name" in bed: - name = bed.name - else: - name = str(n) - outfile.write("%s\t%i\t%i\t%s\t" % - (bed.contig, bed.start, bed.end, name)) - - outfile.write("\t".join(map(str, foreground[:-2]))) - bins, counts = foreground[-2], foreground[-1] - outfile.write("\t%s" % ",".join(map(str, bins))) - outfile.write("\t%s" % ",".join(map(str, counts))) - outfile.write("\n") - - -def writeMatricesForSortOrder(features_per_interval, - bins, - foreground_track, - control_tracks, - shifted, - sort_order): - '''output one or more matrices for each sort sorder. - - For each sort order output the forerground. If there - are additional controls and shifted section, output - these as well - - The files will named: - matrix__ - - ''' - if "name" in features_per_interval[0].interval: - names = [x.interval.name for x in features_per_interval] - else: - names = list(map(str, list(range(1, len(features_per_interval) + 1)))) - - bins = ["%i" % x for x in bins] - sort_order = re.sub("-", "_", sort_order) - - # write foreground - iotools.write_matrix( - E.open_output_file("matrix_%s_%s.gz" % (foreground_track, sort_order)), - [x.foreground.counts for x in features_per_interval], - row_headers=names, - col_headers=bins, - row_header="name") - - # write controls - for idx, track in enumerate(control_tracks): - iotools.write_matrix( - E.open_output_file("matrix_%s_%s.gz" % (track, sort_order)), - [x.controls[idx].counts for x in features_per_interval], - row_headers=names, - col_headers=bins, - row_header="name") - - # write shifted matrix - if shifted: - iotools.write_matrix( - E.open_output_file("matrix_shift_%s.gz" % (sort_order)), - [x.shifted.counts for x in features_per_interval], - row_headers=names, - col_headers=bins, - row_header="name") - - # output a combined matrix - if len(control_tracks) > 0 or shifted: - rows = [] - for row in features_per_interval: - l = [row.foreground.counts] - l.extend([row.controls[x].counts for x in - range(len(control_tracks))]) - if shifted: - l.append(row.shifted.counts) - rows.append(numpy.concatenate(l)) - - n = 1 + len(control_tracks) - if shifted: - n += 1 - - # make column names unique and make sure they can be sorted - # lexicographically - all_bins = [] - for x in range(n): - all_bins.extend(["%i:%s" % (x, b) for b in bins]) - - iotools.write_matrix( - E.open_output_file("matrix_sidebyside_%s.gz" % (sort_order)), - rows, - row_headers=names, - col_headers=all_bins, - row_header="name") - - -def outputMatrices(features_per_interval, - bins, - foreground_track, - control_tracks=None, - shifted=False, - sort_orders=None): - '''ouput matrices from density profiles - in one or more sort_orders. - ''' - - # output sorted matrices - if not sort_orders: - writeMatricesForSortOrder(features_per_interval, - bins, - foreground_track, - control_tracks, - shifted, - "unsorted") - - for sort_order in sort_orders: - - if sort_order == "peak-height": - features_per_interval.sort( - key=lambda x: x.foreground.peak_height) - - elif sort_order == "peak-width": - features_per_interval.sort( - key=lambda x: x.foreground.peak_width) - - elif sort_order == "interval-width": - features_per_interval.sort( - key=lambda x: x.interval.end - x.interval.start) - - elif sort_order == "interval-score": - try: - features_per_interval.sort( - key=lambda x: float(x.interval.score)) - except IndexError: - E.warn("score field not present - no output") - continue - except TypeError: - E.warn("score field not a valid number - no output") - continue - - writeMatricesForSortOrder(features_per_interval, - bins, - foreground_track, - control_tracks, - shifted, - sort_order) - - -def buildDensityMatrices(bedfile, - fg_file, - control_files, - counter, - window_size=1000, - bin_size=10, - strand_specific=False, - centring_method="reads", - use_interval=False, - random_shift=False, - smooth_method="none", - report_step=1000): - '''compute densities and peakshape parameters - in intervals given by *bedfile* using reads in *fg_file*. - - If *control_files* are given, densities are produced for - these as well. - - Returns a list of results for each interval in *bedfile* of - type IntervalData and an array of bin-values. - ''' - - if window_size: - # bins are centered at peak-center and then stretching outwards. - bins = numpy.arange(-window_size + bin_size // 2, - +window_size, - bin_size) - - result = [] - c = E.Counter() - c.input = 0 - - for bed in bedfile: - c.input += 1 - - # if bed.contig not in contigs: - # c.skipped += 1 - # continue - - if c.input % report_step == 0: - E.info("iteration: %i" % c.input) - - features = counter.countInInterval( - fg_file, - bed.contig, bed.start, bed.end, - window_size=window_size, - bins=bins, - use_interval=use_interval, - centring_method=centring_method) - - if features is None: - c.skipped += 1 - continue - - if control_files: - control = [] - for control_file in control_files: - control.append(counter.countAroundPos( - control_file, - bed.contig, - features.peak_center, - bins=features.bins)) - - else: - control = None - - if random_shift: - direction = numpy.random.randint(0, 2) - if direction: - pos = features.peak_center + 2 * bins[0] - else: - pos = features.peak_center + 2 * bins[-1] - shifted = counter.countAroundPos(fg_file, - bed.contig, - pos, - bins=features.bins) - else: - shifted = None - - if strand_specific and bed.strand == "-": - features._replace(hist=features.hist[::-1]) - if control: - for c in control: - c._replace(hist=c.hist[::-1]) - if shifted: - shifted._replace(hist=shifted.hist[::-1]) - - result.append(IntervalData._make((features, bed, control, shifted))) - c.added += 1 - - E.info("interval processing: %s" % c) - - return result, bins - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - parser = buildArgumentParser(argv) - - # add common options (-h/--help, ...) and parse command line - (args, unknown) = E.start(parser, argv=argv, add_output_options=True, unknowns=True) - - if len(unknown) != 2: - raise ValueError( - "please specify one bam- or wig-file and one bed file") - - if args.control_files: - E.info("using control files: %s" % ",".join(args.control_files)) - - infile, bedfile = unknown - control_files = [] - - if args.format == "bigwig": - fg_file = pyBigWig.open(infile) - for control_file in args.control_files: - control_files.append(pyBigWig.open(control_file)) - counter = bam2peakshape.CounterBigwig( - smooth_method=args.smooth_method) - - elif args.format == "bam": - fg_file = pysam.AlignmentFile(infile, "rb") - for control_file in args.control_files: - control_files.append(pysam.AlignmentFile(control_file, "rb")) - counter = bam2peakshape.CounterBam( - shift=args.shift, - smooth_method=args.smooth_method) - - features_per_interval, bins = buildDensityMatrices( - Bed.iterator(iotools.open_file(bedfile)), - fg_file, - control_files, - counter, - window_size=args.window_size, - bin_size=args.bin_size, - strand_specific=args.strand_specific, - centring_method=args.centring_method, - use_interval=args.use_interval, - random_shift=args.random_shift, - smooth_method=args.smooth_method, - report_step=args.report_step) - - if len(features_per_interval) == 0: - E.warn("no data - no output") - E.stop() - return - - outputFeatureTable(args.stdout, features_per_interval, bins) - - # apply normalization - # Note: does not normalize control? - # Needs reworking, currently it does not normalize across - # all samples nor does the work "sum" reflect the per million - # normalization. - if args.normalization == "sum": - E.info("starting sum normalization") - # get total counts across all intervals - norm = 0.0 - for foreground, bed, controls, shifted in features_per_interval: - norm += sum(foreground.counts) - # per million - norm /= float(1000000) - E.info("sum/million normalization with %f" % norm) - - # normalise - new_data = [] - for foreground, bed, controls, shifted in features_per_interval: - foreground = foreground._replace( - counts=numpy.array(foreground.counts, - dtype=numpy.float64) / norm) - new_controls = [] - for control in controls: - new_controls.append( - control._replace( - counts=numpy.array(control.counts, - dtype=numpy.float64) / norm)) - if shifted: - shifted = shifted._replace( - counts=numpy.array(shifted.counts, - dtype=numpy.float64) / norm) - new_data.append(IntervalData._make(( - foreground, bed, new_controls, shifted))) - features_per_interval = new_data - else: - E.info("no normalization performed") - - # center bins - out_bins = bins[:-1] + args.bin_size - - # build tracks - def _toTrack(filename): - return os.path.splitext(os.path.basename(filename))[0] - - outputMatrices(features_per_interval, - out_bins, - foreground_track=_toTrack(infile), - control_tracks=[_toTrack(x) for x in args.control_files], - shifted=args.random_shift, - sort_orders=args.sort_orders) - - # write footer and output benchmark information. - E.stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/bed2annotator.py b/cgat/tools/bed2annotator.py deleted file mode 100644 index 452eaeaab..000000000 --- a/cgat/tools/bed2annotator.py +++ /dev/null @@ -1,183 +0,0 @@ -''' -bed2annotator.py - convert bed to annotator format -================================================== - -:Tags: Python - -Purpose -------- - -This script converts a bed file into annotator compatible regions. Depending on the option --section -this script will create: - - segments - a segments file - - annotations - a file with annotations. Each bed track is a separate annotation. - - workspace - a file with a workspace - -Usage ------ - -Example:: - - python bed2annotator2tsv.py --help - -Type:: - - python bed2annotator2tsv.py --help - -for command line help. - -Command line options --------------------- - -''' -import sys -import re -import itertools -import collections - -import cgatcore.experiment as E -import cgat.Bed as Bed -import cgat.IndexedFasta as IndexedFasta - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if argv is None: - argv = sys.argv - - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("-g", "--genome-file", dest="genome_file", type=str, - help="filename with genome.") - - parser.add_argument("-f", "--features", dest="features", type=str, - help="feature to collect.") - - parser.add_argument("-i", "--files", dest="files", action="append", - help="use multiple annotations.") - - parser.add_argument("-a", "--annotations", dest="annotations", type=str, - help="aggregate name for annotations if only single file is provided from STDIN.") - - parser.add_argument("--map-tsv-file", dest="input_filename_map", type=str, - help="filename with a map of gene_ids to categories.") - - parser.add_argument("-l", "--max-length", dest="max_length", type=str, - help="maximum segment length.") - - parser.add_argument("-m", "--merge-overlapping", dest="merge", action="store_true", - help="merge overlapping bed segments.") - - parser.add_argument("-s", "--section", dest="section", type=str, - choices=("segments", "annotations", "workspace"), - help="annotator section.") - - parser.add_argument("--subset", dest="subsets", type=str, action="append", - help="add filenames to delimit subsets within the gff files. The syntax is filename.gff,label,filename.ids.") - - parser.set_defaults( - genome_file=None, - feature=None, - remove_random=True, - section="segments", - annotations="annotations", - max_length=100000, - files=[], - subsets=[], - input_filename_map=None, - merge=False, - ) - - (args, unknown) = E.start(parser, - unknowns=True) - - args.files += unknown - if len(args.files) == 0: - args.files.append("-") - args.files = list( - itertools.chain(*[re.split("[,; ]+", x) for x in args.files])) - - if args.subsets: - subsets = collections.defaultdict(list) - for s in args.subsets: - filename_gff, label, filename_ids = s.split(",") - subsets[filename_gff].append((label, filename_ids)) - args.subsets = subsets - - if args.genome_file: - fasta = IndexedFasta.IndexedFasta(args.genome_file) - else: - fasta = None - - if args.section == "segments": - prefix = "##Segs" - elif args.section == "annotations": - prefix = "##Id" - elif args.section == "workspace": - prefix = "##Work" - else: - raise ValueError("unknown section %s" % args.section) - - if args.max_length: - max_length = args.max_length - else: - max_length = 0 - - ninput, ntracks, ncontigs, nsegments, ndiscarded = 0, 0, 0, 0, 0 - - if args.section in ("annotations"): - contigs = set() - it = itertools.groupby( - Bed.iterator(args.stdin), key=lambda x: x.track["name"]) - - map_track2segments = {} - for track, beds in it: - ntracks += 1 - map_track2segments[track] = [] - first_segment = nsegments - - beds = list(beds) - - if args.merge: - beds = Bed.merge(beds) - - for bed in beds: - contig, start, end = bed.contig, bed.start, bed.end - - if args.remove_random and "random" in contig: - continue - - if max_length > 0 and end - start > max_length: - ndiscarded += 1 - continue - - contigs.add(contig) - map_track2segments[track].append(nsegments) - args.stdout.write( - "%s\t%i\t%s\t(%i,%i)\n" % (prefix, nsegments, contig, start, end)) - nsegments += 1 - - args.stdout.write("##Ann\t%s\t%s\n" % ( - track, "\t".join(["%i" % x for x in range(first_segment, nsegments)]))) - E.info("track %s: annotated with %i segments" % - (track, nsegments - first_segment)) - - ncontigs = len(contigs) - E.info("ninput=%i, ntracks=%i, ncontigs=%i, nsegments=%i, ndiscarded=%i" % - (ninput, ntracks, ncontigs, nsegments, ndiscarded)) - - E.stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/bed2plot.py b/cgat/tools/bed2plot.py deleted file mode 100644 index d0cbd2e07..000000000 --- a/cgat/tools/bed2plot.py +++ /dev/null @@ -1,261 +0,0 @@ -''' -bed.plot.py - create genomic snapshots using the IGV Viewer -=========================================================== - -:Tags: Python - -Purpose -------- - -Create genomic plots in a set of intervals using -the IGV snapshot mechanism. - -The script can use a running instance of IGV identified -by host and port. Alternatively, it can start IGV and load -a pre-built session. - -Usage ------ - -Example:: - - python bed2plot.py < in.bed - -Type:: - - python script_template.py --help - -for command line help. - -Command line options --------------------- - -''' - -import os -import sys -import re -import socket -import pysam - - -import cgatcore.experiment as E - - -class IGV(object): - """based on IGV.py by Brent Petersen, see here: - https://github.com/brentp/bio-playground/blob/master/igv/igv.py - - (MIT licenced) - """ - - _socket = None - _path = None - - def __init__(self, host='127.0.0.1', port=60151, snapshot_dir='/tmp/igv'): - self.host = host - self.port = port - self.commands = [] - self.connect() - self.set_path(snapshot_dir) - - def connect(self): - if self._socket: - self._socket.close() - self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self._socket.connect((self.host, self.port)) - - def go(self, position): - return self.send('goto ' + position) - goto = go - - def genome(self, name): - return self.send('genome ' + name) - - def load(self, url): - return self.send('load ' + url) - - def region(self, contig, start, end): - return self.send(' '.join(map(str, ['region', contig, start, end]))) - - def sort(self, option='base'): - """ - options is one of: base, position, strand, quality, sample, and - readGroup. - """ - assert option in ("base", "position", "strand", "quality", "sample", - "readGroup") - return self.send('sort ' + option) - - def set_path(self, snapshot_dir): - if snapshot_dir == self._path: - return - if not os.path.exists(snapshot_dir): - os.makedirs(snapshot_dir) - - self.send('snapshotDirectory %s' % snapshot_dir) - self._path = snapshot_dir - - def expand(self, track=''): - self.send('expand %s' % track) - - def collapse(self, track=''): - self.send('collapse %s' % track) - - def clear(self): - self.send('clear') - - def send(self, cmd): - # socket in Python2 oprates with strings - if sys.version_info.major == 2: - self._socket.send(cmd + '\n') - return self._socket.recv(4096).rstrip('\n') - # while socket in Python3 requires bytes - else: - self.commands.append(cmd) - cmd = cmd + '\n' - self._socket.send(cmd.encode('utf-8')) - return self._socket.recv(4096).decode('utf-8').rstrip('\n') - - def save(self, path=None): - if path is not None: - # igv assumes the path is just a single filename, but - # we can set the snapshot dir. then just use the filename. - dirname = os.path.dirname(path) - if dirname: - self.set_path(dirname) - return self.send('snapshot ' + os.path.basename(path)) - else: - return self.send('snapshot') - snapshot = save - - -def main(argv=sys.argv): - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("-s", "--session", dest="session", - type=str, - help="load session before creating plots " - ) - - parser.add_argument("-d", "--snapshot-dir", dest="snapshotdir", - type=str, - help="directory to save snapshots in ") - - parser.add_argument("-f", "--format", dest="format", type=str, - choices=("png", "eps", "svg"), - help="output file format ") - - parser.add_argument("-o", "--host", dest="host", type=str, - help="host that IGV is running on ") - - parser.add_argument("-p", "--port", dest="port", type=int, - help="port that IGV listens at ") - - parser.add_argument("-e", "--extend", dest="extend", type=int, - help="extend each interval by a number of bases " - ) - - parser.add_argument("-x", "--expand", dest="expand", type=float, - help="expand each region by a certain factor " - ) - - parser.add_argument("--session-only", dest="session_only", - action="store_true", - help="plot session after opening, " - "ignore intervals " - ) - - parser.add_argument("-n", "--name", dest="name", type=str, - choices=("bed-name", "increment"), - help="name to use for snapshot " - ) - - parser.set_defaults( - command="igv.sh", - host='127.0.0.1', - port=61111, - snapshotdir=os.getcwd(), - extend=0, - format="png", - expand=1.0, - session=None, - session_only=False, - keep_open=False, - name="bed-name", - ) - - # add common options (-h/--help, ...) and parse command line - (args) = E.start(parser, argv=argv, add_output_options=True) - - igv_process = None - if args.new_instance: - E.info("starting new IGV process") - igv_process = IGV.startIGV(command=args.command, - port=args.port) - E.info("new IGV process started") - - E.info("connection to process on %s:%s" % (args.host, args.port)) - E.info("saving images in %s" % args.snapshotdir) - igv = IGV(host=args.host, - port=args.port, - snapshot_dir=os.path.abspath(args.snapshotdir)) - - if args.session: - E.info('loading session from %s' % args.session) - igv.load(args.session) - E.info('loaded session') - - if args.session_only: - E.info('plotting session only ignoring any intervals') - fn = "%s.%s" % (os.path.basename(args.session), args.format) - E.info("writing snapshot to '%s'" % - os.path.join(args.snapshotdir, fn)) - igv.save(fn) - - else: - c = E.Counter() - for bed in pysam.tabix_iterator(args.stdin, - parser=pysam.asBed()): - - c.input += 1 - - # IGV can not deal with white-space in filenames - if args.name == "bed-name": - name = re.sub("\s", "_", bed.name) - elif args.name == "increment": - name = str(c.input) - - E.info("going to %s:%i-%i for %s" % - (bed.contig, bed.start, bed.end, name)) - - start, end = bed.start, bed.end - extend = args.extend - if args.expand: - d = end - start - extend = max(extend, (args.expand * d - d) // 2) - - start -= extend - end += extend - - igv.go("%s:%i-%i" % (bed.contig, start, end)) - - fn = E.get_output_file("%s.%s" % (name, args.format)) - E.info("writing snapshot to '%s'" % fn) - igv.save(fn) - - c.snapshots += 1 - - E.info(c) - - if igv_process is not None and not args.keep_open: - E.info('shutting down IGV') - igv_process.send_signal(signal.SIGKILL) - - E.stop() - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/cgat/tools/cat_tables.py b/cgat/tools/cat_tables.py deleted file mode 100644 index 8decce6bd..000000000 --- a/cgat/tools/cat_tables.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -cat_tables.py - concatenate tables -================================== - -:Tags: Python - -Purpose -------- - -concatenate tables. Headers of subsequent files are ignored. - -Usage ------ - -Type:: - - python .py --help - -for command line help. - -Command line options --------------------- - -""" - -import sys -import fileinput - -import cgatcore.experiment as E - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if not argv: - argv = sys.argv - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.set_defaults( - ) - - # add common options (-h/--help, ...) and parse command line - (args, unknown) = E.start(parser, - argv=argv, - unknowns=True) - - if len(unknown) == 0 or (len(unknown) == 1 and unknown[0] == "-"): - infile = args.stdin - else: - infile = fileinput.FileInput(args) - - # do sth - ninput, nskipped, noutput = 0, 0, 0 - - header = False - - for line in infile: - ninput += 1 - if line.startswith("#"): - pass - elif not header: - header = line - elif line == header: - nskipped += 1 - continue - - args.stdout.write(line) - noutput += 1 - - E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped)) - - # write footer and output benchmark information. - E.stop() - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/cgat_fasta2cDNA.py b/cgat/tools/cgat_fasta2cDNA.py deleted file mode 100644 index 6305f2dea..000000000 --- a/cgat/tools/cgat_fasta2cDNA.py +++ /dev/null @@ -1,76 +0,0 @@ -''' -cgat_fasta2cDNA.py - converting multi-fasta of exon features into a multi-fasta of spliced cDNAs/RNAs -====================================================================================================== - -:Tags: Python - -Purpose -------- - -Usage ------ - -.. Example use case - -Example:: - - python cgat_fasta2cDNA.py - -Type:: - - python cgat_fasta2cDNA.py --help - -for command line help. - -Command line options --------------------- - -''' - -import sys -import cgatcore.experiment as E -import cgatcore.iotools as iotools - - -def makeSplicedFasta(infile): - ''' - Merge fasta sequences together into a single - spliced transcript sequence - ''' - - fasta_dict = {} - with iotools.open_file(infile) as fafile: - for line in fafile.readlines(): - if line[0] == '>': - header = line.rstrip("\n") - fasta_dict[header] = '' - else: - fasta_dict[header] += line.rstrip("\n") - - for key, value in sorted(fasta_dict.items()): - yield "%s\n%s\n" % (key, value) - - -def main(argv=None): - """script main. - parses command line options in sys.argv, unless *argv* is given. - """ - - if argv is None: - argv = sys.argv - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - # add common options (-h/--help, ...) and parse command line - (args) = E.start(parser, argv=argv) - - infile = argv[-1] - for record in makeSplicedFasta(infile): - options.stdout.write(record) - - # write footer and output benchmark information. - E.stop() - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/chain2psl.py b/cgat/tools/chain2psl.py deleted file mode 100644 index 27487beaf..000000000 --- a/cgat/tools/chain2psl.py +++ /dev/null @@ -1,165 +0,0 @@ -"""chain2psl.py - convert a chain file to a psl file -================================================= - -:Tags: Genomics Intervals GenomeAlignment PSL CHAIN Conversion - -Purpose -------- - -convert a UCSC `chain -`_ -formatted file to a UCSC `psl -`_ formatted file. - -This tool is equivalent to the UCSC tool chainToPsl except that it -will not compute the number of matching, mismatching, etc. bases and -thus does not require the sequences. - -The nomenclature the UCSC uses for its chain files is -:file:`targetToQuery.chain` for mapping ``query`` to ``target`` -(reference). According to the UCSC documentation, ``target`` is the -first entry in ``chain`` files. - -We have been using the nomenclature ``QueryToTarget.psl``. In following -this convention, the correct way to converting a psl file is:: - - python chain2psl.py < targetToQuery.chain > QueryToTarget.psl - -If you would like to keep the TargetToQuery convention, you will need -to add a pslSwap:: - - python chain2psl.py < targetToQuery.chain | pslSwap stdin stdout > targetToQuery.psl - -Usage ------ - -For example:: - - cgat chain2psl.py < in.chain > out.psl - -Type:: - - cgat chain2psl.py --help - -for command line help. - -Command line options --------------------- - -""" - -import sys -import cgatcore.experiment as E -import cgat.Blat as Blat -import alignlib_lite - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if not argv: - argv = sys.argv - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - # add common options (-h/--help, ...) and parse command line - (args) = E.start(parser, argv=argv) - - # do sth - ninput, nskipped, noutput = 0, 0, 0 - - psl = None - - def chain_iterator(infile): - lines = [] - for line in args.stdin: - - if line.startswith("#"): - continue - if line.strip() == "": - continue - if line.startswith("chain"): - if lines: - yield lines - lines = [] - lines.append(line) - - yield lines - - for lines in chain_iterator(args.stdin): - - ninput += 1 - psl = Blat.Match() - - (_, - _, - psl.mSbjctId, - target_length, - target_strand, - target_start, - target_end, - psl.mQueryId, - query_length, - query_strand, - query_start, - query_end, - alignment_id) = lines[0][:-1].split() - - (psl.mQueryStart, psl.mQueryEnd, psl.mQueryLength, - psl.mSbjctStart, psl.mSbjctEnd, psl.mSbjctLength) = \ - [int(x) for x in - (query_start, - query_end, - query_length, - target_start, - target_end, - target_length)] - - map_query2target = alignlib_lite.py_makeAlignmentBlocks() - - qstart, tstart = psl.mQueryStart, psl.mSbjctStart - - for line in lines[1:-1]: - size, dt, dq = [int(x) for x in line[:-1].split()] - map_query2target.addDiagonal(qstart, - qstart + size, - tstart - qstart) - qstart += size + dq - tstart += size + dt - - size = int(lines[-1][:-1]) - - map_query2target.addDiagonal(qstart, - qstart + size, - tstart - qstart) - - psl.fromMap(map_query2target) - - # sort out strand - # target_strand is always positive - assert(target_strand == "+") - - # if query strand is negative - if query_strand == "-": - # invert both query and target - psl.switchTargetStrand() - # manually invert the query coordinates - psl.mQueryFrom, psl.mQueryTo = psl.mQueryLength - \ - psl.mQueryTo, psl.mQueryLength - psl.mQueryFrom - - args.stdout.write("%s\n" % psl) - noutput += 1 - - E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped)) - - # write footer and output benchmark information. - E.stop() - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/combine_tables.py b/cgat/tools/combine_tables.py deleted file mode 100644 index 974a108c4..000000000 --- a/cgat/tools/combine_tables.py +++ /dev/null @@ -1,45 +0,0 @@ -'''combine_tables.py - join tables -=============================== - -:Tags: Python - -Purpose -------- - -This script reads several tab-separated tables and joins them into a -single one. - -.. todo:: - - * Rename to tables2table.py - * Use pandas dataframes for fast IO and merging/joining - -Usage ------ - -The option ``--header-names`` sets the column titles explicitely. Add -``--skip-titles`` if you want to avoid echoing the original title in -the input files. - - -Example:: - - python combine_tables.py --help - -Type:: - - python combine_tables.py --help - -for command line help. - -Command line options --------------------- - -''' - -import sys -from cgatcore.tables import main - - -if __name__ == '__main__': - sys.exit(main(sys.argv)) diff --git a/cgat/tools/csv_cut.py b/cgat/tools/csv_cut.py deleted file mode 100644 index 92edb3994..000000000 --- a/cgat/tools/csv_cut.py +++ /dev/null @@ -1,180 +0,0 @@ -'''csv_cut.py - select columns from a table -======================================== - -:Tags: Python - -Purpose -------- - -extract named columns from a csv formatted table - - -.. todo:: - - describe purpose of the script. - -Usage ------ - -Extract the two columns gene and length from a table in standard input:: - - python csv_cut.py gene length < stdin - -The script permits the use of patterns. For example, the command will -select the column gene and all columns that contain the part 'len':: - - python csv_cut.py gene %len% < stdin - -Type:: - - python csv_cut.py --help - -for command line help. - -Command line options --------------------- - -''' -import sys -import re -import cgatcore.experiment as E -import csv -import six -import _csv -import hashlib -from cgatcore.csvutils import CommentStripper, DictReaderLarge - - -class UniqueBuffer: - mKeys = {} - - def __init__(self, outfile): - self.mOutfile = outfile - - def write(self, out): - key = hashlib.md5(out).digest() - if key not in self.mKeys: - self.mKeys[key] = True - self.mOutfile.write(out) - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if argv is None: - argv = sys.argv - - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument("-r", "--remove", dest="remove", action="store_true", - help="remove specified columns, keep all others.") - - parser.add_argument("-u", "--unique", dest="unique", action="store_true", - help="output rows are uniq.") - - parser.add_argument("-l", "--large", dest="large", action="store_true", - help="large columns. Do not use native python csv module.") - - parser.add_argument("-f", "--filename-fields", dest="filename_fields", type=str, - help="filename with field information.") - - parser.set_defaults( - remove=False, - unique=False, - large=False, - filename_fields=None, - ) - - (args, unknown) = E.start(parser, - add_csv_options=True, - quiet=True, - unknowns=True) - - input_fields = unknown - - if args.filename_fields: - input_fields = [x[:-1].split("\t")[0] for x in [x for x in iotools.open_file(args.filename_fields, "r").readlines() if x[0] != "#"]] - - if args.unique: - outfile = UniqueBuffer(args.stdout) - else: - outfile = args.stdout - - while 1: - line = args.stdin.readline() - - if not line: - E.stop() - sys.exit(0) - - if line[0] == "#": - continue - - first_line = line - break - - old_fields = first_line[:-1].split("\t") - - fields = [] - for f in input_fields: - # do pattern search - if f[0] == "%" and f[-1] == "%": - pattern = re.compile(f[1:-1]) - for o in old_fields: - if pattern.search(o) and o not in fields: - fields.append(o) - else: - if f in old_fields: - fields.append(f) - - if args.remove: - fields = set(fields) - fields = [x for x in old_fields if x not in fields] - - if args.large: - reader = DictReaderLarge(CommentStripper(args.stdin), - fieldnames=old_fields, - dialect=args.csv_dialect) - else: - reader = csv.DictReader(CommentStripper(args.stdin), - fieldnames=old_fields, - dialect=args.csv_dialect) - - writer = csv.DictWriter(outfile, - fields, - dialect=args.csv_dialect, - lineterminator=args.csv_lineterminator, - extrasaction='ignore') - - print("\t".join(fields)) - - first_row = True - ninput, noutput, nerrors = 0, 0, 0 - - while 1: - ninput += 1 - try: - row = six.next(reader) - except _csv.Error as msg: - args.stderr.write("# error while parsing: %s\n" % (msg)) - nerrors += 1 - continue - except StopIteration: - break - if not row: - break - writer.writerow(row) - noutput += 1 - - E.info("ninput=%i, noutput=%i, nerrors=%i" % (ninput, noutput, nerrors)) - - E.stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/csv_intersection.py b/cgat/tools/csv_intersection.py deleted file mode 100644 index becc5796a..000000000 --- a/cgat/tools/csv_intersection.py +++ /dev/null @@ -1,132 +0,0 @@ -''' -csv_intersection.py - intersect two tables -====================================================== - -:Tags: Python - -Purpose -------- - -.. todo:: - - describe purpose of the script. - -Usage ------ - -Example:: - - python csv_intersection.py --help - -Type:: - - python csv_intersection.py --help - -for command line help. - -Command line options --------------------- - -''' -import sys -import cgatcore.experiment as E -import cgatcore.iotools as iotools -from cgatcore.csvutils import readTable -import csv -import hashlib - - -class UniqueBuffer: - mKeys = {} - - def __init__(self, outfile): - self.mOutfile = outfile - - def write(self, out): - key = hashlib.md5(out).digest() - if key not in self.mKeys: - self.mKeys[key] = True - self.mOutfile.write(out) - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if argv is None: - argv = sys.argv - - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument("-u", "--unique", dest="unique", action="store_true", - help="output rows are uniq.") - - parser.set_defaults( - remove=False, - unique=False, - ) - - (args, unknown) = E.start(parser, add_csv_options=True, unknowns=True) - - if len(args) != 2: - raise ValueError("please specify two files to join") - - args.filename1, args.filename2 = unknown - - table1 = readTable(iotools.open_file(args.filename1, "r")) - table2 = readTable(iotools.open_file(args.filename2, "r")) - - if args.unique: - outfile = UniqueBuffer(sys.stdout) - else: - outfile = args.stdout - - # build new field list - new_fields = [] - - for x in args.join_fields1: - new_fields.append(x) - - for x in fields1: - if x not in args.join_fields1: - new_fields.append(x) - if x not in args.join_fields2: - new_fields.append(x) - - writer = csv.DictWriter(outfile, - fields, - dialect=args.csv_dialect, - lineterminator=args.csv_lineterminator, - extrasaction='ignore') - - if len(lines) > 0: - - old_fields = lines[0][:-1].split("\t") - - if args.remove: - fields = [] - for x in old_fields: - if x not in input_fields: - fields.append(x) - else: - fields = input_fields - - reader = csv.DictReader(lines, - dialect=args.csv_dialect) - - print("\t".join(fields)) - - first_row = True - for row in reader: - row = iotools.convertDictionary(row) - writer.writerow(row) - - E.stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/csv_rename.py b/cgat/tools/csv_rename.py deleted file mode 100644 index 544574eeb..000000000 --- a/cgat/tools/csv_rename.py +++ /dev/null @@ -1,104 +0,0 @@ -''' -csv_rename.py - rename columns in a table -========================================= - -:Tags: Python - -Purpose -------- - -rename columns in a csv file - -Usage ------ - -Example:: - - csv_rename.py gene=id < stdin - -Type:: - - python csv_rename.py --help - -for command line help. - -Command line options --------------------- - -''' -import sys -import cgatcore.experiment as E - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if argv is None: - argv = sys.argv - - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument("-r", "--remove", dest="remove", action="store_true", - help="remove specified columns, keep all others.") - - parser.add_argument("-u", "--unique", dest="unique", action="store_true", - help="output rows are uniq.") - - parser.add_argument("-f", "--filename-fields", dest="filename_fields", type=str, - help="filename with field information.") - - parser.set_defaults( - filename_fields=None, - ) - - (args) = E.start(parser, - add_csv_options=True) - mapper = {} - for x in args: - a, b = x.split("=") - mapper[a.strip()] = b.strip() - - while 1: - line = args.stdin.readline() - - if not line: - E.stop() - sys.exit(0) - - if line[0] == "#": - args.stdout.write(line) - continue - - break - - header = [] - nreplaced = 0 - for x in line[:-1].split(): - if x in mapper: - nreplaced += 1 - header.append(mapper[x]) - else: - header.append(x) - - args.stdout.write("\t".join(header) + "\n") - nlines = 0 - for line in args.stdin: - nlines += 1 - args.stdout.write(line) - - if args.loglevel >= 1: - ninput = len(header) - noutput = ninput - args.stdout.write("# ninput=%i, noutput=%i, nreplaced=%i, nlines=%i\n" % ( - ninput, noutput, nreplaced, nlines)) - - E.stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/csv_select.py b/cgat/tools/csv_select.py deleted file mode 100644 index 75751eb56..000000000 --- a/cgat/tools/csv_select.py +++ /dev/null @@ -1,117 +0,0 @@ -''' -csv_select.py - select rows from a table -======================================== - -:Tags: Python - -Purpose -------- - -extract rows from a csv-formatted table. - -The select statement is a one-line, for example:: - - csv_select.py "int(r['mC-foetal-sal-R4']) > 0" < in > out - -Note the required variable name r for denoting field names. Please -also be aware than numeric values need to be converted first before -testing. - -Usage ------ - -Type:: - - python csv_select.py --help - -for command line help. - -Command line options --------------------- - -''' -import sys -import csv -import _csv -import cgatcore.experiment as E -from cgatcore.csvutils import CommentStripper, DictReaderLarge - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if argv is None: - argv = sys.argv - - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("-r", "--remove", dest="remove", action="store_true", - help="remove specified columns, keep all others.") - - parser.add_argument("-u", "--unique", dest="unique", action="store_true", - help="output rows are uniq.") - - parser.add_argument("-l", "--large", dest="large", action="store_true", - help="large columns. Do not use native python csv module [default=%default].") - - parser.add_argument("-f", "--filename-fields", dest="filename_fields", type=str, - help="filename with field information.") - - parser.set_defaults( - remove=False, - unique=False, - filename_fields=None, - ) - - (args, unknown) = E.start(parser, - add_csv_options=True, - quiet=True, - unknowns=True) - - statement = " ".join(unknown) - - if args.large: - reader = DictReaderLarge(CommentStripper(sys.stdin), - dialect=args.csv_dialect) - else: - reader = csv.DictReader(CommentStripper(sys.stdin), - dialect=args.csv_dialect) - - exec("f = lambda r: %s" % statement, globals()) - counter = E.Counter() - writer = csv.DictWriter(args.stdout, - reader.fieldnames, - dialect=args.csv_dialect, - lineterminator=args.csv_lineterminator) - - writer.writerow(dict((fn, fn) for fn in reader.fieldnames)) - while 1: - counter.input += 1 - try: - row = next(reader) - except _csv.Error as msg: - args.stderr.write("# error while parsing: %s\n" % (msg)) - counter.errors += 1 - continue - except StopIteration: - break - - if not row: - break - - if f(row): - writer.writerow(row) - counter.output += 1 - else: - counter.filtered += 1 - - E.info("%s" % counter) - - E.stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/csv_set.py b/cgat/tools/csv_set.py deleted file mode 100644 index 2975e34af..000000000 --- a/cgat/tools/csv_set.py +++ /dev/null @@ -1,159 +0,0 @@ -''' -csv_set.py - set operations on a table -====================================== - -:Tags: Python - -Purpose -------- - -.. todo:: - - describe purpose of the script. - -Usage ------ - -Example:: - - python csv_set.py --help - -Type:: - - python csv_set.py --help - -for command line help. - -Command line options --------------------- - -''' -import sys - -import cgatcore.experiment as E -from cgatcore.csvutils import readTable -import hashlib - - -class UniqueBuffer: - mKeys = {} - - def __init__(self, outfile): - self.mOutfile = outfile - - def write(self, out): - key = hashlib.md5(out).digest() - if key not in self.mKeys: - self.mKeys[key] = True - self.mOutfile.write(out) - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if argv is None: - argv = sys.argv - - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument("-u", "--unique", dest="unique", action="store_true", - help="output rows are uniq.") - - parser.add_argument("-1", "--join-fields1", dest="join_fields1", type=str, - help="join fields in first table.") - parser.add_argument("-2", "--join-fields2", dest="join_fields2", type=str, - help="join fields in second table.") - parser.add_argument("-m", "--method", dest="method", type=str, - help="set operation to perform.", choices=("intersection", "rest", "union")) - - parser.set_defaults( - remove=False, - unique=False, - join_fields1=None, - join_fields2=None, - method="intersection", - ) - - (args, unknown) = E.start(parser, - add_csv_options=True, - unknowns=True) - - if len(unknown) != 2: - raise ValueError("please specify two files to join") - - if not args.join_fields1 or not args.join_fields2: - raise ValueError("please specify at least one join field per table") - - args.join_fields1 = args.join_fields1.split(",") - args.join_fields2 = args.join_fields2.split(",") - - args.filename1, args.filename2 = unknown - - fields1, table1 = readTable(open(args.filename1, "r")) - fields2, table2 = readTable(open(args.filename2, "r")) - - if args.unique: - outfile = UniqueBuffer(sys.stdout) - else: - outfile = args.stdout - - nfields1 = [] - for x in range(len(fields1)): - if fields1[x] in args.join_fields1: - nfields1.append(x) - nfields2 = [] - for x in range(len(fields2)): - if fields2[x] in args.join_fields2: - nfields2.append(x) - - # calculate row indices: double keys are not taken care of here - keys = {} - for row1 in table1: - v = [row1[x] for x in nfields1] - key = hashlib.md5("".join(v)).digest() - keys[key] = row1 - - if args.method == "intersection": - # build new field list - take = list(range(len(fields1))) - c = len(take) - for x in fields2: - if x not in args.join_fields2: - take.append(c) - c += 1 - - t = fields1 + fields2 - - new_fields = [t[x] for x in take] - - print("\t".join(new_fields)) - - for row2 in table2: - v = [row2[x] for x in nfields2] - key = hashlib.md5("".join(v)).digest() - if key in keys: - new_row = keys[key] + row2 - outfile.write( - "\t".join([new_row[x] for x in take]) + "\n") - - elif args.method == "rest": - - new_fields = fields2 - print("\t".join(new_fields)) - - for row2 in table2: - v = [row2[x] for x in nfields2] - key = hashlib.md5("".join(v)).digest() - if key not in keys: - outfile.write("\t".join(row2) + "\n") - - E.stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/medip_merge_intervals.py b/cgat/tools/medip_merge_intervals.py deleted file mode 100644 index 35375c72e..000000000 --- a/cgat/tools/medip_merge_intervals.py +++ /dev/null @@ -1,257 +0,0 @@ -''' -medip_merge_intervals.py - merge differentially methylated regions -================================================================== - -:Tags: Python - -Purpose -------- - -This script takes the output of DESeq or EdgeR and merges -adjacent intervals that show a similar expression change. - -Input is data like this:: - - contig start end treatment_name treatment_mean treatment_std control_name control_mean control_std pvalue qvalue l2fold fold significant status - chr1 10000 11000 CD14 32.9785173324 0 CD4 41.7117152603 0 0.199805206526 1.0 0.338926100945 1.26481475319 0 OK - chr1 14000 15000 CD14 9.32978709019 0 CD4 9.31489982941 0 1.0 1.0 -0.00230390372974 0.998404330063 0 OK - chr1 15000 16000 CD14 9.04603350905 0 CD4 9.01484414416 0 1.0 1.0 -0.00498279072069 0.996552150193 0 OK - chr1 16000 17000 CD14 0.457565479197 0 CD4 0.14910378845 0 0.677265200643 1.0 -1.61766129852 0.325863281276 0 OK - -The second and third window would be merged, as - -1. Their methylation levels are within 10% of each other. -2. They are both not differentially methylated. - -It aggregates the following: - -* mean values: average -* std values: max -* pvalue: max -* qvalue: max -* fold: min/max (depending on enrichment/depletion) -* l2fold: min/max (depending on enrichment/depletion) - -The analysis outputs bed files with intervals that are -potentially activated in one of the conditions. Windows -with a positive fold change are collected in the ``treatment``, -while windows with a negative fold change are collected in the -``control``. - -For methylation analysis, it might be more interesting -to report windows that are depleted (instead of enriched) -of signal. Thus, if the option ``--invert`` is given, -windows with a negative l2fold change are labeled ``treatment``. -Less methylation means that this region is "active" in the -``treatment`` condition. - -Note that the input is assumed to be sorted by coordinate. - -Usage ------ - -Example:: - - python cgat_script_template.py --help - -Type:: - - python cgat_script_template.py --help - -for command line help. - - -Command line options --------------------- - -''' - -import sys -import re -import collections - -import cgatcore.experiment as E -import cgatcore.iotools as iotools - -DATA = collections.namedtuple( - "DATA", - "test_id contig start end treatment_name treatment_mean treatment_std " - "control_name control_mean control_std pvalue qvalue " - "l2fold fold significant status nintervals") - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if not argv: - argv = sys.argv - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument("-o", "--min-overlap", dest="min_overlap", type=int, - help="minimum overlap") - - parser.add_argument( - "-w", "--pattern-window", - dest="pattern_window", type=str, - help="regular expression to extract window coordinates from " - "test id ") - - parser.add_argument( - "-i", "--invert", dest="invert", action="store_true", - help="invert direction of fold change ") - - parser.set_defaults(min_overlap=10, - invert=False, - pattern_window="(\S+):(\d+)-(\d+)"), - - # add common options (-h/--help, ...) and parse command line - (args) = E.start(parser, argv=argv, add_output_options=True) - - outfiles = iotools.FilePool(args.output_filename_pattern) - - if args.invert: - test_f = lambda l2fold: l2fold < 0 - else: - test_f = lambda l2fold: l2fold > 0 - - def read(): - - rx_window = re.compile(args.pattern_window) - # filter any of the DESeq/EdgeR message that end up at the top of the - # output file - - for data in iotools.iterate(args.stdin): - - contig, start, end = rx_window.match(data.test_id).groups() - start, end = list(map(int, (start, end))) - - yield DATA._make((data.test_id, - contig, start, end, - data.treatment_name, - float(data.treatment_mean), - float(data.treatment_std), - data.control_name, - float(data.control_mean), - float(data.control_std), - float(data.pvalue), - float(data.qvalue), - float(data.l2fold), - float(data.fold), - int(data.significant), - data.status, - 0)) - - def grouper(data, distance=10): - - last = next(data) - entries = [last] - - while 1: - d = next(data) - if d is None: - break - if d.contig == last.contig and d.start < last.start: - raise ValueError("error not sorted by start") - - if ((d.contig != last.contig) or - (d.start - last.end > distance) or - (d.status != last.status) or - (d.significant != last.significant) or - (d.l2fold * last.l2fold < 0)): - yield entries - entries = [] - - entries.append(d) - last = d - - yield entries - - counter = E.Counter() - - args.stdout.write("\t".join(DATA._fields) + "\n") - - # set of all sample names - used to create empty files - samples = set() - - # need to sort by coordinate - all_data = list(read()) - all_data.sort(key=lambda x: (x.contig, x.start)) - - group_id = 0 - - for group in grouper(iter(all_data), distance=args.min_overlap): - group_id += 1 - - start, end = group[0].start, group[-1].end - assert start < end, 'start > end: %s' % str(group) - n = float(len(group)) - counter.input += n - - g = group[0] - - if g.l2fold < 0: - l2fold = max([x.l2fold for x in group]) - fold = max([x.fold for x in group]) - else: - l2fold = min([x.l2fold for x in group]) - fold = min([x.fold for x in group]) - - outdata = DATA._make(( - str(group_id), - g.contig, start, end, - g.treatment_name, - sum([x.treatment_mean for x in group]) / n, - max([x.treatment_std for x in group]), - g.control_name, - sum([x.control_mean for x in group]) / n, - max([x.control_std for x in group]), - max([x.pvalue for x in group]), - max([x.qvalue for x in group]), - l2fold, - fold, - g.significant, - g.status, - int(n))) - - samples.add(g.treatment_name) - samples.add(g.control_name) - if g.significant: - if test_f(g.l2fold): - # treatment lower methylation than control - outfiles.write( - g.treatment_name, "%s\t%i\t%i\t%i\t%f\n" % ( - g.contig, g.start, g.end, - group_id, - sum([x.treatment_mean for x in group]) / n)) - - else: - outfiles.write( - g.control_name, "%s\t%i\t%i\t%i\t%f\n" % ( - g.contig, g.start, g.end, - group_id, - sum([x.control_mean for x in group]) / n)) - - args.stdout.write("\t".join(map(str, outdata)) + "\n") - - counter.output += 1 - - # create empty files - for sample in samples: - outfiles.write(sample, "") - - outfiles.close() - E.info("%s" % counter) - - # write footer and output benchmark information. - E.stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/randomize_lines.py b/cgat/tools/randomize_lines.py deleted file mode 100644 index 9cd8b045c..000000000 --- a/cgat/tools/randomize_lines.py +++ /dev/null @@ -1,72 +0,0 @@ -''' -randomize_lines.py - randomize lines from stdin -=============================================== - -:Tags: Python - -Purpose -------- - -This script reads lines from stdin and outputs them -in randomized order. - -Usage ------ - -Example:: - - cgat randomize-lines < in.lines > out.lines - -Command line options --------------------- - -''' - -import sys -import random -import cgatcore.experiment as E - - -def main(argv=None): - """script main. - parses command line options in sys.argv, unless *argv* is given. - """ - - if argv is None: - argv = sys.argv - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument("-k", "--keep-header", dest="keep_header", type=int, - help="randomize, but keep header in place ") - - parser.set_defaults(keep_header=0) - - # add common options (-h/--help, ...) and parse command line - (args) = E.start(parser, argv=argv) - - inf = args.stdin - outf = args.stdout - c = E.Counter() - for x in range(args.keep_header): - c.header += 1 - outf.write(inf.readline()) - - lines = inf.readlines() - c.lines_input = len(lines) - random.shuffle(lines) - for line in lines: - outf.write(line) - c.lines_output = len(lines) - - E.info(c) - - # write footer and output benchmark information. - E.stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/rnaseq_junction_bam2bam.py b/cgat/tools/rnaseq_junction_bam2bam.py deleted file mode 100644 index 8b71f8f4e..000000000 --- a/cgat/tools/rnaseq_junction_bam2bam.py +++ /dev/null @@ -1,215 +0,0 @@ -'''rnaseq_junction_bams2bam.py - convert mappings against junctions to genomic coordinates -======================================================================================== - -:Tags: Genomics NGS Genesets - -Purpose -------- - -This script takes as input a BAM file resulting from reads mapped against -a junction database and outputs a :term:`bam` formatted file in genomic -coordinates. - -The contigs should be of the format -||-|||. - - - 0-based coordinate of first base - - 0-based coordinate of last base in exon - - 0-based coordinate of first base in exon - - 0-based coordinate of base after last base - -Strand can be either ``fwd`` or ``rev``, though sequences in the database -and coordinates are all on the forward strand. - -For example ``chr1|1244933|1244982-1245060|1245110|GTAG|fwd`` translates to the -intron ``chr1:1244983-1245060`` in python coordinates. - -The input bam-file is supposed to be sorted by read. Only the best -matches are output for each read, were best is defined both in terms -of number of mismatches and number of colour mismatches. - -Usage ------ - -Example:: - - cat input.bam | python rnaseq_junction_bam2bam.py - --log=log > output.bam - -Type:: - - python rnaseq_junction_bam2bam.py --help - -for command line help. - -Command line options --------------------- - -''' - -import sys -import itertools - -import cgatcore.experiment as E -import cgatcore.iotools as iotools -import pysam - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if not argv: - argv = sys.argv - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument("-t", "--template-bam-file", dest="filename_genome_bam", type=str, - help="input bam file for header information ") - - parser.add_argument("-s", "--contigs-tsv-file", dest="filename_contigs", type=str, - help="filename with contig sizes ") - - parser.add_argument("-o", "--colour", dest="colour_mismatches", action="store_true", - help="mismatches will use colour differences (CM tag) ") - - parser.add_argument("-i", "--ignore-mismatches", dest="ignore_mismatches", action="store_true", - help="ignore mismatches ") - - parser.add_argument("-c", "--remove-contigs", dest="remove_contigs", type=str, - help="','-separated list of contigs to remove ") - - parser.add_argument("-f", "--force-output", dest="force", action="store_true", - help="force overwriting of existing files ") - - parser.add_argument("-u", "--unique", dest="unique", action="store_true", - help="remove reads not matching uniquely ") - - parser.set_defaults( - filename_genome_bam=None, - filename_gtf=None, - filename_mismapped=None, - remove_contigs=None, - force=False, - unique=False, - colour_mismatches=False, - ignore_mismatches=False, - ) - - # add common options (-h/--help, ...) and parse command line - (args) = E.start(parser, argv=argv) - - genomefile, referencenames, referencelengths = None, None, None - - if args.filename_genome_bam: - genomefile = pysam.AlignmentFile(args.filename_genome_bam, "rb") - elif args.filename_contigs: - contigs = iotools.ReadMap(iotools.open_file(args.filename_contigs)) - data = list(zip(*list(contigs.items()))) - referencenames, referencelengths = data[0], list(map(int, data[1])) - else: - raise ValueError( - "please provide either --template-bam-file or --contigs-tsv-file") - - infile = pysam.AlignmentFile("-", "rb") - outfile = pysam.AlignmentFile("-", "wb", template=genomefile, - referencenames=referencenames, - referencelengths=referencelengths) - - if args.colour_mismatches: - tag = "CM" - else: - tag = "NM" - - nambiguous = 0 - ninput = 0 - nunmapped = 0 - ncigar = 0 - nfull = 0 - noutput = 0 - - contig2tid = dict([(y, x) for x, y in enumerate(outfile.references)]) - - for qname, readgroup in itertools.groupby(infile, lambda x: x.qname): - ninput += 1 - reads = list(readgroup) - if reads[0].is_unmapped: - nunmapped += 1 - continue - - # filter for best match - best = min([x.opt(tag) for x in reads]) - reads = [x for x in reads if x.opt(tag) == best] - if len(reads) > 1: - nambiguous += 1 - continue - - read = reads[0] - - # reject complicated matches (indels, etc) - # to simplify calculations below. - if len(read.cigar) > 1: - ncigar += 1 - continue - - # set NH flag to latest count - t = dict(read.tags) - t['NH'] = 1 - read.tags = list(t.items()) - - sname = infile.getrname(read.tid) - - contig, first_exon_start, middle, last_exon_end, splice, strand = sname.split( - "|") - first_exon_end, last_exon_start = middle.split("-") - first_exon_start, first_exon_end, last_exon_start, last_exon_end = list(map(int, ( - first_exon_start, first_exon_end, last_exon_start, last_exon_end))) - first_exon_end += 1 - - total = first_exon_end - first_exon_start + \ - last_exon_end - last_exon_start - first_exon_length = first_exon_end - first_exon_start - - match1 = first_exon_length - read.pos - intron_length = last_exon_start - first_exon_end - match2 = read.qlen - match1 - - # match lies fully in one exon - ignore - if match1 <= 0 or match2 <= 0: - nfull += 1 - continue - - # increment pos - read.pos = first_exon_start + read.pos - read.tid = contig2tid[contig] - # 3 = BAM_CREF_SKIP - read.cigar = [(0, match1), (3, intron_length), (0, match2)] - - outfile.write(read) - - noutput += 1 - - outfile.close() - if genomefile: - genomefile.close() - - c = E.Counter() - c.input = ninput - c.output = noutput - c.full = nfull - c.cigar = ncigar - c.ambiguous = nambiguous - c.unmapped = nunmapped - - E.info("%s" % str(c)) - - # write footer and output benchmark information. - E.stop() - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/cgat/tools/transfac2transfac.py b/cgat/tools/transfac2transfac.py deleted file mode 100644 index e7859526e..000000000 --- a/cgat/tools/transfac2transfac.py +++ /dev/null @@ -1,130 +0,0 @@ -''' -transfac2transfac.py - filter transfac motif files -==================================================== - -:Tags: Python - -Purpose -------- - -Filter a transfac motif file. - -Usage ------ - -Example:: - - python cgat_script_template.py - -Type:: - - python cgat_script_template.py --help - -for command line help. - -Command line options --------------------- - -''' - - -import sys -import re -import cgatcore.experiment as E - - -def main(argv=None): - """script main. - - parses command line options in sys.argv, unless *argv* is given. - """ - - if argv is None: - argv = sys.argv - - # setup command line parser - parser = E.ArgumentParser(description=__doc__) - - parser.add_argument("--version", action='version', version="1.0") - - parser.add_argument( - "-f", "--filter-prefix", dest="filter_prefix", default=None, - help="ID prefix to filter on, eg. V for vertebrates") - - parser.add_argument( - "-p", "--pattern-identifier", dest="filter_pattern", default=None, - help="ID pattern to filter (filter is case insensitive) eg. pax6. " - "Multiple patterns should be specified as a comma separated list") - - (args) = E.start(parser) - - if args.filter_pattern: - patterns = [x.strip() for x in args.filter_pattern.split(",")] - E.info("Supplied patterns %s" % ", ".join(patterns)) - else: - patterns = False - - filtered_motifs = [] - n = 0 - - inmotif, tid, filter_emit, pattern_emit = False, False, False, False - - for line in args.stdin: - - # pick up motif start and ends. - if line.startswith("AC") and inmotif is False: - # print "in align" - inmotif = True - motif = line - continue - elif line.startswith("ID") and inmotif is True: - # print line - tid = line.split(" ")[1] - motif += line - continue - - elif line.startswith("//") and inmotif is True: - - motif += line - - if tid is False: - raise ValueError("matrix ID not determined") - - if args.filter_prefix: - if tid.startswith(args.filter_prefix): - filter_emit = True - else: - filter_emit = True - - if patterns is not False: - for pat in patterns: - match = re.search(pat, tid, re.IGNORECASE) - if match is not None: - pattern_emit = True - break - else: - pattern_emit = True - - if filter_emit is True and pattern_emit is True: - filtered_motifs.append(motif) - n += 1 - - inmotif, tid, filter_emit, pattern_emit = ( - False, False, False, False) - continue - - elif inmotif is True: - motif += line - - elif inmotif is False: - continue - - else: - raise ValueError("unknown parsing state") - - args.stdout.write("".join(filtered_motifs)) - - E.stop() - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/doc/CGATReference.rst b/doc/CGATReference.rst index f3974593c..565e3ef30 100644 --- a/doc/CGATReference.rst +++ b/doc/CGATReference.rst @@ -152,10 +152,6 @@ NGS data Save sequence and quality information from a :term:`bam` formatted file. -:doc:`scripts/bam2peakshape` - Compute read densities over a collection of intervals. Also - accepts :term:`bed` or :term:`bigwig` formatted files. - :doc:`scripts/bam2stats` Compute summary statistics of a :term:`bam` formatted file. diff --git a/doc/scripts.rst b/doc/scripts.rst index 8432351e2..42510e30d 100644 --- a/doc/scripts.rst +++ b/doc/scripts.rst @@ -49,7 +49,6 @@ Genomics scripts/combine_gff.rst scripts/quality2fasta.rst scripts/bam2wiggle.rst - scripts/bed2annotator.rst scripts/bed2graph.rst scripts/bed2psl.rst scripts/chain2psl.rst @@ -164,11 +163,9 @@ Unsorted :maxdepth: 1 scripts/add_random_reads_to_bam.rst - scripts/bam2UniquePairs.rst scripts/bam2bam.rst scripts/bam2bed.rst scripts/bam2fastq.rst - scripts/bam2peakshape.rst scripts/bam2stats.rst scripts/bam2transcriptContribution.rst scripts/barplotGo.rst @@ -219,12 +216,8 @@ Unsorted scripts/rnaseq_junction_bam2bam.rst scripts/split_gff.rst scripts/annotator_distance.rst - scripts/bam2bidirectionaltranscription.rst scripts/bam2profile.rst - scripts/bam2species_map.rst scripts/bams2bam.rst - scripts/bed2plot.rst - scripts/bigwig2hilbert.rst scripts/cgat.rst scripts/cgat2dot.rst scripts/cgat_add_preamble.rst diff --git a/doc/scripts/bam2UniquePairs.rst b/doc/scripts/bam2UniquePairs.rst deleted file mode 100644 index df77c794c..000000000 --- a/doc/scripts/bam2UniquePairs.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.bam2UniquePairs - -.. program-output:: cgat bam2UniquePairs -? - diff --git a/doc/scripts/cat_tables.rst b/doc/scripts/cat_tables.rst deleted file mode 100644 index cf1151d65..000000000 --- a/doc/scripts/cat_tables.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.cat_tables - -.. program-output:: cgat cat-tables -? - diff --git a/doc/scripts/chain2psl.rst b/doc/scripts/chain2psl.rst deleted file mode 100644 index 8c90699e6..000000000 --- a/doc/scripts/chain2psl.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.chain2psl - -.. program-output:: cgat chain2psl -? - diff --git a/doc/scripts/combine_files.rst b/doc/scripts/combine_files.rst deleted file mode 100644 index 0441be7d2..000000000 --- a/doc/scripts/combine_files.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.combine_files - -.. program-output:: cgat combine-files -? - diff --git a/doc/scripts/combine_gff.rst b/doc/scripts/combine_gff.rst deleted file mode 100644 index 06c00301e..000000000 --- a/doc/scripts/combine_gff.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.combine_gff - -.. program-output:: cgat combine-gff -? - diff --git a/doc/scripts/combine_histograms.rst b/doc/scripts/combine_histograms.rst deleted file mode 100644 index 8ae93600c..000000000 --- a/doc/scripts/combine_histograms.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.combine_histograms - -.. program-output:: cgat combine-histograms -? - diff --git a/doc/scripts/combine_tables.rst b/doc/scripts/combine_tables.rst deleted file mode 100644 index 611777964..000000000 --- a/doc/scripts/combine_tables.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.combine_tables - -.. program-output:: cgat combine-tables -? - diff --git a/doc/scripts/csv_cut.rst b/doc/scripts/csv_cut.rst deleted file mode 100644 index 1d644cc52..000000000 --- a/doc/scripts/csv_cut.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.csv_cut - -.. program-output:: cgat csv-cut -? - diff --git a/doc/scripts/csv_intersection.rst b/doc/scripts/csv_intersection.rst deleted file mode 100644 index 562b93472..000000000 --- a/doc/scripts/csv_intersection.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.csv_intersection - -.. program-output:: cgat csv-intersection -? - diff --git a/doc/scripts/csv_rename.rst b/doc/scripts/csv_rename.rst deleted file mode 100644 index 2f0af2b40..000000000 --- a/doc/scripts/csv_rename.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.csv_rename - -.. program-output:: cgat csv-rename -? - diff --git a/doc/scripts/csv_select.rst b/doc/scripts/csv_select.rst deleted file mode 100644 index 2e6c86417..000000000 --- a/doc/scripts/csv_select.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.csv_select - -.. program-output:: cgat csv-select -? - diff --git a/doc/scripts/csv_set.rst b/doc/scripts/csv_set.rst deleted file mode 100644 index 2f1c6d291..000000000 --- a/doc/scripts/csv_set.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.csv_set - -.. program-output:: cgat csv-set -? - diff --git a/doc/scripts/medip_merge_intervals.rst b/doc/scripts/medip_merge_intervals.rst deleted file mode 100644 index 1a8ba6167..000000000 --- a/doc/scripts/medip_merge_intervals.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.medip_merge_intervals - -.. program-output:: cgat medip-merge-intervals -? - diff --git a/doc/scripts/rnaseq_junction_bam2bam.rst b/doc/scripts/rnaseq_junction_bam2bam.rst deleted file mode 100644 index 79a173359..000000000 --- a/doc/scripts/rnaseq_junction_bam2bam.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.rnaseq_junction_bam2bam - -.. program-output:: cgat rnaseq-junction-bam2bam -? - diff --git a/doc/scripts/transfac2transfac.rst b/doc/scripts/transfac2transfac.rst deleted file mode 100644 index 33e0cf727..000000000 --- a/doc/scripts/transfac2transfac.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgat.tools.transfac2transfac - -.. program-output:: cgat transfac2transfac -? - diff --git a/tests/bam2UniquePairs.py/tests.yaml b/tests/bam2UniquePairs.py/tests.yaml deleted file mode 100644 index 4de2ab029..000000000 --- a/tests/bam2UniquePairs.py/tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version diff --git a/tests/bam2libtype.py/paired.bam b/tests/bam2libtype.py/paired.bam deleted file mode 120000 index 5cd26cf1e..000000000 --- a/tests/bam2libtype.py/paired.bam +++ /dev/null @@ -1 +0,0 @@ -../data/paired.bam \ No newline at end of file diff --git a/tests/bam2libtype.py/paired_IU.txt b/tests/bam2libtype.py/paired_IU.txt deleted file mode 100644 index 9d7c7cb79..000000000 --- a/tests/bam2libtype.py/paired_IU.txt +++ /dev/null @@ -1,20 +0,0 @@ -# 2018-06-08 11:25:27,748 INFO output generated by bam2libtype \ -# job started at Fri Jun 8 11:25:27 2018 on cgath2.anat.ox.ac.uk -- d44bac8d-92c0-4392-b401-73ec7ad9b5ff \ -# pid: 105304, system: Linux 2.6.32-696.20.1.el6.x86_64 #1 SMP Fri Jan 12 15:07:59 EST 2018 x86_64 -# 2018-06-08 11:25:27,748 INFO log_config_filename : None \ -# loglevel : 1 \ -# max_iterations : None \ -# max_iteratiors : 10000 \ -# random_seed : None \ -# short_help : None \ -# stderr : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> \ -# stdin : <_io.TextIOWrapper name='' mode='r' encoding='ANSI_X3.4-1968'> \ -# stdlog : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> \ -# stdout : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> \ -# timeit_file : None \ -# timeit_header : None \ -# timeit_name : all \ -# tracing : None -MSR ISR OSR ISF MSF OSF SF SR -0 47 0 45 0 0 3 3 -# 2018-06-08 11:25:27,864 INFO job finished in 0 seconds at Fri Jun 8 11:25:27 2018 -- 0.44 0.12 0.00 0.00 -- d44bac8d-92c0-4392-b401-73ec7ad9b5ff diff --git a/tests/bam2libtype.py/single.bam b/tests/bam2libtype.py/single.bam deleted file mode 100644 index 046b6f1a6..000000000 Binary files a/tests/bam2libtype.py/single.bam and /dev/null differ diff --git a/tests/bam2libtype.py/single_U.txt b/tests/bam2libtype.py/single_U.txt deleted file mode 100644 index 15a5d87ea..000000000 --- a/tests/bam2libtype.py/single_U.txt +++ /dev/null @@ -1,20 +0,0 @@ -# 2018-06-08 11:25:28,539 INFO output generated by bam2libtype \ -# job started at Fri Jun 8 11:25:28 2018 on cgath2.anat.ox.ac.uk -- 372b9be2-ee36-4e31-ba41-7f1d279c0034 \ -# pid: 105310, system: Linux 2.6.32-696.20.1.el6.x86_64 #1 SMP Fri Jan 12 15:07:59 EST 2018 x86_64 -# 2018-06-08 11:25:28,539 INFO log_config_filename : None \ -# loglevel : 1 \ -# max_iterations : None \ -# max_iteratiors : 10000 \ -# random_seed : None \ -# short_help : None \ -# stderr : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> \ -# stdin : <_io.TextIOWrapper name='' mode='r' encoding='ANSI_X3.4-1968'> \ -# stdlog : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> \ -# stdout : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> \ -# timeit_file : None \ -# timeit_header : None \ -# timeit_name : all \ -# tracing : None -MSR ISR OSR ISF MSF OSF SF SR -0 0 0 0 0 0 57 42 -# 2018-06-08 11:25:30,020 INFO job finished in 1 seconds at Fri Jun 8 11:25:30 2018 -- 1.77 0.15 0.00 0.00 -- 372b9be2-ee36-4e31-ba41-7f1d279c0034 diff --git a/tests/bam2libtype.py/tests.yaml b/tests/bam2libtype.py/tests.yaml deleted file mode 100644 index 10589a464..000000000 --- a/tests/bam2libtype.py/tests.yaml +++ /dev/null @@ -1,19 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version - -paired_lib: - stdin: paired.bam - outputs: [stdout] - references: [paired_IU.txt] - options: - -single_lib: - stdin: single.bam - outputs: [stdout] - references: [single_U.txt] - options: - diff --git a/tests/bam2peakshape.py/BamOnlyIntervalCentringMethodReads.tsv b/tests/bam2peakshape.py/BamOnlyIntervalCentringMethodReads.tsv deleted file mode 100644 index dfe814304..000000000 --- a/tests/bam2peakshape.py/BamOnlyIntervalCentringMethodReads.tsv +++ /dev/null @@ -1,34 +0,0 @@ -# output generated by /ifs/devel/andreas/cgat-py3/scripts/bam2peakshape.py --force-output --use-interval --centring-method=reads --control-bam-file=/ifs/devel/andreas/cgat-py3/tests/bam2peakshape.py/control.bam /ifs/devel/andreas/cgat-py3/tests/bam2peakshape.py/small.bam /ifs/devel/andreas/cgat-py3/tests/bam2peakshape.py/onepeak.bed -# job started at Mon Apr 11 21:01:44 2016 on cgat150.anat.ox.ac.uk -- e73c7f82-d82b-4286-a035-d5f5b020b4f5 -# pid: 30442, system: Linux 2.6.32-573.18.1.el6.x86_64 #1 SMP Wed Jan 6 11:20:49 EST 2016 x86_64 -# bin_size : 10 -# centring_method : reads -# control_files : ['/ifs/devel/andreas/cgat-py3/tests/bam2peakshape.py/control.bam'] -# format : bam -# loglevel : 1 -# normalization : None -# output_filename_pattern : %s -# output_force : True -# random_seed : None -# random_shift : False -# report_step : 100 -# shift : 0 -# short_help : None -# smooth_method : None -# sort_orders : [] -# stderr : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> -# stdin : <_io.TextIOWrapper name='' mode='r' encoding='ANSI_X3.4-1968'> -# stdlog : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> -# stdout : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> -# strand_specific : False -# timeit_file : None -# timeit_header : None -# timeit_name : all -# use_interval : True -# window_size : 1000 -## 2016-04-11 21:01:44,885 INFO using control files: /ifs/devel/andreas/cgat-py3/tests/bam2peakshape.py/control.bam -## 2016-04-11 21:01:44,901 INFO interval processing: added=1, input=1 -contig start end name interval_width npeaks peak_center peak_width peak_height peak_relative_pos nreads median closest_half_height furthest_halfheight bins counts -chr1 1000 4000 1 3000 100 3170 99 23 670 225 18.0 830 2170 -995,-985,-975,-965,-955,-945,-935,-925,-915,-905,-895,-885,-875,-865,-855,-845,-835,-825,-815,-805,-795,-785,-775,-765,-755,-745,-735,-725,-715,-705,-695,-685,-675,-665,-655,-645,-635,-625,-615,-605,-595,-585,-575,-565,-555,-545,-535,-525,-515,-505,-495,-485,-475,-465,-455,-445,-435,-425,-415,-405,-395,-385,-375,-365,-355,-345,-335,-325,-315,-305,-295,-285,-275,-265,-255,-245,-235,-225,-215,-205,-195,-185,-175,-165,-155,-145,-135,-125,-115,-105,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95,105,115,125,135,145,155,165,175,185,195,205,215,225,235,245,255,265,275,285,295,305,315,325,335,345,355,365,375,385,395,405,415,425,435,445,455,465,475,485,495,505,515,525,535,545,555,565,575,585,595,605,615,625,635,645,655,665,675,685,695,705,715,725,735,745,755,765,775,785,795,805,815,825,835,845,855,865,875,885,895,905,915,925,935,945,955,965,975,985,995 190,190,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,185,195,205,215,225,230,230,230,230,230,225,215,205,195,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -## 2016-04-11 21:01:44,903 INFO no normalization performed -# job finished in 0 seconds at Mon Apr 11 21:01:44 2016 -- 1.21 0.18 0.00 0.00 -- e73c7f82-d82b-4286-a035-d5f5b020b4f5 diff --git a/tests/bam2peakshape.py/BamOnlyIntervalCentringMethodReads_control_unsorted.gz b/tests/bam2peakshape.py/BamOnlyIntervalCentringMethodReads_control_unsorted.gz deleted file mode 100644 index caf97c605..000000000 Binary files a/tests/bam2peakshape.py/BamOnlyIntervalCentringMethodReads_control_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamOnlyIntervalCentringMethodReads_matrix_unsorted.gz b/tests/bam2peakshape.py/BamOnlyIntervalCentringMethodReads_matrix_unsorted.gz deleted file mode 100644 index f3f336c0e..000000000 Binary files a/tests/bam2peakshape.py/BamOnlyIntervalCentringMethodReads_matrix_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamOnlyIntervalNormalization.tsv b/tests/bam2peakshape.py/BamOnlyIntervalNormalization.tsv deleted file mode 100644 index 06a25d4c8..000000000 --- a/tests/bam2peakshape.py/BamOnlyIntervalNormalization.tsv +++ /dev/null @@ -1,32 +0,0 @@ -# output generated by ../../scripts/bam2peakshape.py --force --only-interval --normalization=sum small.bam onepeak.bed --control-file=small.bam -# job started at Tue Apr 29 21:32:48 2014 on fgu205.anat.ox.ac.uk -- a8056b64-d44e-468b-8d38-1ae7c9f67486 -# pid: 14142, system: Linux 2.6.32-431.11.2.el6.x86_64 #1 SMP Mon Mar 3 13:32:45 EST 2014 x86_64 -# bin_size : 10 -# centring_method : reads -# control_file : small.bam -# format : bam -# loglevel : 1 -# normalization : sum -# only_interval : True -# output_filename_pattern : %s -# output_force : True -# random_shift : False -# report_step : 100 -# shift : 0 -# sort : [] -# stderr : \', mode \'w\' at 0x7fc9fe8af270> -# stdin : \', mode \'r\' at 0x7fc9fe8af150> -# stdlog : \', mode \'w\' at 0x7fc9fe8af1e0> -# stdout : \', mode \'w\' at 0x7fc9fe8af1e0> -# strand_specific : False -# timeit_file : None -# timeit_header : None -# timeit_name : all -# window_size : 1000 -# 2014-04-29 21:32:48,240 INFO using control file small.bam -contig start end name interval_width npeaks peak_center peak_width peak_height peak_relative_pos nreads median closest_half_height furthest_halfheight bins counts -# 2014-04-29 21:32:48,300 INFO interval processing: input=1, added=1 -chr1 1000 4000 1 3000 100 3170 99 23 670 225 18.0 830 2170 -995,-985,-975,-965,-955,-945,-935,-925,-915,-905,-895,-885,-875,-865,-855,-845,-835,-825,-815,-805,-795,-785,-775,-765,-755,-745,-735,-725,-715,-705,-695,-685,-675,-665,-655,-645,-635,-625,-615,-605,-595,-585,-575,-565,-555,-545,-535,-525,-515,-505,-495,-485,-475,-465,-455,-445,-435,-425,-415,-405,-395,-385,-375,-365,-355,-345,-335,-325,-315,-305,-295,-285,-275,-265,-255,-245,-235,-225,-215,-205,-195,-185,-175,-165,-155,-145,-135,-125,-115,-105,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95,105,115,125,135,145,155,165,175,185,195,205,215,225,235,245,255,265,275,285,295,305,315,325,335,345,355,365,375,385,395,405,415,425,435,445,455,465,475,485,495,505,515,525,535,545,555,565,575,585,595,605,615,625,635,645,655,665,675,685,695,705,715,725,735,745,755,765,775,785,795,805,815,825,835,845,855,865,875,885,895,905,915,925,935,945,955,965,975,985,995 190,190,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,185,195,205,215,225,230,230,230,230,230,225,215,205,195,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -# 2014-04-29 21:32:48,303 INFO Starting sum normalization -# 2014-04-29 21:32:48,303 INFO norm = 0 -# job finished in 0 seconds at Tue Apr 29 21:32:48 2014 -- 0.19 0.12 0.00 0.01 -- a8056b64-d44e-468b-8d38-1ae7c9f67486 diff --git a/tests/bam2peakshape.py/BamOnlyIntervalNormalization_control_unsorted.gz b/tests/bam2peakshape.py/BamOnlyIntervalNormalization_control_unsorted.gz deleted file mode 100644 index cf504ddff..000000000 Binary files a/tests/bam2peakshape.py/BamOnlyIntervalNormalization_control_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamOnlyIntervalNormalization_matrix_unsorted.gz b/tests/bam2peakshape.py/BamOnlyIntervalNormalization_matrix_unsorted.gz deleted file mode 100644 index 6088c51db..000000000 Binary files a/tests/bam2peakshape.py/BamOnlyIntervalNormalization_matrix_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamOnlyIntervalShift.tsv b/tests/bam2peakshape.py/BamOnlyIntervalShift.tsv deleted file mode 100644 index 600e44fe8..000000000 --- a/tests/bam2peakshape.py/BamOnlyIntervalShift.tsv +++ /dev/null @@ -1,31 +0,0 @@ -# output generated by ../../scripts/bam2peakshape.py --force --only-interval --shift=100 small.bam onepeak.bed --control-file=small.bam -# job started at Tue Apr 29 21:28:39 2014 on fgu205.anat.ox.ac.uk -- c24a27cd-447d-45fd-b749-a27022217249 -# pid: 14113, system: Linux 2.6.32-431.11.2.el6.x86_64 #1 SMP Mon Mar 3 13:32:45 EST 2014 x86_64 -# bin_size : 10 -# centring_method : reads -# control_file : small.bam -# format : bam -# loglevel : 1 -# normalization : None -# only_interval : True -# output_filename_pattern : %s -# output_force : True -# random_shift : False -# report_step : 100 -# shift : 100 -# sort : [] -# stderr : \', mode \'w\' at 0x7fd6fb8ab270> -# stdin : \', mode \'r\' at 0x7fd6fb8ab150> -# stdlog : \', mode \'w\' at 0x7fd6fb8ab1e0> -# stdout : \', mode \'w\' at 0x7fd6fb8ab1e0> -# strand_specific : False -# timeit_file : None -# timeit_header : None -# timeit_name : all -# window_size : 1000 -# 2014-04-29 21:28:39,735 INFO using control file small.bam -contig start end name interval_width npeaks peak_center peak_width peak_height peak_relative_pos nreads median closest_half_height furthest_halfheight bins counts -# 2014-04-29 21:28:39,792 INFO interval processing: input=1, added=1 -chr1 1000 4000 1 3000 30 1195 29 19 1305 220 0.0 55 195 -995,-985,-975,-965,-955,-945,-935,-925,-915,-905,-895,-885,-875,-865,-855,-845,-835,-825,-815,-805,-795,-785,-775,-765,-755,-745,-735,-725,-715,-705,-695,-685,-675,-665,-655,-645,-635,-625,-615,-605,-595,-585,-575,-565,-555,-545,-535,-525,-515,-505,-495,-485,-475,-465,-455,-445,-435,-425,-415,-405,-395,-385,-375,-365,-355,-345,-335,-325,-315,-305,-295,-285,-275,-265,-255,-245,-235,-225,-215,-205,-195,-185,-175,-165,-155,-145,-135,-125,-115,-105,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95,105,115,125,135,145,155,165,175,185,195,205,215,225,235,245,255,265,275,285,295,305,315,325,335,345,355,365,375,385,395,405,415,425,435,445,455,465,475,485,495,505,515,525,535,545,555,565,575,585,595,605,615,625,635,645,655,665,675,685,695,705,715,725,735,745,755,765,775,785,795,805,815,825,835,845,855,865,875,885,895,905,915,925,935,945,955,965,975,985,995 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,100,100,100,100,100,100,100,100,100,100,110,120,130,140,150,160,170,180,190,180,160,140,120,100,80,60,40,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,10,10,10,10,10,10,10,10 -# 2014-04-29 21:28:39,794 INFO No normalization performed -# job finished in 0 seconds at Tue Apr 29 21:28:39 2014 -- 0.19 0.12 0.00 0.00 -- c24a27cd-447d-45fd-b749-a27022217249 diff --git a/tests/bam2peakshape.py/BamOnlyIntervalShift_control_unsorted.gz b/tests/bam2peakshape.py/BamOnlyIntervalShift_control_unsorted.gz deleted file mode 100644 index fa8bdad16..000000000 Binary files a/tests/bam2peakshape.py/BamOnlyIntervalShift_control_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamOnlyIntervalShift_matrix_unsorted.gz b/tests/bam2peakshape.py/BamOnlyIntervalShift_matrix_unsorted.gz deleted file mode 100644 index 4584e976a..000000000 Binary files a/tests/bam2peakshape.py/BamOnlyIntervalShift_matrix_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamOnlyIntervalUseStrand.tsv b/tests/bam2peakshape.py/BamOnlyIntervalUseStrand.tsv deleted file mode 100644 index 24de75d87..000000000 --- a/tests/bam2peakshape.py/BamOnlyIntervalUseStrand.tsv +++ /dev/null @@ -1,31 +0,0 @@ -# output generated by ../../scripts/bam2peakshape.py --force --only-interval --use-strand small.bam onepeak.bed --control-file=small.bam -# job started at Tue Apr 29 21:37:50 2014 on fgu205.anat.ox.ac.uk -- 775aa823-1442-4a66-a82b-d30d5ab28bda -# pid: 14191, system: Linux 2.6.32-431.11.2.el6.x86_64 #1 SMP Mon Mar 3 13:32:45 EST 2014 x86_64 -# bin_size : 10 -# centring_method : reads -# control_file : small.bam -# format : bam -# loglevel : 1 -# normalization : None -# only_interval : True -# output_filename_pattern : %s -# output_force : True -# random_shift : False -# report_step : 100 -# shift : 0 -# sort : [] -# stderr : \', mode \'w\' at 0x7f46e7f18270> -# stdin : \', mode \'r\' at 0x7f46e7f18150> -# stdlog : \', mode \'w\' at 0x7f46e7f181e0> -# stdout : \', mode \'w\' at 0x7f46e7f181e0> -# strand_specific : True -# timeit_file : None -# timeit_header : None -# timeit_name : all -# window_size : 1000 -# 2014-04-29 21:37:50,374 INFO using control file small.bam -contig start end name interval_width npeaks peak_center peak_width peak_height peak_relative_pos nreads median closest_half_height furthest_halfheight bins counts -# 2014-04-29 21:37:50,427 INFO interval processing: input=1, added=1 -chr1 1000 4000 1 3000 100 3170 99 23 670 225 18.0 830 2170 -995,-985,-975,-965,-955,-945,-935,-925,-915,-905,-895,-885,-875,-865,-855,-845,-835,-825,-815,-805,-795,-785,-775,-765,-755,-745,-735,-725,-715,-705,-695,-685,-675,-665,-655,-645,-635,-625,-615,-605,-595,-585,-575,-565,-555,-545,-535,-525,-515,-505,-495,-485,-475,-465,-455,-445,-435,-425,-415,-405,-395,-385,-375,-365,-355,-345,-335,-325,-315,-305,-295,-285,-275,-265,-255,-245,-235,-225,-215,-205,-195,-185,-175,-165,-155,-145,-135,-125,-115,-105,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95,105,115,125,135,145,155,165,175,185,195,205,215,225,235,245,255,265,275,285,295,305,315,325,335,345,355,365,375,385,395,405,415,425,435,445,455,465,475,485,495,505,515,525,535,545,555,565,575,585,595,605,615,625,635,645,655,665,675,685,695,705,715,725,735,745,755,765,775,785,795,805,815,825,835,845,855,865,875,885,895,905,915,925,935,945,955,965,975,985,995 190,190,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,185,195,205,215,225,230,230,230,230,230,225,215,205,195,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -# 2014-04-29 21:37:50,430 INFO No normalization performed -# job finished in 0 seconds at Tue Apr 29 21:37:50 2014 -- 0.17 0.13 0.00 0.01 -- 775aa823-1442-4a66-a82b-d30d5ab28bda diff --git a/tests/bam2peakshape.py/BamOnlyIntervalUseStrand_control_unsorted.gz b/tests/bam2peakshape.py/BamOnlyIntervalUseStrand_control_unsorted.gz deleted file mode 100644 index 8a2c0fd09..000000000 Binary files a/tests/bam2peakshape.py/BamOnlyIntervalUseStrand_control_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamOnlyIntervalUseStrand_matrix_unsorted.gz b/tests/bam2peakshape.py/BamOnlyIntervalUseStrand_matrix_unsorted.gz deleted file mode 100644 index df957887c..000000000 Binary files a/tests/bam2peakshape.py/BamOnlyIntervalUseStrand_matrix_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamSortByPeakHeight.tsv b/tests/bam2peakshape.py/BamSortByPeakHeight.tsv deleted file mode 100644 index f461ce1d8..000000000 --- a/tests/bam2peakshape.py/BamSortByPeakHeight.tsv +++ /dev/null @@ -1,31 +0,0 @@ -# output generated by ../../scripts/bam2peakshape.py --force --sort=peak-height small.bam onepeak.bed --control-file=small.bam -# job started at Tue Apr 29 21:52:01 2014 on fgu205.anat.ox.ac.uk -- 88c9c365-9b46-43c2-a28d-0f9c18201291 -# pid: 14339, system: Linux 2.6.32-431.11.2.el6.x86_64 #1 SMP Mon Mar 3 13:32:45 EST 2014 x86_64 -# bin_size : 10 -# centring_method : reads -# control_file : small.bam -# format : bam -# loglevel : 1 -# normalization : None -# only_interval : None -# output_filename_pattern : %s -# output_force : True -# random_shift : False -# report_step : 100 -# shift : 0 -# sort : [\'peak-height\'] -# stderr : \', mode \'w\' at 0x7f5b2d358270> -# stdin : \', mode \'r\' at 0x7f5b2d358150> -# stdlog : \', mode \'w\' at 0x7f5b2d3581e0> -# stdout : \', mode \'w\' at 0x7f5b2d3581e0> -# strand_specific : False -# timeit_file : None -# timeit_header : None -# timeit_name : all -# window_size : 1000 -# 2014-04-29 21:52:01,905 INFO using control file small.bam -contig start end name interval_width npeaks peak_center peak_width peak_height peak_relative_pos nreads median closest_half_height furthest_halfheight bins counts -# 2014-04-29 21:52:01,950 INFO interval processing: input=1, added=1 -chr1 1000 4000 1 3000 100 3170 99 23 670 225 18.0 830 2170 -995,-985,-975,-965,-955,-945,-935,-925,-915,-905,-895,-885,-875,-865,-855,-845,-835,-825,-815,-805,-795,-785,-775,-765,-755,-745,-735,-725,-715,-705,-695,-685,-675,-665,-655,-645,-635,-625,-615,-605,-595,-585,-575,-565,-555,-545,-535,-525,-515,-505,-495,-485,-475,-465,-455,-445,-435,-425,-415,-405,-395,-385,-375,-365,-355,-345,-335,-325,-315,-305,-295,-285,-275,-265,-255,-245,-235,-225,-215,-205,-195,-185,-175,-165,-155,-145,-135,-125,-115,-105,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95,105,115,125,135,145,155,165,175,185,195,205,215,225,235,245,255,265,275,285,295,305,315,325,335,345,355,365,375,385,395,405,415,425,435,445,455,465,475,485,495,505,515,525,535,545,555,565,575,585,595,605,615,625,635,645,655,665,675,685,695,705,715,725,735,745,755,765,775,785,795,805,815,825,835,845,855,865,875,885,895,905,915,925,935,945,955,965,975,985,995 190,190,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,185,195,205,215,225,230,230,230,230,230,225,215,205,195,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,185,185,175,165,155,145,135 -# 2014-04-29 21:52:01,952 INFO No normalization performed -# job finished in 0 seconds at Tue Apr 29 21:52:02 2014 -- 0.19 0.10 0.00 0.01 -- 88c9c365-9b46-43c2-a28d-0f9c18201291 diff --git a/tests/bam2peakshape.py/BamSortByPeakHeight_control_peak_height.gz b/tests/bam2peakshape.py/BamSortByPeakHeight_control_peak_height.gz deleted file mode 100644 index 209f0511c..000000000 Binary files a/tests/bam2peakshape.py/BamSortByPeakHeight_control_peak_height.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamSortByPeakHeight_matrix_peak_height.gz b/tests/bam2peakshape.py/BamSortByPeakHeight_matrix_peak_height.gz deleted file mode 100644 index 59861c127..000000000 Binary files a/tests/bam2peakshape.py/BamSortByPeakHeight_matrix_peak_height.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamWindowSize.tsv b/tests/bam2peakshape.py/BamWindowSize.tsv deleted file mode 100644 index de8abdc3e..000000000 --- a/tests/bam2peakshape.py/BamWindowSize.tsv +++ /dev/null @@ -1,31 +0,0 @@ -# output generated by ../../scripts/bam2peakshape.py --force --window-size=500 --centring-method=middle small.bam onepeak.bed --control-file=small.bam -# job started at Tue Apr 29 21:41:13 2014 on fgu205.anat.ox.ac.uk -- ac11dbe1-5968-47b3-b73f-2116d4cb2e43 -# pid: 14221, system: Linux 2.6.32-431.11.2.el6.x86_64 #1 SMP Mon Mar 3 13:32:45 EST 2014 x86_64 -# bin_size : 10 -# centring_method : middle -# control_file : small.bam -# format : bam -# loglevel : 1 -# normalization : None -# only_interval : None -# output_filename_pattern : %s -# output_force : True -# random_shift : False -# report_step : 100 -# shift : 0 -# sort : [] -# stderr : \', mode \'w\' at 0x7f2c7e46b270> -# stdin : \', mode \'r\' at 0x7f2c7e46b150> -# stdlog : \', mode \'w\' at 0x7f2c7e46b1e0> -# stdout : \', mode \'w\' at 0x7f2c7e46b1e0> -# strand_specific : False -# timeit_file : None -# timeit_header : None -# timeit_name : all -# window_size : 500 -# 2014-04-29 21:41:13,220 INFO using control file small.bam -contig start end name interval_width npeaks peak_center peak_width peak_height peak_relative_pos nreads median closest_half_height furthest_halfheight bins counts -# 2014-04-29 21:41:13,308 INFO interval processing: input=1, added=1 -chr1 1000 4000 1 3000 100 2500 99 23 0 134 18.0 1391 1500 -495,-485,-475,-465,-455,-445,-435,-425,-415,-405,-395,-385,-375,-365,-355,-345,-335,-325,-315,-305,-295,-285,-275,-265,-255,-245,-235,-225,-215,-205,-195,-185,-175,-165,-155,-145,-135,-125,-115,-105,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95,105,115,125,135,145,155,165,175,185,195,205,215,225,235,245,255,265,275,285,295,305,315,325,335,345,355,365,375,385,395,405,415,425,435,445,455,465,475,485,495 180,180,180,180,180,180,180,180,180,185,190,190,190,190,190,190,190,190,190,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180 -# 2014-04-29 21:41:13,309 INFO No normalization performed -# job finished in 0 seconds at Tue Apr 29 21:41:13 2014 -- 0.19 0.12 0.00 0.01 -- ac11dbe1-5968-47b3-b73f-2116d4cb2e43 diff --git a/tests/bam2peakshape.py/BamWindowSize_control_unsorted.gz b/tests/bam2peakshape.py/BamWindowSize_control_unsorted.gz deleted file mode 100644 index 19e69aeff..000000000 Binary files a/tests/bam2peakshape.py/BamWindowSize_control_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/BamWindowSize_matrix_unsorted.gz b/tests/bam2peakshape.py/BamWindowSize_matrix_unsorted.gz deleted file mode 100644 index ec35d05ef..000000000 Binary files a/tests/bam2peakshape.py/BamWindowSize_matrix_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/bamOnlyInterval.tsv b/tests/bam2peakshape.py/bamOnlyInterval.tsv deleted file mode 100644 index 613572fdf..000000000 --- a/tests/bam2peakshape.py/bamOnlyInterval.tsv +++ /dev/null @@ -1,29 +0,0 @@ -# output generated by ../../scripts/bam2peakshape.py --force --only-interval small.bam onepeak.bed -# job started at Wed Aug 28 13:51:26 2013 on cgat150.anat.ox.ac.uk -- 178e9121-658b-4697-8ad3-3fdbb8ee1d0f -# pid: 16150, system: Linux 2.6.32-358.11.1.el6.x86_64 #1 SMP Wed May 15 10:48:38 EDT 2013 x86_64 -# bin_size : 10 -# centring_method : reads -# control_file : None -# format : bam -# loglevel : 1 -# normalization : None -# only_interval : True -# output_filename_pattern : %s -# output_force : True -# random_shift : False -# shift : 0 -# sort : [] -# stderr : \', mode \'w\' at 0x2ab0ea5fa270> -# stdin : \', mode \'r\' at 0x2ab0ea5fa150> -# stdlog : \', mode \'w\' at 0x2ab0ea5fa1e0> -# stdout : \', mode \'w\' at 0x2ab0ea5fa1e0> -# strand_specific : False -# timeit_file : None -# timeit_header : None -# timeit_name : all -# window_size : 1000 -contig start end name interval_width npeaks peak_center peak_width peak_height peak_relative_pos nreads median closest_half_height furthest_halfheight bins counts -# 2013-08-28 13:51:26,727 INFO interval processing: input=1, added=1 -chr1 1000 4000 1 3000 100 3170 99 23 670 225 18.0 830 2170 -995,-985,-975,-965,-955,-945,-935,-925,-915,-905,-895,-885,-875,-865,-855,-845,-835,-825,-815,-805,-795,-785,-775,-765,-755,-745,-735,-725,-715,-705,-695,-685,-675,-665,-655,-645,-635,-625,-615,-605,-595,-585,-575,-565,-555,-545,-535,-525,-515,-505,-495,-485,-475,-465,-455,-445,-435,-425,-415,-405,-395,-385,-375,-365,-355,-345,-335,-325,-315,-305,-295,-285,-275,-265,-255,-245,-235,-225,-215,-205,-195,-185,-175,-165,-155,-145,-135,-125,-115,-105,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95,105,115,125,135,145,155,165,175,185,195,205,215,225,235,245,255,265,275,285,295,305,315,325,335,345,355,365,375,385,395,405,415,425,435,445,455,465,475,485,495,505,515,525,535,545,555,565,575,585,595,605,615,625,635,645,655,665,675,685,695,705,715,725,735,745,755,765,775,785,795,805,815,825,835,845,855,865,875,885,895,905,915,925,935,945,955,965,975,985,995 190,190,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,185,195,205,215,225,230,230,230,230,230,225,215,205,195,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -# 2013-08-28 13:51:26,730 INFO No normalization performed -# job finished in 0 seconds at Wed Aug 28 13:51:26 2013 -- 0.32 0.17 0.00 0.01 -- 178e9121-658b-4697-8ad3-3fdbb8ee1d0f diff --git a/tests/bam2peakshape.py/bamOnlyIntervalWithControl.tsv b/tests/bam2peakshape.py/bamOnlyIntervalWithControl.tsv deleted file mode 100644 index 6fd4d5c4c..000000000 --- a/tests/bam2peakshape.py/bamOnlyIntervalWithControl.tsv +++ /dev/null @@ -1,30 +0,0 @@ -# output generated by ../../scripts/bam2peakshape.py --force --control=small.bam --only-interval small.bam onepeak.bed -# job started at Wed Aug 28 13:51:14 2013 on cgat150.anat.ox.ac.uk -- 9013ad62-6f4e-43b9-a85a-3afe0f4299e2 -# pid: 16143, system: Linux 2.6.32-358.11.1.el6.x86_64 #1 SMP Wed May 15 10:48:38 EDT 2013 x86_64 -# bin_size : 10 -# centring_method : reads -# control_file : small.bam -# format : bam -# loglevel : 1 -# normalization : None -# only_interval : True -# output_filename_pattern : %s -# output_force : True -# random_shift : False -# shift : 0 -# sort : [] -# stderr : \', mode \'w\' at 0x2ae2c8f26270> -# stdin : \', mode \'r\' at 0x2ae2c8f26150> -# stdlog : \', mode \'w\' at 0x2ae2c8f261e0> -# stdout : \', mode \'w\' at 0x2ae2c8f261e0> -# strand_specific : False -# timeit_file : None -# timeit_header : None -# timeit_name : all -# window_size : 1000 -# 2013-08-28 13:51:14,619 INFO using control file small.bam -contig start end name interval_width npeaks peak_center peak_width peak_height peak_relative_pos nreads median closest_half_height furthest_halfheight bins counts -# 2013-08-28 13:51:14,633 INFO interval processing: input=1, added=1 -chr1 1000 4000 1 3000 100 3170 99 23 670 225 18.0 830 2170 -995,-985,-975,-965,-955,-945,-935,-925,-915,-905,-895,-885,-875,-865,-855,-845,-835,-825,-815,-805,-795,-785,-775,-765,-755,-745,-735,-725,-715,-705,-695,-685,-675,-665,-655,-645,-635,-625,-615,-605,-595,-585,-575,-565,-555,-545,-535,-525,-515,-505,-495,-485,-475,-465,-455,-445,-435,-425,-415,-405,-395,-385,-375,-365,-355,-345,-335,-325,-315,-305,-295,-285,-275,-265,-255,-245,-235,-225,-215,-205,-195,-185,-175,-165,-155,-145,-135,-125,-115,-105,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95,105,115,125,135,145,155,165,175,185,195,205,215,225,235,245,255,265,275,285,295,305,315,325,335,345,355,365,375,385,395,405,415,425,435,445,455,465,475,485,495,505,515,525,535,545,555,565,575,585,595,605,615,625,635,645,655,665,675,685,695,705,715,725,735,745,755,765,775,785,795,805,815,825,835,845,855,865,875,885,895,905,915,925,935,945,955,965,975,985,995 190,190,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,185,195,205,215,225,230,230,230,230,230,225,215,205,195,185,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -# 2013-08-28 13:51:14,636 INFO No normalization performed -# job finished in 0 seconds at Wed Aug 28 13:51:14 2013 -- 0.32 0.15 0.00 0.00 -- 9013ad62-6f4e-43b9-a85a-3afe0f4299e2 diff --git a/tests/bam2peakshape.py/bam_control_unsorted.gz b/tests/bam2peakshape.py/bam_control_unsorted.gz deleted file mode 100644 index 3677b2b0c..000000000 Binary files a/tests/bam2peakshape.py/bam_control_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/bam_matrix_unsorted.gz b/tests/bam2peakshape.py/bam_matrix_unsorted.gz deleted file mode 100644 index c31919446..000000000 Binary files a/tests/bam2peakshape.py/bam_matrix_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/control.bam b/tests/bam2peakshape.py/control.bam deleted file mode 120000 index d9f902665..000000000 --- a/tests/bam2peakshape.py/control.bam +++ /dev/null @@ -1 +0,0 @@ -../data/small.bam \ No newline at end of file diff --git a/tests/bam2peakshape.py/control.bam.bai b/tests/bam2peakshape.py/control.bam.bai deleted file mode 120000 index 9e31448fa..000000000 --- a/tests/bam2peakshape.py/control.bam.bai +++ /dev/null @@ -1 +0,0 @@ -../data/small.bam.bai \ No newline at end of file diff --git a/tests/bam2peakshape.py/control.bw b/tests/bam2peakshape.py/control.bw deleted file mode 120000 index 19512be86..000000000 --- a/tests/bam2peakshape.py/control.bw +++ /dev/null @@ -1 +0,0 @@ -../data/small.bw \ No newline at end of file diff --git a/tests/bam2peakshape.py/onepeak.bed b/tests/bam2peakshape.py/onepeak.bed deleted file mode 100644 index b8d56fb4f..000000000 --- a/tests/bam2peakshape.py/onepeak.bed +++ /dev/null @@ -1 +0,0 @@ -chr1 1000 4000 diff --git a/tests/bam2peakshape.py/small.bam b/tests/bam2peakshape.py/small.bam deleted file mode 120000 index d9f902665..000000000 --- a/tests/bam2peakshape.py/small.bam +++ /dev/null @@ -1 +0,0 @@ -../data/small.bam \ No newline at end of file diff --git a/tests/bam2peakshape.py/small.bam.bai b/tests/bam2peakshape.py/small.bam.bai deleted file mode 120000 index 9e31448fa..000000000 --- a/tests/bam2peakshape.py/small.bam.bai +++ /dev/null @@ -1 +0,0 @@ -../data/small.bam.bai \ No newline at end of file diff --git a/tests/bam2peakshape.py/small.bw b/tests/bam2peakshape.py/small.bw deleted file mode 120000 index 19512be86..000000000 --- a/tests/bam2peakshape.py/small.bw +++ /dev/null @@ -1 +0,0 @@ -../data/small.bw \ No newline at end of file diff --git a/tests/bam2peakshape.py/tests.yaml b/tests/bam2peakshape.py/tests.yaml deleted file mode 100644 index ec006030e..000000000 --- a/tests/bam2peakshape.py/tests.yaml +++ /dev/null @@ -1,137 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version - - -BamOnlyInterval: - stdin: null - options: > - --force-output --use-interval - /small.bam /onepeak.bed - outputs: [stdout, - matrix_small_unsorted.gz ] - references: [bamOnlyInterval.tsv, - bam_matrix_unsorted.gz] - - -BamOnlyIntervalWithControlLibrary: - stdin: null - options: > - --force-output --use-interval - --control-bam-file=/control.bam - /small.bam /onepeak.bed - outputs: [stdout, - matrix_small_unsorted.gz, - matrix_control_unsorted.gz] - references: [bamOnlyIntervalWithControl.tsv, - bam_matrix_unsorted.gz, - bam_control_unsorted.gz] - -WigOnlyInterval: - skip_python: "3" - stdin: null - options: > - --format=bigwig --force-output --use-interval - /small.bw /onepeak.bed - outputs: [stdout, matrix_small_unsorted.gz ] - references: [wigOnlyInterval.tsv, wig_matrix_unsorted.gz] - - -WigOnlyIntervalWithControlLibrary: - skip_python: "3" - stdin: null - options: > - --format=bigwig --force-output --use-interval - --control-bam-file=/control.bw - /small.bw /onepeak.bed - outputs: [stdout, - matrix_small_unsorted.gz, - matrix_control_unsorted.gz] - references: [wigOnlyIntervalWithControl.tsv, wig_matrix_unsorted.gz, - wig_control_unsorted.gz] - - -BamOnlyIntervalShift: - stdin: null - options: > - --force-output --use-interval --shift-size=100 - --control-bam-file=/control.bam - /small.bam /onepeak.bed - outputs: [stdout, - matrix_small_unsorted.gz, - matrix_control_unsorted.gz] - references: [BamOnlyIntervalShift.tsv, - BamOnlyIntervalShift_matrix_unsorted.gz, - BamOnlyIntervalShift_control_unsorted.gz] - - -BamOnlyIntervalNormalization: - stdin: null - options: > - --force-output --use-interval --normalize-matrix=sum - --control-bam-file=/control.bam - /small.bam /onepeak.bed - outputs: [stdout, - matrix_small_unsorted.gz, - matrix_control_unsorted.gz] - references: [BamOnlyIntervalNormalization.tsv, - BamOnlyIntervalNormalization_matrix_unsorted.gz, - BamOnlyIntervalNormalization_control_unsorted.gz] - - -BamOnlyIntervalCentringMethodReads: - stdin: null - options: > - --force-output --use-interval --centring-method=reads - --control-bam-file=/control.bam - /small.bam /onepeak.bed - outputs: [stdout, - matrix_small_unsorted.gz, - matrix_control_unsorted.gz] - references: [BamOnlyIntervalCentringMethodReads.tsv, - BamOnlyIntervalCentringMethodReads_matrix_unsorted.gz, - BamOnlyIntervalCentringMethodReads_control_unsorted.gz] - - -BamOnlyIntervalUseStrand: - stdin: null - options: > - --force-output --use-interval --use-strand - --control-bam-file=/control.bam - /small.bam /onepeak.bed - outputs: [stdout, matrix_small_unsorted.gz, matrix_control_unsorted.gz] - references: [BamOnlyIntervalUseStrand.tsv, - BamOnlyIntervalUseStrand_matrix_unsorted.gz, - BamOnlyIntervalUseStrand_control_unsorted.gz] - - -BamWindowSize: - stdin: null - options: > - --force-output --window-size=500 --centring-method=middle - --control-bam-file=/control.bam - /small.bam /onepeak.bed - outputs: [stdout, - matrix_small_unsorted.gz, - matrix_control_unsorted.gz] - references: [BamWindowSize.tsv, - BamWindowSize_matrix_unsorted.gz, - BamWindowSize_control_unsorted.gz] - - -BamSortByPeakHeight: - stdin: null - options: > - --force-output --sort-order=peak-height - --control-bam-file=/control.bam - /small.bam /onepeak.bed - outputs: [stdout, - matrix_small_peak_height.gz, - matrix_control_peak_height.gz] - references: [BamSortByPeakHeight.tsv, - BamSortByPeakHeight_matrix_peak_height.gz, - BamSortByPeakHeight_control_peak_height.gz] - diff --git a/tests/bam2peakshape.py/wigOnlyInterval.tsv b/tests/bam2peakshape.py/wigOnlyInterval.tsv deleted file mode 100644 index 4755a3d48..000000000 --- a/tests/bam2peakshape.py/wigOnlyInterval.tsv +++ /dev/null @@ -1,29 +0,0 @@ -# output generated by ../../scripts/bam2peakshape.py --force --format=bigwig --only-interval small.bw onepeak.bed -# job started at Wed Aug 28 13:48:51 2013 on cgat150.anat.ox.ac.uk -- da23a423-917f-488f-9af7-cf799874d784 -# pid: 16024, system: Linux 2.6.32-358.11.1.el6.x86_64 #1 SMP Wed May 15 10:48:38 EDT 2013 x86_64 -# bin_size : 10 -# centring_method : reads -# control_file : None -# format : bigwig -# loglevel : 1 -# normalization : None -# only_interval : True -# output_filename_pattern : %s -# output_force : True -# random_shift : False -# shift : 0 -# sort : [] -# stderr : \', mode \'w\' at 0x2b46fe5a8270> -# stdin : \', mode \'r\' at 0x2b46fe5a8150> -# stdlog : \', mode \'w\' at 0x2b46fe5a81e0> -# stdout : \', mode \'w\' at 0x2b46fe5a81e0> -# strand_specific : False -# timeit_file : None -# timeit_header : None -# timeit_name : all -# window_size : 1000 -contig start end name interval_width npeaks peak_center peak_width peak_height peak_relative_pos nreads median closest_half_height furthest_halfheight bins counts -# 2013-08-28 13:48:58,578 INFO interval processing: input=1, added=1 -chr1 1000 4000 1 3000 95 3169 98 23 669 4958.0 18.0 30 2169 -995,-985,-975,-965,-955,-945,-935,-925,-915,-905,-895,-885,-875,-865,-855,-845,-835,-825,-815,-805,-795,-785,-775,-765,-755,-745,-735,-725,-715,-705,-695,-685,-675,-665,-655,-645,-635,-625,-615,-605,-595,-585,-575,-565,-555,-545,-535,-525,-515,-505,-495,-485,-475,-465,-455,-445,-435,-425,-415,-405,-395,-385,-375,-365,-355,-345,-335,-325,-315,-305,-295,-285,-275,-265,-255,-245,-235,-225,-215,-205,-195,-185,-175,-165,-155,-145,-135,-125,-115,-105,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95,105,115,125,135,145,155,165,175,185,195,205,215,225,235,245,255,265,275,285,295,305,315,325,335,345,355,365,375,385,395,405,415,425,435,445,455,465,475,485,495,505,515,525,535,545,555,565,575,585,595,605,615,625,635,645,655,665,675,685,695,705,715,725,735,745,755,765,775,785,795,805,815,825,835,845,855,865,875,885,895,905,915,925,935,945,955,965,975,985,995 190,190,167,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,166,175,184,193,202,230,230,230,230,230,203,194,185,176,167,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -# 2013-08-28 13:48:58,581 INFO No normalization performed -# job finished in 7 seconds at Wed Aug 28 13:48:58 2013 -- 7.68 0.16 0.00 0.00 -- da23a423-917f-488f-9af7-cf799874d784 diff --git a/tests/bam2peakshape.py/wigOnlyIntervalWithControl.tsv b/tests/bam2peakshape.py/wigOnlyIntervalWithControl.tsv deleted file mode 100644 index 9eebdb14f..000000000 --- a/tests/bam2peakshape.py/wigOnlyIntervalWithControl.tsv +++ /dev/null @@ -1,30 +0,0 @@ -# output generated by ../../scripts/bam2peakshape.py --force --format=bigwig --control=small.bw --only-interval small.bw onepeak.bed -# job started at Wed Aug 28 13:49:17 2013 on cgat150.anat.ox.ac.uk -- 5a13aa61-dbd5-4863-a237-b9e578b16317 -# pid: 16038, system: Linux 2.6.32-358.11.1.el6.x86_64 #1 SMP Wed May 15 10:48:38 EDT 2013 x86_64 -# bin_size : 10 -# centring_method : reads -# control_file : small.bw -# format : bigwig -# loglevel : 1 -# normalization : None -# only_interval : True -# output_filename_pattern : %s -# output_force : True -# random_shift : False -# shift : 0 -# sort : [] -# stderr : \', mode \'w\' at 0x2ab92c815270> -# stdin : \', mode \'r\' at 0x2ab92c815150> -# stdlog : \', mode \'w\' at 0x2ab92c8151e0> -# stdout : \', mode \'w\' at 0x2ab92c8151e0> -# strand_specific : False -# timeit_file : None -# timeit_header : None -# timeit_name : all -# window_size : 1000 -# 2013-08-28 13:49:17,454 INFO using control file small.bw -contig start end name interval_width npeaks peak_center peak_width peak_height peak_relative_pos nreads median closest_half_height furthest_halfheight bins counts -# 2013-08-28 13:49:25,928 INFO interval processing: input=1, added=1 -chr1 1000 4000 1 3000 95 3169 98 23 669 4958.0 18.0 30 2169 -995,-985,-975,-965,-955,-945,-935,-925,-915,-905,-895,-885,-875,-865,-855,-845,-835,-825,-815,-805,-795,-785,-775,-765,-755,-745,-735,-725,-715,-705,-695,-685,-675,-665,-655,-645,-635,-625,-615,-605,-595,-585,-575,-565,-555,-545,-535,-525,-515,-505,-495,-485,-475,-465,-455,-445,-435,-425,-415,-405,-395,-385,-375,-365,-355,-345,-335,-325,-315,-305,-295,-285,-275,-265,-255,-245,-235,-225,-215,-205,-195,-185,-175,-165,-155,-145,-135,-125,-115,-105,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95,105,115,125,135,145,155,165,175,185,195,205,215,225,235,245,255,265,275,285,295,305,315,325,335,345,355,365,375,385,395,405,415,425,435,445,455,465,475,485,495,505,515,525,535,545,555,565,575,585,595,605,615,625,635,645,655,665,675,685,695,705,715,725,735,745,755,765,775,785,795,805,815,825,835,845,855,865,875,885,895,905,915,925,935,945,955,965,975,985,995 190,190,167,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,166,175,184,193,202,230,230,230,230,230,203,194,185,176,167,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -# 2013-08-28 13:49:25,931 INFO No normalization performed -# job finished in 8 seconds at Wed Aug 28 13:49:25 2013 -- 8.77 0.16 0.00 0.00 -- 5a13aa61-dbd5-4863-a237-b9e578b16317 diff --git a/tests/bam2peakshape.py/wig_control_unsorted.gz b/tests/bam2peakshape.py/wig_control_unsorted.gz deleted file mode 100644 index f476cca89..000000000 Binary files a/tests/bam2peakshape.py/wig_control_unsorted.gz and /dev/null differ diff --git a/tests/bam2peakshape.py/wig_matrix_unsorted.gz b/tests/bam2peakshape.py/wig_matrix_unsorted.gz deleted file mode 100644 index 4b4cfdfdd..000000000 Binary files a/tests/bam2peakshape.py/wig_matrix_unsorted.gz and /dev/null differ diff --git a/tests/bed2annotator.py/tests.yaml b/tests/bed2annotator.py/tests.yaml deleted file mode 100644 index 4de2ab029..000000000 --- a/tests/bed2annotator.py/tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version diff --git a/tests/cat_tables.py/tests.yaml b/tests/cat_tables.py/tests.yaml deleted file mode 100644 index 4de2ab029..000000000 --- a/tests/cat_tables.py/tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version diff --git a/tests/chain2psl.py/mm10ToMm9.over.chain.gz b/tests/chain2psl.py/mm10ToMm9.over.chain.gz deleted file mode 120000 index 7fa8edb5a..000000000 --- a/tests/chain2psl.py/mm10ToMm9.over.chain.gz +++ /dev/null @@ -1 +0,0 @@ -../data/mm10ToMm9.over.chain.gz \ No newline at end of file diff --git a/tests/chain2psl.py/out.psl b/tests/chain2psl.py/out.psl deleted file mode 100644 index 659ebde48..000000000 --- a/tests/chain2psl.py/out.psl +++ /dev/null @@ -1,15 +0,0 @@ -# output generated by ../../scripts/chain2psl.py -# job started at Fri Aug 23 15:54:13 2013 on cgat150.anat.ox.ac.uk -- c49de908-2f37-4115-9df3-851f9b7a0d62 -# pid: 36958, system: Linux 2.6.32-358.11.1.el6.x86_64 #1 SMP Wed May 15 10:48:38 EDT 2013 x86_64 -# loglevel : 1 -# stderr : \', mode \'w\' at 0x2aeb6c3c6270> -# stdin : \', mode \'r\' at 0x2aeb6c3c6150> -# stdlog : \', mode \'w\' at 0x2aeb6c3c61e0> -# stdout : \', mode \'w\' at 0x2aeb6c3c61e0> -# timeit_file : None -# timeit_header : None -# timeit_name : all -191466594 0 0 0 0 0 0 0 + chr1 197195432 3000000 197195432 chr1 195471971 3009919 195369238 36 19423349,2213283,22520320,14295619,32,607,277,1272,151,78,60,87,117,35,93,13533007,71033,10154514,2913567,124,2414944,52624216,12668739,133680,26689256,8447653,14433,2824,423,32511,82255,22625,501779,650,4300,2698681, 3000000,22473349,24736638,47256958,61552578,61552611,61553249,61553531,61554803,61555032,61555137,61555278,61555401,61555577,61555612,61569123,75118130,75189164,87343678,90257248,90257381,92672331,145297134,158098873,158232554,185327810,193775463,193789897,193792722,193793146,193875657,193957913,193987695,194490416,194492205,194496751, 3009919,22466510,24679793,47200114,61495734,61495767,61496405,61496687,61498171,61498400,61498505,61498646,61498981,61499157,61499404,61522549,75121556,75192589,85447103,88360673,88360806,90775754,143450004,156168743,156302423,183503931,191951585,191966018,191968842,191969265,192051776,192134031,192163813,192665592,192666257,192670557, -407460 0 0 0 0 0 0 0 + chr1_random 1231697 0 427279 chr1 195471971 183038090 183453931 48 72677,726,4963,24,20633,1065,1058,1352,766,53,18483,708,28,313,3770,10965,33,88,6413,6341,13,101,1520,7475,8328,2211,117,58,7,6,15258,5560,634,1404,28,172,287,29,3278,2757,9,2455,17481,120,7,30,7,187649, 0,72678,73405,78379,78815,99449,100675,101750,103804,104670,104724,123307,124016,124045,127457,131228,142194,142228,142317,149165,155663,155677,155779,157406,165340,173806,176018,176136,176194,176201,177416,192685,198345,210802,212207,212240,212539,212827,212857,216538,219395,219405,221875,239364,239584,239591,239622,239630, 183038090,183110767,183111493,183116467,183117164,183137797,183138879,183139954,183141749,183142564,183142618,183161170,183161879,183161908,183163481,183167251,183178217,183178251,183178339,183186310,183192729,183192742,183192844,183194649,183202410,183210937,183213149,183213267,183213326,183213334,183213607,183228874,183235352,183236018,183237422,183237454,183239101,183239389,183239419,183243035,183246083,183246093,183248561,183266048,183266236,183266244,183266274,183266282, -# 2013-08-23 15:54:16,540 INFO ninput=2, noutput=2, nskipped=0 -# job finished in 2 seconds at Fri Aug 23 15:54:16 2013 -- 3.13 0.14 0.00 0.00 -- c49de908-2f37-4115-9df3-851f9b7a0d62 diff --git a/tests/chain2psl.py/tests.yaml b/tests/chain2psl.py/tests.yaml deleted file mode 100644 index 9d9cda85d..000000000 --- a/tests/chain2psl.py/tests.yaml +++ /dev/null @@ -1,12 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version - -chain: - stdin: mm10ToMm9.over.chain.gz - outputs: [stdout] - references: [out.psl] - options: diff --git a/tests/combine_tables.py/Brain-F1-R1.exon.validation.tsv b/tests/combine_tables.py/Brain-F1-R1.exon.validation.tsv deleted file mode 100644 index c1fde1c97..000000000 --- a/tests/combine_tables.py/Brain-F1-R1.exon.validation.tsv +++ /dev/null @@ -1,18 +0,0 @@ -category counts -input 1192074 -mapped 1192074 -spliced 70209 -spliced_bothoverlap 21248 -spliced_exact 2280 -spliced_halfoverlap 7103 -spliced_ignored 43 -spliced_inexact 47319 -spliced_nooverlap 41815 -spliced_overrun 11785 -spliced_underrun 35534 -unmapped 0 -unspliced 1121865 -unspliced_nooverlap 844361 -unspliced_nooverrun 261354 -unspliced_overlap 277504 -unspliced_overrun 16150 diff --git a/tests/combine_tables.py/Brain-F1-R1.picard_stats.quality_by_cycle_metrics b/tests/combine_tables.py/Brain-F1-R1.picard_stats.quality_by_cycle_metrics deleted file mode 100644 index 2c1b138f5..000000000 --- a/tests/combine_tables.py/Brain-F1-R1.picard_stats.quality_by_cycle_metrics +++ /dev/null @@ -1,44 +0,0 @@ -## htsjdk.samtools.metrics.StringHeader -# CollectMultipleMetrics INPUT=BamFiles.dir/Brain-F1-R1.bam ASSUME_SORTED=true OUTPUT=Picard_stats.dir/Brain-F1-R1.picard_stats VALIDATION_STRINGENCY=SILENT REFERENCE_SEQUENCE=/ifs/mirror/genomes/index/hg38.fa STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false -## htsjdk.samtools.metrics.StringHeader -# Started on: Fri Aug 24 11:09:03 BST 2018 - - -## HISTOGRAM java.lang.Integer -CYCLE MEAN_QUALITY -1 38.431682 -2 38.359101 -3 37.786103 -4 36.817173 -5 36.719593 -6 35.467565 -7 38.092814 -8 37.426394 -9 37.436355 -10 34.581035 -11 36.809462 -12 37.366635 -13 37.157685 -14 37.003179 -15 36.747924 -16 36.609361 -17 36.312623 -18 36.045169 -19 36.090275 -20 35.711856 -21 35.341332 -22 35.044161 -23 34.686123 -24 34.183453 -25 33.940641 -26 33.434012 -27 33.194524 -28 32.63614 -29 32.335688 -30 31.806513 -31 31.319239 -32 30.872834 -33 30.285672 -34 29.937992 -35 29.443948 - diff --git a/tests/combine_tables.py/Brain-F1-R2.exon.validation.tsv b/tests/combine_tables.py/Brain-F1-R2.exon.validation.tsv deleted file mode 100644 index a18ee9ccd..000000000 --- a/tests/combine_tables.py/Brain-F1-R2.exon.validation.tsv +++ /dev/null @@ -1,18 +0,0 @@ -category counts -input 1241349 -mapped 1241349 -spliced 74997 -spliced_bothoverlap 22957 -spliced_exact 3035 -spliced_halfoverlap 7767 -spliced_ignored 67 -spliced_inexact 50646 -spliced_nooverlap 44206 -spliced_overrun 12401 -spliced_underrun 38245 -unmapped 0 -unspliced 1166352 -unspliced_nooverlap 875800 -unspliced_nooverrun 273823 -unspliced_overlap 290552 -unspliced_overrun 16729 diff --git a/tests/combine_tables.py/Brain-F1-R2.picard_stats.quality_by_cycle_metrics b/tests/combine_tables.py/Brain-F1-R2.picard_stats.quality_by_cycle_metrics deleted file mode 100644 index f6bbc70de..000000000 --- a/tests/combine_tables.py/Brain-F1-R2.picard_stats.quality_by_cycle_metrics +++ /dev/null @@ -1,44 +0,0 @@ -## htsjdk.samtools.metrics.StringHeader -# CollectMultipleMetrics INPUT=BamFiles.dir/Brain-F1-R2.bam ASSUME_SORTED=true OUTPUT=Picard_stats.dir/Brain-F1-R2.picard_stats VALIDATION_STRINGENCY=SILENT REFERENCE_SEQUENCE=/ifs/mirror/genomes/index/hg38.fa STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false -## htsjdk.samtools.metrics.StringHeader -# Started on: Fri Aug 24 11:09:03 BST 2018 - - -## HISTOGRAM java.lang.Integer -CYCLE MEAN_QUALITY -1 38.456286 -2 39.188868 -3 38.657553 -4 37.957105 -5 37.919865 -6 36.134969 -7 38.7074 -8 38.030236 -9 38.177917 -10 35.596811 -11 37.495758 -12 37.941189 -13 37.887334 -14 37.701731 -15 37.399855 -16 37.137005 -17 36.743277 -18 36.61135 -19 36.349215 -20 36.258035 -21 35.816852 -22 35.344138 -23 34.820308 -24 34.419653 -25 34.281176 -26 33.782293 -27 33.478677 -28 32.922052 -29 32.356289 -30 31.858269 -31 31.413711 -32 30.934453 -33 30.288899 -34 29.844867 -35 29.299181 - diff --git a/tests/combine_tables.py/Brain-F2-R1.exon.validation.tsv b/tests/combine_tables.py/Brain-F2-R1.exon.validation.tsv deleted file mode 100644 index f44d9d305..000000000 --- a/tests/combine_tables.py/Brain-F2-R1.exon.validation.tsv +++ /dev/null @@ -1,18 +0,0 @@ -category counts -input 1137384 -mapped 1137384 -spliced 73947 -spliced_bothoverlap 23171 -spliced_exact 3015 -spliced_halfoverlap 7801 -spliced_ignored 69 -spliced_inexact 51128 -spliced_nooverlap 42906 -spliced_overrun 12650 -spliced_underrun 38478 -unmapped 0 -unspliced 1063437 -unspliced_nooverlap 793946 -unspliced_nooverrun 253506 -unspliced_overlap 269491 -unspliced_overrun 15985 diff --git a/tests/combine_tables.py/Brain-F2-R1.picard_stats.quality_by_cycle_metrics b/tests/combine_tables.py/Brain-F2-R1.picard_stats.quality_by_cycle_metrics deleted file mode 100644 index 3713584c9..000000000 --- a/tests/combine_tables.py/Brain-F2-R1.picard_stats.quality_by_cycle_metrics +++ /dev/null @@ -1,44 +0,0 @@ -## htsjdk.samtools.metrics.StringHeader -# CollectMultipleMetrics INPUT=BamFiles.dir/Brain-F2-R1.bam ASSUME_SORTED=true OUTPUT=Picard_stats.dir/Brain-F2-R1.picard_stats VALIDATION_STRINGENCY=SILENT REFERENCE_SEQUENCE=/ifs/mirror/genomes/index/hg38.fa STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false -## htsjdk.samtools.metrics.StringHeader -# Started on: Fri Aug 24 11:09:19 BST 2018 - - -## HISTOGRAM java.lang.Integer -CYCLE MEAN_QUALITY -1 39.071238 -2 39.229009 -3 38.912227 -4 39.133563 -5 39.1226 -6 39.08531 -7 39.196536 -8 38.974727 -9 38.979975 -10 38.78891 -11 38.830188 -12 38.973154 -13 38.951122 -14 38.963898 -15 38.856298 -16 38.924159 -17 38.919922 -18 38.922683 -19 38.686766 -20 38.858523 -21 38.597927 -22 38.471857 -23 38.747422 -24 38.51056 -25 38.654077 -26 38.440747 -27 38.093414 -28 38.266978 -29 38.249967 -30 38.067499 -31 37.944933 -32 37.632927 -33 37.75147 -34 37.670929 -35 37.34416 - diff --git a/tests/combine_tables.py/Brain-F2-R2.exon.validation.tsv b/tests/combine_tables.py/Brain-F2-R2.exon.validation.tsv deleted file mode 100644 index 93c4ce67b..000000000 --- a/tests/combine_tables.py/Brain-F2-R2.exon.validation.tsv +++ /dev/null @@ -1,18 +0,0 @@ -category counts -input 1256621 -mapped 1256621 -spliced 78998 -spliced_bothoverlap 24170 -spliced_exact 2626 -spliced_halfoverlap 8166 -spliced_ignored 82 -spliced_inexact 53880 -spliced_nooverlap 46580 -spliced_overrun 13375 -spliced_underrun 40505 -unmapped 0 -unspliced 1177623 -unspliced_nooverlap 878847 -unspliced_nooverrun 281541 -unspliced_overlap 298776 -unspliced_overrun 17235 diff --git a/tests/combine_tables.py/Brain-F2-R2.picard_stats.quality_by_cycle_metrics b/tests/combine_tables.py/Brain-F2-R2.picard_stats.quality_by_cycle_metrics deleted file mode 100644 index 11e63f093..000000000 --- a/tests/combine_tables.py/Brain-F2-R2.picard_stats.quality_by_cycle_metrics +++ /dev/null @@ -1,44 +0,0 @@ -## htsjdk.samtools.metrics.StringHeader -# CollectMultipleMetrics INPUT=BamFiles.dir/Brain-F2-R2.bam ASSUME_SORTED=true OUTPUT=Picard_stats.dir/Brain-F2-R2.picard_stats VALIDATION_STRINGENCY=SILENT REFERENCE_SEQUENCE=/ifs/mirror/genomes/index/hg38.fa STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false -## htsjdk.samtools.metrics.StringHeader -# Started on: Fri Aug 24 11:09:18 BST 2018 - - -## HISTOGRAM java.lang.Integer -CYCLE MEAN_QUALITY -1 39.262413 -2 39.225667 -3 39.130035 -4 39.007157 -5 38.925765 -6 38.670488 -7 38.891177 -8 38.786911 -9 38.795467 -10 38.016508 -11 38.606532 -12 38.710793 -13 38.640963 -14 38.588047 -15 38.535051 -16 38.481949 -17 38.415626 -18 38.326127 -19 38.267224 -20 38.155954 -21 38.030757 -22 37.883728 -23 37.644065 -24 37.533436 -25 37.326208 -26 37.132533 -27 36.946584 -28 36.659302 -29 36.411508 -30 36.093223 -31 35.877106 -32 35.592824 -33 35.153221 -34 34.987149 -35 34.616187 - diff --git a/tests/combine_tables.py/UHR-F1-R1.exon.validation.tsv b/tests/combine_tables.py/UHR-F1-R1.exon.validation.tsv deleted file mode 100644 index 575d3a1f4..000000000 --- a/tests/combine_tables.py/UHR-F1-R1.exon.validation.tsv +++ /dev/null @@ -1,18 +0,0 @@ -category counts -input 806848 -mapped 806848 -spliced 48559 -spliced_bothoverlap 12531 -spliced_exact 1346 -spliced_halfoverlap 5613 -spliced_ignored 27 -spliced_inexact 29329 -spliced_nooverlap 30388 -spliced_overrun 6443 -spliced_underrun 22886 -unmapped 0 -unspliced 758289 -unspliced_nooverlap 552772 -unspliced_nooverrun 193325 -unspliced_overlap 205517 -unspliced_overrun 12192 diff --git a/tests/combine_tables.py/UHR-F1-R1.picard_stats.quality_by_cycle_metrics b/tests/combine_tables.py/UHR-F1-R1.picard_stats.quality_by_cycle_metrics deleted file mode 100644 index dadf1c6b5..000000000 --- a/tests/combine_tables.py/UHR-F1-R1.picard_stats.quality_by_cycle_metrics +++ /dev/null @@ -1,44 +0,0 @@ -## htsjdk.samtools.metrics.StringHeader -# CollectMultipleMetrics INPUT=BamFiles.dir/UHR-F1-R1.bam ASSUME_SORTED=true OUTPUT=Picard_stats.dir/UHR-F1-R1.picard_stats VALIDATION_STRINGENCY=SILENT REFERENCE_SEQUENCE=/ifs/mirror/genomes/index/hg38.fa STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false -## htsjdk.samtools.metrics.StringHeader -# Started on: Fri Aug 24 11:09:49 BST 2018 - - -## HISTOGRAM java.lang.Integer -CYCLE MEAN_QUALITY -1 38.718499 -2 38.692835 -3 37.886879 -4 37.133697 -5 36.886926 -6 36.528932 -7 38.501957 -8 37.700383 -9 37.577311 -10 35.932376 -11 37.314929 -12 37.416167 -13 37.339353 -14 37.28759 -15 37.144223 -16 37.201106 -17 37.262631 -18 37.198938 -19 37.181037 -20 37.054708 -21 37.057731 -22 37.097481 -23 37.050893 -24 36.627497 -25 36.977044 -26 36.76404 -27 36.631694 -28 36.240537 -29 36.269671 -30 35.921543 -31 35.789091 -32 35.462085 -33 35.172174 -34 35.007217 -35 34.617801 - diff --git a/tests/combine_tables.py/UHR-F1-R2.exon.validation.tsv b/tests/combine_tables.py/UHR-F1-R2.exon.validation.tsv deleted file mode 100644 index c9f185244..000000000 --- a/tests/combine_tables.py/UHR-F1-R2.exon.validation.tsv +++ /dev/null @@ -1,18 +0,0 @@ -category counts -input 892148 -mapped 892148 -spliced 58209 -spliced_bothoverlap 15042 -spliced_exact 1715 -spliced_halfoverlap 6753 -spliced_ignored 46 -spliced_inexact 35122 -spliced_nooverlap 36368 -spliced_overrun 7763 -spliced_underrun 27359 -unmapped 0 -unspliced 833939 -unspliced_nooverlap 602853 -unspliced_nooverrun 217354 -unspliced_overlap 231086 -unspliced_overrun 13732 diff --git a/tests/combine_tables.py/UHR-F1-R2.picard_stats.quality_by_cycle_metrics b/tests/combine_tables.py/UHR-F1-R2.picard_stats.quality_by_cycle_metrics deleted file mode 100644 index 5b35fc832..000000000 --- a/tests/combine_tables.py/UHR-F1-R2.picard_stats.quality_by_cycle_metrics +++ /dev/null @@ -1,44 +0,0 @@ -## htsjdk.samtools.metrics.StringHeader -# CollectMultipleMetrics INPUT=BamFiles.dir/UHR-F1-R2.bam ASSUME_SORTED=true OUTPUT=Picard_stats.dir/UHR-F1-R2.picard_stats VALIDATION_STRINGENCY=SILENT REFERENCE_SEQUENCE=/ifs/mirror/genomes/index/hg38.fa STOP_AFTER=0 METRIC_ACCUMULATION_LEVEL=[ALL_READS] PROGRAM=[CollectAlignmentSummaryMetrics, CollectBaseDistributionByCycle, CollectInsertSizeMetrics, MeanQualityByCycle, QualityScoreDistribution] INCLUDE_UNPAIRED=false VERBOSITY=INFO QUIET=false COMPRESSION_LEVEL=5 MAX_RECORDS_IN_RAM=500000 CREATE_INDEX=false CREATE_MD5_FILE=false GA4GH_CLIENT_SECRETS=client_secrets.json USE_JDK_DEFLATER=false USE_JDK_INFLATER=false -## htsjdk.samtools.metrics.StringHeader -# Started on: Fri Aug 24 11:09:49 BST 2018 - - -## HISTOGRAM java.lang.Integer -CYCLE MEAN_QUALITY -1 39.182444 -2 39.23106 -3 38.934046 -4 38.601784 -5 38.596249 -6 38.308799 -7 38.854468 -8 38.616093 -9 38.517652 -10 37.766502 -11 38.348214 -12 38.443104 -13 38.236812 -14 38.174215 -15 38.136873 -16 38.151113 -17 38.139396 -18 38.122217 -19 38.049351 -20 37.952503 -21 37.767426 -22 37.722352 -23 37.448784 -24 37.228862 -25 37.11858 -26 36.91564 -27 36.697264 -28 36.36046 -29 36.165844 -30 35.825119 -31 35.533471 -32 35.249325 -33 34.832703 -34 34.546918 -35 34.151861 - diff --git a/tests/combine_tables.py/combine_1.out b/tests/combine_tables.py/combine_1.out deleted file mode 100644 index 73070c3f1..000000000 --- a/tests/combine_tables.py/combine_1.out +++ /dev/null @@ -1,85 +0,0 @@ -# 2018-08-24 17:36:44,169 INFO output generated by combine_tables --regex-start=## HISTOGRAM --missing-value=0 --take=2 Picard_stats.dir/Brain-F1-R1.picard_stats.quality_by_cycle_metrics Picard_stats.dir/Brain-F1-R2.picard_stats.quality_by_cycle_metrics Picard_stats.dir/Brain-F2-R1.picard_stats.quality_by_cycle_metrics Picard_stats.dir/Brain-F2-R2.picard_stats.quality_by_cycle_metrics Picard_stats.dir/UHR-F1-R1.picard_stats.quality_by_cycle_metrics Picard_stats.dir/UHR-F1-R2.picard_stats.quality_by_cycle_metrics \ -# job started at Fri Aug 24 17:36:44 2018 on cgath1.anat.ox.ac.uk -- cd69e441-6152-4edd-9f60-1fe08caf70e5 \ -# pid: 56411, system: Linux 2.6.32-754.el6.x86_64 #1 SMP Thu May 24 18:18:25 EDT 2018 x86_64 -# 2018-08-24 17:36:44,169 INFO add_file_prefix : False \ -# cat : None \ -# columns : 1 \ -# glob : None \ -# headers : None \ -# ignore_empty : True \ -# ignore_titles : None \ -# input_has_titles : True \ -# log_config_filename : None \ -# loglevel : 1 \ -# merge : False \ -# missing_value : 0 \ -# prefixes : None \ -# random_seed : None \ -# regex_end : None \ -# regex_filename : (.*) \ -# regex_start : ## HISTOGRAM \ -# short_help : None \ -# skip_titles : False \ -# sort : None \ -# sort_keys : False \ -# stderr : <_io.TextIOWrapper name='' mode='w' encoding='UTF-8'> \ -# stdin : <_io.TextIOWrapper name='' mode='r' encoding='UTF-8'> \ -# stdlog : <_io.TextIOWrapper name='' mode='w' encoding='UTF-8'> \ -# stdout : <_io.TextIOWrapper name='' mode='w' encoding='UTF-8'> \ -# take : ['2'] \ -# test : 0 \ -# timeit_file : None \ -# timeit_header : None \ -# timeit_name : all \ -# tracing : None \ -# use_file_prefix : False -# 2018-08-24 17:36:44,169 INFO combining 6 tables -# 2018-08-24 17:36:44,169 INFO processing Picard_stats.dir/Brain-F1-R1.picard_stats.quality_by_cycle_metrics (1/6) -# 2018-08-24 17:36:44,170 INFO reading table from line 6 -# 2018-08-24 17:36:44,170 INFO processing Picard_stats.dir/Brain-F1-R2.picard_stats.quality_by_cycle_metrics (2/6) -# 2018-08-24 17:36:44,170 INFO reading table from line 6 -# 2018-08-24 17:36:44,171 INFO processing Picard_stats.dir/Brain-F2-R1.picard_stats.quality_by_cycle_metrics (3/6) -# 2018-08-24 17:36:44,171 INFO reading table from line 6 -# 2018-08-24 17:36:44,171 INFO processing Picard_stats.dir/Brain-F2-R2.picard_stats.quality_by_cycle_metrics (4/6) -# 2018-08-24 17:36:44,171 INFO reading table from line 6 -# 2018-08-24 17:36:44,172 INFO processing Picard_stats.dir/UHR-F1-R1.picard_stats.quality_by_cycle_metrics (5/6) -# 2018-08-24 17:36:44,172 INFO reading table from line 6 -# 2018-08-24 17:36:44,172 INFO processing Picard_stats.dir/UHR-F1-R2.picard_stats.quality_by_cycle_metrics (6/6) -# 2018-08-24 17:36:44,172 INFO reading table from line 6 -CYCLE MEAN_QUALITY MEAN_QUALITY MEAN_QUALITY MEAN_QUALITY MEAN_QUALITY MEAN_QUALITY -1 38.431682 38.456286 39.071238 39.262413 38.718499 39.182444 -2 38.359101 39.188868 39.229009 39.225667 38.692835 39.23106 -3 37.786103 38.657553 38.912227 39.130035 37.886879 38.934046 -4 36.817173 37.957105 39.133563 39.007157 37.133697 38.601784 -5 36.719593 37.919865 39.1226 38.925765 36.886926 38.596249 -6 35.467565 36.134969 39.08531 38.670488 36.528932 38.308799 -7 38.092814 38.7074 39.196536 38.891177 38.501957 38.854468 -8 37.426394 38.030236 38.974727 38.786911 37.700383 38.616093 -9 37.436355 38.177917 38.979975 38.795467 37.577311 38.517652 -10 34.581035 35.596811 38.78891 38.016508 35.932376 37.766502 -11 36.809462 37.495758 38.830188 38.606532 37.314929 38.348214 -12 37.366635 37.941189 38.973154 38.710793 37.416167 38.443104 -13 37.157685 37.887334 38.951122 38.640963 37.339353 38.236812 -14 37.003179 37.701731 38.963898 38.588047 37.28759 38.174215 -15 36.747924 37.399855 38.856298 38.535051 37.144223 38.136873 -16 36.609361 37.137005 38.924159 38.481949 37.201106 38.151113 -17 36.312623 36.743277 38.919922 38.415626 37.262631 38.139396 -18 36.045169 36.61135 38.922683 38.326127 37.198938 38.122217 -19 36.090275 36.349215 38.686766 38.267224 37.181037 38.049351 -20 35.711856 36.258035 38.858523 38.155954 37.054708 37.952503 -21 35.341332 35.816852 38.597927 38.030757 37.057731 37.767426 -22 35.044161 35.344138 38.471857 37.883728 37.097481 37.722352 -23 34.686123 34.820308 38.747422 37.644065 37.050893 37.448784 -24 34.183453 34.419653 38.51056 37.533436 36.627497 37.228862 -25 33.940641 34.281176 38.654077 37.326208 36.977044 37.11858 -26 33.434012 33.782293 38.440747 37.132533 36.76404 36.91564 -27 33.194524 33.478677 38.093414 36.946584 36.631694 36.697264 -28 32.63614 32.922052 38.266978 36.659302 36.240537 36.36046 -29 32.335688 32.356289 38.249967 36.411508 36.269671 36.165844 -30 31.806513 31.858269 38.067499 36.093223 35.921543 35.825119 -31 31.319239 31.413711 37.944933 35.877106 35.789091 35.533471 -32 30.872834 30.934453 37.632927 35.592824 35.462085 35.249325 -33 30.285672 30.288899 37.75147 35.153221 35.172174 34.832703 -34 29.937992 29.844867 37.670929 34.987149 35.007217 34.546918 -35 29.443948 29.299181 37.34416 34.616187 34.617801 34.151861 -# 2018-08-24 17:36:44,173 INFO job finished in 0 seconds at Fri Aug 24 17:36:44 2018 -- 0.29 0.15 0.00 0.00 -- cd69e441-6152-4edd-9f60-1fe08caf70e5 diff --git a/tests/combine_tables.py/combine_2.out b/tests/combine_tables.py/combine_2.out deleted file mode 100644 index 7fad26319..000000000 --- a/tests/combine_tables.py/combine_2.out +++ /dev/null @@ -1,61 +0,0 @@ -# 2018-08-24 17:10:48,771 INFO output generated by /ifs/devel/sebastian/cgat-developers-v0/cgat-core/CGATCore/Tables.py --header-names=Brain-F1-R1,Brain-F1-R2,Brain-F2-R1,Brain-F2-R2,UHR-F1-R1,UHR-F1-R2 --skip-titles --missing-value=0 --ignore-empty Brain-F1-R1.exon.validation.tsv Brain-F1-R2.exon.validation.tsv Brain-F2-R1.exon.validation.tsv Brain-F2-R2.exon.validation.tsv UHR-F1-R1.exon.validation.tsv UHR-F1-R2.exon.validation.tsv \ -# job started at Fri Aug 24 17:10:48 2018 on cgath1.anat.ox.ac.uk -- 6632c70b-9fa8-4a24-a742-89535f3f1898 \ -# pid: 43102, system: Linux 2.6.32-754.el6.x86_64 #1 SMP Thu May 24 18:18:25 EDT 2018 x86_64 -# 2018-08-24 17:10:48,772 INFO add_file_prefix : False \ -# cat : None \ -# columns : 1 \ -# glob : None \ -# headers : Brain-F1-R1,Brain-F1-R2,Brain-F2-R1,Brain-F2-R2,UHR-F1-R1,UHR-F1-R2 \ -# ignore_empty : True \ -# ignore_titles : None \ -# input_has_titles : True \ -# log_config_filename : None \ -# loglevel : 1 \ -# merge : False \ -# missing_value : 0 \ -# prefixes : None \ -# random_seed : None \ -# regex_end : None \ -# regex_filename : (.*) \ -# regex_start : None \ -# short_help : None \ -# skip_titles : True \ -# sort : None \ -# sort_keys : False \ -# stderr : <_io.TextIOWrapper name='' mode='w' encoding='UTF-8'> \ -# stdin : <_io.TextIOWrapper name='' mode='r' encoding='UTF-8'> \ -# stdlog : <_io.TextIOWrapper name='' mode='w' encoding='UTF-8'> \ -# stdout : <_io.TextIOWrapper name='' mode='w' encoding='UTF-8'> \ -# take : [] \ -# test : 0 \ -# timeit_file : None \ -# timeit_header : None \ -# timeit_name : all \ -# tracing : None \ -# use_file_prefix : False -# 2018-08-24 17:10:48,772 INFO combining 6 tables -# 2018-08-24 17:10:48,772 INFO processing Brain-F1-R1.exon.validation.tsv (1/6) -# 2018-08-24 17:10:48,773 INFO processing Brain-F1-R2.exon.validation.tsv (2/6) -# 2018-08-24 17:10:48,773 INFO processing Brain-F2-R1.exon.validation.tsv (3/6) -# 2018-08-24 17:10:48,774 INFO processing Brain-F2-R2.exon.validation.tsv (4/6) -# 2018-08-24 17:10:48,774 INFO processing UHR-F1-R1.exon.validation.tsv (5/6) -# 2018-08-24 17:10:48,775 INFO processing UHR-F1-R2.exon.validation.tsv (6/6) -bin Brain-F1-R1 Brain-F1-R2 Brain-F2-R1 Brain-F2-R2 UHR-F1-R1 UHR-F1-R2 -input 1192074 1241349 1137384 1256621 806848 892148 -mapped 1192074 1241349 1137384 1256621 806848 892148 -spliced 70209 74997 73947 78998 48559 58209 -spliced_bothoverlap 21248 22957 23171 24170 12531 15042 -spliced_exact 2280 3035 3015 2626 1346 1715 -spliced_halfoverlap 7103 7767 7801 8166 5613 6753 -spliced_ignored 43 67 69 82 27 46 -spliced_inexact 47319 50646 51128 53880 29329 35122 -spliced_nooverlap 41815 44206 42906 46580 30388 36368 -spliced_overrun 11785 12401 12650 13375 6443 7763 -spliced_underrun 35534 38245 38478 40505 22886 27359 -unmapped 0 0 0 0 0 0 -unspliced 1121865 1166352 1063437 1177623 758289 833939 -unspliced_nooverlap 844361 875800 793946 878847 552772 602853 -unspliced_nooverrun 261354 273823 253506 281541 193325 217354 -unspliced_overlap 277504 290552 269491 298776 205517 231086 -unspliced_overrun 16150 16729 15985 17235 12192 13732 -# 2018-08-24 17:10:48,776 INFO job finished in 0 seconds at Fri Aug 24 17:10:48 2018 -- 0.14 0.06 0.00 0.00 -- 6632c70b-9fa8-4a24-a742-89535f3f1898 diff --git a/tests/combine_tables.py/tests.yaml b/tests/combine_tables.py/tests.yaml deleted file mode 100644 index efbf5e960..000000000 --- a/tests/combine_tables.py/tests.yaml +++ /dev/null @@ -1,18 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version - -combine_1: - stdin: null - outputs: [stdout] - references: [combine_1.out] - options: --regex-start="## HISTOGRAM" --missing-value=0 --take=2 %DIR%/Brain-F1-R1.picard_stats.quality_by_cycle_metrics %DIR%/Brain-F1-R2.picard_stats.quality_by_cycle_metrics %DIR%/Brain-F2-R1.picard_stats.quality_by_cycle_metrics %DIR%/Brain-F2-R2.picard_stats.quality_by_cycle_metrics %DIR%/UHR-F1-R1.picard_stats.quality_by_cycle_metrics %DIR%/UHR-F1-R2.picard_stats.quality_by_cycle_metrics - -combine_2: - stdin: null - outputs: [stdout] - references: [combine_2.out] - options: --header-names=Brain-F1-R1,Brain-F1-R2,Brain-F2-R1,Brain-F2-R2,UHR-F1-R1,UHR-F1-R2 --skip-titles --missing-value=0 --ignore-empty %DIR%/Brain-F1-R1.exon.validation.tsv %DIR%/Brain-F1-R2.exon.validation.tsv %DIR%/Brain-F2-R1.exon.validation.tsv %DIR%/Brain-F2-R2.exon.validation.tsv %DIR%/UHR-F1-R1.exon.validation.tsv %DIR%/UHR-F1-R2.exon.validation.tsv diff --git a/tests/csv_cut.py/tests.yaml b/tests/csv_cut.py/tests.yaml deleted file mode 100644 index 4de2ab029..000000000 --- a/tests/csv_cut.py/tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version diff --git a/tests/csv_intersection.py/tests.yaml b/tests/csv_intersection.py/tests.yaml deleted file mode 100644 index 4de2ab029..000000000 --- a/tests/csv_intersection.py/tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version diff --git a/tests/csv_rename.py/tests.yaml b/tests/csv_rename.py/tests.yaml deleted file mode 100644 index 4de2ab029..000000000 --- a/tests/csv_rename.py/tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version diff --git a/tests/csv_set.py/tests.yaml b/tests/csv_set.py/tests.yaml deleted file mode 100644 index 4de2ab029..000000000 --- a/tests/csv_set.py/tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version diff --git a/tests/medip_merge_intervals.py/tests.yaml b/tests/medip_merge_intervals.py/tests.yaml deleted file mode 100644 index 4de2ab029..000000000 --- a/tests/medip_merge_intervals.py/tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version diff --git a/tests/randomize_lines.py/tests.yaml b/tests/randomize_lines.py/tests.yaml deleted file mode 100644 index 49df38a32..000000000 --- a/tests/randomize_lines.py/tests.yaml +++ /dev/null @@ -1,34 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version - -without_header_py2: - skip_python: 3 - stdin: ../data/design.tsv - outputs: [stdout] - references: [without_header_py2.tsv] - options: --random-seed=1 - -with_header_py2: - skip_python: 3 - stdin: ../data/design.tsv - outputs: [stdout] - references: [with_header_py2.tsv] - options: --random-seed=1 --keep-header=1 - -without_header: - skip_python: 2 - stdin: ../data/design.tsv - outputs: [stdout] - references: [without_header.tsv] - options: --random-seed=1 - -with_header: - skip_python: 2 - stdin: ../data/design.tsv - outputs: [stdout] - references: [with_header.tsv] - options: --random-seed=1 --keep-header=1 diff --git a/tests/randomize_lines.py/with_header.tsv b/tests/randomize_lines.py/with_header.tsv deleted file mode 100644 index 937246e4d..000000000 --- a/tests/randomize_lines.py/with_header.tsv +++ /dev/null @@ -1,29 +0,0 @@ -# output generated by randomize_lines --random-seed=1 --keep-header=1 -# job started at Mon Dec 19 13:09:11 2016 on cgath1.anat.ox.ac.uk -- 2e5664ee-413a-4c81-915d-ac5dff2f20c7 -# pid: 39726, system: Linux 2.6.32-642.1.1.el6.x86_64 #1 SMP Fri May 6 14:54:05 EDT 2016 x86_64 -# keep_header : 1 -# loglevel : 1 -# random_seed : 1 -# short_help : None -# stderr : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> -# stdin : <_io.TextIOWrapper name='' mode='r' encoding='ANSI_X3.4-1968'> -# stdlog : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> -# stdout : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> -# timeit_file : None -# timeit_header : None -# timeit_name : all -track include group pair treatment genotype replicate -delta-N-2 1 deltaN 1 N delta 2 -delta-P-3 1 deltaP 1 P delta 3 -wt-N-1 1 wtN 1 N wt 1 -delta-N-3 1 deltaN 1 N delta 3 -wt-P-3 1 wtP 1 P wt 3 -delta-N-1 1 deltaN 1 N delta 1 -wt-P-1 1 wtP 1 P wt 1 -delta-P-2 1 deltaP 1 P delta 2 -wt-P-2 1 wtP 1 P wt 2 -wt-N-2 1 wtN 1 N wt 2 -delta-P-1 1 deltaP 1 P delta 1 -wt-N-3 1 wtN 1 N wt 3 -## 2016-12-19 13:09:11,639 INFO header=1, lines_output=12, lines_input=12 -# job finished in 0 seconds at Mon Dec 19 13:09:11 2016 -- 0.20 0.05 0.00 0.00 -- 2e5664ee-413a-4c81-915d-ac5dff2f20c7 diff --git a/tests/randomize_lines.py/with_header_py2.tsv b/tests/randomize_lines.py/with_header_py2.tsv deleted file mode 100644 index 838345d53..000000000 --- a/tests/randomize_lines.py/with_header_py2.tsv +++ /dev/null @@ -1,29 +0,0 @@ -# output generated by randomize_lines --random-seed=1 --keep-header=1 -# job started at Mon Dec 19 13:10:02 2016 on cgath1.anat.ox.ac.uk -- e24021c0-0e5e-4c31-8dbd-40cc81a0bd7f -# pid: 41084, system: Linux 2.6.32-642.1.1.el6.x86_64 #1 SMP Fri May 6 14:54:05 EDT 2016 x86_64 -# keep_header : 1 -# loglevel : 1 -# random_seed : 1 -# short_help : None -# stderr : ', mode 'w' at 0x2b58d9f8a1e0> -# stdin : ', mode 'r' at 0x2b58d9f8a0c0> -# stdlog : ', mode 'w' at 0x2b58d9f8a150> -# stdout : ', mode 'w' at 0x2b58d9f8a150> -# timeit_file : None -# timeit_header : None -# timeit_name : all -track include group pair treatment genotype replicate -delta-N-3 1 deltaN 1 N delta 3 -delta-P-3 1 deltaP 1 P delta 3 -wt-P-2 1 wtP 1 P wt 2 -wt-N-1 1 wtN 1 N wt 1 -wt-P-3 1 wtP 1 P wt 3 -delta-N-1 1 deltaN 1 N delta 1 -delta-P-2 1 deltaP 1 P delta 2 -wt-P-1 1 wtP 1 P wt 1 -wt-N-3 1 wtN 1 N wt 3 -delta-N-2 1 deltaN 1 N delta 2 -delta-P-1 1 deltaP 1 P delta 1 -wt-N-2 1 wtN 1 N wt 2 -## 2016-12-19 13:10:02,648 INFO header=1, lines_input=12, lines_output=12 -# job finished in 0 seconds at Mon Dec 19 13:10:02 2016 -- 0.58 5.31 0.00 0.01 -- e24021c0-0e5e-4c31-8dbd-40cc81a0bd7f diff --git a/tests/randomize_lines.py/without_header.tsv b/tests/randomize_lines.py/without_header.tsv deleted file mode 100644 index e10178143..000000000 --- a/tests/randomize_lines.py/without_header.tsv +++ /dev/null @@ -1,29 +0,0 @@ -# output generated by randomize_lines --random-seed=1 -# job started at Mon Dec 19 13:08:07 2016 on cgath1.anat.ox.ac.uk -- a13aa617-b131-49d9-93f3-43b24d536a0a -# pid: 39377, system: Linux 2.6.32-642.1.1.el6.x86_64 #1 SMP Fri May 6 14:54:05 EDT 2016 x86_64 -# keep_header : 0 -# loglevel : 1 -# random_seed : 1 -# short_help : None -# stderr : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> -# stdin : <_io.TextIOWrapper name='' mode='r' encoding='ANSI_X3.4-1968'> -# stdlog : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> -# stdout : <_io.TextIOWrapper name='' mode='w' encoding='ANSI_X3.4-1968'> -# timeit_file : None -# timeit_header : None -# timeit_name : all -delta-N-2 1 deltaN 1 N delta 2 -delta-P-3 1 deltaP 1 P delta 3 -track include group pair treatment genotype replicate -delta-P-2 1 deltaP 1 P delta 2 -wt-P-2 1 wtP 1 P wt 2 -wt-N-3 1 wtN 1 N wt 3 -wt-P-3 1 wtP 1 P wt 3 -delta-N-1 1 deltaN 1 N delta 1 -delta-P-1 1 deltaP 1 P delta 1 -wt-P-1 1 wtP 1 P wt 1 -wt-N-1 1 wtN 1 N wt 1 -delta-N-3 1 deltaN 1 N delta 3 -wt-N-2 1 wtN 1 N wt 2 -## 2016-12-19 13:08:07,568 INFO lines_output=13, lines_input=13 -# job finished in 0 seconds at Mon Dec 19 13:08:07 2016 -- 0.19 0.06 0.00 0.00 -- a13aa617-b131-49d9-93f3-43b24d536a0a diff --git a/tests/randomize_lines.py/without_header_py2.tsv b/tests/randomize_lines.py/without_header_py2.tsv deleted file mode 100644 index 371b34f93..000000000 --- a/tests/randomize_lines.py/without_header_py2.tsv +++ /dev/null @@ -1,29 +0,0 @@ -# output generated by randomize_lines --random-seed=1 -# job started at Mon Dec 19 13:08:30 2016 on cgath1.anat.ox.ac.uk -- 2f125d43-d713-4f09-a4ed-345a0330a562 -# pid: 39511, system: Linux 2.6.32-642.1.1.el6.x86_64 #1 SMP Fri May 6 14:54:05 EDT 2016 x86_64 -# keep_header : 0 -# loglevel : 1 -# random_seed : 1 -# short_help : None -# stderr : ', mode 'w' at 0x2b9e215a71e0> -# stdin : ', mode 'r' at 0x2b9e215a70c0> -# stdlog : ', mode 'w' at 0x2b9e215a7150> -# stdout : ', mode 'w' at 0x2b9e215a7150> -# timeit_file : None -# timeit_header : None -# timeit_name : all -delta-P-3 1 deltaP 1 P delta 3 -delta-N-1 1 deltaN 1 N delta 1 -delta-N-3 1 deltaN 1 N delta 3 -wt-P-2 1 wtP 1 P wt 2 -track include group pair treatment genotype replicate -wt-P-3 1 wtP 1 P wt 3 -delta-P-2 1 deltaP 1 P delta 2 -wt-N-3 1 wtN 1 N wt 3 -wt-P-1 1 wtP 1 P wt 1 -wt-N-2 1 wtN 1 N wt 2 -delta-N-2 1 deltaN 1 N delta 2 -delta-P-1 1 deltaP 1 P delta 1 -wt-N-1 1 wtN 1 N wt 1 -## 2016-12-19 13:08:30,383 INFO lines_input=13, lines_output=13 -# job finished in 0 seconds at Mon Dec 19 13:08:30 2016 -- 0.58 5.29 0.00 0.01 -- 2f125d43-d713-4f09-a4ed-345a0330a562 diff --git a/tests/rnaseq_junction_bam2bam.py/tests.yaml b/tests/rnaseq_junction_bam2bam.py/tests.yaml deleted file mode 100644 index 4de2ab029..000000000 --- a/tests/rnaseq_junction_bam2bam.py/tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version diff --git a/tests/split_file.py/tests.yaml b/tests/split_file.py/tests.yaml deleted file mode 100644 index 4de2ab029..000000000 --- a/tests/split_file.py/tests.yaml +++ /dev/null @@ -1,6 +0,0 @@ - -version: - stdin: null - outputs: [stdout] - references: [] - options: --version