Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
svigneau committed Feb 7, 2014
1 parent c44001c commit 207f729
Showing 1 changed file with 99 additions and 0 deletions.
99 changes: 99 additions & 0 deletions tab3col_to_bedgraph.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/perl

# Description: This script converts a 3 columns tabular format, where columns are chr, start, value, to bedGraph format. Input file may be compressed as .gz.
# Coordinates in both input and bedGraph output are assumed to be 0-based (http://genome.ucsc.edu/goldenPath/help/bedgraph.html).

# Usage: tab3col_to_bedgraph.pl --tab input.tsv --bedgraph output.bedgraph
# --tab : specify input file in 3 columns tabular format, where columns are chr, start, value.
# --bedgraph : specify output file in bedgraph format.

# Credits: This script was written by Sebastien Vigneau ([email protected]) in Alexander Gimelbrant lab (Dana-Farber Cancer Institute).


use strict;
use warnings;
use Getopt::Long;

my $usage = "Usage: $0 --tab <infile.tsv> --bedgraph <outfile.bedgraph>";

# Parse command line arguments

my $infile; # 3 columns input file name
my $outfile; # bedgraph output file name

GetOptions (
"tab=s" => \$infile,
"bedgraph=s" => \$outfile,
) or die ("Error in command line arguments!\n$usage\n");

# Open input file. If it is compressed with gunzip, uncompress it.

if ($infile =~ /\.gz$/){
open(IN,'-|',"gunzip -c $infile") || die "Could not open $infile: $!\n";
} else {
open(IN,'<',$infile) || die "Could not open $infile: $!\n";
}

# Open output file.

open(OUT,'>',$outfile) || die "Could not open $outfile: $!\n";


# Conversion to bedgraph starts here.


# Declare variables.

my $chr;
my $start;
my $end;
my $val;
my $step;

my $prev_chr;
my $prev_start;
my $prev_end;
my $prev_val;


while (<IN>) {

chomp;

# Skip comment lines
next if (/^#/);

# Save previous line information
$prev_chr = $chr;
$prev_start = $start;
$prev_val = $val;

# Parse relevant information in current line
# e.g: chr1 0 2
($chr, $start, $val) = split(/\t/);

# Continue to next line if first line of file
next if (! defined $prev_chr);

# Update step size if current line belongs to same chromosome as previous line.
# Otherwise, keep step unchanged.
if ($chr eq $prev_chr) {
$step = $start - $prev_start;
}

# Print information for previous line
$prev_end = $prev_start + $step;
print OUT "$prev_chr\t$prev_start\t$prev_end\t$prev_val\n";
}

# Print last line
$prev_chr = $chr;
$prev_start = $start;
$prev_end = $prev_start + $step;
$prev_val = $val;
print OUT "$prev_chr\t$prev_start\t$prev_end\t$prev_val\n";

close(IN);
close(OUT);

exit(0);

0 comments on commit 207f729

Please sign in to comment.