From 207f729d60ee661ad7729a6e1ca31a64340068af Mon Sep 17 00:00:00 2001
From: Sebastien Vigneau <sebastien.vigneau@gmail.com>
Date: Fri, 7 Feb 2014 18:42:02 -0500
Subject: [PATCH] first commit

---
 tab3col_to_bedgraph.pl | 99 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100755 tab3col_to_bedgraph.pl
diff --git a/tab3col_to_bedgraph.pl b/tab3col_to_bedgraph.pl
new file mode 100755
index 0000000..3a43c51
--- /dev/null
+++ b/tab3col_to_bedgraph.pl
@@ -0,0 +1,99 @@
+#!/usr/bin/perl
+
+# Description: This script converts a 3 columns tabular format, where columns are chr, start, value, to bedGraph format. Input file may be compressed as .gz.
+# Coordinates in both input and bedGraph output are assumed to be 0-based (http://genome.ucsc.edu/goldenPath/help/bedgraph.html).
+
+# Usage: tab3col_to_bedgraph.pl --tab input.tsv --bedgraph output.bedgraph
+# --tab : specify input file in 3 columns tabular format, where columns are chr, start, value.
+# --bedgraph : specify output file in bedgraph format.
+
+# Credits: This script was written by Sebastien Vigneau (sebastien.vigneau@gmail.com) in Alexander Gimelbrant lab (Dana-Farber Cancer Institute).
+
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $usage = "Usage: $0 --tab <infile.tsv> --bedgraph <outfile.bedgraph>";
+
+# Parse command line arguments
+
+my $infile; # 3 columns input file name
+my $outfile; # bedgraph output file name
+
+GetOptions (
+  "tab=s" => \$infile,
+  "bedgraph=s" => \$outfile,
+) or die ("Error in command line arguments!\n$usage\n");
+
+# Open input file. If it is compressed with gunzip, uncompress it.
+
+if ($infile =~ /\.gz$/){
+  open(IN,'-|',"gunzip -c $infile") || die "Could not open $infile: $!\n";
+} else {
+  open(IN,'<',$infile) || die "Could not open $infile: $!\n";
+}
+
+# Open output file.
+
+open(OUT,'>',$outfile) || die "Could not open $outfile: $!\n";
+
+
+# Conversion to bedgraph starts here.
+
+
+# Declare variables.
+
+my $chr;
+my $start;
+my $end;
+my $val;
+my $step;
+
+my $prev_chr;
+my $prev_start;
+my $prev_end;
+my $prev_val;
+
+
+while (<IN>) {
+
+  chomp;
+
+  # Skip comment lines
+  next if (/^#/);
+
+  # Save previous line information 
+  $prev_chr = $chr;
+  $prev_start = $start;
+  $prev_val = $val;
+
+  # Parse relevant information in current line 
+  # e.g: chr1 0 2
+  ($chr, $start, $val) = split(/\t/);
+
+  # Continue to next line if first line of file
+  next if (! defined $prev_chr);
+
+  # Update step size if current line belongs to same chromosome as previous line.
+  # Otherwise, keep step unchanged.
+  if ($chr eq $prev_chr) {
+    $step = $start - $prev_start;
+  }
+ 
+  # Print information for previous line
+  $prev_end = $prev_start + $step;
+  print OUT "$prev_chr\t$prev_start\t$prev_end\t$prev_val\n";
+}
+
+# Print last line
+$prev_chr = $chr;
+$prev_start = $start;
+$prev_end = $prev_start + $step;
+$prev_val = $val;
+print OUT "$prev_chr\t$prev_start\t$prev_end\t$prev_val\n";
+
+close(IN);
+close(OUT);
+
+exit(0);