From 448ebcd4f71fc53013ea4ee42082b1bf65cac7c8 Mon Sep 17 00:00:00 2001 From: Sebastien Vigneau Date: Fri, 10 Jan 2014 15:14:49 -0500 Subject: [PATCH] first commit --- bedgraph_to_wig.pl | 130 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100755 bedgraph_to_wig.pl diff --git a/bedgraph_to_wig.pl b/bedgraph_to_wig.pl new file mode 100755 index 0000000..1e20fd4 --- /dev/null +++ b/bedgraph_to_wig.pl @@ -0,0 +1,130 @@ +#!/usr/bin/perl + +# Description: This script converts bedGraph to fixedStep wig format with defined step size. Input file may be compressed as .gz. +# Coordinates in bedGraph input are assumed to be 0-based (http://genome.ucsc.edu/goldenPath/help/bedgraph.html). +# Coordinates in wig output are 1-based (http://genome.ucsc.edu/goldenPath/help/wiggle.html). + +# Usage: bedgraph_to_wig.pl --bedgraph input.bedgraph --wig output.wig --step step_size [--compact] +# --bedgraph : specify input file in bedGraph format. +# --wig : specify output file in fixedStep format. +# --step : specify step size. Note that span is set to be identical to step. +# --compact : if selected, steps with value equal to 0 will not be printed. This saves space but was not allowed in original wig format, thus some scripts using wig file as input may not understand it. + +# Credits: This script was written by Sebastien Vigneau (sebastien.vigneau@gmail.com) in Alexander Gimelbrant lab (Dana-Farber Cancer Institute). The inspiration for this script comes from Dave Tang's own version (http://davetang.org/wiki/tiki-index.php?page=wig). + + +use strict; +use warnings; +use Getopt::Long; +use List::Util qw[min max]; + +my $usage = "Usage: $0 --bedgraph --wig --step [--compact]\n"; + +# Parse command line arguments + +my $infile; # bedGraph input file name +my $outfile; # wig output file name +my $step; # wig step size +my $compact; # if selected, steps with value equal to 0 will not be printed + +GetOptions ( + "bedgraph=s" => \$infile, + "wig=s" => \$outfile, + "step=i" => \$step, + "compact" => \$compact, +) or die ("Error in command line arguments!\n$usage\n"); + +# Open input file. If it is compressed with gunzip, uncompress it. + +if ($infile =~ /\.gz$/){ + open(IN,'-|',"gunzip -c $infile") || die "Could not open $infile: $!\n"; +} else { + open(IN,'<',$infile) || die "Could not open $infile: $!\n"; +} + +# Open output file. + +open(OUT,'>',$outfile) || die "Could not open $outfile: $!\n"; + + +# bedGraph to wig conversion starts here. + +# Print main header. + +print OUT "track type=wiggle_0 name=\"$infile\" description=\"$infile\" visibility=full\n"; + +# Initialize variables. + +my $cur_chr = 0; # chromosome being processed +my $cur_pos = 0; # position of current step +my $next_pos = $cur_pos + $step; # position of next step +my $exp_pos = 0; # expected position if no step was skipped; used with --compact option +my $cur_val = 0; # value for current step + +while () { + + chomp; + + # Skip comment lines + next if (/^track/); + next if (/^#/); + + # Parse relevant information in current line + # e.g: chr1 3000400 3000500 2 + my ($chr, $start, $end, $val) = split(/\t/); + + # Print header for new chromosome and initialize variables. + if ($chr ne $cur_chr) { + $cur_chr = $chr; + $cur_pos = 0; + $next_pos = $cur_pos + $step; + $cur_val = 0; + if (!$compact) { # If --compact option selected, header will be printed immediately before non-null value. + print OUT "fixedStep chrom=$chr start=", $cur_pos + 1, " step=$step span=$step\n"; + # +1 was added to convert from 0-based to 1-based coordinates. + } + } + + # Print values when gap in bedGraph file is greater than step. + while ($start >= $next_pos) { + print_wig_line($cur_chr, \$cur_pos, \$next_pos, \$exp_pos, \$cur_val, $chr, $start, $end, $val, $step); + } + + # Print values when step overlaps with bedGraph interval and bedGraph interval is longer than step. + while ($end >= $next_pos) { + $cur_val += $val * ($next_pos - max($cur_pos, $start)); + print_wig_line($cur_chr, \$cur_pos, \$next_pos, \$exp_pos, \$cur_val, $chr, $start, $end, $val, $step); + } + + # Update value when end of bedGraph interval is contained within step. + if ($end < $next_pos) { + $cur_val += $val * ($end - max($cur_pos, $start)); + } + +} + +close(IN); +close(OUT); + +exit(0); + + +# Print or skip line in wig file depending on --compact option and value; update variables for next step. +sub print_wig_line { + my ($cur_chr, $cur_pos, $next_pos, $exp_pos, $cur_val, $chr, $start, $end, $val, $step) = @_; + if (!$compact) { # Always print if --compact option was not selected. + my $cur_ave_val = $$cur_val / $step; + print OUT "$cur_ave_val\n"; + } elsif ($$cur_val != 0) { # Skips printing if --compact option selected and value is null. + if ($$cur_pos != $$exp_pos) { # Adds header if previous step had null value and was skipped in print out. + print OUT "fixedStep chrom=$chr start=", $$cur_pos + 1, " step=$step span=$step\n"; + # +1 was added to convert from 0-based to 1-based coordinates. + } + my $cur_ave_val = $$cur_val / $step; + print OUT "$cur_ave_val\n"; + $$exp_pos = $$next_pos; + } + $$cur_pos = $$next_pos; + $$next_pos = $$cur_pos + $step; + $$cur_val = 0; +}