#!/usr/bin/env perl

=pod

=head1 NAME

convertlinks - convert 2-line link file to new 1-line format

=head1 SYNOPSIS

  cat links.txt | convertlinks [-nooptions] > links.new.txt

  convertlinks -links links.txt [-nooptions] > links.new.txt

=head1 DESCRIPTION

Converts from the 2-line link format

  segdup00001 hs1 465 30596
  segdup00001 hs2 114046768 114076456
  segdup00002 hs1 486 76975
  segdup00002 hs15 100263879 100338121
  segdup00003 hs1 486 30596
  segdup00003 hs9 844 30515

to the 1-line format

  hs1 465 30596 hs2 114046768 114076456
  hs1 486 76975 hs15 100263879 100338121
  hs1 486 30596 hs9 844 30515

There is no script to do the reverse. The two line format is a bad idea.

Options which are different for both lines are suffixed with "1" or "2". For example,

  segdup00001 hs1 465 30596 id=abc,color=blue,thickness=1
  segdup00001 hs2 114046768 114076456 id=def,fill_color=red,thickness=1

generates

  hs1 465 30596 hs2 114046768 114076456 color1=blue,fill_color2=red,id1=abc,id2=def,thickness=1

=head1 OPTIONS

=over

=item * -nooptions

Strip options.

=back

=head1 HISTORY

=over

=item * 1 Aug 2012 v0.01

Started and versioned.

=back 

=head1 BUGS

=head1 AUTHOR

Martin Krzywinski

=head1 CONTACT

  Martin Krzywinski
  Genome Sciences Centre
  Vancouver BC Canada
  www.bcgsc.ca
  martink@bcgsc.ca

=cut

################################################################
#
# Copyright 2002-2012 Martin Krzywinski
#
# This file is part of the Genome Sciences Centre Perl code base.
#
# This script is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this script; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
################################################################

use strict;
use Config::General;
use Data::Dumper;
use File::Basename;
use FindBin;
use Getopt::Long;
use IO::File;
use Math::VecStat qw(sum min max average);
use Memoize;
#use Devel::DProf;
use Pod::Usage;
use Set::IntSpan;
use Time::HiRes qw(gettimeofday tv_interval);
use lib "$FindBin::RealBin";
use lib "$FindBin::RealBin/../lib";
use lib "$FindBin::RealBin/lib";
use vars qw(%OPT %CONF);

################################################################
#
# *** YOUR MODULE IMPORTS HERE
#
################################################################

GetOptions(\%OPT,
					 "links=s",
					 "options!",
					 "configfile=s","help","man","debug+");

pod2usage() if $OPT{help};
pod2usage(-verbose=>2) if $OPT{man};
loadconfiguration($OPT{configfile});
populateconfiguration(); # copy command line options to config hash
validateconfiguration(); 
if($CONF{debug} > 1) {
  $Data::Dumper::Pad = "debug parameters";
  $Data::Dumper::Indent = 1;
  $Data::Dumper::Quotekeys = 0;
  $Data::Dumper::Terse = 1;
  print Dumper(\%CONF);
}

my $inputhandle;
if(my $file = $CONF{links}) {
  die "No such file $file" unless -e $file;
  open(FILE,$file);
  $inputhandle = \*FILE;
} else {
  $inputhandle = \*STDIN;
}

my $links;
while(<$inputhandle>) {
	chomp;
	my ($id,$chr,$start,$end,$options) = split;
	push @{$links->{$id}}, {chr=>$chr,
													start=>$start,
													end=>$end,
													options=>$options};
	printdebug(1,$id,@{$links->{$id}});
	if(@{$links->{$id}} == 2) {
		report_link($id);
	}
}

sub report_link {
	my $id = shift;
	my $l = $links->{$id};
	if($CONF{options}) {
		printinfo(sprintf("%s %d %d %s %d %d %s",
											@{$l->[0]}{qw(chr start end)},
											@{$l->[1]}{qw(chr start end)},
											options($l->[0]{options},$l->[1]{options})));
	} else {
		printinfo(sprintf("%s %d %d %s %d %d",
											@{$l->[0]}{qw(chr start end)},
											@{$l->[1]}{qw(chr start end)}));
	}
	delete $links->{$id};
}

sub options {
	my ($opt1,$opt2) = @_;
	my %opt1 = parse_options($opt1);
	my %opt2 = parse_options($opt2);
	my %opt;
	for my $set ([\%opt1,\%opt2,1],[\%opt2,\%opt1,2]) {
		for my $key (sort keys %{$set->[0]}) {
			if(defined $set->[1]{$key} && $set->[1]{$key} eq $set->[0]{$key}) {
				$opt{$key} = $set->[0]{$key};
			} else {
				$opt{$key.$set->[2]} = $set->[0]{$key};
			}
		}
	}
	my @pairs;
	for my $key (sort keys %opt) {
		push @pairs, join("=",$key,$opt{$key});
	}
	return join(",",@pairs);
}

sub parse_options {
	my $str = shift;
	my %opt =  map { split("=",$_) } split(",",$str);
	return %opt;
}

sub validateconfiguration {
	$CONF{options} = 1 if ! defined $CONF{options};
}

################################################################
#
# *** DO NOT EDIT BELOW THIS LINE ***
#
################################################################

sub populateconfiguration {
  foreach my $key (keys %OPT) {
    $CONF{$key} = $OPT{$key};
  }

  # any configuration fields of the form __XXX__ are parsed and replaced with eval(XXX). The configuration
  # can therefore depend on itself.
  #
  # flag = 10
  # note = __2*$CONF{flag}__ # would become 2*10 = 20

  for my $key (keys %CONF) {
    my $value = $CONF{$key};
    while($value =~ /__([^_].+?)__/g) {
      my $source = "__" . $1 . "__";
      my $target = eval $1;
      $value =~ s/\Q$source\E/$target/g;
      #printinfo($source,$target,$value);
    }
    $CONF{$key} = $value;
  }

}

sub loadconfiguration {
  my $file = shift;
  my ($scriptname) = fileparse($0);
  if(-e $file && -r _) {
    # great the file exists
  } elsif (-e "/home/$ENV{LOGNAME}/.$scriptname.conf" && -r _) {
    $file = "/home/$ENV{LOGNAME}/.$scriptname.conf";
  } elsif (-e "$FindBin::RealBin/$scriptname.conf" && -r _) {
    $file = "$FindBin::RealBin/$scriptname.conf";
  } elsif (-e "$FindBin::RealBin/etc/$scriptname.conf" && -r _) {
    $file = "$FindBin::RealBin/etc/$scriptname.conf";
  } elsif (-e "$FindBin::RealBin/../etc/$scriptname.conf" && -r _) {
    $file = "$FindBin::RealBin/../etc/$scriptname.conf";
  } else {
    return undef;
  }
  $OPT{configfile} = $file;
  my $conf = new Config::General(-ConfigFile=>$file,
				 -AllowMultiOptions=>"yes",
				 -LowerCaseNames=>1,
				 -AutoTrue=>1);
  %CONF = $conf->getall;
}

sub printdebug {
  printinfo("debug",@_)  if $CONF{debug};
}

sub printinfo {
  printf("%s\n",join(" ",@_));
}

sub printdumper {
    print Dumper(@_);
}

sub printerr {
  printf STDERR ("%s\n",join(" ",@_));
}
