#!/usr/bin/env perl
use Env;
$seq_reformat="t_coffee -other_pg seq_reformat ";
$VersionTag="1.00";
$step=1;
$unset="";
$scoreT1=$scoreT2=$nseqT=$dp_limit=$unset;
@tl=();
chomp($tc_version=`t_coffee -version`);$tc_version=~s/PROGRAM: //;


print STDERR "\n*****************************************************************";
print STDERR "\n*           HIGH LEVEL PROGRAM: T-COFFEE_DPA Version $VersionTag";
print STDERR "\n*           LOW  LEVEL PROGRAM: $tc_version ";
print STDERR "\n*****************************************************************";

if (!@ARGV)
  {
    print "t_coffee_dpa accepts every t_coffee_flag.\nType t_coffee to obtain a list\n";
    print "Requires $TC_VERSION\n";
    print "Requires ";
    print "t_coffee_dpa specific flags:\n";
    print "\t-dpa_master_aln....................Master alignment: provided OR computed\n";
    print "\t-dpa_master_aln....................By default, Computed with t_coffee -very_fast\n";
    print "\t-dpa_master_aln=<file>.............Use file, (must be an aln in Fasta or ClustalW\n";
    print "\t-dpa_master_aln=<program>..........Compute aln with pg -in seq -out aln`\n";
    print "\t-dpa_maxnseq.......................Maximum number of sequences in subgroups\n";
    print "\t-dpa_min_score1....................Minimum Id for two sequences to be grouped in ref_aln\n";
    print "\t-dpa_min_score2....................Minimum Id within a subgroup\n";
    print "\t-dpa_debug.........................Keep Tmp File (for debug purpose)\n\n";
    
    exit (0);
  }
foreach $arg (@ARGV)
  {
    $arg_list.=" $arg";
  }
$arg_list=~s/[=,;]/ /g;


($seq0, $arg_list)=&extract_val_from_arg_list("^",$arg_list, "SPLICE","unset");
($seq1, $arg_list)=&extract_val_from_arg_list("-seq",$arg_list, "SPLICE","unset");
($seq2, $arg_list)=&extract_val_from_arg_list("-in",$arg_list, "KEEP","unset");
($seq3, $arg_list)=&extract_val_from_arg_list("-infile",$arg_list, "SPLICE","unset");
($prf,  $arg_list)=&extract_val_from_arg_list("-profile",$arg_list, "SPLICE","unset");

$gl{'Seq'}=$seq=&vtmpnam();#file containing all the sequences

   #1-remove sequences from -in
if ( $arg_list =~/\-in\b/)
  {
    my $save, $name;
    while($arg_list=~/\-in\b[^-]+(\bS[\w.]+)/)
      {
	$name=$1;$name=~s/^.//;
	if ( !-e $name){$save.=" S$name ";}

	$arg_list=~s/S$name/ /;
      }
    $arg_list=~s/\-in\b/\-in $save /;
  }
   #2-prepare 

if (!($arg_list=~/\-outorder/))
  {
    
    $output_cl .=" -outorder=$seq";
  }
@output_flag=("-output","-outfile", "-run_name", "-outorder"); 
foreach $v1 (@output_flag)
  {
    ($v2, $arg_list)=&extract_val_from_arg_list($v1,$arg_list, "SPLICE","unset");
    if ($v2 ne "")
      {

	if ($v1 eq "-run_name"){$run_name=$v2;$output_cl .=" $v1 $v2 ";}
	elsif ( $v1 eq "-outorder")
	  {
	    if ( $v2 eq "input"){$v2=$seq;}
	    $outorder=$v2;$output_cl .=" $v1 $v2 ";
	  }
	else
	  {
	    $output_cl .=" $v1 $v2 ";
	  }
      }
 }


($dpa_master_aln, $arg_list)  =&extract_val_from_arg_list("-dpa_master_aln",$arg_list, "SPLICE", "t_coffee");
$dpa_master_aln=~s/\s//g;
($nseqT, $arg_list)           =&extract_val_from_arg_list("-dpa_maxnseq",$arg_list, "SPLICE", 30);
($scoreT1, $arg_list)         =&extract_val_from_arg_list("-dpa_min_score1",$arg_list, "SPLICE", 80);
($scoreT2, $arg_list)         =&extract_val_from_arg_list("-dpa_min_score2"    ,$arg_list, "SPLICE", 30);
($dpa_limit, $arg_list)       =&extract_val_from_arg_list("-dpa_limit"        ,$arg_list, "SPLICE", 0);
($dpa_delta_id, $arg_list)    =&extract_val_from_arg_list("-dpa_delta_id"        ,$arg_list, "SPLICE", 1);
($dpa_debug, $arg_list)       =&extract_val_from_arg_list("-dpa_debug"           ,$arg_list, "SPLICE", 0);


$in_seq=$seq0." ".$seq1." ".$seq2." ".$seq3;
$in_prf=(($prf ne $unset)?"$prf ":"");
&exit_dpa (($in_seq eq "" && $in_prf eq "")?1:0, "ERROR: You did not Provide any sequences. Use the -seq flag [FATAL: t_coffee_dpa]\n", EXIT_FAILURE);


print STDERR "\nSTART DPA COMPUTATION";



if ($in_seq=~/\S+/)
  {
    
    print STDERR "\n Step $step: Gather all the sequences into the tmp file: [$seq]";$step++;	
    &my_system ("t_coffee $in_seq -convert -quiet -output fasta_seq -outfile=$seq -maxnseq 0");
  }

if ( !-e $seq){$seq="";}

if ($in_prf=~/\S+/)
  {
    $seq_in_type="profile"; 
    $seq.= $in_prf; 
  }
if ($seq eq ""){ &exit_dpa (1, "\nERROR: No Sequence FOund. Provide Sequences with the -seq flag [FATAL: t_coffee_dpa]", EXIT_FAILURE);}

 

if ( $run_name)
  {
    $suffix=$run_name;
  }
elsif ($in_seq=~/\b(S[\w.]+\b)/)
  {
    my $suffix1, $sufffix2;
    $suffix1=$suffix2=$1;
    $suffix2=~s/^S//;
    if ( -e $suffix1){$suffix=$suffix1;}
    elsif ( -e $suffix2){$suffix=$suffix2;}
    else
      {
	$suffix=&vtmpnam();	
      }
    $suffix=~s/\.\w+//;
  }

else
  {
    $suffix=&vtmpnam();
  }


if (!$run_name){$output_cl.=" -run_name $suffix ";}


$gl{'Tree'}=&seq2dpa_tree ($seq, "$suffix.dpadnd");

print STDERR "\n Step $step: Prepare guide tree: $gl{'Tree'}";$step++;

print STDERR "\n Step $step: Identify and Align Closely Related Groups";$step++;
%gl=&make_one_pass (0, $scoreT1,"Align",%gl);

print STDERR "\n Step $step: Make Multiple Group Alignment";$step++;
while (!%gl ||$gl{'Ng'}>$nseqT)
  {
    %gl=&make_one_pass ($nseqT, $scoreT2,"t_coffee",%gl);
    if ( $gl{'Newgroups'}==0){$scoreT2--;}    
  }
print STDERR "\n Step $step: Make The Final Alignment";$step++;


$arg_list .=$output_cl;


%gl=&tree2group (0,0, %gl);
$gl{$gl{'0'}{'File'}}{'Output'}="";
$a=0;
&align_groups ("t_coffee",'0', $arg_list, " ", %gl);



if ( !$dpa_keep_tmpfile){&clean_tmp_file (@tl);}



sub seq2dpa_tree 
  {
    my $seq=@_[0];
    my $newtree=@_[1];
    my $aln=&vtmpnam ();

    &my_system ("t_coffee -special_mode quickaln -in $seq -outfile $aln -quiet");
    &my_system ("$seq_reformat -in $aln -action +aln2tree +tree2dpatree -output newick >$newtree");
    return $newtree;
  }	
sub seq2dpa_tree_old 
  {
    my $aln=@_[0];
    my $newtree=@_[1];
    
    
    &my_system("$seq_reformat -in $aln -action +seq2dpatree -output newick > $newtree");
    return $newtree;
  }
sub aln2dpa_tree 
  {
    my $aln=@_[0];
    my $newtree=&vtmpnam();
    
    &my_system("$seq_reformat -in $aln -action +aln2tree +tree2dpatree -output newick > $newtree");
    return $newtree;
  }
sub group_file2ngroups
  {
    my $file=@_[0];
    my $n;
    
    open ( F, $file);
    while (<F>)
      {
	$n+=/\>/;
      }
    close (F);
    return $n;
  }

sub make_one_pass
  {
    my ($N, $ID,$pg, %gl)=@_;
    my $a;

    %gl=&tree2group ($N,$ID,%gl);
    if (!$gl{'Newgroups'}){return %gl;}
    else
      {
	for ( $a=0; $a< $ng; $a++)
	  {
	    if ($gl{$gl{$a}{'File'}}{'Ng'}>1){&display_group($a, %gl);}
	    &align_groups ($pg, $a, $arg_list, " -quiet=quiet ", %gl);
	  }
	return %gl;
      }
  }

sub tree2group 
  {
    my ($N, $ID, %gl)=@_;
    my $prefix=&vtmpnam();
    my $group_file=&vtmpnam();
    my $file;
    my $oldtree=&vtmpnam();
    my $n;
    my $tree;


    if ( $gl{'Ng'}==1){return %gl;}
    $tree=$gl{'Tree'}; 
    
    #1 extract the groups
    &my_system ("$seq_reformat -in $tree -action +tree2group $N $ID $prefix > $group_file");
    $n=group_file2ngroups($group_file);
    
    
    $gl{'Newgroups'}=1;
    if ( $n==$gl{'Ng'})
      {
	$gl{'Newgroups'}=0;
	return %gl;
      }
    $gl{'Iteration'}++;
    $gl{'MaxNseq'}=$N;$gl{'MinID'}=$ID;
    $gl{'GroupFile'}=$group_file;$gl{'Ng'}=$ng=0;
    #2 Process the group list into the hash
    open (F, $group_file);
    while (<F>)
      {
	$gl{'File'}.=$_;
	if (/\>/)
	  {
	    $line=$_;
	    $line=~s/\>//;
	    @list=($line=~/(\S+)/g);
	    $file=$gl{$ng}{'File'}=shift @list;
	    $gl{$file}{'Output'}=$file;
	    
	    $gl{$file}{'Ng'}=$#list+1;
	    if ($gl{$file}{'Ng'}>1){ $gl{$file}{'Tlist'}=$gl{$file}{'Alist'}="(";}
	    foreach $l (@list)
	      {
	
		$gl{$file}{'List'}.=" $l ";
		
		if (!$gl{$l}{'Tlist'})
		  {
		    $gl{$l}{'Tlist'}="$l";
		    $gl{$l}{'Alist'}="$l";
		    $gl{$l}{'Nseq'}=1;
		    $gl{$l}{'Ng'}=1;
		  }
		$gl{$file}{'Tlist'}.="$gl{$l}{'Tlist'},";
		$gl{$file}{'Alist'}.="$gl{$l}{'Tlist'}|";
		$gl{$file}{'Nseq'}+=$gl{$l}{'Nseq'};
	      }
	    

	    chop($gl{$file}{'Tlist'});chop($gl{$file}{'Alist'});
	    if ($gl{$file}{'Ng'}>1){$gl{$file}{'Tlist'}.=")"; $gl{$file}{'Alist'}.=");";}
	    $ng++;
	  }	
      }
    $gl{'Ng'}=$ng;
    close (F);
    
    #3 Update the old tree with the new groups
    $gl{'Tree'}=&vtmpnam();
    &my_system ("$seq_reformat -in $tree -action +collapse_tree $group_file -output newick > $gl{'Tree'}");
    
    return %gl;
  }

sub display_group 
  {
    my ($g,%gl)=@_;
    my $f;
    
    if ( $g==-1)
      {
	print STDERR "\nIteration $gl{'Iteration'} [MaxN=$gl{'MaxNseq'}][MinID=$gl{'MinID'}]";
      }
    else
      {

	$f=$gl{$g}{'File'};
	$action=($gl{$f}{'Ng'}==1 || $gl{'Iteration'}==1)?"KEEP  ":"ALIGN ";
        print STDERR "\n\t[$action][MaxN=$gl{'MaxNseq'}][MinID=$gl{'MinID'}][File $f][Nseq=$gl{$f}{'Nseq'}][Ngroups=$gl{$f}{'Ng'}][$gl{$f}{'Alist'}]";
      }
  }
      


sub align_groups
  {
    my ($pg, $g, $arg, $extra_arg,%gl)=@_;
    my $f;
    my $Output,$Outflag;
    
    
    $f=$gl{$g}{'File'};
    $Output=($gl{$f}{'Output'});
    
    if ( $pg eq "Align")
      {
	if ( !-e $f)
	  {
	    $command="$seq_reformat -in $gl{'Seq'}  -action +extract_aln $gl{'GroupFile'}";
	    if ($gl{$f}{'Ng'}>1)
	      {
		&my_system ($command);
		$command="t_coffee -special_mode quick_aln  S$f -outfile=$Output -quiet";
	      }
	  }
	else 
	  {$command="";}
      }
    elsif ( -e $f)
      {	
	$Outflag=($Output)?"-outfile=$Output":"";
	$command="$pg -infile $f $Outflag -quiet stdout $arg $extra_arg -maxnseq 0 -convert -quiet stdout";
      }
    elsif ( $gl{$f}{'Ng'}==1)
      {
	$action=($dpa_debug)?"cp":"mv";
	$command="$action $gl{$f}{'List'} $Output";
      }
    else
      {
	$Outflag=($Output)?"-outfile=$Output":"";
	$command="$pg -profile $gl{$f}{'List'} $Outflag $arg $extra_arg -maxnseq 0";
      }
    
    &my_system ($command);
    return $outfile;
  }
    
sub my_system 
  {
    my $command=@_[0];
    my $force=@_[1];
    my $status;

    if ( $dpa_debug) {print STDERR "\nCOMMAND: $command";}
    $status=system ($command);

    if (!$force)
       {
	 &exit_dpa (($status==1), "Failed in Command:\n$command\n[FATAL: t_coffee_dpa]\n", EXIT_FAILURE);
       }
    
    return $status;
  }

sub vtmpnam
  {
    my $prefix=@_[0];
    my $tmp_file_name;

    $tmp_prefix=($prefix)?$prefix:"dpa_tmp_file_$$";
   
    $tmp_count++;
    $tmp_file_name="$tmp_prefix"."$tmp_count";
    $tl[$#tl+1]=$tmp_file_name;
    return $tmp_file_name;
  }

sub clean_tmp_file
  {

    my $list;
    my $file;
    
    if ($dpa_debug){return;}
    $list=vtmpnam();
    `ls -1 | grep $tmp_prefix>$list`;
    
    open (F,$list);
    while ( <F>)
      {
	$file=$_;
	chop $file;
	if ( -e $file){unlink $file;}
      }
    close (F);
    unlink $list;
  }


sub exit_dpa
  {
  my $condition=@_[0];
  my $error_msg=@_[1];
  my $exit_value=@_[2];
  if ( $condition)
    {
      print "$error_msg\n";
      exit ($exit_value);
    }
  else
    {
      return;
    }
  
}
sub extract_val_from_arg_list
  {
    my $arg=@_[0];
    my $arg_list=@_[1];
    my $keep_flag=@_[2];
    my $default_value=@_[3];
    my $val="";
    
    #protect
    $arg_list=~s/\s-/ \@/g;
    $arg=~s/-/\@/g;
    
    #search
    if ($arg eq "^")
      {
	$arg_list=~/^([^@]*)/;
	$val=$1;
      }
    else
      {$arg_list=~/$arg ([^@]*)/;$val=$1;}
    
    #remove trailing spaces
    $val=~s/\s*$//;
    
    #remove the parsed sequence if needed
    if (($val ne "") && $keep_flag ne "KEEP")
      {
	if ( $arg eq "^"){$arg_list=~s/$val/ /;}
	else {$arg_list=~s/($arg [^@]*)/ /;}
      }
	
    #unprotect
    $arg_list=~s/\@/-/g;
    $arg=~s/\@/-/g;
    
    if (($val eq "") && $default_value ne "unset"){$val=$default_value;}
    
    return $val, $arg_list;
  }
$program="T-COFFEE (Version_11.00)";\n

