#!/bin/bash

# Author: Maria Nattestad
# github.com/marianattestad/assemblytics

USAGE="Assemblytics delta output_prefix unique_length_required min_size max_size"

if [ -z "$1" ]
  then
    echo "ERROR in Assemblytics: No delta file given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$2" ]
  then
    echo "ERROR in Assemblytics: No output prefix given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$3" ]
  then
    echo "ERROR in Assemblytics: No unique length requirement parameter given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$4" ]
  then
    echo "ERROR in Assemblytics: No minimum size parameter given"
    echo "Usage:"
    echo $USAGE
    exit
fi
if [ -z "$5" ]
  then
    echo "ERROR in Assemblytics: No maximum size parameter given"
    echo "Usage:"
    echo $USAGE
    exit
fi

DELTA=${1?"$USAGE"}
OUTPUT_PREFIX=${2?"$USAGE"}
UNIQUE_LENGTH=${3?"$USAGE"}
MINIMUM_SIZE=${4?"$USAGE"}
MAXIMUM_SIZE=${5?"$USAGE"}

>&2 echo Input delta file: $DELTA
>&2 echo Output prefix: $OUTPUT_PREFIX
>&2 echo Unique anchor length: $UNIQUE_LENGTH
>&2 echo Minimum variant size to call: $MINIMUM_SIZE
>&2 echo Maximum variant size to call: $MAXIMUM_SIZE


mkdir -p ${OUTPUT_PREFIX%/*}/

# In the web application, this log file is used to show status updates in the GUI
LOG_FILE=${OUTPUT_PREFIX%/*}/progress.log
>&2 echo "Logging progress updates in $LOG_FILE"

SCRIPT_PATH=$(dirname $BASH_SOURCE)
echo 'script path:' $SCRIPT_PATH

# Check that Rscript is available (needed by R scripts)
if [ -z $(which Rscript) ]; then
    >&2 echo "Error: No Rscript executable available in PATH: $PATH."
    exit
fi


echo "${OUTPUT_PREFIX##*/}" >> $LOG_FILE

echo "STARTING,DONE,Starting unique anchor filtering." >> $LOG_FILE

>&2 echo "1. Filter delta file"
$SCRIPT_PATH/Assemblytics_uniq_anchor.py --delta $DELTA --unique-length $UNIQUE_LENGTH --out $OUTPUT_PREFIX --keep-small-uniques
if [ -e $OUTPUT_PREFIX.Assemblytics.unique_length_filtered_l$UNIQUE_LENGTH.delta.gz ];
then
    echo "UNIQFILTER,DONE,Step 1: Assemblytics_uniq_anchor.py completed successfully. Now finding variants between alignments." >> $LOG_FILE
    >&2 echo "2. Finding variants between alignments"
    $SCRIPT_PATH/Assemblytics_between_alignments.pl $OUTPUT_PREFIX.coords.tab $MINIMUM_SIZE $MAXIMUM_SIZE all-chromosomes exclude-longrange bed > $OUTPUT_PREFIX.variants_between_alignments.bed

    if [ -e $OUTPUT_PREFIX.variants_between_alignments.bed ];
    then
        echo "BETWEEN,DONE,Step 2: Assemblytics_between_alignments.pl completed successfully. Now finding variants within alignments." >> $LOG_FILE
        >&2 echo "3. Finding variants within alignments"
        $SCRIPT_PATH/Assemblytics_within_alignment.py --delta $OUTPUT_PREFIX.Assemblytics.unique_length_filtered_l$UNIQUE_LENGTH.delta.gz --min $MINIMUM_SIZE --output $OUTPUT_PREFIX.variants_within_alignments.bed

        if [ -e $OUTPUT_PREFIX.variants_within_alignments.bed ];
        then
            echo "WITHIN,DONE,Step 3: Assemblytics_within_alignment.py completed successfully. Now combining the two sets of variants together." >> $LOG_FILE
            >&2 echo "4. Combine variants between and within alignments";
            HEADER="#reference\tref_start\tref_stop\tID\tsize\tstrand\ttype\tref_gap_size\tquery_gap_size\tquery_coordinates\tmethod"
            cat <(echo -e $HEADER) $OUTPUT_PREFIX.variants_within_alignments.bed $OUTPUT_PREFIX.variants_between_alignments.bed > $OUTPUT_PREFIX.Assemblytics_structural_variants.bed

            if [ -e $OUTPUT_PREFIX.Assemblytics_structural_variants.bed ];
            then 
                echo "COMBINE,DONE,Step 4: Variants combined successfully. Now generating figures and summary statistics." >> $LOG_FILE
                
                $SCRIPT_PATH/Assemblytics_variant_charts.R $OUTPUT_PREFIX $MINIMUM_SIZE $MAXIMUM_SIZE

                $SCRIPT_PATH/Assemblytics_index.py -coords $OUTPUT_PREFIX.coords.csv -out $OUTPUT_PREFIX
                
                $SCRIPT_PATH/Assemblytics_dotplot.R $OUTPUT_PREFIX

                cat $OUTPUT_PREFIX.coords.tab | awk '{print $7,$5}' OFS='\t' | sort | uniq | sort -k2,2nr >  $OUTPUT_PREFIX.coords.ref.genome
                cat $OUTPUT_PREFIX.coords.tab | awk '{print $8,$6}' OFS='\t' | sort | uniq | sort -k2,2nr >  $OUTPUT_PREFIX.coords.query.genome
                
                $SCRIPT_PATH/Assemblytics_Nchart.R $OUTPUT_PREFIX
                
                $SCRIPT_PATH/Assemblytics_summary.py -i $OUTPUT_PREFIX.Assemblytics_structural_variants.bed -min $MINIMUM_SIZE -max $MAXIMUM_SIZE > $OUTPUT_PREFIX.Assemblytics_structural_variants.summary

                # zip up results for quick download of all results
                zip $OUTPUT_PREFIX.Assemblytics_results.zip $OUTPUT_PREFIX.Assemblytics*
                
                # create a small preview file of the variants for the GUI
                head $OUTPUT_PREFIX.Assemblytics_structural_variants.bed | column -t > $OUTPUT_PREFIX.variant_preview.txt

                if grep -q "Total" $OUTPUT_PREFIX.Assemblytics_structural_variants.summary; 
                then
                    echo "SUMMARY,DONE,Step 5: Assemblytics_summary.py completed successfully" >> $LOG_FILE
                else
                    echo "SUMMARY,FAIL,Step 5: Assemblytics_summary.py failed" >> $LOG_FILE
                    exit 1
                fi
            else
                echo "COMBINE,FAIL,Step 4: combining variants failed" >> $LOG_FILE
                exit 1
            fi
        else
            echo "WITHIN,FAIL,Step 3: Assemblytics_within_alignment.py failed: Possible problem before this step or with Python on server." >> $LOG_FILE
            exit 1
        fi
    else
        echo "BETWEEN,FAIL,Step 2: Assemblytics_between_alignments.pl failed: Possible problem with Perl or show-coords on server." >> $LOG_FILE
        exit 1
    fi
else
    echo "UNIQFILTER,FAIL,Step 1: Assemblytics_uniq_anchor.py failed: Possible problem with Python or Python packages on server." >> $LOG_FILE
    exit 1
fi
