#!/usr/bin/perl -w
#blastn2qrnadepth.pl

use strict;

use vars qw ($opt_d $opt_e $opt_g $opt_i $opt_j $opt_l $opt_o $opt_r $opt_s $opt_v $opt_w $opt_x);
use Getopt::Std;
use constant GNUPLOT => '/usr/bin/gnuplot';

getopts ('d:e:g:i:j:l:o:rs:vw:x:');     

if (!@ARGV) { 
    print "usage: blastn2qrnadepth.pl [options] <blastfile>\n";
    print "options:\n";
    print "-d <depth>     :  max number of alignment coverage per position  [ default depth = 1       ]\n";
    print "-e <max_eval>  :  maximum evalue   of blast hits allowed         [ default max_eval = 0.01 ] \n";
    print "-g <org_name>  :  name of the blasting organism                  [ default 'org'           ]\n";
    print "-i <min_id>    :  minimum identity of blast hits allowed         [ default min_id = 0      ]\n";
    print "-j <max_id>    :  maximum identity of blast hits allowed         [ default max_id = 100    ]\n";
    print "-l <min_len>   :  minimum length   of blast hits allowed         [ default min_len = 1     ]\n";
    print "-o <outfile>   :  output qfile                                   [ default = 'blastfile.q' ]\n";
    print "-r             :  calculate depth respect to the database instead of the query \n";
    print "-s <shift>     :  position shift when calculating depth          [ default shift = 1       ] \n";
    print "-w <which>     :  criteria to pick alignments                    [ default which = 'SC'    ] \n";
    print "                                    ID -- best \% identity\n";
    print "                                    SC -- best score\n";
    print "-x <name>          : ignore given name, use this one for gff outputs\n\n";
    exit;
}
my $filename  = shift;
my $dir;
my $file;

if ($filename =~ /^(\S+)\/([^\/]+)$/) {
    $dir  = $1."/";
    $file = $2;
}
else {
    $dir  = "";
    $file = $filename;
}
print "$dir\n $file\n";

#
#options
my $depth;
if ($opt_d) { $depth = $opt_d; }
else        { $depth = 1;      }

my $max_eval;
if ($opt_e) { $max_eval = $opt_e; }
else        { $max_eval = 0.01;   }

my $org;
if ($opt_g) { $org = $opt_g; }
else        { $org = "org";  }

my $min_id;
if ($opt_i) { $min_id = $opt_i; }
else        { $min_id = 0;      }

my $max_id;
if ($opt_j) { $max_id = $opt_j; }
else        { $max_id = 100;    }

my $min_len;
if ($opt_l) { $min_len = $opt_l; }
else        { $min_len = 1;      }

my $outfile;
if ($opt_o) { $outfile = $opt_o;    }
else        { $outfile = "$file.E$max_eval.D$depth.q"; }

my $reverse = $opt_r;

my $shift;
if ($opt_s) { $shift = $opt_s; }
else        { $shift = 1;      }

my $verbose = $opt_v;

my $which;
if ($opt_w) { $which = $opt_w; }
else        { $which = "SC";   }
if ($which =~ /^SC$/ || $which =~ /^ID$/) { ;}
else { print "wrong option: 'SC' or 'ID'"; die; }

my $usename;
if ($opt_x) { $usename = $opt_x; }

my $filea   = "$file.a";      # here I dump the results of using len, evalue and identity cutoffs
my $fileb   = "$file.b";      # here I order the aligments for any query according to %id or score
my $filec   = "$file.c";      # final list of alignments in pseudoblastn format
my $fileq   = "$outfile";     # qrna-ready file
my $gfffile = "$outfile.gff";
my $report  = "$outfile.rep";
my $filerev = "$file.rev";    # reverse the blast output from query to sbjct

my $max_len = 300000000;

my %len_query;

my $n_query = 0;
my $n_ali_total = 0;
my $n_ali_one = 0;
my $n_ali_two = 0;

my %n_ali_per_query_total;
my %n_ali_per_query_one;
my %n_ali_per_query_two;
my %worse_evalue;

my $ave_len_ali_total = 0;
my $ave_len_ali_one   = 0;
my $ave_len_ali_two   = 0;

my $blastfile = "$dir$file"."_copy";
    
system ("cp $dir$file $blastfile\n");


if ($reverse) { print " Rervese blast\n"; reverse_blast ("$blastfile", "$filerev"); system ("mv $filerev $blastfile\n"); }

prune_blastn  ("$blastfile", "$filea"); 
if ($n_ali_one > 0) {
    order_blastn  ("$filea",     "$fileb");  system ("rm $filea\n"); 
    depile_blastn ("$fileb",     "$filec");  system ("rm $fileb\n");
    blastn_2_qrna ("$filec",     "$fileq");  system ("rm $filec\n"); 
}
else { 
    system ("rm $filea\n"); 
}

write_report  ("$report");

system("rm $blastfile\n");


##
## SUBROUTINES
##

# blastn_2_qrna ()
#
#
sub blastn_2_qrna {

    my ($file, $fileq) = @_;

    my $nseq1 = 0;
    my $nseq2 = 0;
    
    my $line1 = 0;
    my $line2 = 0;
    
    my $name1;
    my $name2;

    my $annotation1;
    my $annotation2;

    my $more;
    my $info;

    my $start1;
    my $start2;

    my $end1;
    my $end2;

    my $coor1;
    my $coor2;

    my @more_parts;

    my $seq = 0;

    my @seq1;
    my @seq2;
    
    my @numl1;
    my @numl2;

    my @numr1;
    my @numr2;

    my $strand1;
    my $strand2;

    my $pmstrand1;
    my $pmstrand2;


    my $abs;

    open (GFF, ">$gfffile") || die;
    open (OUT, ">$fileq") || die;

    open (SBLAST, "$file") || die;

    while (<SBLAST>) {
	if    (/^>(.+)/ && $seq == 0) { 
	    if ($nseq1 > 0) {
		if ($numl1[0] < $numr1[$line1-1]) { $start1 = $numl1[0];        $end1 = $numr1[$line1-1]; $strand1 = ">"; $pmstrand1 = "+"; }
		else                              { $start1 = $numr1[$line1-1]; $end1 = $numl1[0];        $strand1 = "<"; $pmstrand1 = "-"; }
		
		$coor1 = $start1.$strand1.$end1;
		
		print OUT ">", $name1, "-", $coor1, "-", $annotation1, "\n";
		for (my $i = 0; $i < $line1; $i++) {
		    print OUT "\t", $seq1[$i], "\n"; }

		$abs = 0;

		if ($name1 =~ /^(\S+)\/frag\d+(.+)$/) {
		    $name1 = $1; 
		    my $rest = $2;
		    $name1 =~ s/\\//g;

		    if ($rest =~ /(\S+)\-\S+/) {
			$rest = $1;
			$rest =~ s/\///g; $rest =~ s/\\//g; 
			$abs = $rest-1;
		    
		    }
		}
		$name1 =~ s/\\//g;
		if ($name1 =~ /(\S+)[\/\-\:](\d+)\-(\d+)/) { 
		    $name1 = $1;
		    if ($2 < $3) { $abs += $2-1; }
		    else         { $abs += $3-1; }
		}
		$start1 += $abs;
		$end1   += $abs;

		if (defined($opt_x)) { print GFF "$usename\tBLASTN\tsimilarity\t$start1\t$end1\t.\t$pmstrand1\t.\n"; }
		else                 { print GFF "$name1\tBLASTN\tsimilarity\t$start1\t$end1\t.\t$pmstrand1\t.\n";   }


	    }
	    $line1 = 0;
	    $name1 = quotemeta $1;

	    $annotation1 = "";  
	    if ($name1 =~ /^(\S+)\s+(\S+)/) { $name1 = $1; $annotation1 = $2; }
	    
	    $seq = 1;
	    $nseq1++;
	}
	
	elsif (/^>(.+)/ && $seq == 1) {
	    if ($nseq2 > 0) {
		if ($numl2[0] < $numr2[$line2-1]) { $start2 = $numl2[0];        $end2 = $numr2[$line2-1]; $strand2 = ">"; $pmstrand2 = "+"; }
		else                              { $start2 = $numr2[$line2-1]; $end2 = $numl2[0];        $strand2 = "<"; $pmstrand2 = "-"; }
		
		$coor2 = $start2.$strand2.$end2;

		print OUT ">", $name2, "-", $coor2, "-", $annotation2, "\n";
		for (my $i = 0; $i < $line2; $i++) {
		    print OUT "\t", $seq2[$i], "\n"; }
	    }
	    $line2 = 0;
	    $name2 = quotemeta $1;
	    
	    $annotation2 = ""; 
	    if ($name2 =~ /^(\S+)\s+(\S+)/) { $name2 = $1; $annotation2 = $2; }
	    
	    $seq = 0;
	    $nseq2++;
	}
	elsif (/^Query:\s+(\S+)\s+(\S+)\s(\S+)/) { 
	    $numl1[$line1] = $1; $seq1[$line1] = $2; $numr1[$line1] = $3; $line1++; }
	elsif (/^Sbjct:\s+(\S+)\s+(\S+)\s(\S+)/) { 
	    $numl2[$line2] = $1; $seq2[$line2] = $2; $numr2[$line2] = $3; $line2++; }
	else { next; }
    }
    
    close (SBLAST);
    
    if ($numl1[0] < $numr1[$line1-1]) { $start1 = $numl1[0];        $end1 = $numr1[$line1-1]; $strand1 = ">"; $pmstrand1 = "+"; }
    else                              { $start1 = $numr1[$line1-1]; $end1 = $numl1[0];        $strand1 = "<"; $pmstrand1 = "-"; }
    
    $coor1 = $start1.$strand1.$end1;

    print OUT ">", $name1, "-", $coor1, "-", $annotation1, "\n";
    for (my $i = 0; $i < $line1; $i++) {
	print OUT "\t", $seq1[$i], "\n"; }
    
    $abs = 0;
    if ($name1 =~ /^(\S+)\/frag\d+(.+)$/) {
	$name1 = $1; 
	my $rest = $2;
	$name1 =~ s/\\//g;
	
	if ($rest =~ /(\S+)\-\S+/) {
	    $rest = $1;
	    $rest =~ s/\///g; $rest =~ s/\\//g; 
	    $abs = $rest-1;
	    
	}
    }
    $name1 =~ s/\\//g;
    if ($name1 =~ /(\S+)[\/\-\:](\d+)\-(\d+)/) { 
	$name1 = $1;
	if ($2 < $3) { $abs += $2-1; }
	else         { $abs += $3-1; }
    }
    $start1 += $abs;
    $end1   += $abs;

    if (defined($opt_x)) { print GFF "$usename\tBLASTN\tsimilarity\t$start1\t$end1\t.\t$pmstrand1\t.\n"; }
    else                 { print GFF "$name1\tBLASTN\tsimilarity\t$start1\t$end1\t.\t$pmstrand1\t.\n";   }
    
    if ($numl2[0] < $numr2[$line2-1]) { $start2 = $numl2[0].">".$numr2[$line2-1]; }
    else                              { $start2 = $numr2[$line2-1]."<".$numl2[0]; }
    
    print OUT ">", $name2, "-", $start2, "-", $annotation2, "\n";
    for (my $i = 0; $i < $line2; $i++) {
	print OUT "\t", $seq2[$i], "\n"; }


    $nseq1++;
    $nseq2++;
    

}

# depile_blastn ()
#
#
sub depile_blastn {

    my ($fileb, $filec) = @_;

    my $keyquery;
    my $keyquery_new;

    my $numl;
    my $numr;

    my $start;
    my $end;

    my @howmany;
    my @which;

    my @array;
    my $pile;

    my %bit;

    my $count_query = 0;
    my $count_ali_per_query = 0;

    my $queryplot;

    my $num;

   open (SBLAST, "$fileb") || die;
    while (<SBLAST>) {
	if    (/^\s*Query=\s+(.+)/ || ($reverse && /^\s*Sbjct=\s+(.+)/) ) 
	{ 
	    $keyquery_new = $1; 

	    if ($count_query > 0) {

		$bit{$keyquery} = "";
		for (my $n = 0; $n < $n_ali_per_query_one{$keyquery}; $n++) { $bit{$keyquery} .= "0"; }

		if ($verbose) { 
		    $keyquery =~ /^(\d+)>(\S+)/;
		    $queryplot = "$1_$2.plot"; 
		    system ("touch $queryplot\n"); 
		    open (PLOT, ">$queryplot") || die; 
		}

		my $pos = 0;
		while ($pos < $len_query{$keyquery}) {
			
		    $pile = ($howmany[$pos] > $depth)? $depth : $howmany[$pos];

		    if ($verbose) { print PLOT "$pos $howmany[$pos] $pile\n"; }

		    @array = split(/-/, $which[$pos]);
		    
		    #paranoia
		    if (($#array+1) != $howmany[$pos]) { print "how many alignments at position $pos? ", $#array+1, " or $howmany[$pos]?\n"; die; }
		    
		    for (my $num = 0; $num < $pile; $num++) {
			substr($bit{$keyquery}, $array[$num], 1) = "1";
		    }

		    $pos += $shift;
		}

		if ($verbose) { close (PLOT); plot ("$queryplot"); }
 	    }

	    $count_query ++; 
	    $count_ali_per_query = 0;
	    $keyquery = $keyquery_new;

	    for (my $l = 0; $l < $len_query{$keyquery}; $l++) { $howmany[$l] = 0; $which[$l] = "";}
	    
	}    
	elsif (/^\s+Identities/)       
	{ 
	    $count_ali_per_query ++;
	}
	elsif (/^Query:\s+(\S+)\s+\S+\s+(\S+)/ || ($reverse && /^Sbjct:\s+(\S+)\s+\S+\s+(\S+)/) ) 
	{ 
	    
	    $numl = $1; 
	    $numr = $2;
	    
	    if ($numl < $numr) { $start = $numl-1; $end = $numr-1; }
	    else               { $start = $numr-1; $end = $numl-1; }
	    
	    if ($start < 0) { die "wrong blast limits: start= $start\n"; }

	    if ($end >= $len_query{$keyquery}) { die "wrong blast limits: end=$end len = $len_query{$keyquery}\n"; }	    
	    
	    for (my $l = $start; $l <= $end; $l++) {
		$howmany[$l] ++;
		$num = $count_ali_per_query - 1;
		$which[$l] .= $num."-";
	    }
	}
    }
    
    close (SBLAST);

    #last one again
    for (my $n = 0; $n < $n_ali_per_query_one{$keyquery}; $n++) { $bit{$keyquery} .= "0"; }

    if ($verbose) { 
	$keyquery =~ /^(\d+)>(\S+)$/;
	$queryplot = "$1_$2.plot"; 
 	system ("touch $queryplot\n"); 
	open (PLOT, ">$queryplot") || die; 
    }

    my $pos = 0;
    while ($pos < $len_query{$keyquery}) {

	$pile = ($howmany[$pos] > $depth)? $depth : $howmany[$pos];
	
	if ($verbose) { print PLOT "$pos $howmany[$pos] $pile\n"; }
	
	@array = split(/-/, $which[$pos]);
	
	#paranoia
	if (($#array+1) != $howmany[$pos]) { print "how many alignments at position $pos? ", $#array+1, " or $howmany[$pos]?\n"; die; }
	
	for (my $num = 0; $num < $pile; $num++) {
	    substr($bit{$keyquery}, $array[$num], 1) = "1";
	}
	
	$pos += $shift;
    }
 
    if ($verbose) { close (PLOT); plot ("$queryplot"); }
    
    
   
    # now, print the selected alignments
    
    my $flag = 0;
    my $count = 0;
    my $count_two = 0;

    my @qbit;
    my $query;
    my $sbjct;

    my $covered;
    my $uncovered;
    my $total;

    open (OUT, ">$filec") || die;
    
    open (SBLAST, "$fileb") || die;
    while (<SBLAST>) {
	if    (/^\s*Query=\s+(.+)/ || ($reverse && /^\s*Sbjct=\s+(.+)/)) 
	{ 
	    $keyquery = $1; 

	    $keyquery =~ /^\d+>(.+)$/; $query = $1;

	    if ($bit{$keyquery}) {


		$covered   = ($bit{$keyquery} =~ s/1/1/g);
		$uncovered = ($bit{$keyquery} =~ s/0/0/g);
		$total = $covered + $uncovered;  
		
		$n_ali_per_query_two{$keyquery}  = $covered;
		$n_ali_two                      += $covered;
		
		$count     = 0;
		$count_two = 0;
		
		@qbit = split(//,$bit{$keyquery});
		
		#paranoia
		if ($total   != $n_ali_per_query_one{$keyquery}) { print "how many at first at $keyquery? $total or $n_ali_per_query_one{$keyquery}?\n";  die; }
		if ($covered != $n_ali_per_query_two{$keyquery}) { print "how many at last at $keyquery? $covered or $n_ali_per_query_two{$keyquery}?\n";  die; }
		if ($#qbit+1 != $n_ali_per_query_one{$keyquery}) { print "how many in qbit at $keyquery? ", $#qbit+1, " or $n_ali_per_query_one{$keyquery}?\n"; die; }
	    }
	    
	}
	elsif (/^>(.+)/)       
	{ 
	    $sbjct = $1;
	    $flag = $qbit[$count];
	    $count ++;
	    
	    if ($flag) {
		$count_two ++;

		print OUT ">$query\n";
		print OUT ">$sbjct\n";
	    }

	}
	elsif ($flag && /^\s+Identities = \S+\/(\S+)/) 
	{ 
	    $ave_len_ali_two += $1;
	    print OUT $_; 
	}
	elsif ($flag) { print OUT $_; }

    }
    
    close (SBLAST);
    
    if ($bit{$keyquery}) {
	#paranoia
	if ($count     != $n_ali_per_query_one{$keyquery}) { print "how many at first for sure at $keyquery? $count or $n_ali_per_query_one{$keyquery}?\n"; die; }
	if ($count_two != $n_ali_per_query_two{$keyquery}) { print "how many at last for sure at $keyquery? $count_two or $n_ali_per_query_two{$keyquery}?\n"; die; }
    }
}



# order_blastn ()
#
#
sub order_blastn {

    my ($filea, $fileb) = @_;

    my $keyquery;
    my $keyquery_new;

    my $sbjct;

    my $count_qr= 0;

    my $n_ali= 0;
    my @ali;
    my @sc;

    my $sc;
    my $ali;

    open (OUT, ">$fileb") || die;

    open (SBLAST, "$filea") || die;
    while (<SBLAST>) {
	if    (/^\s*Query=\s+(.+)/ || ($reverse && /^\s*Sbjct=\s+(.+)/)) 
	{ 
	    $keyquery_new = $1; 

	    if ($count_qr > 0) {
		for (my $i = 0; $i <= $n_ali; $i++) {
		    for (my $j = $i+1; $j <= $n_ali; $j++) {
			if ($sc[$i] < $sc[$j]) {
			    $sc  = $sc[$i];
			    $ali = $ali[$i];

			    $sc[$i]  = $sc[$j];
			    $ali[$i] = $ali[$j];

			    $sc[$j]  = $sc;
			    $ali[$j] = $ali;

			}
		    }
		}

		print OUT "Query=  $keyquery\n";
		for (my $i = 0; $i <= $n_ali; $i++) {
		    print OUT $ali[$i] ;
		}
	    }
	    
	    $n_ali = -1;
	    undef @ali;
	    $keyquery = $keyquery_new;
	    $count_qr ++;
	}	
	elsif (/^>(.+)/) 
	{
	    $n_ali ++;
	    $ali[$n_ali] .= $_; 
	}
	elsif ($which =~ /^ID$/ && /^\s+Identities\s+=\s+\S+\/\S+\s\((\S+)%\)/) 
	{
     	    $sc[$n_ali] = $1; 
	    $ali[$n_ali] .= $_; 
	}
	elsif ($which =~ /^SC$/ && /^\s+Score\s+=\s+(\S+)/) 
	{
     	    $sc[$n_ali] = $1; 
	    $ali[$n_ali] .= $_; 
	}
	elsif ($n_ali > -1) { $ali[$n_ali] .= $_; }

    
    }
    
    #last case
    for (my $i = 0; $i <= $n_ali; $i++) {
	for (my $j = $i+1; $j <= $n_ali; $j++) {
	    if ($sc[$i] < $sc[$j]) {
		$sc  = $sc[$i];
		$ali = $ali[$i];
		
		$sc[$i]  = $sc[$j];
		$ali[$i] = $ali[$j];
		
		$sc[$j]  = $sc;
		$ali[$j] = $ali;
		
	    }
	}
    }

    if ($reverse) { print OUT "Sbjct=  $keyquery\n"; }
    else          { print OUT "Query=  $keyquery\n"; }

    for (my $i = 0; $i <= $n_ali; $i++) {
	print OUT $ali[$i] ;
    }
    
    close (SBLAST);
    close (OUT);
    
}


# plot ()
#
#
sub plot {

    my ($plotfile) = @_;

    open(GP,'|'.GNUPLOT) || die "Gnuplot: $!";
    
    print GP "set terminal postscript color\n";
    print GP "set output '$plotfile.ps'\n";
    
    
    print GP "set title 'Density of BLASTN alignments'\n"; 
    print GP "set xlabel 'Position'\n";
    print GP "set ylabel 'Number of blastn hits'\n";
    
    print GP "plot '$plotfile' using 1:2 with lines,  '$plotfile' using 1:3 with lines\n";
    close GP;
    
    system ("ghostview -landscape -magstep -2 $plotfile.ps&\n");
}

# prune_blastn ()
#
#
sub prune_blastn {

    my ($file, $filea) = @_;

    my $query;
    my $keyquery;
    my $sbjct;

    my $length;

    my $evalue;
    my $scoreline;

    my $identity;
    my $identityline;

    my $flag = 0;

    open (OUT, ">$filea") || die;

    open (BLAST, "$file") || die;
    while (<BLAST>) {

	if    (/^\s*Query=(.+)/ || ($reverse && /^\s*Sbjct=(.+)/)) 
	{ 
	    $n_query ++;
	    $query = $1; 

	    if (length($query) == 0) { $query = "Query"; }

	    if ($query =~ /^\s+(\S+)/) { $query = $1; }
	    if ($query =~ /^(\S+)\s+/) { $query = $1; }

	    $keyquery = $n_query.">".$query; 
	    
	    if (!$reverse) { print OUT "Query=  $keyquery\n"; }
	    else           { print OUT "Sbjct=  $keyquery\n"; }

	    #set counters
	    $n_ali_per_query_total{$keyquery} = 0;
	    $n_ali_per_query_one{$keyquery}   = 0;
	    $n_ali_per_query_two{$keyquery}   = 0;

	    $worse_evalue{$keyquery} = exp(-300.0*log(10.0));
	}
	elsif (/^\s+\((\S+) letters/) 
	{ 
	    $len_query{$keyquery} = $1;  $len_query{$keyquery} =~ s/,//g;
	}
	elsif (/^>(.+)/)       
	{ 
	    $flag = 0;
	    $sbjct = $1; $sbjct =~ s/ //g; $sbjct =~ s/[\(\)\|\+\_\/]/-/g;  
	}
	elsif (/Expect\s+=\s+(\S+)/) 
	{
	    $evalue = $1; 

	    # dealing with different representation of evalues
	    if ($evalue =~ /^(\S+),/)       { $evalue = $1;          }
	    if ($evalue =~ /^(\S+)e-(\d+)/) { $evalue = $1*exp(-$2*log(10.0)); }
	    if ($evalue =~ /^e-(\d+)/)      { $evalue = exp(-$1*log(10.0));    }

	    if ($evalue > $worse_evalue{$keyquery}) { $worse_evalue{$keyquery} = $evalue; }

	    $scoreline = $_; 
	}
	elsif (/^\s+Identities\s+=\s+\S+\/(\S+)\s\((\S+)%\)/) 
	{
	    $flag = 0;

	    $length   = $1; 
	    $identity = $2; 
	    
	    $identityline = $_; 

	    $n_ali_total  ++;
	    $n_ali_per_query_total{$keyquery}  ++;

	    $ave_len_ali_total += $length;

	    if ($length   >= $min_len && $length   <= $max_len &&
		$identity >= $min_id  && $identity <= $max_id  &&  
		$evalue   <= $max_eval) { 
		$n_ali_per_query_one{$keyquery} ++; $n_ali_one ++; 
		$flag = 1;

		$ave_len_ali_one += $length;
		
		print OUT ">", $sbjct, "\n";
		print OUT $scoreline;
		print OUT $identityline;
		
	    }
	
	}
	elsif (/Parameters:/) { $flag = 0; }
	elsif ($flag == 1) { print OUT $_; }

    }
    close (BLAST);
    close (OUT);
}


# write_report ()
#
#
sub write_report {
    my ($report) = @_;

    my $query;
    my $num;

    my $n_query_eval = 0;

    open (REP, ">$report") || die;
    
    print REP "FILE: \t$file\n";
    print REP "DIR:  \t$dir/\n";


    print REP "\nFIRST TRIMMING\n";
    print REP "Minimum length = $min_len\n";
    print REP "Maximum Evalue = $max_eval\n";
    print REP "Minimum %id    = $min_id\n";
    print REP "Maximum %id    = $max_id\n";

    print REP "\nSECOND TRIMMING\n";
    print REP "Alignments culled by = $which\n";
    print REP "Depth of alignments  = $depth\n";
    print REP "shift                = $shift\n\n";

    foreach my $r (keys(%n_ali_per_query_total)) { 

	$r =~ /^(\d+)>(.+)$/; $num = $1; $query = $2;
	print REP "$num-QUERY: $query \n";
	print REP "\tTotal \# alignments: $n_ali_per_query_total{$r}\t After First trimming: $n_ali_per_query_one{$r}\t After Second trimming: $n_ali_per_query_two{$r} \n";
	print REP "\tWorse evalue: $worse_evalue{$r}\n";

	if ($worse_evalue{$r} > $max_eval) { $n_query_eval ++; }
    }

    printf (REP "\nTotal \#Queries     \t%d\n", $n_query);
    printf (REP "Total \#Alignments    \t%d\t", $n_ali_total);
    if ($n_ali_total > 0.0) { printf (REP " ave_len = %.1f\n", $ave_len_ali_total/$n_ali_total); }
    else { printf (REP " ave_len = 0\n"); }
    
    printf (REP "After first trimming  \t%d\t", $n_ali_one);
    if ($n_ali_one > 0.0) { printf (REP " ave_len = %.1f\n", $ave_len_ali_one/$n_ali_one); }
    else { printf (REP " ave_len = 0\n"); }
    
    printf (REP "After second trimming \t%d\t", $n_ali_two);
    if ($n_ali_two > 0.0) {  printf (REP " ave_len = %.1f\n", $ave_len_ali_two/$n_ali_two); }
    else { printf (REP " ave_len = 0\n"); }
   
    printf (REP "\# queries with worse evalue > %f\t%d\t", $max_eval, $n_query_eval);

    close (REP);
}


# reverse_blast ()
#
#
sub reverse_blast
{
    my ($file, $filerev) = @_;

    my $query_num = 0;

    my $keysbjct;
    my %chain;
    my %bit;
    
    my $n_sbjct = 0;

    open (OUT, ">$filerev") || die;

    open (SBLAST, "$file") || die;
    while (<SBLAST>) {
	if    (/^\s*Query=\s+.+/) 
	{ 
	    $query_num ++;

	}	
	elsif (/^>(.+)$/) 
	{

	    $keysbjct = $1; $keysbjct =~ s/[\(\)\|\+\_\/]/-/g; 

	    $chain{$keysbjct} .= "$query_num"."\-";

	}

    }
    close (SBLAST);
    
    foreach my $sbjct (keys %chain) {  

	$n_sbjct ++;
	my @bit;
	my $get = 0;
	my $keyquery;

	print OUT "Sbjct= $sbjct\n"; 

	if ($sbjct =~ /^.+\-(\d+)\-(\d+)$/) { 

	    my $len = $2 - $1; 

	    print OUT "          ($len letters)\n";
		
	}
	
	for (my $n = 0; $n < $query_num; $n++) { $bit{$sbjct} .= "0"; }
	
	foreach my $query (split(/\-/, $chain{$sbjct})) { substr($bit{$sbjct}, $query, 1) = "1"; }
	
	@bit = split(//, $bit{$sbjct});

	$query_num = 0;
	open (SBLAST, "$file") || die;
	while (<SBLAST>) {
	    if    (/^\s*Query=\s+(.+)/) 
	    { 
		$keyquery = $1;

		if ($bit[$query_num] == 1) { }

		$query_num ++;
		
	    }	
	    elsif (/^>(.+)$/) 
	    {
		$get = 0;
		$keysbjct = $1; $keysbjct =~ s/[\(\)\|\+\_\/]/-/g; 
		if ($keysbjct =~ /^$sbjct$/) { 
		    $get = 1; 
		    print OUT ">$keyquery\n";

		}

	    }
	    elsif (/Parameters:/) { $get = 0; }
	    elsif ( $get == 1) { print OUT $_; }
	    
	}
	close (SBLAST);
	
    }
}

