#!/usr/bin/perl -w

#phase_count_fast.pl

use strict;
use vars qw ($opt_c $opt_i $opt_g $opt_l $opt_o $opt_q $opt_s $opt_t $opt_u $opt_w $opt_x);  # required if strict used
use Getopt::Std;
use constant GNUPLOT => '/usr/bin/gnuplot';

getopts ('ti:c:g:l:o:q:s:u:w:x:');     # ('aci:p:o:') means 'ac' are flags, 'i:p:o:' gets following scalar.


# Print a helpful message if the user provides no input file.
if (!@ARGV) { 
        print "usage:  phase_count_fast.pl [options] file.qrna org1 org2\n\n";
	print "options:\n";
        print "-c <case>          :  cases (default is case = 1)\n";
        print "                        possible cases are:\n";
        print "                        0=GLOBAL\n";  
        print "                        1=LOCAL_DIAG_VITERBI 2=LOCAL_DIAG_FORWARD\n";
        print "                        3=LOCAL_SEMI_VITERBI 4=LOCAL_SEMI_FORWARD\n";
        print "                        5=LOCAL_FULL_VITERBI 6=LOCAL_FULL_FORWARD\n";
	print "-i <id_max>        :  max identity of alignments analysed (default is id_max = 100)\n";
	print "-l <loci_overlap>  :  minimun overlap required to build loci (default is loci_overlap = -1)\n";
	print "-g <typetarget>    :  which type of loci you want to analyze (default is all three)\n";
        print "                        possible types of loci are:\n";
        print "                        OTH | COD | RNA \n";
	print "-o <output>        : output file [default = ]\n";
	print "-q <file.q>        : include qfile to check if all the alignments were analysed\n";
        print "-s <type_of_score> : type of score (sigmoidal | simple)       [default = sigmoidal]\n";
        print "-t                 : towhomness -- obtains corresponding loci in the other organism\n";
	print "-u <cutoff>        : default is cutoff = 5\n";
	print "-w <whichorg>      : default is whichorg = 1  (use 1-for-org1 2-for-org2 12-for-both)\n";
	print "-x <name>          : ignore given name, use this one for gff outputs\n\n";
       exit;
}
my $file    = shift;
my $org1    = shift;
my $org2    = shift;
my $tag;
my $type;

my $dir;
my $filename;

if ($file =~ /^(\S+)\/([^\/]+)$/) {
    $dir  = $1;
    $filename = $2;
}
else {
    $dir  = "";
    $filename = $file;
}
#print "file: $filename\n";
#print "dir:  $dir/\n";

my $output   = "$dir";

# examples of how a non-flag option might be used 
my $towhomness = $opt_t;

my $n_in_ali = 0;
my $file_q     = $opt_q;
if ($file_q) { check_with_qfile(); }


my $typetarget;
if ($opt_g) { $typetarget = $opt_g; }
else        { $typetarget = "all";  }

my $overlap; #minimum overlap required when calculating loci
if (defined($opt_l)) { $overlap = $opt_l; }
else                 { $overlap = -1;     }

my $whichorg ;
if ($opt_w) { $whichorg = $opt_w; }
else        { $whichorg = 1;      }
if ($whichorg != 1 && $whichorg != 2 && $whichorg != 12 ) { print "organism has to be '1' or '2' or '12'\n"; die }

my $fix_cutoff;
if (defined($opt_u)) { $fix_cutoff = $opt_u; }
else                 { $fix_cutoff = 5;     }

my $id_max;
if (defined($opt_i)) { $id_max = $opt_i; }
else                 { $id_max = 100;    }

my $outputfile;
if ($opt_o) { $outputfile = $opt_o; }
else        { $outputfile = "$file.$typetarget"."loci".".CUTOFF$fix_cutoff";  }

my $usename;
if ($opt_x) { $usename = $opt_x; }

my $outputfile1gff = "$outputfile.$org1.gff";
my $outputfile2gff = "$outputfile.$org2.gff";

system("rm $outputfile.$org1.gff\n");
system("rm $outputfile.$org2.gff\n");
system("touch $outputfile.$org1.gff\n");
system("touch $outputfile.$org2.gff\n");

my $type_of_score;
if ($opt_s) { $type_of_score = $opt_s;      }
else        { $type_of_score = "sigmoidal"; }
if ($type_of_score =~ /^simple$/ || $type_of_score =~ /^sigmoidal$/) { ;}
else { print "wrong type of score. options are: 'simple' or 'sigmoidal'"; die; }

my $case;
if ($opt_c) { $case = $opt_c; }
else        { $case = 1;      }

if   ($case==0) { $tag = "GLOBAL";        }
elsif($case==1) { $tag = "LOCAL_DIAG_VITERBI"; }
elsif($case==2) { $tag = "LOCAL_DIAG_FORWARD"; }
elsif($case==3) { $tag = "LOCAL_SEMI_VITERBI"; }
elsif($case==4) { $tag = "LOCAL_SEMI_FORWARD"; }
elsif($case==5) { $tag = "LOCAL_FULL_VITERBI"; }
elsif($case==6) { $tag = "LOCAL_FULL_FORWARD"; }

my $sqrt2 = sqrt(2.0);

open (OUT, ">$outputfile") || die;

if ($typetarget =~ /^all$/) {
    phase_count_target ("RNA");
    phase_count_target ("COD");
    phase_count_target ("OTH"); 
}
else {
    phase_count_target ($typetarget);
}

close (OUT);


#######################################


sub phase_count_target {

    my ($target) = @_;

    my $num = 0;
    
    my $coor1;
    my $coor2;
    my $coor1_new;
    my $coor2_new;
    
    my $othsc;
    my $rnasc;
    my $codsc;
    
    my $rnalod;
    my $codlod;
    
    my $othlodsigm;
    my $rnalodsigm;
    my $codlodsigm;
    
    my $rna;
    my $cod;
    my $oth;
    
    my $name1;
    my $name2;
    my $name1_new;
    my $name2_new;
    
    my @name1;
    my @name2;
    my $rest;
    
    my @lloci1;
    my @lloci2;
    
    my @rloci1;
    my @rloci2;
    
    my @type1;
    my @type2;
    
    my @howmany1;
    my @howmany2;
    
    my @othsc1;
    my @othsc2;
    
    my @codsc1;
    my @codsc2;
    
    my @rnasc1;
    my @rnasc2;
    
    my @towhom1;
    my @towhom2;
    
    my $startblast1;
    my $startblast2;
    my $startblast1_new;
    my $startblast2_new;

    my $startfrag1;
    my $startfrag2;
    my $startfrag1_new;
    my $startfrag2_new;
        
    my $nnamesseq = 0;   # total number of blast hits 
    my $ntseq = 0;       # total number of windows
    my $nloci1 = 0;      # total number of independent loci
    my $nloci2 = 0;      # total number of independent loci
    
    my $nrcseq  = 0; # number of seq in transition RNA/COD
    my $nroseq  = 0; # number of seq in transition RNA/OTH
    my $ncoseq  = 0; # number of seq in transition COD/OTH
    my $n3seq   = 0; # number of seq in transition RNA/COD/OTH
    my $nundseq;     # number of seqs in transitions
    # $nundseq = nrcseq + $nroseq + $ncoseq + $n3seq
    
    my $nrseq     = 0; # number of seq in RNA phase
    my $ncseq     = 0; # number of seq in COD phase
    my $noseq     = 0; # number of seq in OTH phase
    my $nphaseseq;     # number of seq that belong to full phases
    # $nphaseseq = $nundseq + $nrseq + $ncseq + $noseq
    
    my $codpos = 0; # number of windows with cod_lodsc > 0
    my $rnapos = 0; # number of windows with rna_lodsc > 0
    
    my $codcut = 0; # number of windows with cod_lodsc > cutoff
    my $rnacut = 0; # number of windows with rna_lodsc > cutoff
    
    my $seq = 0;
    
    my $realtype; 
    
    my $idx = 0;
    
    my $len;
    my $len_new;
    my $id;
    my $id_new;
    
    my $time = -1;
    
    my $Nid = 100;
    my $kid = 1/1;
    
    my $cutoff;
    
    my $oth_lend;
    my $oth_rend;
    
    my $cod_lend;
    my $cod_rend;
    
    my $rna_lend;
    my $rna_rend;
    
    my $winner_lend;
    my $winner_rend;
    
    open (FILE,"$file") || die;
    while (<FILE>) {
	
	if (/^Divergence time \(\S+\):\s+(\S+)/) {
	    $time = $1;
	}
	elsif (/^posX: (.+)$/ ) { 
	    $coor1_new = $1;
	} 
	
	elsif (/^posY: (.+)$/) { 
	    $coor2_new = $1;
	    
	    if ($ntseq > 0 && $id <= $id_max) {
		
		#
		#Identify type of hit, and add them up to generate some statistics at the end.
		#
		if ($type_of_score =~  /^simple$/) { 
		    is_above_cutoff($cutoff, \$realtype, $codlod, $rnalod, \$noseq, \$ncseq, \$nrseq, \$ncoseq, \$nroseq, \$nrcseq, \$n3seq);                                
		    $oth = 0; $cod = $codlod; $rna = $rnalod;    
		}
		else                               { 
		    is_above_cutoff_sigmoidal($cutoff, \$realtype, $othlodsigm, $codlodsigm, $rnalodsigm, \$noseq, \$ncseq, \$nrseq); 
		    $oth = $othlodsigm; $cod = $codlodsigm; $rna = $rnalodsigm; 
		}
		
		if ($cod > 0.0) { $codpos++; }
		if ($rna > 0.0) { $rnapos++; }
		if ($cod > $cutoff) { $codcut++; }
		if ($rna > $cutoff) { $rnacut++; }
		
		#
		#Identify independent loci for the window just before.
		#
		$nloci1 = identify_loci($target, $ntseq, $overlap, $startblast1, $startfrag1, $name1, $coor1, 
					$startblast2, $startfrag2, $name2, $coor2, $realtype, $oth, $cod, $rna,
					\$nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
		$nloci2 = identify_loci($target, $ntseq, $overlap, $startblast2, $startfrag2, $name2, $coor2, 
					$startblast1, $startfrag1, $name1, $coor1, $realtype, $oth, $cod, $rna,
					\$nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
 	    }
	    
	    if ($nnamesseq > $idx) 
	    { 
		
		$name1 = $name1_new; 
		$name2 = $name2_new; 
		
		$len = $len_new; 
		$id  = $id_new; 
		if ($id <= $id_max) { $ntseq ++; }
		
		$startblast1 = $startblast1_new;
		$startblast2 = $startblast2_new;

		$startfrag1 = $startfrag1_new;
		$startfrag2 = $startfrag2_new;
		
		#$idx = $nnamesseq; 
	    }
	    
	    $coor1 = $coor1_new;
	    $coor2 = $coor2_new;
	    
	} 
	
	elsif (/^length alignment:\s+(\S+) \(id=(\d+\.\d+)\)/) { 
	    $len_new = $1;
	    $id_new  = $2;
	}
	
	elsif (/^>(\S+)[\-\:](\d+[><]\d+)\-(.+)$/ && $seq == 0) { 
	    $name1_new = $1;
	    $startblast1_new = $2; 
	    $rest = $3;
	    
	    $startfrag1_new = 0;
	    if ($name1_new =~ /^(\S+)\/frag\d+(.+)$/) {
		$name1_new  = $1; 
		$startfrag1_new = $2;
		$name1_new  =~ s/\\//g;
		
		if ($startfrag1_new =~ /(\S+)\-\S+/) {
		    $startfrag1_new = $1;
		    $startfrag1_new =~ s/\///g; $startfrag1_new =~ s/\\//g; 
		    $startfrag1_new --;
		    
		}
	    }
	    if ($rest =~ /^(\S+)/) { $name1_new .= "\-$1"; }
	    
	    $seq = 1; 
	    $nnamesseq++; 
	}
	
	elsif (/^>(\S+)[\-\:](\d+[><]\d+)\-(.+)$/ && $seq == 1) { 
	    $name2_new = $1;
	    $startblast2_new = $2;
	    $rest = $3;
	    
	    $startfrag2_new = 0;
	    if ($name2_new =~ /^(\S+)\/frag\d+(.+)$/) {
		$name2_new  = $1; 
		$startfrag2_new = $2;
		$name2_new  =~ s/\\//g;
		
		if ($startfrag2_new =~ /(\S+)\-\S+/) {
		    $startfrag2_new = $1;
		    $startfrag2_new=~ s/\///g; $startfrag2_new =~ s/\\//g; 
		    $startfrag2_new --;
		    
		}
	    }
	    if ($rest =~ /^(\S+)/) { $name2_new .= "\-$1"; }
	    
	    $seq = 0; 
	    $nnamesseq++; 
	} 
	
	elsif (/^$tag/) { $num = 1; }
	
	elsif (/^OTH ends \*\([\-\+]\)\s+=\s+\((\d+)\.\.\[\d+\]\.\.(\d+)\)/) { 
	    $oth_lend = $1; 
	    $oth_rend = $2; 
	}

	elsif (/^COD ends \*\([\-\+]\)\s+=\s+\((\d+)\.\.\[\d+\]\.\.(\d+)\)/) { 
	    $cod_lend = $1; 
	    $cod_rend = $2; 
	}

	elsif (/^RNA ends \*\([\-\+]\)\s+=\s+\((\d+)\.\.\[\d+\]\.\.(\d+)\)/) { 
	    $rna_lend = $1; 
	    $rna_rend = $2; 
	}

	elsif (/winner = (\S+)/) { 
	    $type = $1; 

	    if    ($type =~ /^OTH$/) { $winner_lend = $oth_lend; $winner_rend = $oth_rend; }
	    elsif ($type =~ /^COD$/) { $winner_lend = $cod_lend; $winner_rend = $cod_rend; }
	    elsif ($type =~ /^RNA$/) { $winner_lend = $rna_lend; $winner_rend = $rna_rend; }
	    else                     { print "what ind of winner you have! $type\n"; die;  }
	}
	
	elsif (/^\s+ OTH = \s+(\S+)\s+ COD = \s+(\S+)\s+ RNA = \s+(\S+)/ && $num == 1) { 
	    $othsc = $1; 
	    $codsc = $2; 
	    $rnasc = $3; 
	    
	    $codlod = $codsc - $othsc;
	    $rnalod = $rnasc - $othsc;
	    
	    $othlodsigm = - log(exp(log(2.0)*($codsc-$othsc)) + exp(log(2.0)*($rnasc-$othsc)))/log(2.0);
	    $codlodsigm = - log(exp(log(2.0)*($othsc-$codsc)) + exp(log(2.0)*($rnasc-$codsc)))/log(2.0);
	    $rnalodsigm = - log(exp(log(2.0)*($othsc-$rnasc)) + exp(log(2.0)*($codsc-$rnasc)))/log(2.0);
	    
	    $cutoff = $fix_cutoff;                                  
	    
	    $num = 0;
	}
	
	else  { next; }
	
    }
    
    close (FILE);
    
    if ($type_of_score =~  /^simple$/) { 
	is_above_cutoff($cutoff, \$realtype, $codlod, $rnalod, \$noseq, \$ncseq, \$nrseq, \$ncoseq, \$nroseq, \$nrcseq, \$n3seq);                                
	$oth = 0; $cod = $codlod; $rna = $rnalod;    
    }
    else                               { 
	is_above_cutoff_sigmoidal($cutoff, \$realtype, $othlodsigm, $codlodsigm, $rnalodsigm, \$noseq, \$ncseq, \$nrseq); 
	$oth = $othlodsigm; $cod = $codlodsigm; $rna = $rnalodsigm; 
    }
    
    $nloci1 = identify_loci($target, $ntseq, $overlap, $startblast1, $startfrag1, $name1, $coor1, $startblast2, $startfrag2, $name2, $coor2, $realtype, $oth, $cod, $rna, 
			    \$nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
    $nloci2 = identify_loci($target, $ntseq, $overlap, $startblast2, $startfrag2, $name2, $coor2, $startblast1, $startfrag1, $name1, $coor1, $realtype, $oth, $cod, $rna, 
			    \$nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
    
    
    print OUT "\n---------------Some General Statistics-------------------\n";
    print OUT "FILE:            \t", "$file\n";
    print OUT "method:          \t", "$tag\n";
    if ($fix_cutoff > -1) { print OUT "Cutoff:          \t", "$fix_cutoff\n\n"; }
    else                  { print OUT "Cutoff:          \t", "cutoff variable\n\n"; }
    print OUT "max id:          \t", "$id_max\n\n";
    if ($file_q) { print OUT "# inblastn hits: \t",  $n_in_ali ,"\n"; }
    print OUT "# blastn hits:   \t",  $nnamesseq/2.0 ,"\n";
    print OUT "# windows:       \t", "$ntseq\n";
    print OUT "---------------------------------------------------------\n";
    
    print OUT "\n---------------Statistics by Windows---------------------\n";
    print OUT "# windows:            \t", "$ntseq\n";
    print OUT "\nRNA>0:           \t", $rnapos,    "/", "$ntseq\n";
    print OUT "RNA>cutoff:       \t", $rnacut,    "/", "$ntseq\n\n";
    
    print OUT "COD>0:            \t", $codpos,    "/", "$ntseq\n";
    print OUT "COD>cutoff:       \t", $codcut,    "/", "$ntseq\n\n";
    
    $nphaseseq = $nrseq + $ncseq + $noseq;
    print OUT " in phases:        \t", $nphaseseq, "/", "$ntseq\n";
    print OUT "\tRNA:             \t", $nrseq,     "/", "$nphaseseq\n";
    print OUT "\tCOD:             \t", $ncseq,     "/", "$nphaseseq\n";
    print OUT "\tOTH:             \t", $noseq,     "/", "$nphaseseq\n\n";
    
    $nundseq   = $nrcseq + $nroseq + $ncoseq + $n3seq;
    print OUT " in transitions:   \t", $nundseq, "/", "$ntseq\n";
    print OUT "\tRNA/COD:         \t", $nrcseq,  "/", "$nundseq\n";
    print OUT "\tRNA/OTH:         \t", $nroseq,  "/", "$nundseq\n";
    print OUT "\tCOD/OTH:         \t", $ncoseq,  "/", "$nundseq\n";
    print OUT "\tRNA/COD/OTH:     \t", $n3seq,   "/", "$nundseq\n";
    print OUT "---------------------------------------------------------\n";
    
#
#for QUERY
#
    if ($whichorg == 1 || $whichorg == 12 || $towhomness) {
	if ($nloci1 > 0) {
	    arrange_loci($org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	    $nloci1 = recheck_loci(\$nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	}
	
    }
#
#for SUBJ
#
    if ($whichorg == 2 || $whichorg == 12 || $towhomness) {
	if ($nloci2 > 0) {
	    arrange_loci($org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
	    $nloci2 = recheck_loci(\$nloci2, \@name2, \@lloci2, \@rloci2, \@type2,\ @towhom2, \@howmany2, \\@othsc2, @codsc2, \@rnasc2);
	}
    }
    
#clean up towhom after both sets of loci have been obtained
#
    if ($towhomness) 
    {
	if ($whichorg == 1 || $whichorg == 12) { cleanup_towhom($nloci1, \@towhom1, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2); }
	if ($whichorg == 2 || $whichorg == 12) { cleanup_towhom($nloci2, \@towhom2, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1); }
    }
    
    if ($whichorg == 1) 
    {
	class_stat("$outputfile1gff", $target, $towhomness, $org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	class_stat("$outputfile2gff", $target, 0,           $org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
    }
    elsif ($whichorg == 2) 
    {
	class_stat("$outputfile1gff", $target, 0,           $org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	class_stat("$outputfile2gff", $target, $towhomness, $org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
    }
    elsif ($whichorg == 12) 
    {
	class_stat("$outputfile1gff", $target, $towhomness, $org1, $nloci1, \@name1, \@lloci1, \@rloci1, \@type1, \@towhom1, \@howmany1, \@othsc1, \@codsc1, \@rnasc1);
	class_stat("$outputfile2gff", $target, $towhomness, $org2, $nloci2, \@name2, \@lloci2, \@rloci2, \@type2, \@towhom2, \@howmany2, \@othsc2, \@codsc2, \@rnasc2);
    }
    
}

sub arrange_loci {
    my ($org, $nloci, $name_ref, $lloci_ref, $rloci_ref, $type_ref, $towhom_ref, $howmany_ref, $othsc_ref, $codsc_ref, $rnasc_ref) = @_;
    my $l;
    my $n = 0;
    my $k;
    my $lend;
    my $lstart = 0;
    my $lstart_n;
    my $cur_name;
    my $cur_name_quote;
    my $name_ref_quote;
    my @list;
    my @newlist;
    my %rend;
    my %type;
    my %towhom;
    my %howmany;
    my %othsc;
    my %codsc;
    my %rnasc;
    my $howmany_tot_bf = 0;
    my $howmany_tot_af = 0;
    my $count = 0;

    $cur_name       =           $name_ref->[0];
    $cur_name_quote = quotemeta $name_ref->[0];

    for ($l = 0; $l < $nloci; $l++) { $howmany_tot_bf += $howmany_ref->[$l]; }

    for ($l = 1; $l < $nloci; $l++) { 
	$name_ref_quote = quotemeta $name_ref->[$l];

   	if ($cur_name_quote eq $name_ref_quote) {
	    $n++;
	}
	else { $cur_name = $name_ref->[$l]; $cur_name_quote = quotemeta $name_ref->[$l]; $n = 0; $lstart_n = $l; }
	if ($n == 0) { 
	    undef @list; 
	    undef @newlist;
	    undef %rend; 
	    undef %type; 
	    undef %towhom; 
	    undef %howmany; 
	    undef %othsc; 
	    undef %codsc; 
	    undef %rnasc; 
	    for ($k = 0; $k < $lstart_n - $lstart; $k++) { 
		$list[$k] = $lloci_ref->[$k+$lstart]."-".$rloci_ref->[$k+$lstart].$towhom_ref->[$k+$lstart]; 
		$lend = $list[$k];

		$rend{$lend}    = $rloci_ref->[$k+$lstart];
		$type{$lend}    = $type_ref->[$k+$lstart];
		$towhom{$lend}  = $towhom_ref->[$k+$lstart];
		$howmany{$lend} = $howmany_ref->[$k+$lstart];
		$othsc{$lend}   = $othsc_ref->[$k+$lstart];
		$codsc{$lend}   = $codsc_ref->[$k+$lstart];
		$rnasc{$lend}   = $rnasc_ref->[$k+$lstart];
		
	    }
	    
	    @newlist = sort loci_left_to_right @list;

	    for ($k = 0; $k < $lstart_n - $lstart; $k++) {
		$lend = $newlist[$k];
		if ($lend) {
		    $count++;
		    if ($lend =~ /^(\d+)-/) { $lloci_ref->[$k+$lstart] = $1; }
		    $rloci_ref->[$k+$lstart]   = $rend{$lend};
		    $type_ref->[$k+$lstart]    = $type{$lend};
		    $towhom_ref->[$k+$lstart]  = $towhom{$lend};
		    $howmany_ref->[$k+$lstart] = $howmany{$lend};
		    $othsc_ref->[$k+$lstart]   = $othsc{$lend};
		    $codsc_ref->[$k+$lstart]   = $codsc{$lend};
		    $rnasc_ref->[$k+$lstart]   = $rnasc{$lend};
		}
	    }
	    $lstart = $lstart_n;
	}
    }
    
#last case
    $lstart_n = $lstart + $n + 1;
    undef @list;
    undef @newlist;
    undef %rend;
    undef %type;
    undef %towhom;
    undef %howmany;
    undef %othsc;
    undef %codsc;
    undef %rnasc;
    
   for ($k = 0; $k < $lstart_n - $lstart; $k++) {
 	$lend = $lloci_ref->[$k+$lstart]."-".$rloci_ref->[$k+$lstart].$towhom_ref->[$k+$lstart];
	
	$list[$k] = $lend;
	$rend{$lend}    = $rloci_ref->[$k+$lstart];
	$type{$lend}    = $type_ref->[$k+$lstart];
	$towhom{$lend}  = $towhom_ref->[$k+$lstart];
	$howmany{$lend} = $howmany_ref->[$k+$lstart];
	$othsc{$lend}   = $othsc_ref->[$k+$lstart];
	$codsc{$lend}   = $codsc_ref->[$k+$lstart];
	$rnasc{$lend}   = $rnasc_ref->[$k+$lstart];
    }
    
    @newlist = sort loci_left_to_right  @list;

    for ($k = 0; $k < $lstart_n - $lstart; $k++) {
	$lend = $newlist[$k];
	
	if ($lend) {
	    $count++;
	    
	    if ($lend =~ /^(\d+)-/) { $lloci_ref->[$k+$lstart] = $1; }
	    $rloci_ref->[$k+$lstart]   = $rend{$lend};
	    $type_ref->[$k+$lstart]    = $type{$lend};
	    $towhom_ref->[$k+$lstart]  = $towhom{$lend};
	    $howmany_ref->[$k+$lstart] = $howmany{$lend};
	    $othsc_ref->[$k+$lstart]   = $othsc{$lend};
	    $codsc_ref->[$k+$lstart]   = $codsc{$lend};
	    $rnasc_ref->[$k+$lstart]   = $rnasc{$lend};

	}
    }
    
    for ($l = 0; $l < $nloci; $l++) { $howmany_tot_af += $howmany_ref->[$l]; }
    
    #paranoia
    if ($count != $nloci) { die "bad loci count in $org: $count $nloci"; }
    if ($howmany_tot_bf != $howmany_tot_af) { die "bad blasthit count in $org: $howmany_tot_bf $howmany_tot_af"; }
    
}

sub by_mostly_numeric {
    ($a <=> $b) || ($a cmp $b);
}

sub check_with_qfile {
    my $ali = 0;

    open (INFILE,"$file_q") || die;
    while (<INFILE>) {
	if (/^>(\S+)-(\d+[><]\d+)-/ && $ali == 0) { 
	    $n_in_ali ++;
	    $ali = 1;
	}
	elsif (/^>(\S+)-(\d+[><]\d+)-/ && $ali == 1) { 
	    $ali = 0;
	}
    }
    close (INFILE);
}



sub class_stat {
    my ($outgff, $target, $towhomness, $org, $nloci, $name_ref, $lloci_ref, $rloci_ref, $type_ref, $towhom_ref, $howmany_ref, $othsc_ref, $codsc_ref, $rnasc_ref) = @_;
    my $l;
    my $n = 0;
    my $ave_length = 0;
    my $nloci_type = 0;
    
    open (GFF,">>$outgff") || die;

    if ($target !~ /NULL/) {
	
	print OUT "\n---------------Statistics for $target loci ($org):-------------------\n";
	
	for ($l = 0; $l < $nloci; $l++) { 
	    if($type_ref->[$l] =~ /^$target$/) { 
		$nloci_type ++; 
		$ave_length += $rloci_ref->[$l] - $lloci_ref->[$l]; 
	    }
	}
	

	if ($nloci_type == 0) { print OUT "sorry no loci of type $target\n"; }
	else {
	    print OUT "# loci:\t", "$nloci_type\n";
	    printf OUT "ave_length:\t %.2f\n\n", $ave_length / $nloci_type;
	    
	    for ($l = 0; $l < $nloci; $l++) {
		if($type_ref->[$l] =~ /^$target$/) { 
		    $n ++; 
		    print OUT "$n-loci $name_ref->[$l] $lloci_ref->[$l] $rloci_ref->[$l] (", 
		    $rloci_ref->[$l]-$lloci_ref->[$l]+1,") $howmany_ref->[$l] $type_ref->[$l] "; 
		    printf OUT "%.2f %.2f\n", $codsc_ref->[$l], $rnasc_ref->[$l]; 

		    my $score;
		    if    ($target =~ /^RNA$/) { $score = $rnasc_ref->[$l]; }
		    elsif ($target =~ /^COD$/) { $score = $codsc_ref->[$l]; }
		    elsif ($target =~ /^OTH$/) { $score = $othsc_ref->[$l]; }
		    else                        { print "wrong type\n"; die; }

		    if (defined($opt_x)) { 
			print GFF "$usename\tQRNA_loci\t$type_ref->[$l]\t$lloci_ref->[$l]\t$rloci_ref->[$l]\t$score\t\.\t\.\tgene \"$name_ref->[$l]\"\n";        }
		    else {
			print GFF "$name_ref->[$l]\tQRNA_loci\t$type_ref->[$l]\t$lloci_ref->[$l]\t$rloci_ref->[$l]\t$score\t\.\t\.\tgene \"$name_ref->[$l]\"\n"; }


		    if ($towhomness) {print OUT "$towhom_ref->[$l]"; }
		}
	    }
	}
    }
    close (GFF);
}

sub cleanup_towhom {
    my ($nloci, $towhom_ref, $nlocib, $name_loci_ref, $lloci_ref, $rloci_ref, $type_ref, $howmany_ref, $othsc_ref, $codsc_ref, $rnasc_ref) = @_;

    my $k;
    my $l;
    my $name;
    my $lend;
    my $rend;
    my @towhom;
    my $towhom;
    my $bit_total;

    $bit_total = ""; for ($k = 0; $k < $nlocib; $k++) { $bit_total .= "0"; }

    for ($l = 0; $l < $nloci; $l++) { 
	
	@towhom = split (/:/, $towhom_ref->[$l]);
	
	$towhom_ref->[$l] = "";
	
	my $bit = ""; for ($k = 0; $k < $nlocib; $k++) { $bit .= "0"; }
	    
	foreach $towhom (@towhom) {
	    $towhom =~ /^(\S+)\-(\d+)\-(\d+)$/;
	    
	    $name = $1;
	    $lend = $2;
	    $rend = $3;
	    for ($k = 0; $k < $nlocib; $k++) { 
		
		if (is_hit_in_locus($name, $lend, $rend, $name_loci_ref->[$k], $lloci_ref->[$k], $rloci_ref->[$k])) 
		{  
		    if (substr($bit, $k, 1) == 0) 
		    { 
			$towhom_ref->[$l] .= "\t\[".($k+1)."\] ".$name_loci_ref->[$k]." ".$lloci_ref->[$k]." ".$rloci_ref->[$k]." \(".($rloci_ref->[$k]-$lloci_ref->[$k]+1);
			$towhom_ref->[$l] .= "\) ".$howmany_ref->[$k]." ".$type_ref->[$k]." ".(int($codsc_ref->[$k]*100)/100)." ".(int($rnasc_ref->[$k]*100)/100)."\n"; 
			
			substr($bit, $k, 1) = "1";       # well, now, it is.
			substr($bit_total, $k, 1) = "1"; # for total count
		    }		   
		}
	    }
	}
    }
    #paranoia
    my $total = ($bit_total =~ tr/1/1/);
    if ($total != $nlocib) { print "you did not account for all loci ($total $nlocib)\n"; die; }
    
}

sub cutoff_curve {
    my ($param_curve, $param_file, $N, $k) = @_;

    my $m1;
    my $m1_err;
    my $m2;
    my $m2_err;

    my $a1_exp;
    my $a1_err;
    my $a2_exp;
    my $a2_err;

    my $keye1;    
    my $keye2;    
    
    # do exp fit
    #
    #
    my $abs_id  = 100;
    my $max_id  = 99;
    my $half_id = 55;   # change of fitting curve point
    my $min_id  = 0;

    my $asyntote_id  = 25;   #change into an asyntote

    my $abs_lod  =   8.0;
    my $max_lod  =  15.0;

    my $id;
    my $lod;

    my $lod_half_id = fit_half_exp ("$param_file", \$a1_exp, \$a1_err, \$m1, \$m1_err, $max_id,  $half_id, $max_lod);
    my $lod_min_id  = fit_half_exp ("$param_file", \$a2_exp, \$a2_err, \$m2, \$m2_err, $half_id, $min_id,  $lod_half_id);

    my $lod_asyntote_id = $lod_half_id - $a2_exp * ($half_id - $asyntote_id) ** $m2;

    $keye1 = "fit\[$max_lod - $a1_exp$a1_err * ($max_id -x) ^ $m1$m1_err\]"; 
    $keye2 = "fit\[$lod_half_id - $a2_exp$a2_err * ($half_id-x) ^ $m2$m2_err\]"; 
    
    open (PARAM,">$param_curve") || die;
    print PARAM "#file: $param_curve\n";
    print PARAM "#fit1: $keye1\n";
    print PARAM "#fit2: $keye2\n";
    
    my $fun;
    my $dim = $N*$k;
    my $x;
    for (my $i = 0; $i <= $dim; $i++) {
	$x = $i/$k;
	 
	if    ($x >= $max_id)                        { $fun = $abs_lod;                                        print PARAM "$x $fun\n"; }
	elsif ($x <  $max_id  && $x >= $half_id)     { $fun = $max_lod     - $a1_exp * ($max_id  - $x) ** $m1; print PARAM "$x $fun\n"; }
	elsif ($x <  $half_id && $x >= $asyntote_id) { $fun = $lod_half_id - $a2_exp * ($half_id - $x) ** $m2; print PARAM "$x $fun\n"; }
	else                                         { $fun = $lod_asyntote_id;                                print PARAM "$x $fun\n"; }
	
    }
    close(PARAM);


}

sub cutoff_for_id {

    my ($param_curve, $id, $Nid, $kid) = @_;
    
    my $cutoff;
    my $x;
    my $y;

    open (PARAM, "$param_curve") || die;
    while(<PARAM>) {
	if (/^\#/) {
	    next;
	}
	if (/^(\S+)\s+(\S+)/) {
	    
	    $x = $1;
	    $y = $2;
	    
	    if ($id/$kid <= $x && $x < ($id+1)/$kid) { $cutoff = $y; last; }

	}
    }
    close(PARAM);
    

    return $cutoff;

}

sub extract_exp_fit_info {

    my ($fitlog, $plotfile, $m_ref, $a_ref) = @_;

    my $read = 0;

    open (FIT,"$fitlog") || die;
    while (<FIT>) {
	
	if (/^FIT:.+$plotfile/) { $read = 1; }
	elsif (/^FIT:/)         { $read = 0; }

	if    (/^m\s+=\s+(\S+)\s+\+\/\-\s+\S+\s+(\(\S+\%\))/)  { if ($read == 1) { $$m_ref  = $1.$2; } }
	elsif (/^a\s+=\s+(\S+)\s+\+\/\-\s+\S+\s+(\(\S+\%\))/)  { if ($read == 1) { $$a_ref  = $1.$2; } }
    }
    close (FIT);
 
}



sub  fit_half_exp {

    my ($param_file, $a_exp_ref, $a_err_ref, $m_ref, $m_err_ref, $max_x, $min_x, $max_y) = @_;
    
    my $m;
    my $m_err;
    my $a;
    my $a_err;

    my $x;
    my $y;
    
    my $y_min_x;

    open (FO,">foo") || die;
    open (FILE,"$param_file") || die;
    while(<FILE>) {
	
	if (/^(\S+)\s+(\S+)/) {
	    $x = $1;
	    $y = $2;
	    
	    if ($y <= $max_y && $x < $max_x && $x >= $min_x) {
		printf(FO "%.4f %.4f\n", log($max_x-$x), log($max_y-$y));
	    }
	    
	}
    }
    close (FILE);
    close(FO);
    
    open(GP,'|'.GNUPLOT) || die "Gnuplot: $!";
    print GP "fe(x) = m*x + a\n"; 
    print GP "fit fe(x) 'foo' using 1:2  via m,a \n";
    close (GP);
    
    extract_exp_fit_info ("fit.log", "foo", \$m, \$a);
    
    $m =~ /(\S+)(\(\S+\%\))/;  $m = $1; $m_err = $2;
    $a =~ /(\S+)(\(\S+\%\))/;  $a = $1; $a_err = $2;
    
    my $a_exp = exp ($a);
    
    $a_exp *= 1000;
    $a_exp = int $a_exp;
    $a_exp /= 1000;

    $m *= 1000;
    $m = int $m;
    $m /= 1000;

    $$a_exp_ref = $a_exp;
    $$a_err_ref = $a_err;

    $$m_ref = $m;
    $$m_err_ref = $m_err;

    system ("rm 'fit.log'\n"); 
    system("rm foo\n");

    $y_min_x = $max_y - $a_exp*($max_x-$min_x)**$m;
    $y_min_x *= 1000;
    $y_min_x = int $y_min_x;
    $y_min_x /= 1000;
     
   return $y_min_x
}


sub identify_loci {
    my ($target, $ntseq, $overlap, $startblast1, $startfrag1, $name1, $coor1,  $startblast2, $startfrag2, $name2, $coor2, 
	$type, $oth, $cod, $rna, $nloci_ref, $name_ref, $lloci_ref, $rloci_ref, 
	$type_ref, $towhom_ref, $howmany_ref, $othsc_ref, $codsc_ref, $rnasc_ref)  = @_;
    my $l;
    my $coor1l;
    my $coor1r;
    my $coor2l;
    my $coor2r;
    my $startwin1;
    my $startwin2;
    my $endwin1;
    my $endwin2;
    my $new = 1;
    my $nloci;
    my $abs;
    my $name1_quote;
    my $name2_quote;
    my $rest;
    
    $nloci = $$nloci_ref;
    
   if ($coor1 =~ /^\d+-\d+\s+\[(\d+)-(\d+)\]\((\d+)\)/) {

	#remember conventions for qrna output:
	#
	#      posX: 0-62 [0-59](60) 
	#
	# is an alignment of 63 positions with 3 gaps. 
	# So the actual positions are from 0 to 59 not to 62.
	#
	#
	$coor1l = $1;
	$coor1r = $2;
    }

    if ($coor2 =~ /^\d+-\d+\s+\[(\d+)-(\d+)\]\((\d+)\)/) {

	#remember conventions for qrna output:
	#
	#      posX: 0-62 [0-59](60) 
	#
	# is an alignment of 63 positions with 3 gaps. 
	# So the actual positions are from 0 to 59 not to 62.
	#
	#
	$coor2l = $1;
	$coor2r = $2;
    }

    #get the ENDS depending on the strand
    #

    if ($startblast1 =~ /(\d+)>(\d+)/) {
	$startwin1 = $1 + $coor1l;
	$endwin1   = $1 + $coor1r;
    }
    elsif ($startblast1 =~ /(\d+)<(\d+)/) {
	$startwin1 = $2 - $coor1r;
	$endwin1   = $2 - $coor1l;
    }
    else { print "identify_loci(): fasta name has to indicate strand (1): $startblast1\n"; die; }

    if ($startblast2 =~ /(\d+)>(\d+)/) {
	$startwin2 = $1 + $coor2l;
	$endwin2   = $1 + $coor2r;
    }
    elsif ($startblast2 =~ /(\d+)<(\d+)/) {
	$startwin2 = $2 - $coor2r;
	$endwin2   = $2 - $coor2l;
    }
    else { print "identify_loci(): fasta name has to indicate strand (2): $startblast2\n"; die; }

    $startwin1 += $startfrag1;
    $startwin2 += $startfrag2;
    $endwin1   += $startfrag1;
    $endwin2   += $startfrag2;

   #paranoia
    if ( ($startwin1 > $endwin1) || $startwin1 < 0 || $endwin1 < 0 ) { print "got ends of the window wrong\n"; die; }
    if ( ($startwin2 > $endwin2) || $startwin2 < 0 || $endwin2 < 0 ) { print "got ends of the window wrong\n"; die; }
    
    # regex metacharacters: \ | ( ) [ { ^ $ * + ? .
    #
    $name1_quote = quotemeta $name1;
    $name2_quote = quotemeta $name2;
   
    $name1 =~ s/\\//g;
    if ($name1 =~ /(\S+)[\-\:](\d+)\-(\d+)/ || $name1 =~ /(\S+)\/frag.+\/(\d+)\-(\d+)/) { 
	$name1 = $1;
	if ($2 < $3) { $abs = $2-1; }
	else         { $abs = $3-1; }
	$startwin1 += $abs; $endwin1 += $abs;

    }
    
    $name2 =~ s/\\//g;
    if ($name2 =~ /(\S+)[\-\:](\d+)\-(\d+)/ || $name2 =~ /(\S+)\/frag.+\/(\d+)\-(\d+)/) { 
	$name2 = $1;
	if ($2 < $3) { $abs = $2-1; }
	else         { $abs = $3-1; }
	$startwin2 += $abs; $endwin2 += $abs;

    }
    
   #start the first locus
    if ($nloci == 0 && $type =~ /^$target$/) {
	$name_ref->[$nloci]    = $name1;
	$lloci_ref->[$nloci]   = $startwin1;
	$rloci_ref->[$nloci]   = $endwin1;
	$type_ref->[$nloci]    = $type;
	$towhom_ref->[$nloci]  = $name2."-".$startwin2."-".$endwin2.":";
	$howmany_ref->[$nloci] = 1;
	$othsc_ref->[$nloci]   = $oth;
	$codsc_ref->[$nloci]   = $cod;
	$rnasc_ref->[$nloci]   = $rna;
	$nloci++;
    }
    
    elsif ($nloci > 0) {

         #identify if this is a new locus
	for ($l = 0; $l < $nloci; $l++) {
	    my $name_ref_quote = quotemeta $name_ref->[$l];

	    #an already existing locus, modify the ends if convenient  ---  
	    if (
		$name1_quote eq $name_ref_quote             &&
		$type       =~ /^$type_ref->[$l]$/          && 
		overlap ($lloci_ref->[$l], $rloci_ref->[$l], $startwin1, $endwin1, $overlap) == 1)
	    {
		#modify ends
		if($startwin1 < $lloci_ref->[$l]) { $lloci_ref->[$l] = $startwin1; }
		if($endwin1   > $rloci_ref->[$l]) { $rloci_ref->[$l] = $endwin1;   }
		
		$towhom_ref->[$l]  .= $name2."-".$startwin2."-".$endwin2.":";
		$howmany_ref->[$l] ++;
		$othsc_ref->[$l]   += $oth;
		$codsc_ref->[$l]   += $cod;
		$rnasc_ref->[$l]   += $rna;
		$new = 0;
		last;
	    }

	}
	
	if ($new == 1 && $type =~ /^$target$/) {
	    $name_ref->[$nloci]    = $name1;
	    $lloci_ref->[$nloci]   = $startwin1;
	    $rloci_ref->[$nloci]   = $endwin1;
	    $type_ref->[$nloci]    = $type;
	    $towhom_ref->[$nloci]  = $name2."-".$startwin2."-".$endwin2.":";
	    $howmany_ref->[$nloci] = 1;
	    $othsc_ref->[$nloci]   = $oth;
	    $codsc_ref->[$nloci]   = $cod;
	    $rnasc_ref->[$nloci]   = $rna;

	    $nloci++;
	}
    }

    return $nloci;
}

sub is_above_cutoff {
    my ($cutoff, $type_ref, $codlod, $rnalod, $noseq_ref, $ncseq_ref, $nrseq_ref, $ncoseq_ref, $nroseq_ref, $nrcseq_ref, $n3seq_ref) = @_;

    my $above;
    my $sqrt2 = sqrt(2.0);
    my $log2 = 0.69314718056;

   if ($codlod <= $cutoff && $codlod >= -$cutoff &&
	$rnalod <= $cutoff && $rnalod >= -$cutoff) { 
	$$n3seq_ref++; $$type_ref = 'RNA/COD/OTH'; $above = 0; 
    }
    elsif ($codlod < -$cutoff &&
	   $rnalod <= $cutoff && $rnalod >= -$cutoff) { 
	$$nroseq_ref++; $$type_ref = 'RNA/OTH'; $above = 0; 
    }
    elsif ($rnalod < -$cutoff &&
	   $codlod <= $cutoff && $codlod >= -$cutoff) { 
	$$ncoseq_ref++; $$type_ref = 'COD/OTH'; $above = 0; 
    }
    elsif ($rnalod <= $codlod+$sqrt2*$cutoff && $rnalod >= $codlod-$sqrt2*$cutoff) { 
	$$nrcseq_ref++; $$type_ref = 'RNA/COD'; $above = 0; 
    }
    elsif ($rnalod < -$cutoff && $codlod < -$cutoff) { 
	$$noseq_ref++; $$type_ref = 'OTH';     $above = 1; 
    } 
    elsif ($codlod > $cutoff && $rnalod < $codlod-$sqrt2*$cutoff) { 
	$$ncseq_ref++; $$type_ref = 'COD';     $above = 1; 
    } 
    elsif ($rnalod > $cutoff && $codlod < $rnalod-$sqrt2*$cutoff) { 
	$$nrseq_ref++; $$type_ref = 'RNA';     $above = 1; 
    } 
    else { print "unknown type for (rnasc, codsc) = (", $rnalod, ", $codlod)\n"; }

    return $above;
}

sub is_above_cutoff_sigmoidal {
    my ($cutoff, $type_ref, $othlodsigm, $codlodsigm, $rnalodsigm, $noseq_ref, $ncseq_ref, $nrseq_ref) = @_;

    my $above;
  
    if    ($codlodsigm >= $cutoff) { $$ncseq_ref++; $$type_ref = 'COD'; $above = 1; }
    elsif ($rnalodsigm >= $cutoff) { $$nrseq_ref++; $$type_ref = 'RNA'; $above = 1; }
    elsif ($othlodsigm >= $cutoff) { $$noseq_ref++; $$type_ref = 'OTH'; $above = 1; }
    else                           { $$type_ref = '';    $above = 0; }

    return $above;
}

sub is_hit_in_locus {
    my ($name_hit, $start_hit, $end_hit, $name_loci, $start_loci, $end_loci) = @_;

    my $is = 0;

    if ($name_hit  eq $name_loci  && 
	$start_hit >= $start_loci && 
	$end_hit   <= $end_loci     )        { $is = 1; }
    
    return $is;
}

sub loci_left_to_right {

    my $la;
    my $lb;
    my $ra;
    my $rb;
    my $ta;
    my $tb;

    $a =~ /(\d+)\-(\d+)(\S+)/;
    $la = $1; $ra = $2; $ta = $3;

    $b =~ /(\d+)\-(\d+)(\S+)/;
    $lb = $1; $rb = $2; $tb = $3;

    ($la <=> $lb) || ($ra <=> $rb) || ($ta cmp $tb);
}



# the reason for this subroutine is that ones we have identified the independent loci,
# we have to go back to check that those loci (which may have expanded their limits after being defined)
# do not overlap.

sub recheck_loci {
    my ($nloci_ref, $name_ref, $lloci_ref, $rloci_ref, $type_ref, $towhom_ref, $howmany_ref, $othsc_ref, $codsc_ref, $rnasc_ref) = @_;
    my $l;
    my $k;
    my $sh;
    my $name;
    my $name_ref_quote;
    my $startloci;
    my $endloci;
    my $type;
    my $towhom;
    my $howmany;
    my $othsc;
    my $codsc;
    my $rnasc;
    my $nloci;
    
    $nloci = $$nloci_ref;
    
    for ($k = 0; $k < $nloci; $k++) {
	$name_ref_quote = quotemeta $name_ref->[$k];
	
	for ($l = $k+1; $l < $nloci; $l++) {
	    $name      = quotemeta $name_ref->[$l];
	    $startloci = $lloci_ref->[$l];
	    $endloci   = $rloci_ref->[$l];
	    $type      = $type_ref->[$l];
	    $towhom    = $towhom_ref->[$l];
	    $howmany   = $howmany_ref->[$l];
	    $othsc     = $othsc_ref->[$l];
	    $codsc     = $codsc_ref->[$l];
	    $rnasc     = $rnasc_ref->[$l];
	    
	    #an already existing locus, modify the ends if convenient
	    if ($name      eq $name_ref_quote            && 
		$type      =~ /^$type_ref->[$k]$/        &&		
		overlap ($lloci_ref->[$k], $rloci_ref->[$k], $startloci, $endloci, $overlap) == 1)
	    {
		#modify ends
		if($startloci < $lloci_ref->[$k]) { $lloci_ref->[$k] = $startloci; }
		if($endloci   > $rloci_ref->[$k]) { $rloci_ref->[$k] = $endloci;   }
		
		$towhom_ref->[$k]  .= $towhom;
		$howmany_ref->[$k] += $howmany;
		$othsc_ref->[$k]   += $othsc;
		$codsc_ref->[$k]   += $codsc;
		$rnasc_ref->[$k]   += $rnasc;
		
		for ($sh = $l; $sh < $nloci-1; $sh++) {
		    
		    $name_ref->[$sh]    = $name_ref->[$sh+1];
		    $lloci_ref->[$sh]   = $lloci_ref->[$sh+1];
		    $rloci_ref->[$sh]   = $rloci_ref->[$sh+1];
		    $type_ref->[$sh]    = $type_ref->[$sh+1];
		    $towhom_ref->[$sh]  = $towhom_ref->[$sh+1];
		    $howmany_ref->[$sh] = $howmany_ref->[$sh+1];
		    $othsc_ref->[$sh]   = $othsc_ref->[$sh+1];
		    $codsc_ref->[$sh]   = $codsc_ref->[$sh+1];
		    $rnasc_ref->[$sh]   = $rnasc_ref->[$sh+1];
		}
		
		$nloci--;
		$k --;
	    }
	}
    }
    
    #finallly, normalize mean scores and calculate types.
    for ($l = 0; $l < $nloci; $l++) {	
	$codsc_ref->[$l] /= $howmany_ref->[$l];
	$rnasc_ref->[$l] /= $howmany_ref->[$l];

    }
    

    return $nloci;
}

sub overlap {
    my ($lend_loci, $rend_loci, $lend, $rend, $overlap)  = @_;

    my $is_same_loci = 0;

    if ($lend >= $lend_loci && $rend <= $rend_loci) { $is_same_loci = 1; } #is included
    if ($lend <= $lend_loci && $rend >= $rend_loci) { $is_same_loci = 1; } #extends over

    if ($rend < $rend_loci && $rend >= $lend_loci+$overlap)  { $is_same_loci = 1; } #left-end overlap
    if ($lend > $lend_loci && $lend <= $rend_loci-$overlap)  { $is_same_loci = 1; } #right-end overlap


    return $is_same_loci;

}
