#!/usr/bin/perl -w

#qrna2gff.pl

use strict;
use vars qw ($opt_c $opt_i $opt_g $opt_s $opt_u $opt_w $opt_x);  # required if strict used
use Getopt::Std;
use constant GNUPLOT => '/usr/bin/gnuplot';

getopts ('i:c:g:s:u:w:x:');     # ('aci:p:o:') means 'ac' are flags, 'i:p:o:' gets following scalar.


# Print a helpful message if the user provides no input file.
if (!@ARGV) { 
        print "usage:  qrna2gff.pl [options] file.qrna\n\n";
	print "options:\n";
        print "-c <case>          :  cases (default is case = 1)\n";
        print "                        possible cases are:\n";
        print "                        0=GLOBAL\n";  
        print "                        1=LOCAL_DIAG_VITERBI 2=LOCAL_DIAG_FORWARD\n";
        print "                        3=LOCAL_SEMI_VITERBI 4=LOCAL_SEMI_FORWARD\n";
        print "                        5=LOCAL_FULL_VITERBI 6=LOCAL_FULL_FORWARD\n";
	print "-i <id_max>        :  max identity of alignments analysed (default is id_max = 100)\n";
	print "-g <typetarget>    :  which type of loci you want to analyze (default is all)\n";
        print "                        possible types of loci are:\n";
        print "                        OTH | COD | RNA \n";
        print "-s <type_of_score> : type of score (sigmoidal | simple)       [default = sigmoidal]\n";
	print "-u <cutoff>        : default is cutoff = 0\n";
	print "-w <whichorg>      : default is whichorg = 1  (use 1-for-org1 2-for-org2 12-for-both)\n";
	print "-x <name>          : ignore given name, use this one for gff outputs\n\n";
       exit;
}
my $verbose;
undef $verbose;

my $file    = shift;
my $tag;
my $type;

my $dir;
my $filename;

if ($file =~ /^(\S+)\/([^\/]+)$/) {
    $dir  = $1;
    $filename = $2;
}
else {
    $dir  = "";
    $filename = $file;
}
print "file: $filename\n";
print "dir:  $dir/\n";

my $output   = "$dir";


my $n_in_ali = 0; 

my $typetarget; 
if ($opt_g) { $typetarget = $opt_g; } 
else        { $typetarget = "all";  }
$verbose = $typetarget;


my $whichorg ;
if ($opt_w) { $whichorg = $opt_w; }
else        { $whichorg = 1;      }
if ($whichorg != 1 && $whichorg != 2 && $whichorg != 12 ) { print "organism has to be '1' or '2' or '12'\n"; die }

my $usename;
if ($opt_x) { $usename = $opt_x; }

my $cutoff;
if (defined($opt_u)) { $cutoff = $opt_u; }
else                 { $cutoff = 0;     }

my $id_max;
if (defined($opt_i)) { $id_max = $opt_i; }
else                 { $id_max = 100;    }

my $type_of_score;
if ($opt_s) { $type_of_score = $opt_s;      }
else        { $type_of_score = "sigmoidal"; }
if ($type_of_score =~ /^simple$/ || $type_of_score =~ /^sigmoidal$/) { ;}
else { print "wrong type of score. options are: 'simple' or 'sigmoidal'"; die; }

my $case;
if ($opt_c) { $case = $opt_c; }
else        { $case = 1;      }

if   ($case==0) { $tag = "GLOBAL";        }
elsif($case==1) { $tag = "LOCAL_DIAG_VITERBI"; }
elsif($case==2) { $tag = "LOCAL_DIAG_FORWARD"; }
elsif($case==3) { $tag = "LOCAL_SEMI_VITERBI"; }
elsif($case==4) { $tag = "LOCAL_SEMI_FORWARD"; }
elsif($case==5) { $tag = "LOCAL_FULL_VITERBI"; }
elsif($case==6) { $tag = "LOCAL_FULL_FORWARD"; }

my $sqrt2 = sqrt(2.0);

my $gff_file = "$file.$typetarget.CUTOFF$cutoff.gff";

my $num = 0;

my $coor1;
my $coor2;

my $othsc;
my $rnasc;
my $codsc;

my $rnalod;
my $codlod;

my $othlodsigm;
my $rnalodsigm;
my $codlodsigm;

my $rna;
my $cod;

my $name1;
my $name2;

my @name1;
my @name2;
my $rest;

my @lloci1;
my @lloci2;

my @rloci1;
my @rloci2;

my @type1;
my @type2;

my @codsc1;
my @codsc2;

my @rnasc1;
my @rnasc2;

my $startblast1;
my $startblast2;

my $seq = 0;

my $idx = 0;

my $len;
my $len_new;
my $id;
my $id_new;

my $score;

my $shuffle = 0;

my $time;

my $coor1l;
my $coor1r;
my $coor2l;
my $coor2r;
my $startwin1;
my $startwin2;
my $endwin1;
my $endwin2;
my $new = 1;

my $abs1;
my $abs2;
my $name1_quote;
my $name2_quote;

my $nnamesseq = 0;

my $qrna;

open (GFF,">$gff_file") || die;

open (FILE,"$file") || die;
while (<FILE>) {
    
    if (/^Divergence time \(\S+\):\s+(\S+)/) {
	$time = $1;
    }
    elsif (/^\#.+shuffled/) { 
	$shuffle = 1; 
    }
    elsif (/^>(\S+)[\/\-\:](\d+[><]\d+)\-(.+)$/ && $seq == 0) { 
	$name1 = $1;
	$startblast1 = $2; 
	$rest = $3;
	
	# regex metacharacters: \ | ( ) [ { ^ $ * + ? .
	#
	$name1_quote = quotemeta $name1;
	
	$abs1 =0;	

	if ($name1 =~ /^(\S+)\/frag\d+(.+)$/) {
	    $name1 = $1; 
	    $rest  = $2;
	    $name1 =~ s/\\//g;
	    
	    if ($rest =~ /(\S+)\-\S+/) {
		$rest = $1;
		$rest =~ s/\///g; $rest =~ s/\\//g; 
		$abs1 = $rest-1;
		
	    }
	}
	$name1 =~ s/\\//g;
	if ($name1 =~ /^(\S+)[\/\-\:](\d+)\-(\d+)/) { 
	    $name1 = $1;
	    $rest  = $4;
	    if ($2 < $3) { $abs1 += $2-1; }
	    else         { $abs1 += $3-1; }
	}
 
	$seq = 1; 
	$nnamesseq++; 
    }
    
    elsif (/^>(\S+)[\/\-\:](\d+[><]\d+)\-(.+)$/ && $seq == 1) { 
	$name2 = $1;
	$startblast2 = $2;
	$rest = $3;
	
	# regex metacharacters: \ | ( ) [ { ^ $ * + ? .
	#
	$name2_quote = quotemeta $name2;
	
	$abs2 = 0;
	if ($name2 =~ /^(\S+)\/frag\d+(.+)$/) {
	    $name2 = $1; 
	    $rest  = $2;
	    $name2 =~ s/\\//g;
	    
	    if ($rest =~ /(\S+)\-\S+/) {
		$rest = $1;
		$rest =~ s/\///g; $rest =~ s/\\//g; 
		$abs2 = $rest-1;
		
	    }
	}
	$name2 =~ s/\\//g;
	if ($name2 =~ /^(\S+)[\-\:](\d+)\-(\d+)/) { 
	    $name2 = $1;
	    $rest = $4;
	    if ($2 < $3) { $abs2 += $2-1; }
	    else         { $abs2 += $3-1; }
	    
	}	    	    
		
	$seq = 0; 
	$nnamesseq++; 
    } 
    
    elsif (/^length alignment:\s+(\S+) \(id=(\d+\.\d+)\)/) { 
	$len = $1;
	$id  = $2;
    }
    
    elsif (/^posX: (.+)$/ ) { 
	$coor1 = $1;
	
 	if ($coor1 =~ /^\d+-\d+\s+\[(\d+)-(\d+)\]\((\d+)\)/) {
	    
	    #remember conventions for qrna output:
	    #
	    #      posX: 0-62 [0-59](60) 
	    #
	    # is an alignment of 63 positions with 3 gaps. 
	    # So the actual positions are from 0 to 59 not to 62.
	    #
	    #
	    $coor1l = $1;
	    $coor1r = $2;
	}

	#get the ENDS depending on the strand
	#	
	if ($startblast1 =~ /(\d+)>(\d+)/) {
	    $startwin1 = $1 + $coor1l;
	    $endwin1   = $1 + $coor1r;
	}
	elsif ($startblast1 =~ /(\d+)<(\d+)/) {
	    $startwin1 = $2 - $coor1r;
	    $endwin1   = $2 - $coor1l;
	}
	else { print "identify_loci(): fasta name has to indicate strand (1): $startblast1\n"; die; }

	$startwin1 += $abs1; 
	$endwin1   += $abs1;

	#paranoia
	if ( ($startwin1 > $endwin1) || $startwin1 < 0 || $endwin1 < 0 ) { print "got ends of the window wrong\n"; die; }
	

    }
    
    elsif (/^posY: (.+)$/) { 
	$coor2 = $1;
	
	if ($coor2 =~ /^\d+-\d+\s+\[(\d+)-(\d+)\]\((\d+)\)/) {
	    
	    #remember conventions for qrna output:
	    #
	    #      posX: 0-62 [0-59](60) 
	    #
	    # is an alignment of 63 positions with 3 gaps. 
	    # So the actual positions are from 0 to 59 not to 62.
	    #
	    #
	    $coor2l = $1;
	    $coor2r = $2;
	}
	if ($startblast2 =~ /(\d+)>(\d+)/) {
	    $startwin2 = $1 + $coor2l;
	    $endwin2   = $1 + $coor2r;
	}
	elsif ($startblast2 =~ /(\d+)<(\d+)/) {
	    $startwin2 = $2 - $coor2r;
	    $endwin2   = $2 - $coor2l;
	}
	else { print "identify_loci(): fasta name has to indicate strand (2): $startblast2\n"; die; }
	
	$startwin2 += $abs2; 
	$endwin2   += $abs2;

	#paranoia
	if ( ($startwin2 > $endwin2) || $startwin2 < 0 || $endwin2 < 0 ) { print "got ends of the window wrong\n"; die; }
		
    } 
    
    elsif (/^$tag/) { $num = 1; }
    
    elsif (/winner = (\S+)/) { $type = $1; }
    
    elsif (/^\s+ OTH = \s+(\S+)\s+ COD = \s+(\S+)\s+ RNA = \s+(\S+)/ && $num == 1) { 
	$othsc = $1; 
	$codsc = $2; 
	$rnasc = $3; 
	
	$codlod = $codsc - $othsc;
	$rnalod = $rnasc - $othsc;
	
	$othlodsigm = - log(exp(log(2.0)*($codsc-$othsc)) + exp(log(2.0)*($rnasc-$othsc)))/log(2.0);
	$codlodsigm = - log(exp(log(2.0)*($othsc-$codsc)) + exp(log(2.0)*($rnasc-$codsc)))/log(2.0);
	$rnalodsigm = - log(exp(log(2.0)*($othsc-$rnasc)) + exp(log(2.0)*($codsc-$rnasc)))/log(2.0);
	
	undef($score);
	if    ($type =~/^RNA$/) { $score = $rnalodsigm; }
	elsif ($type =~/^COD$/) { $score = $codlodsigm; }
	elsif ($type =~/^OTH$/) { $score = $othlodsigm; }
	else                    { print "wrong type ($type)\n"; die; }
	
	$qrna = "QRNA";
	if ($shuffle == 1) { $type = "sh$type"; $qrna = "shQRNA"; }
	
	$num = 0;
	
	if ($score) {

	    if ($typetarget =~ /^all$/ || $type =~ /$typetarget$/) 
	    {
		if ($score >= $cutoff) {
		    if ($whichorg == 1) {
			if (defined($opt_x)) { print GFF "$usename\t$qrna\t$type\t$startwin1\t$endwin1\t$score\t.\t\.\tgene '$name1' id '$id'\n"; }
			else                 { print GFF "$name1\t$qrna\t$type\t$startwin1\t$endwin1\t$score\t.\t\.\tgene '$name1' id '$id'\n";   }
		    }
		    if ($whichorg == 2) {
			if (defined($opt_x)) { print GFF "$usename\t$qrna\t$type\t$startwin2\t$endwin2\t$score\t.\t\.\tgene '$name2' id '$id'\n"; }
			else                 { print GFF "$name2\t$qrna\t$type\t$startwin2\t$endwin2\t$score\t.\t\.\tgene '$name2' id '$id'\n";   } 
		    }
		}
	    }
	    
	}
    }
    
    else  { next; }
    
}
close (FILE);


