/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* qrnaversion.c
 *
 * contains the dunctions score_() and score_scan() that decide the qrna mode.
 *
 * E. Rivas [St. Louis]
 * 
 * Sun Oct 13 13:50:38 CDT 2002
 * 
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <time.h>

#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"
#include "version.h"

/* Function: IndexForwardWindow() 
 *
 * date:     Mon Dec  3 11:47:25 CST 2001
 *
 * Purpose:  For a given position j, calculate the index j mod (win, off),
 *           and the maximum length of the interval dmax.
 *
 * Args:     j     - 
 *           jmod  - 
 *           dmax  - 
 *
 * Return:   returns dfmax               
 */
int
IndexForwardWindow(int L, int win, int off, int j)
{
  int jfoff;
  int dfmax;
  
  if (win >= off) 
    {
      dfmax = (j < win)? j : (win-off) + (j-win)   % off;
    }

  else if (win < off && off > 0)
    {
      jfoff =  j  % off;

      if (jfoff >= win) dfmax = -1;
      else              dfmax = (j < win)? j : jfoff;

    }

  else Die("you have to slide at least one nt at the time!");

  if (dfmax >= win) Die("Bad assignment of dfmax. IndexForwardWindow() j = %d dfmax = %d", j, dfmax);
 
 return dfmax;

}

int
IndexBackwardWindow(int L, int win, int off, int j)
{
  int jboff;
  int dbmax;
  
 if (win >= off) 
    {
      dbmax = (j < win)? j : (win-1)   - ((L-1)-j) % off;
    }

  else if (win < off && off > 0)
    {
      jboff = ((L-1) - j) % off;

     if (jboff >= win) dbmax = -1;
     else              dbmax = (j < win)? j : (win-1) - jboff;
	
    }

  else Die("you have to slide at least one nt at the time!");

  if (dbmax >= win) Die("Bad assignment of dbmax. IndexBackwardWindow() j = %d dbmax = %d", j, dbmax);
  
  return dbmax;
}

/* Function: IndexWindow() 
 *
 * date:     Mon Mar  3 11:23:36 CST 2003
 *
 * Purpose:  For a given j corresponding to a full window, calculate the index of the corresponding window.
 *
 * Args:     j     - 
 *           jmod  - 
 *           dmax  - 
 *
 * Return:   returns dfmax               
 */
int
IndexWindow(int j, int L, int win, int off, int revstrand)
{
  int lookj;
  int idx_win;
  int num_win;

  num_win = NumberScanningWindows(L, win, off);
  
  if (IsFullWindow(revstrand, L, win, off, j)) {
    
    if (revstrand) lookj = ((L-1-j)+(win-1) < L-1)? (L-1-j)+(win-1) : L-1;
    else           lookj = j;

    if      (lookj == L-1) idx_win = num_win - 1; 
    else if (lookj <  L-1) idx_win = (int) (lookj-(win-1))/off;
  
    else Die ("IndexWindow(): index j (%d) cannot be larger than alignment lenght (%d)\n", lookj, L);
 
  }

 return idx_win;

}


/* Function: NumberScanningWindows() 
 *
 * date:     Mon Mar  3 10:46:38 CST 2003
 *
 * Purpose:  For a given alignment of lenght L, calculate the number
 *           of scanning windows for a given set (win, off).
 *
 * Args:     j     - 
 *           jmod  - 
 *           dmax  - 
 *
 * Return:   returns dfmax               
 */
int
NumberScanningWindows(int L, int win, int off)
{
  int num_win = 1;
  int idx;
  int i;
  
  idx = (int) L/off;
  
  for (i = 0; i < idx; i++) 
    if (i*off+win < L) num_win ++;

 return num_win;

}


void 
RNAbanner (FILE *ofp, int cyk)
{
  if (cyk) 
    fprintf(ofp, "[CYK SCFG]\n");
  else 
    fprintf(ofp, "[Inside SCFG]\n");
}

void
PrintScanBanner (FILE *ofp, 
		 int start, int end, 
		 int lenX, int lenY, int fstX, int fstY, 
		 double *freqX, double *freqY, 
		 double id, double gap, double mut)
{
  int len;

  len = end - start + 1;

  fprintf(ofp, "length alignment: %d (id=%.2f) (mut=%.2f) (gap=%.2f) \n", len, id, mut, gap);
  
  fprintf(ofp, "posX: %d-%d [%d-%d](%d) -- (%.2f %.2f %.2f %.2f) \n", 
	  start, end, fstX, fstX+lenX-1, lenX, freqX[0], freqX[1], freqX[2], freqX[3]);
  fprintf(ofp, "posY: %d-%d [%d-%d](%d) -- (%.2f %.2f %.2f %.2f) \n", 
	  start, end, fstY, fstY+lenY-1, lenY, freqY[0], freqY[1], freqY[2], freqY[3]);
  
}


/* Function: ScoreScanFast()
 *
 * Date:     ER,  Sat Oct 12 16:54:07 CDT 2002 [St. Louis]
 *
 * Purpose:  This is the scanning version, similar to the one implemented in the failed "ncranscan" program.
 *
 *           For each position (j) we keep the score of the window [j-win+1,j]
 * 
 *           If original algorithm is order L^n,
 *
 *           this version becomes           L * w ^ {n-1} -- gain respect to the traditional scoring a window is ~ w/x.
 *
 *           The difference with score_scan() is that here everything is going to be performed in the (j,d) coordinate
 *           system. Before the HMM parts were done in the (start,i) system, which is clumsy, and not efficient.
 *
 *           While the original HMM parts were L^3 in time, with this new system, they are reduced to L^2,
 *           therefore the correspondin scanning version goes from Lw^2 to Lw.
 *
 * Returns:  void
 */
void
ScoreScanFast(FILE *ofp, 
	      FILE *regressfp, char *regressionfile, 
	      int format, 
	      SQINFO sqinfoX, int *isegX, int *iseqX, double *freqX,
	      SQINFO sqinfoY, int *isegY, int *iseqY, double *freqY,
	      char *aliss,
	      int Lw, int leg, int win, int slide,
	      fullmat_t           *riboprob, 
	      double            ***cfg_node,
	      double             **hexa,
	      double              *pair5prob,
	      double              *codon_joint,
	      int                **pam,
	      double               scale,
	      struct dos_s             dos, 
	      struct model_s           *model, 
	      struct dpdscanfast_s     *dpdscan, 
	      struct rnascfgscanfast_s *mx, 
	      struct scores_s          *sc, 
	      struct ali_s             *ali,
	      struct scanfast_s        *scanfast,
	      int add_codon, int add_hexamer, 
	      int alignment, int cyk, int changefreq, 
	      int fastintloop, int logodds, int pedantic, int shuffle, int sre_shuffle, double tfactor, 
	      int traceback, int verbose, int ones, int parse, int rnass, int doends)
{
  int      j, jmod;
  int      dfmax, dbmax;
  int      l, lfmax, lbmax;
  int      start, end;          /* first and last position of analysis                  */
  int      lenX, lenY;          /* len of seq's  without gaps                           */
  int      fstX, fstY;          /* mapping of startX and startY to the seq without gaps */
  int     *segX, *segY;
  int     *segrvX, *segrvY;
  int     *segshX, *segshY;
  int      fwindows = 0;
  int      bwindows = 0;
  double   id, gap, mut;

  /* Remember that in this scanning version, the only possible shuffle is of the
   * whole aligment. Otherwise the stepwise calculation fails.
   *
   * To do a careful window-by-window shuffling revert to the original qrna (--noscan)
   */
  if (shuffle || sre_shuffle) 
    {
      AllocIntSeqs(leg, &segshX, &segshY);

      DupIntSeq(isegX, segshX, leg-1, leg-1);
      DupIntSeq(isegY, segshY, leg-1, leg-1);  
      
      if (shuffle)     Shuffle2IntSequences(segshX, segshY, leg, leg-1, leg-1, verbose);
      if (sre_shuffle) QRNAIntShuffle(segshX, segshY, leg);
      
      segX = segshX;
      segY = segshY;
    }
  else 
    {
      segX = isegX;
      segY = isegY;
    }
  
  /* Reverse-Complemente the whole alignment
   */
  if (!ones) {
    AllocIntSeqs(leg, &segrvX, &segrvY);
    
    RevComp(segrvX, segX, leg);
    RevComp(segrvY, segY, leg);
  }
  
  if (leg < win) win = leg;  /* if alignment is smaller than window, score the whole aligment at once*/
  

   /* Use the base-composition, lenght and time to construct the models
   */
  ConstructModels_phase2(ofp, win, riboprob, cfg_node, pam, scale, hexa, pair5prob, codon_joint, freqX, freqY, model, tfactor, 
			 add_codon, add_hexamer, changefreq, logodds, pedantic, verbose);

  /*
   *  Calculate scores from [j-d,j] for j = 0 to leg-win+1 and d \in [0,dmax]
   *   
   */
  for (j = 0; j < leg; j ++) {
    
   jmod = j % win;

   dfmax = IndexForwardWindow (leg, win, slide, j);
   dbmax = IndexBackwardWindow(leg, win, slide, j);
   
   lfmax = dfmax + 1;
   lbmax = dbmax + 1;
   
   /* 
    *  FORWARD strand 
    */
   for (l = 0; l <= lfmax; l++) 
     fwindows += ScoreWithModelsScanFast(ofp, sqinfoX, segX, sqinfoY, segY, aliss, leg, win, slide, j, jmod, l, lfmax, model, dpdscan,
					 mx, sc, ali, scanfast, alignment, cyk, doends, fastintloop, logodds, ones, parse, rnass, FALSE, 
					 shuffle, traceback, verbose);   
  
   /* 
    *  BACkWARD strand 
    */
   if (!ones) 
     for (l = 0; l <= lbmax; l++) 
       bwindows += ScoreWithModelsScanFast(ofp, sqinfoX, segrvX, sqinfoY, segrvY, aliss, leg, win, slide, j, jmod, l, lbmax, model, dpdscan,
					   mx, sc, ali, scanfast, alignment, cyk, doends, fastintloop, logodds, ones, parse, rnass, TRUE, 
					   shuffle, traceback, verbose);  
   
  }/* while j < leg */
  
  /* Check that we calculated the same number of scores Forwards and Backwards
   */
  if (!ones && fwindows != bwindows) Die ("ScoreScanFast(): wrong calculation of full windows [forward=%d, backwards=%d]\n", fwindows, bwindows); 

  /* For each full scanning window, give some statistics 
   *
   * and calculate POSTERIORS for the three functions
   */
  for (j = win-1; j < leg; j ++) {

    dfmax = IndexForwardWindow (leg, win, slide, j);

    if (dfmax == win - 1 || (j == leg-1 && dfmax >= 0))
      {	
	end   = j;
	start = end - dfmax;
	
	PercIdSeqs(segX, segY, end, dfmax, &id, &gap, &mut);
	
	BaseComp(ofp, segX, end, dfmax, freqX);
	BaseComp(ofp, segY, end, dfmax, freqY);
	
	lenX = LenNoGaps(segX, end, dfmax); /* len of seqX without gaps */
	lenY = LenNoGaps(segY, end, dfmax); /* len of seqY without gaps */
	
	fstX = PosNoGaps(segX, start);
	fstY = PosNoGaps(segY, start);
	
	/* print scan banner 
	 */
	PrintScanBanner(ofp, start, end, lenX, lenY, fstX, fstY, freqX, freqY, id, gap, mut);
	
	/* print alignment if asked for it
	 */
	if (alignment) {
	  FillAliStruct(segX, segY, end, dfmax, ali);
	  PrintAlign(ofp, sqinfoX, sqinfoY, 0, dfmax+1, ali);
	}
	
	/* Compare F(seq)[j] with F(seqrv)[j] and calculate POSTERIORS 
	 */
	fprintf(ofp, "LOCAL_DIAG_VITERBI -- ");
	RNAbanner(ofp, cyk);
	PosteriorScoresScanFast(ofp, scanfast, leg, j, win, slide, doends, ones); 

	/* Regression test info.
	 */
	if (regressionfile != NULL) {
	  PrintScanBanner(regressfp, start, end, lenX, lenY, fstX, fstY, freqX, freqY, id, gap, mut);
	  fprintf(regressfp, "+> %f %f %f\n", scanfast->sc->oth[j],    scanfast->sc->cod[j],    scanfast->sc->rna[j]);
	  fprintf(regressfp, "-> %f %f %f\n", scanfast->sc->othrev[j], scanfast->sc->codrev[j], scanfast->sc->rnarev[j]);
	}
      } 
  }
  
  if (!ones)                  { free(segrvX); free(segrvY); }
  if (shuffle || sre_shuffle) { free(segshX); free(segshY); }
 
}

/* Function: ScoreWindow()
 *
 * Date:     ER,  Fri Nov 30 13:17:18 CST 2001 [St. Louis]
 *
 * Purpose:  Scores with qrna in the traditional way of moving along the given
 *           alignment with a window (w) and sliding a number of positions (x) at the time.
 *           Each scoring window is assigned a 2D score and a winner model.
 *
 *           The number of scoring windows is  (L-w)/x, for an alignment of length L.
 * 
 *           If original algorithm is order L^n,
 *
 *           this version becomes           w^n * (L-w)/x 
 *
 * Returns: void
 */
void
ScoreWindow(FILE *ofp, 
	    FILE *regressfp, char *regressionfile, 
	    int format, 
	    SQINFO sqinfoX, int *isegX, int *iseqX, double *freqX,
	    SQINFO sqinfoY, int *isegY, int *iseqY, double *freqY,
	    char *aliss,
	    int Lw, int leg, int win, int slide,
	    fullmat_t           *riboprob, 
	    double            ***cfg_node,
	    double             **hexa,
	    double              *pair5prob,
	    double              *codon_joint,
	    int                **pam,
	    double               scale,
	    struct dos_s         d, 
	    struct model_s      *model, 
	    struct dpd_s        *dpd, 
	    struct dpf_s        *dpf, 
	    struct rnascfg_s    *mx, 
	    struct scores_s     *sc, 
	    struct ali_s        *ali,
	    int add_codon, int add_hexamer, int alignment, int cyk, int changefreq, int changefreqwin, 
	    int fastintloop, int logodds, int pedantic, int shuffle, int sre_shuffle, int sweep, double tfactor, 
	    int traceback, int verbose, int ones, int parse, int rnass, int doends, int shtoo, int twindow)
{
  double  id, gap, mut;
  int     i;
  int     pos;
  int     dis;
  
  /* Use the base-composition time and length to construct the models
   */
  if (!changefreqwin && ! twindow)
    ConstructModels_phase2(ofp, win, riboprob, cfg_node, pam, scale, hexa, pair5prob, codon_joint, freqX, freqY, model,  tfactor, 
			   add_codon, add_hexamer, changefreq, logodds, pedantic, verbose);
  
  /* Calculate scores from pos to pos+dis-1
   */
  pos = 0;
  while (pos < leg) {
    
    dis = (pos<leg-win)? win : leg-pos;
    
    if(changefreqwin || twindow) {
      /* calculate single-nt frequencies for window: [pos, pos+dis-1]
       */
      for (i = 0; i < 4; i++)         /* initialization         */
	{
	  freqX[i] = 0.0;   
	  freqY[i] = 0.0;   
	}
      BaseComp(ofp, isegX, pos+dis-1, dis-1, freqX);    /* freqs for seqX         */
      BaseComp(ofp, isegY, pos+dis-1, dis-1, freqY);    /* freqs for seqY         */
      
      if (twindow) {
	PercIdSeqs(isegX+pos, isegY+pos, dis-1, dis-1, &id, &gap, &mut);

	tfactor = TimeIdCorrelation(othdiv, id); 
	fprintf(ofp, "Divergence time (variable by window): %.6f\n", tfactor); 
       
      }

      ConstructModels_phase2(ofp, win, riboprob, cfg_node, pam, scale, hexa, pair5prob, codon_joint, freqX, freqY, model,  tfactor, 
			     add_codon, add_hexamer, changefreqwin, logodds, pedantic, verbose);
      
    }
    
    
    /* the "given" RNA secondary structure (with gaps) if any goes to sqinfoX.ss and sqinfoY.ss
     */
    if ((format == kSquid || format == kSelex) && (sqinfoX.flags & SQINFO_SS))
      if (verbose) PrintCtSeqs(ofp, &sqinfoX, isegX, &sqinfoY, isegY, pos, dis, aliss);
    
    ScoreWithModels(ofp, regressfp, regressionfile, sqinfoX, isegX, iseqX, sqinfoY, isegY, iseqY, aliss, pos, 
		    dis, Lw, d, model, dpd, dpf,mx, sc, ali, 
		    alignment, cyk, doends, fastintloop, logodds, ones, parse, rnass, shuffle, sre_shuffle, sweep, traceback, verbose);
    
    if (!shuffle && shtoo)
      ScoreWithModels(ofp, regressfp, regressionfile, sqinfoX, isegX, iseqX, sqinfoY, isegY, iseqY, aliss, pos, 
		      dis, Lw, d, model, dpd, dpf, mx, sc, ali, 
		      alignment, cyk, doends, fastintloop, logodds, ones, parse, rnass, TRUE, FALSE, sweep, traceback, verbose);

     pos = (pos<leg-win)? pos+slide : leg;
  }

}



