/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* misc.c
 * Functions with no home.
 * ER, Wed Oct  6 14:06:47 CDT 1999 [STL]
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <time.h>

#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"


#ifdef MEMDEBUG
#include "dbmalloc.h"
#endif

  
/* Function: AdjustCT()
 * Date:     ER, Thu Dec  5 11:17:23 CST 2002 [St. Louis]
 *
 * Purpose:  given a ct() array compare to the actual sequence it corresponds to.
 *           Eliminates basepairs in which one or both positions are gaps.
 *
 *
 * Args:    seq, ct
 *
 * Returns:  void.
 */
void
AdjustAliSS(FILE *ofp, SQINFO sqinfo, char *aliss, int *ctX, int *ctY)
{
  int len;
  int i;

  len = sqinfo.len;

  for (i = 0; i < len; i++)
    if( (ctX[i] == -1 || ctY[i] == -1) && (aliss[i] == '>' || aliss[i] == '<') )
      aliss[i] = '.';
    
}

/* Function: AllocAli()
 * Date:     ER, Sun Oct 10 13:56:21 CDT 1999 [St. Louis]
 *
 * Purpose:  Allocates memory for an aligment
 *
 * Returns:  ali are allocated
 */
void
AllocAli(int L, struct ali_s **ret_ali)
{
  struct ali_s *ali;       /* structure with alignmet arrays  */
  int           i;

  ali = (struct ali_s *) MallocOrDie (sizeof(struct ali_s));

  AllocCharSeqs(L, &ali->charX, &ali->charY);

  for (i = 0; i < L; i++) {
    ali->charX[i] = '.';
    ali->charY[i] = '.';
  }
  
  *ret_ali = ali;
}

struct dpd_s *
AllocDpDiagMtx(int L)
{
  struct dpd_s *dp;       /* structure with models' dp  matrices  */

  dp = (struct dpd_s *) MallocOrDie (sizeof(struct dpd_s));
  
  dp->oth = AllocDpDiagOTH(L);
  dp->cod = AllocDpDiagCOD(L);
  dp->rna = AllocDpDiagRNA(L);
  
  return dp;
}

struct dpdscanfast_s *
AllocDpDiagScanFastMtx(int L)
{
  struct dpdscanfast_s *dp;       /* structure with models' dp  matrices  */

  dp = (struct dpdscanfast_s *) MallocOrDie (sizeof(struct dpdscanfast_s));
  
  dp->othscan2 = AllocDpDiagScanFast2OTH(L);
  dp->codscan2 = AllocDpDiagScanFast2COD(L);
  dp->rnascan2 = AllocDpDiagScanFast2RNA(L);
  
  return dp;
}


struct dpf_s *
AllocDpSemiMtx(int L)
{
  struct dpf_s *dp;       /* structure with models' dp  matrices  */

  dp = (struct dpf_s *) MallocOrDie (sizeof(struct dpf_s));
  
  dp->oth = AllocDpFullOTH(L, L);
  dp->cod = AllocDpDiagCOD(L);
  dp->rna = AllocDpDiagRNA(L);
  
  return dp;
}

struct dpf_s *
AllocDpFullMtx(int Lx, int Ly)
{
  struct dpf_s *dp;       /* structure with models' dp  matrices  */

  dp = (struct dpf_s *) MallocOrDie (sizeof(struct dpf_s));
  
  dp->oth = AllocDpFullOTH(Lx, Ly);
  dp->cod = AllocDpFullCOD(Lx, Ly);
  dp->rna = AllocDpFullRNA(Lx);
  
  return dp;
}

/* Function: AllocDpScores()
 * Date:     ER, Fri Oct  1 11:43:42 CDT 1999 [St. Louis]
 *
 * Purpose:  Allocates memory for forward scores
 *
 * Returns:  dpscores are allocated
 */
struct dpsc3_s *
AllocDpDiagScores(int L)
{
  struct dpsc3_s *sc;       /* structure with forward scores  */
  int i;

  sc      = (struct dpsc3_s *) MallocOrDie (sizeof(struct dpsc3_s));
  sc->oth = (double *) MallocOrDie (sizeof(double) * 2 * L);
  sc->cod = (double *) MallocOrDie (sizeof(double) * 2 * L);
  sc->rna = (double *) MallocOrDie (sizeof(double) * 2 * L);
  
  for (i = 0; i < 2*L; i++) {
    sc->oth[i] = 0.;
    sc->cod[i] = 0.;
    sc->rna[i] = 0.;
  }
  return sc;
}

struct dpsc3_s *
AllocDpScores(int L)
{
  struct dpsc3_s *sc;       /* structure with forward scores  */
  int i, j;

  sc      = (struct dpsc3_s *) MallocOrDie (sizeof(struct dpsc3_s));
  sc->oth = (double *) MallocOrDie (sizeof(double) * (L+1) * (L+1));
  sc->cod = (double *) MallocOrDie (sizeof(double) * 2 * L);
  sc->rna = (double *) MallocOrDie (sizeof(double) * 2 * L);
  
  for (i = 0; i < 2*L; i++) {
    sc->cod[i] = 0.;
    sc->rna[i] = 0.;
  }
  for (i = 0; i <= L; i++) 
    for (j = 0; j <= L; j++) 
      sc->oth[i*(L+1) + j] = 0.;

  return sc;
}

/* Function: AllocScanEnds()
 * Date:     ER, Mon Nov 25 11:57:13 CST 2002 [St. Louis]
 *
 * Purpose:  Allocates memory for scan ends
 *
 * Returns:  scanends are allocated
 */
struct scanends_s *
AllocScanEnds(int L)
{
  struct scanends_s *scanends;       /* structure with forward scores  */
  int                i;

  scanends = (struct scanends_s *) MallocOrDie (sizeof(struct scanends_s));

  scanends->fwd = (struct end3scan_s **) MallocOrDie (sizeof(struct end3scan_s *) * L);
  scanends->rev = (struct end3scan_s **) MallocOrDie (sizeof(struct end3scan_s *) * L);
  
  for (i = 0; i < L; i++) {
    scanends->fwd[i] = AllocEnd3ScanFast();
    scanends->rev[i] = AllocEnd3ScanFast();
  }

  return scanends;
}

/* Function: AllocScanSc()
 * Date:     ER, Tue Dec  4 08:53:33 CST 2001 [St. Louis]
 *
 * Purpose:  Allocates memory for scan scores
 *
 * Returns:  scansc are allocated
 */
struct scansc_s *
AllocScanSc(int L)
{
  struct scansc_s *scansc;       /* structure with forward scores  */
  int i;

  scansc      = (struct scansc_s *) MallocOrDie (sizeof(struct scansc_s));
 
  scansc->oth = (double *) MallocOrDie (sizeof(double) * L);
  scansc->cod = (double *) MallocOrDie (sizeof(double) * L);
  scansc->rna = (double *) MallocOrDie (sizeof(double) * L);
  
  scansc->othrev = (double *) MallocOrDie (sizeof(double) * L);
  scansc->codrev = (double *) MallocOrDie (sizeof(double) * L);
  scansc->rnarev = (double *) MallocOrDie (sizeof(double) * L);
   
  for (i = 0; i < L; i++) {
    scansc->oth[i] = 0.;
    scansc->cod[i] = 0.;
    scansc->rna[i] = 0.;

    scansc->othrev[i] = 0.;
    scansc->codrev[i] = 0.;
    scansc->rnarev[i] = 0.;
 }

  return scansc;
}

struct scanfast_s *
AllocScanFast(int L, int win, int off)
{
  struct scanfast_s *scanfast;       /* structure with scores ans ends */
  int                num_win;
  
  scanfast = (struct scanfast_s *) MallocOrDie (sizeof(struct scanfast_s));

  num_win = NumberScanningWindows(L, win, off);
  
  scanfast->sc   = AllocScanSc(num_win);
  scanfast->ends = AllocScanEnds(num_win);
  
  
  return scanfast;
}

void
AllocModels(struct model_s **ret_model)
{
  struct model_s *model;

  model = (struct model_s *) MallocOrDie (sizeof(struct model_s));

  model->null = AllocNullModel();
  model->oth  = AllocOTHModel();
  model->cod  = AllocCODModel();
  model->rna  = AllocRNAModel();
 
  *ret_model = model;
}

/* Function: ConstructModels()
 * Date:     ER, Wed Oct  6 15:36:12 CDT 1999  [St. Louis]
 *
 * Purpose:  Constructs model_s
 *
 * Args:      pamfile -  
 *            cfgfile -
 *            model_s - structures to fill
 *            verbose - 
 *
 * Returns:  (void)
 *           fills all prob's for models, log2 form 
 *           (allc'ed here, freed by caller)
 */
void
ConstructModels(FILE *ofp, char *codonfile, char *hexapfile, char *pamfile, char *cfgfile, char *ribofile,
		struct model_s **ret_model, int win, double tfactor, int changefreq, int logodds, int pedantic, int verbose)
{
  struct model_s    *model;
  char              *codonjfile;
  char              *hexamerfile;
  FILE              *cfgfp;	            /* open RNA model file                                    */
  FILE              *codonfp;               /* open Codon-Codon model file                            */
  FILE              *hexapfp;               /* open Hexamer model file                                */
  FILE              *pamfp;                 /* open PAM model file                                    */
  FILE              *ribofp;                /* open RIBOPROB model file                               */
  int              **pam;
  fullmat_t        *riboprob;               /* The RIBOPROB matrix */
  double            *codon_joint;
  double           **hexa;
  float              scale;
  double          ***cfg_node;              /* RNA grammar probs by nodes                            +*/
  double           **cfg_emit;              /* RNA grammar probs by emission                         +*/
  double           **cfg;                   /* RNA grammar probs                                      */
  double            *pair5prob;             /* RNA pair probabilities                                +*/
  double           **pammodel;              /* 64x64 condon-codon correlation  probabilities          */
  double            *mutpxy;                /* 16 joint mutation  probabilities                       */
  double            *mut5pxy;               /* 25 joint mutation  probabilities                       */
  double            *freq;
  int                add_codon;
  int                add_hexamer;
  
  /* Load a PAM substitution matrix
   */
  if ((pamfp = fopen(pamfile, "r")) == NULL &&
      (pamfp = EnvFileOpen(pamfile, "QRNADB")) == NULL)
    Die("Failed to open PAM scoring matrix file %s", pamfile);

  if (! ParsePAMFile(pamfp, &pam, &scale))
    Die("Failed to parse PAM file");
  fclose(pamfp);

  /* Load a codon-codon joint frequencies in log2 form 
   */
  if (codonfile) {
     codonjfile = FileConcat("aa", codonfile);
    if ((codonfp = fopen(codonjfile, "r")) == NULL)
      Die("Failed to open Codon-Codon file %s", codonfile);

    add_codon = TRUE;
  }
  else {
    add_codon = FALSE;
    codonfp   = NULL;
  }
    
  if (! ParseCodonFile(codonfp, &codon_joint))
    Die("Failed to parse Codon-Codon file");

  if (add_codon) {
    fclose(codonfp);
    free(codonjfile);
  }
  
 /* Load a Hexamer joint frequencies in log2 form 
   */
  if (hexapfile) {
    hexamerfile = FileConcat("aa", hexapfile);
    if ((hexapfp = fopen(hexamerfile, "r")) == NULL)
      Die("Failed to open Hexamer file %s", hexapfile);

    add_hexamer = TRUE;
  }
  else {
    add_hexamer = FALSE;
    hexapfp     = NULL;
  }
    
  if (! ParseHexamerFile(hexapfp, &hexa))
    Die("Failed to parse Hexamer file");

  if (add_hexamer) {
    fclose(hexapfp);
    free(hexamerfile);
  }
  
 /* Load a RNA cfg grammar
   */
  if (cfgfile == NULL) /* SCFG is not provided */
    cfgfile = FileConcat("", "mix_tied_linux.cfg");

  if ((cfgfp = fopen(cfgfile, "r")) == NULL&&
      (cfgfp = EnvFileOpen(cfgfile, "QRNADB")) == NULL)
    Die("Failed to open SCFG save file %s", cfgfile);
  if (! ReadSCFG(cfgfp, &cfg))
    Die("Failed to read SCFG from file %s", cfgfile);
    fclose(cfgfp);
  
  Pairs5SCFG(cfg, &pair5prob);      /* counts for base-pairs */
  DNorm(pair5prob, 25);
  if (verbose) {
    fprintf(stdout, "Ppair5(i,j, t) probabilities\n");
    PrintProbs(stdout, pair5prob, 5);
  }

  /* log2 prob         
   */
  Log2ProbSCFG(cfg);
  
  TieProbs(cfg, &cfg_node, &cfg_emit);

  /* Load a RIBOPROB matrix
   */
  if (ribofile == NULL) /* RIBOPROB is not provided */
    ribofile = FileConcat("", "RIBOPROB85-60.mat");
  if ((ribofp = fopen(ribofile, "r")) == NULL&&
      (ribofp = EnvFileOpen(ribofile, "QRNADB")) == NULL)
    Die("Failed to open RIBOPROB file %s", ribofile);
  if (! (riboprob = ReadRIBOPROBMatrix(ribofp)))
    Die ("Failed to read RIBOPROB matrix file \n");


  if (changefreq) {
    freq = (double *) MallocOrDie (sizeof(double) * 5);

    freq[0] = singlep.pa;
    freq[1] = singlep.pc;
    freq[2] = singlep.pg;
    freq[3] = 1.0 - singlep.pa - singlep.pc - singlep.pg;
    freq[4] = 0.0;
    
  }

  /* Calculate the PAM model:  P(x1 x2 x3,y1 y2 y3)
   * and mutation probabilities:
   * P(x,y) = 1/3 \sum_{x1,x2,y1,y2} [ P(x  x1 x2, y  y1 y2) + 
   *                                   P(x1 x  x2, y1 y  y2) + 
   *                                   P(x1 x2 x , y1 y2 y )   ]
   */
  if (tfactor < 0.0) Die("positive times required, or you won't be here!");

  ConstructPAMModel(pam, scale, codon_joint, &pammodel, add_codon, FALSE, verbose);
  ConstructTiedProbs(pammodel, tfactor, &mutpxy, &mut5pxy, freq, changefreq, pedantic, verbose);
  ConstructTimeDependentPAMModel(pam, scale, tfactor, codon_joint, pammodel, freq, add_codon, changefreq, pedantic, verbose);
  if (verbose) PrintPAMModel (pammodel, mutpxy); 

  /* allocate the models 
   */
  AllocModels(&model);
  PatternModels(model);

  ConstructNullModel(mutpxy, model->null, Nullparam, Nullparam, Nullparam, 1.0);   /* we may think of other null models though */

  ConstructCODModel (mutpxy, pammodel, hexa, win, tfactor, model->cod, add_hexamer, pedantic, verbose);
  ConstructOTHModel (mutpxy, OTHparam, OTHparam_zero, OTHparam_infty, win, win, win, tfactor, model->oth, pedantic, verbose);
  ConstructRNAModel (mutpxy, mut5pxy, cfg_node, pair5prob, riboprob, win, tfactor, model->rna, model->null, 
		     freq, changefreq, pedantic, verbose);  
  
  if (verbose) {
    PrintMutProbs (mutpxy,  model->null->xem);
    PrintMut5Probs(mut5pxy, model->null->xem);
    
    PrintNullModel(model->null);
    PrintCODModel (model->cod);
    PrintOTHModel (model->oth);
    PrintRNAModel (model->rna);

    PrintTrProbs (model); 
  }
  
  free(codon_joint);
  free(mutpxy);
  free(mut5pxy);
  free(pammodel[0]);
  free(pammodel);
  Free2DArray(pam,27);
  FreeFullMatrix(riboprob);
  FreeSCFG(cfg);
  FreeSCFG(cfg_emit);
  FreeSCFGNode(cfg_node);
  free(pair5prob);

  if (add_hexamer) free(hexa);
  if (changefreq)  free(freq);

  /* Log2Odds form of the models 
   */
  if (logodds) ModelLog2ToOdds(model);

  *ret_model = model;
  
}

/* Function: ConstructModels_phase1()
 * Date:     ER, Fri Sep 22 13:51:32 CDT 2000  [St. Louis]
 *
 * Purpose:  Constructs model_s
 *
 * Args:      pamfile -  
 *            cfgfile -
 *            model_s - structures to fill
 *            verbose - 
 *
 * Returns:  (void)
 *           fills all prob's for models, log2 form 
 *           (allc'ed here, freed by caller)
 */
void
ConstructModels_phase1(FILE *ofp, char *codonfile, char *hexapfile, char *pamfile, char *cfgfile, char *ribofile, 
		       struct model_s **ret_model, 
		       double ****ret_cfg_node, double ***ret_hexa, double **ret_pair5prob, double **ret_codon_joint, 
		       int ***ret_pam, double *ret_scale, fullmat_t **ret_riboprob,
		       int add_codon, int add_hexamer, int logodds, int pedantic, int verbose)
{
  struct model_s    *model;
  FILE              *cfgfp;	            /* open RNA      model file                                */
  FILE              *codonfp;               /* open Codon-Codon model file                             */
  FILE              *hexapfp;               /* open Hexamer  model file                                */
  FILE              *pamfp;                 /* open PAM      model file                                */
  FILE              *ribofp;                /* open RIBOPROB model file                                */
  char              *codonjfile;
  char              *hexamerfile;
  int              **pam;
  fullmat_t        *riboprob;                /* The RIBOPROB matrix */
  double            *codon_joint;
  double           **hexa;
  double           **cfg;                   /* RNA grammar probs                                      */
  double          ***cfg_node;              /* RNA grammar probs by nodes                            +*/
  double           **cfg_emit;              /* RNA grammar probs by emission                         +*/
  double            *pair5prob;             /* RNA pair probabilities                                +*/
  float              scale;

 /* Load a PAM substitution matrix
   */
  if ((pamfp = fopen(pamfile, "r")) == NULL &&
      (pamfp = EnvFileOpen(pamfile, "QRNADB")) == NULL)
    Die("Failed to open PAM scoring matrix file %s", pamfile);

  if (! ParsePAMFile(pamfp, &pam, &scale))
    Die("Failed to parse PAM file");
  fclose(pamfp);

  /* Load a codon-codon joint frequencies in log2 form 
   */
  if (codonfile) {
     codonjfile = FileConcat("aa", codonfile);
    if ((codonfp = fopen(codonjfile, "r")) == NULL)
      Die("Failed to open Codon-Codon file %s", codonfile);
  }
  else codonfp = NULL;
    
  if (! ParseCodonFile(codonfp, &codon_joint))
    Die("Failed to parse Codon-Codon file");

  if (add_codon) {
    fclose(codonfp);
    free(codonjfile);
  }
  
  /* Load a Hexamer joint frequencies in log2 form 
   */
  if (hexapfile) {
    hexamerfile = FileConcat("/nfs/wol2/people/elena/db/hexamer", hexapfile);
    if ((hexapfp = fopen(hexamerfile, "r")) == NULL)
      Die("Failed to open Hexamer file %s", hexapfile);
  }
  else hexapfp = NULL;
    
  if (! ParseHexamerFile(hexapfp, &hexa))
    Die("Failed to parse Hexamer file");

  if (add_hexamer) {
    fclose(hexapfp);
    free(hexamerfile);
  }
  
  /* Load a RNA cfg grammar
   */
  if (cfgfile == NULL) /* SCFG is not provided */
    cfgfile = FileConcat("", "mix_tied_linux.cfg");

  if ((cfgfp = fopen(cfgfile, "r")) == NULL&&
      (cfgfp = EnvFileOpen(cfgfile, "QRNADB")) == NULL)
    Die("Failed to open SCFG save file %s", cfgfile);
  if (! ReadSCFG(cfgfp, &cfg))
    Die("Failed to read SCFG from file %s", cfgfile);
  
  fclose(cfgfp);

  Pairs5SCFG(cfg, &pair5prob);      /* counts for base-pairs */
  DNorm(pair5prob, 25);

  if (verbose) {
    fprintf(stdout, "Ppair5(i,j, t) probabilities\n");  /* Load a PAM substitution matrix*/

  if ((pamfp = fopen(pamfile, "r")) == NULL &&
      (pamfp = EnvFileOpen(pamfile, "QRNADB")) == NULL)
    Die("Failed to open PAM scoring matrix file %s", pamfile);

  if (! ParsePAMFile(pamfp, &pam, &scale))
    Die("Failed to parse PAM file");
  fclose(pamfp);

  /* Load a Hexamer joint frequencies in log2 form 
   */
  if (hexapfile) {
    hexamerfile = FileConcat("/nfs/wol2/people/elena/db/hexamer", hexapfile);
    if ((hexapfp = fopen(hexamerfile, "r")) == NULL)
      Die("Failed to open Hexamer file %s", hexapfile);
  }
  else hexapfp = NULL;
    
  if (! ParseHexamerFile(hexapfp, &hexa))
    Die("Failed to parse Hexamer file");

  if (add_hexamer) {
    fclose(hexapfp);
    free(hexamerfile);
  }
  
  /* Load a RNA cfg grammar
   */
  if (cfgfile == NULL) /* SCFG is not provided */
    cfgfile = FileConcat("", "mix_tied_linux.cfg");

  if ((cfgfp = fopen(cfgfile, "r")) == NULL&&
      (cfgfp = EnvFileOpen(cfgfile, "QRNADB")) == NULL)
    Die("Failed to open SCFG save file %s", cfgfile);
  if (! ReadSCFG(cfgfp, &cfg))
    Die("Failed to read SCFG from file %s", cfgfile);
  
  fclose(cfgfp);

  Pairs5SCFG(cfg, &pair5prob);      /* counts for base-pairs */
  DNorm(pair5prob, 25);

  if (verbose) {
    fprintf(stdout, "Ppair5(i,j, t) probabilities\n");
    PrintProbs(stdout, pair5prob,5);
  }

    PrintProbs(stdout, pair5prob,5);
  }

  /* log2 prob         
   */
  Log2ProbSCFG(cfg);

  /* Separate the cfg frequencies into emission and transitions
   */
  TieProbs(cfg, &cfg_node, &cfg_emit);

 /* Load a RIBOPROB matrix
   */
  if (ribofile == NULL) /* RIBOPROB is not provided */
    ribofile = FileConcat("", "RIBOPROB85-60.mat");
 if ((ribofp = fopen(ribofile, "r")) == NULL&&
      (ribofp = EnvFileOpen(ribofile, "QRNADB")) == NULL)
    Die("Failed to open RIBOPROB file %s", ribofile);
 if (! (riboprob = ReadRIBOPROBMatrix(ribofp)))
    Die ("Failed to read RIBOPROB matrix file \n");  

 if (verbose) PrintFullMatrix (stdout, riboprob);

 /* allocate the models 
   */
  AllocModels(&model);
  PatternModels(model);

  *ret_model       = model;
  *ret_cfg_node    = cfg_node;
  *ret_codon_joint = codon_joint;
  *ret_hexa        = hexa;
  *ret_pair5prob   = pair5prob;
  *ret_pam         = pam;
  *ret_riboprob    = riboprob;
  *ret_scale       = (double)scale;

  /* free memory */
  FreeSCFG(cfg);
  FreeSCFG(cfg_emit);  

}

/* Function: ConstructModels_phase2()
 * Date:     ER, Wed Fri Sep 22 14:44:10 CDT 2000  [St. Louis]
 *
 * Purpose:  Constructs model_s
 *
 * Args:      pamfile -  
 *            cfgfile -
 *            model_s - structures to fill
 *            verbose - 
 *
 * Returns:  (void)
 *           fills all prob's for models, log2 form 
 *           (allc'ed here, freed by caller)
 */
void
ConstructModels_phase2(FILE *ofp, int win, fullmat_t *riboprob, double ***cfg_node, int **pam, double scale, double **hexa, 
		       double *pair5prob, double *codon_joint, double *freqX, double *freqY, 
		       struct model_s *model, double time, 
		       int add_codon, int add_hexamer, int changefreq, int logodds, int pedantic, int verbose)
{
  double            *mutpxy;                /* 16    joint mutation    probabilities                       */
  double            *mut5pxy;               /* 25    joint mutation    probabilities                       */
  double           **pammodel;              /* 64x64 joint codon-codon probabilities                       */
  double            *targetfreq;            /* 4x4   target frequncies, otherwise they are given my mutpxy */
  int                i;

  /* Calculate the PAM model:  P(x1 x2 x3,y1 y2 y3)
   * and mutation probabilities:
   * P(x,y) = 1/3 \sum_{x1,x2,y1,y2} [ P(x  x1 x2, y  y1 y2) + 
   *                                   P(x1 x  x2, y1 y  y2) + 
   *                                   P(x1 x2 x , y1 y2 y )   ]
   */
  if (time < 0.0) Die("positive times required, or you won't be here!");

  targetfreq = (double *) MallocOrDie (sizeof(double) * 4);
  for (i = 0; i < 4; i ++)  
    targetfreq[i] = 0.5*(freqX[i]+freqY[i]);

  ConstructPAMModel(pam, scale, codon_joint, &pammodel, add_codon, FALSE, verbose);
  ConstructTiedProbs(pammodel, time, &mutpxy, &mut5pxy, targetfreq, changefreq, pedantic, verbose);
  ConstructTimeDependentPAMModel(pam, scale, time, codon_joint, pammodel, targetfreq, add_codon, changefreq, pedantic, verbose);
  if (verbose) PrintPAMModel (pammodel, mutpxy); 
  PatternModels(model);
  
  ConstructNullModel(mutpxy, model->null, Nullparam, Nullparam, Nullparam, 1.0);   /* we may think of other null models though */

  ConstructCODModel (mutpxy, pammodel, hexa, win, time, model->cod, add_hexamer, pedantic, verbose);
  ConstructOTHModel (mutpxy, OTHparam, OTHparam_zero, OTHparam_infty, win, win, win, time, model->oth, pedantic, verbose);
  if (1) {
    ConstructRNAModel_201 (mutpxy, mut5pxy, cfg_node, pair5prob, riboprob, win, time, model->rna, model->null, 
			   targetfreq, changefreq, pedantic, verbose);  
  }
  else {
    ConstructRNAModel (mutpxy, mut5pxy, cfg_node, pair5prob, riboprob, win, time, model->rna, model->null, 
		       targetfreq, changefreq, pedantic, verbose);  
  }
  
  if (verbose) {
    PrintMutProbs (mutpxy,  model->null->xem);
    PrintMut5Probs(mut5pxy, model->null->xem);
    
    PrintNullModel(model->null);
    PrintCODModel (model->cod);
    PrintOTHModel (model->oth);
    PrintRNAModel (model->rna);

    PrintTrProbs (model); 
  }
   
 /* Logodds form of the models 
   */
  if (logodds) ModelLog2ToOdds(model);

  /* free memory */
  free(mutpxy);
  free(mut5pxy);
  free(pammodel[0]);
  free(pammodel);
  free(targetfreq);
}


/* Function: ConstructTiedProbs()
 * Date:     ER, Tue Jun 13 13:18:30 CDT 2000 [St. Louis]
 *
 * Purpose:  Given a PAMModel, marginalize to calculate
 *           tied probabilities.
 *
 * Args:     pammodel  - 64x64 AAA..UUUxAAA..UUU joint prob matrix (prealloc)
 *
 * Returns:  (void)
 *           Fills in mutpxy, mut5pxy, condpxy, cond5pxy, CODsinglep.
 */
void
ConstructTiedProbs(double **pammodel, double time, double **ret_mutpxy, double **ret_mut5pxy, 
		   double *targetfreq, int changefreq, int pedantic, int verbose)
{
  double *mutpxy;
  double *mut5pxy;
  double *Id;
  double *px, *py;
  double  sum;
  int     x1, x2, x3;
  int     y1, y2, y3;
 
  /* allocate mutpxy[16] mut5pxy[25] 
   */
  mutpxy  = (double *) MallocOrDie (sizeof(double) * 16);
  mut5pxy = (double *) MallocOrDie (sizeof(double) * 25);

  /* Identity matrix
   */
  Id  = Cal_Id(4);

  /* allocate px[4] py[4] 
   */
  px = (double *) MallocOrDie (sizeof(double) * 4);
  py = (double *) MallocOrDie (sizeof(double) * 4);

  /* Initialize to zero
   */
  for (x1 = 0; x1 < 4; x1++) 
    for (y1 = 0; y1 < 4; y1++) 
      mutpxy[idx(x1,y1)]  = 0.0;

  for (x1 = 0; x1 < 5; x1++) 
    for (y1 = 0; y1 < 5; y1++) 
      mut5pxy[idx5(x1,y1)]  = 0.0;

  for (x1 = 0; x1 < 4; x1++) {
    px[x1] = 0.;
    py[x1] = 0.;
  }

  /* Marginalize and average over three positions
   */
  for (x1 = 0; x1 < 4; x1++)
    for (x2 = 0; x2 < 4; x2++)
      for (x3 = 0; x3 < 4; x3++)
	for (y1 = 0; y1 < 4; y1++)
	  for (y2 = 0; y2 < 4; y2++)
	    for (y3 = 0; y3 < 4; y3++)
	      {
		mutpxy[idx(x1,y1)] += pammodel[CODON(x1,x2,x3)][CODON(y1,y2,y3)] / 3.0;
		mutpxy[idx(x2,y2)] += pammodel[CODON(x1,x2,x3)][CODON(y1,y2,y3)] / 3.0;
		mutpxy[idx(x3,y3)] += pammodel[CODON(x1,x2,x3)][CODON(y1,y2,y3)] / 3.0;

		mut5pxy[idx5(x1,y1)] += pammodel[CODON(x1,x2,x3)][CODON(y1,y2,y3)] / 3.0;
		mut5pxy[idx5(x2,y2)] += pammodel[CODON(x1,x2,x3)][CODON(y1,y2,y3)] / 3.0;
		mut5pxy[idx5(x3,y3)] += pammodel[CODON(x1,x2,x3)][CODON(y1,y2,y3)] / 3.0;
	      }
  CheckSingleProb(mutpxy, 16);
  if (verbose) {
    fprintf(stdout, "PAM(i,j, t*) probabilities\n");
    PrintProbs2D(stdout, pammodel, 64, 64);
    fprintf(stdout, "P(i,j, t*) probabilities\n");
    PrintProbs(stdout, mutpxy, 4);
  }
  /* Marginalize to get single-nucleotide frequencies
   */
  for (x1 = 0; x1 < 4; x1++)
    for (x2 = 0; x2 < 4; x2++) {
      px[x1] += mutpxy[idx(x1,x2)];
      py[x1] += mutpxy[idx(x2,x1)];
    }
  if (verbose) {
    fprintf(stdout, "marg probabilities\n");
    for (x1 = 0; x1 < 4; x1++)
      printf("%.5f %.5f\n", px[x1], py[x1]);
  }

  for (x1 = 0; x1 < 4; x1++) {
    mut5pxy[idx5(x1,4)] = INDL * px[x1];
    mut5pxy[idx5(4,x1)] = INDL * py[x1];
  }
  
  mut5pxy[idx5(4,4)] = INDL2INDL; /* this is fix to have a "decent" model of evolution (14 april 2000) */
   
  /* normalize */
  sum = 0.;
  for (x1 = 0; x1 < 25; x1++)
    sum += mut5pxy[x1];
  for (x1 = 0; x1 < 25; x1++) 
    mut5pxy[x1] /= sum;
  CheckSingleProb(mut5pxy, 25);

  /* Evolve Joint probs for mutpxy -- not the mut5pxy, we will do that in rnamodel.c
   */
  Joint2Joint(mutpxy, Id, Id, 4, time, px, FALSE, FALSE, FALSE, pedantic, FALSE);

  /* paranoia
   */
  CheckSingleProb(mutpxy,  16);
  CheckSingleProb(mut5pxy, 25);

  if (verbose) {
    fprintf(stdout, "P(i,j, t=%.4f) probabilities\n", time);
    PrintProbs(stdout, mutpxy, 4);
    
    fprintf(stdout, "P5(i,j, t*) probabilities\n");
    PrintProbs(stdout, mut5pxy, 5);
  }

  *ret_mutpxy  = mutpxy;
  *ret_mut5pxy = mut5pxy;
  
  free(px);
  free(py);
  free(Id);
}

/* Function: FormatSeqs()
 * Date:     ER, Thu Dec  2 11:28:54 CST 1999  [St. Louis]
 *
 * Purpose:  Get the 2 sequences to compare as they come from the fasta file.
 *
 *           (1) ArrangeGapSequence()
 *                    convert char seqs to upper case and remove degeneracy
 *                     
 *           (2) IntizeGapAsequence()
 *                     sequences in "int form" : 
 *                              seqX[] --> isegX[] 
 *                              seqY[] --> isegY[]
 *
 *           (3) print arrays if we are asked to
 *
 *
 * Args:       
 *
 * Returns:  (void) we create two types of arrays: (allc'ed before and freed by caller)
 *
 *            isegX[leg],  isegY[leg] that contain the int form of the 2 seq with common gaps
 *          
 *       
 */
void
FormatSeqs(FILE *ofp, int Lmax, int format, char *seqX, char *seqY, SQINFO *sqinfoX, SQINFO *sqinfoY, 
	   int **ret_ctX, int **ret_ctY, int *isegX, int *isegY, int allow_pseudoknots, int verbose)
{
  int *ctX;
  int *ctY;

  ArrangeGapSequence(ofp, sqinfoX, format, seqX, &ctX, allow_pseudoknots, verbose);
  ArrangeGapSequence(ofp, sqinfoY, format, seqY, &ctY, allow_pseudoknots, verbose);
  
  PatternIntSeqs(Lmax, isegX, isegY);

  IntizeGapAsequence(seqX, 0, sqinfoX->len, isegX, verbose);
  IntizeGapAsequence(seqY, 0, sqinfoY->len, isegY, verbose);
  
  /* prints the two seqs with the gaps
   */
  if (verbose) { 
    PrintIntSeq(ofp, sqinfoX, sqinfoX->len, isegX);
    PrintIntSeq(ofp, sqinfoY, sqinfoY->len, isegY);
  }	

  *ret_ctX = ctX;
  *ret_ctY = ctY;
}

void
FreeAli(struct ali_s *ali)
{
  free(ali->charX);
  free(ali->charY);
  free(ali);
}

void
FreeDpScores(struct dpsc3_s *sc)
{
  free(sc->cod);
  free(sc->oth);
  free(sc->rna);
  free(sc);
}

void
FreeScanEnds(struct scanends_s *scanends, int n)
{
  int i;

  for (i = 0; i < n; i++) {
    FreeEnd3ScanFast(scanends->fwd[i]);
    FreeEnd3ScanFast(scanends->rev[i]);
  }

  free(scanends->fwd);
  free(scanends->rev);

  free(scanends);
}

void
FreeScanSc(struct scansc_s *scansc)
{
  free(scansc->cod);
  free(scansc->oth);
  free(scansc->rna);

  free(scansc->codrev);
  free(scansc->othrev);
  free(scansc->rnarev);

  free(scansc);
}
void
FreeScanFast(struct scanfast_s *scanfast, int L, int win, int off)
{
  int n;

  n = NumberScanningWindows(L, win, off);

  FreeScanEnds(scanfast->ends, n);
  FreeScanSc(scanfast->sc);

  free(scanfast);
}

void
FreeDpDiagMtx(struct dpd_s *dp)
{
  FreeDpDiagOTH(dp->oth);
  FreeDpCOD(dp->cod);
  FreeDpRNA(dp->rna);

  free(dp);
}

void
FreeDpDiagScanFastMtx(struct dpdscanfast_s *dp)
{
  FreeDpDiagScanFast2OTH(dp->othscan2);
  FreeDpScanFast2COD(dp->codscan2);
  FreeDpScanFast2RNA(dp->rnascan2);

  free(dp);
}

void
FreeDpSemiMtx(struct dpf_s *dp)
{
  FreeDpFullOTH(dp->oth);
  FreeDpCOD(dp->cod);
  FreeDpRNA(dp->rna);

  free(dp);
}

void
FreeDpFullMtx(struct dpf_s *dp)
{
  FreeDpFullOTH(dp->oth);
  FreeDpCOD(dp->cod);
  FreeDpRNA(dp->rna);

  free(dp);
}

void
FreeModels(struct model_s *model)
{
  FreeNullModel(model->null);
  FreeCODModel (model->cod);
  FreeOTHModel (model->oth);
  FreeRNAModel (model->rna);
  free(model);
}

/* Function: ModelLog2ToOdds()
 * Date:     ER, Wed Oct  6 16:23:26 CDT 1999 [St. Louis]
 *
 * Purpose:  Converts transition and emission prob's of a model_ structure
 *           from log2 to log2odds form (except for model->null).
 *
 * Args:     model - the structure for a model_s 
 *
 * Returns:  void. 
 */
void
ModelLog2ToOdds(struct model_s *model)
{
  CODLog2ToOdds(model->cod, model->null);
  OTHLog2ToOdds(model->oth, model->null);
  RNALog2ToOdds(model->rna, model->null); 
}

/* Function: PatternAli()
 * Date:     ER, Sun Oct 17 17:10:10 CDT 1999 [St. Louis]
 *
 * Purpose:  writes blank in the alignment strings
 *
 * Returns:  void
 */
void
PatternAli(int L, struct ali_s *ali)
{
  int i;

  for (i = 0; i < L; i++) {
    ali->charX[i] = '.';
    ali->charY[i] = '.';
  }
}

void
PatternDpDiag(int L, struct dpd_s *dp)
{
  PatternDpDiagOTH(L, dp->oth);
  PatternDpDiagCOD(L, dp->cod);
  PatternDpRNA    (L, dp->rna);
}

void
PatternDpFull(int Lx, int Ly, struct dpf_s *dp)
{
  PatternDpFullOTH(Lx, Ly, dp->oth);
  PatternDpDiagCOD(Lx,     dp->cod);
  PatternDpRNA    (Lx,     dp->rna);
}

void
PatternModels(struct model_s *model)
{
  PatternNullModel(model->null);
  PatternOTHModel (model->oth);
  PatternCODModel (model->cod);
  PatternRNAModel (model->rna);
}

/* Function: PrintTrProbs()
 * Date:     ER, Tue Nov 23 17:57:48 CST 1999 [St. Louis]
 *
 * Purpose:  Print trnasition probabilities of all the models
 *
 * Args:     othmodel -- the othmodel prob's, in log2 form
 *
 * Returns:  void. prints transition and emission probs for oth model, in [0,1] form.
 */
void
PrintTrProbs(struct model_s *model)
{

  printf("\nNull MODEL -- Transition probabilities\n");
  printf("eta   = %f\n", EXP2(model->null->eta)); 
  printf("1-eta = %f\n", EXP2(model->null->meta)); 
  
  printf("\nOTH MODEL \n");
  PrintOTHTrProbs(model->oth);

  printf("\nCOD MODEL \n");
  PrintCODTrProbs(model->cod);

  printf("\nRNA MODEL \n");
  PrintRNATrProbs(model->rna);

  printf("\n");
}

/* Function: PrintProbs()
 * Date:     ER, Tue Jun 13 13:42:27 CDT 2000 [St. Louis]
 *
 * Purpose:  Print probabilities
 *
 * Args:     othmodel -- the othmodel prob's, in log2 form
 *
 * Returns:  void. prints transition and emission probs, in [0,1] form.
 */
void
PrintProbs(FILE *ofp, double *prob, int L) 
{
  int x, y;

  for (x = 0; x < L; x++) {
    for (y = 0; y < L; y++) {
      fprintf(ofp, "%.4f ", prob[x*L+y]);
    }
    fprintf(ofp, "\n");
  }
}

void
PrintProbs2D(FILE *ofp, double **prob, int Lx, int Ly) 
{
  int x, y;

  for (x = 0; x < Lx; x++) {
    for (y = 0; y < Ly; y++) {
      fprintf(ofp, "%.4f ", prob[x][y]);
    }
    fprintf(ofp, "\n");
  }
}

void
PrintVectorProbs(FILE *ofp, double *prob, int L) 
{
  int x;

  for (x = 0; x < L; x++) 
    fprintf(ofp, "%.4f ", prob[x]);
  
  fprintf(ofp, "\n");
  
}

