/*
 *
 * Implementation of Viterbi's algorithm for DNA sequences.
 * This only computes the probability of the most probable sequence of
 * hidden states. Traceback of the optimal sequence itself is not implemneted
 *
 * The test main gets the number of states for the HMM and a filename containing
 * a DNA sequence (as text containing only ACGT), randomizes a model, runs 
 * viterbi and reports the time.
 *
 * Shay Mozes, 2007
 *
 */
#include <stdio.h>
#include <string>
#include <iostream>
#include <fstream>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include <assert.h>

using namespace std;

#define ALPH_SIZE 4

double realViterbi(char* seq, const int len, double** e , double** t, int k) {

  double* v = new double[k];
  double *tmpv = new double[k];
  double* tmp;

  int i;
  for (i=0; i<k;i++) v[i] = 0;

  for (i=0;i<len;i++){

    for (int j=0;j<k;j++){
      tmpv[j] = v[0] + e[0][seq[i]] + t[0][j];
      for (int l=1;l<k;l++){
	if (tmpv[j] < v[l] + e[l][seq[i]] + t[l][j]){
	  tmpv[j] = v[l] + e[l][seq[i]] + t[l][j];
	}
      }   
    } 

    tmp = v;
    v = tmpv;
    tmpv = tmp;
    ///      for (int j=0;j<k;j++) printf("%5.3g ",v[j]);
      ///cout << endl;
  }

  double res = v[0];
  for(int j=1;j<k;j++){
    if (res < v[j]) res = v[j];
  }

  return(res);
    
}


char* get_seq(const char* filename, int *len){

  ifstream file(filename);

  char stam[1000];

  //skip first line
  //  file.getline((char*) &stam,1000);

  //find size of file
  int begin = (int) file.tellg();
  file.seekg (0, ios::end);
  int end = (int) file.tellg();
  
  //allocate and read seq
  char* seq = new (nothrow) char[end-begin+1000];
  assert(seq!=NULL);
  cout << "length is: " << end-begin+1000 << endl;

  int cnt=0;
  *len = end-begin+999;
  file.seekg(begin);
  while (! file.eof()) {    
    file.getline(seq + cnt, *len-cnt);
    ///    cout << seq+cnt << endl;
    cnt += strlen(seq+cnt);
    //    cout << cnt << endl;
  }

  *len = (unsigned int) cnt;

  char* iseq = new (nothrow) char[cnt];
  assert(iseq!=NULL);

  int l=0;
  for (unsigned int i=0;i<cnt;i++) {
    if (seq[i]=='A' || seq[i]=='a') iseq[l++]=0;
    else if (seq[i]=='C' || seq[i]=='c') iseq[l++]=1;
    else if (seq[i]=='G' || seq[i]=='g') iseq[l++]=2;
    else if (seq[i]=='T' || seq[i]=='t') iseq[l++]=3;
    else cout << "garble at position " << i << endl;
  }
  delete[] seq;

  *len = (int) l;
  

  return iseq;
}



int main(int argc, char* argv[] ){

  cout << "just regular viterbi" << endl;

  
  int len;
  char* seq =  get_seq(argv[2],&len);
  cout << "read " << len << " bases." << endl;


  time_t tmp;
  time(&tmp); 
  srand48(tmp); 
  
  int k=atoi(argv[1]);
  cout << "k = " << k << endl;
  double **ee = new double*[k];
  double **ttt = new double*[k];

  for (int i=0; i<k; i++) {
    ee[i] = new double[ALPH_SIZE];
    ttt[i] = new double[k];

    for (int j=0;j<4;j++){
      //      ee[i][j] = (double) ((int)(100*drand48()))/128.0;
      ee[i][j] = -1.0*drand48();
    }
    for (int j=0;j<k;j++){
      //      ttt[i][j] = (double) ((int)(100*drand48()))/128.0;
      ttt[i][j] = -1.0*drand48();
    }
  }

  clock_t cl1 = clock();
  double realvit = realViterbi(seq,len,ee,ttt,k);
  clock_t cl2 = clock();

  cout << "real viterbi returned: " << realvit << endl;
  cout << "real viterbi time: " << cl2-cl1 << endl;

}


