//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Tue Jun  5 00:28:50 PDT 2007
// Last Modified: Sat Jun 23 19:36:30 PDT 2007 (added random noise)
// Filename:      ...sig/examples/all/scaperank.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/scaperank/scaperank.cpp
// Syntax:        C++; museinfo
//
// Description:   Generate a comparison between multiple sets of 
//                number sequences.  
//

#include "Array.h"
#include "Options.h"
#include <math.h>
#include <string.h>

#ifndef OLDCPP
   using namespace std;
   #include <fstream>
#else
   #include <fstream.h>
#endif

#include <stdlib.h>             /* for qsort and bsearch functions */
#include <time.h>               /* for random number seeding */


// function declarations:
void     checkOptions(Options& opts, int argc, char** argv);
void     example(void);
void     usage(const char* command);
void     readData(Array<Array<double> >& data, 
                                  const char* hfile);
void     printData(Array<Array<double> >& data);
double   pearsonCorrelation(int size, double* a, double* b);
double   getMean(int size, double* a);
void     calculateRank0(Array<Array<double> >& data, int reference,
                                  Array<double>& rank0values, 
				  Array<int>& rank0rank);
void     calculateRank1(Array<Array<double> >& data, 
                                  Array<Array<Array<double> > >& correlationset,
                                  int reference, Array<double>& rank0values, 
				  Array<int>& rank0rank);
void     calculateRank2(Array<Array<double> >& data, 
                                  Array<Array<Array<double> > >& correlationset,
                                  int reference, Array<double>& rank2values, 
                                  Array<int>& rank2rank);
void     calculateRank3(Array<Array<double> >& data, 
                                  Array<Array<Array<double> > >& correlationset,
                                  int reference,
				  Array<double>& rank2values, 
                                  Array<int>& rank2rank,
				  Array<double>& rank3values, 
                                  Array<int>& rank3rank);
void     sortRanks(Array<double>& values, Array<int>& ranks);
int      numbersort(const void* A, const void* B);
void     calculateCorrelationSet(Array<Array<Array<double> > >& correlationset,
                                  Array<Array<double> >& data, int reference);
void     calculateCorrelations(Array<Array<double> >& correlations,
                                  Array<double>& a, Array<double>& b);
int      getBestIndex(Array<Array<Array<double> > >& correlations, 
                                  int i, int j, Array<int>& mask);
void     findBestArea(Array<Array<Array<double> > >& correlationset,
                                  Array<int>& mask, int& bestmatch, 
				  double& bestarea);
void     fillWithDummy(Array<Array<Array<double> > >& correlationset,
                                  int index, double value = -1.0);
int      getArea(Array<Array<Array<double> > >& correlationset,
                                  Array<int>& mask, Array<double>& areas);
void     printRange(int count);
void     unsmoothSequence(Array<double>& sequence, double gain);
void     smoothSequence(Array<double>& sequence, double gain);
void     applySmoothing(Array<Array<double> >& data, double smooth);
double   getRandomDelta(double value, double fraction);


Options   options;

int    rank0q     = 0;           // used with -0 option
int    rank1q     = 0;           // used with -1 option
int    rank2q     = 0;           // used with -2 option
int    lowlimit   = 3;
int    rank3q     = 0;           // used with -3 option
int    reference  = 0;           // used with -r option
int    datacountQ = 0;           // used with -n option
double smooth     = 0.0;         // used with -s option
int    randomseed = 0;           // used with -S option
double randomamt  = 0.0;         // used with -R option
int    printrandom = 1;          // used with --random

char pids[50000] = {0};
char labels[50000] = {0};
Array<const char*> pidindex;
Array<const char*> labelindex;
const char null[2] = ".";

//////////////////////////////////////////////////////////////////////////

int main(int argc, char** argv) {
   // process the command-line options
   checkOptions(options, argc, argv);

   Array<Array<double> > data;

   const char* filename = "";
   if (options.getArgCount() <= 0) {
      filename = "<STDIN>";
   } else {
      filename = options.getArg(1);
   }
   readData(data, filename);

   if (datacountQ) {
      // report the number of data sequences and then exit
      cout << data.getSize() << endl;
      exit(0);
   }
   if (options.getBoolean("range")) {
      printRange(data.getSize());
      exit(0);
   }

   if (reference < 0) {
      reference = 0;
   }
   if (reference >= data.getSize()) {
      reference = data.getSize()-1;
   }

   if (randomseed < 0) {
      randomseed = time(NULL);   // time in seconds
   }
   srand48(randomseed);
   // srand(randomseed);   // for non-unix compiling

   int i;
   Array<double> randomaddition;
   randomaddition.setSize(data[0].getSize());
   randomaddition.allowGrowth(0);
   if (randomamt > 0.0) {
      data.setSize(data.getSize()+1);
      data[data.getSize()-1].setSize(data[0].getSize());
      for (i=0; i<data[reference].getSize(); i++) {
        randomaddition[i] = getRandomDelta(data[reference][i], randomamt);
        data[data.getSize()-1][i] = randomaddition[i] + data[reference][i];
      }
      pidindex.setSize(pidindex.getSize()+1);
      labelindex.setSize(labelindex.getSize()+1);
      pidindex[pidindex.getSize()-1] = "randomized";
      labelindex[labelindex.getSize()-1] = labelindex[reference];
   } else {
      randomaddition.setAll(0);
   } 

   applySmoothing(data, smooth);

   // printData(data);


   Array<double> rank0values;
   Array<int>    rank0rank;
   calculateRank0(data, reference, rank0values, rank0rank);

   Array<Array<Array<double> > > correlationset;
   calculateCorrelationSet(correlationset, data, reference);

   Array<double> rank1values;
   Array<int>    rank1rank;
   calculateRank1(data, correlationset, reference, rank1values, rank1rank);

   Array<double> rank2values;
   Array<int>    rank2rank;
   calculateRank2(data, correlationset, reference, rank2values, rank2rank);

   Array<double> rank3values;
   Array<int>    rank3rank;
   calculateRank3(data, correlationset, reference, 
         rank2values, rank2rank, rank3values, rank3rank);

   cout << "**label\t" << "**pid\t"
	<< "**rank0\t" << "**score0\t"
	<< "**rank1\t" << "**score1\t"
	<< "**rank2\t" << "**score2\t"
	<< "**rank3\t" << "**score3\n";

   for (i=0; i<rank0values.getSize(); i++) {
      if (reference == i) {
         cout << labelindex[i]  << "\t" 
              << pidindex[i]    << "\t" 
              << "target"       << "\t" 
              << "target"       << "\t"
              << "target"       << "\t" 
              << "target"       << "\t" 
              << "target"       << "\t" 
              << "target"       << "\t" 
              << "target"       << "\t" 
              << "target"       << endl;
      } else {
         cout << labelindex[i]  << "\t" 
              << pidindex[i]    << "\t" 
              << rank0rank[i]   << "\t"
              << rank0values[i] << "\t" 
              << rank1rank[i]   << "\t"
              << rank1values[i] << "\t" 
              << rank2rank[i]   << "\t"
              << rank2values[i] << "\t"
              << rank3rank[i]   << "\t"
              << rank3values[i] << endl;
      }
   }

   cout << "*-\t" << "*-\t"
	<< "*-\t" << "*-\t"
	<< "*-\t" << "*-\t"
	<< "*-\t" << "*-\t"
	<< "*-\t" << "*-\n";

   if (printrandom && randomamt > 0.0) {
      cout << "!!!srand:\t" << randomseed << endl;
      cout << "**rand" << endl;
      for (i=0; i<randomaddition.getSize(); i++) {
         cout << randomaddition[i] << "\n";
      }
      cout << "*-" << endl;
   }

 
   return 0;

}

//////////////////////////////////////////////////////////////////////////


///////////////////////////////
//
// applySmoothing --
//

void applySmoothing(Array >& data, double smooth) {
   if (smooth >= 1.0 || smooth <= -1.0) {
      return;
   }
   int i;
   if (smooth > 0.0) {
      for (i=0; i<data.getSize(); i++) {
         smoothSequence(data[i], smooth);
      }
   } else {
      for (i=0; i<data.getSize(); i++) {
         unsmoothSequence(data[i], -smooth);
      }
   }
}



///////////////////////////////
//
// printRange --
//

void printRange(int count) {
   int i;
   for (i=1; i<=count; i++) {
      //if (count >= 1000 && i < 1000) { cout << "0"; }
      //if (count >= 100 && i < 100)   { cout << "0"; }
      //if (count >= 10 && i < 10)     { cout << "0"; }
      cout << i;
      if (i < count) { cout << " "; }
   }
}


//////////////////////////////
//
// calculateRank0 -- calculate the Pearson correlation of all sequences
//     compared to the reference sequence.
//

void calculateRank0(Array<Array<double> >& data, int reference,
      Array<double>& rank0values, Array<int>& rank0rank) {
   if (reference < 0 || reference >= data.getSize()) {
      cerr << "Error: invalid reference number: " << reference << "\n";
      cerr << "Expected value in range from 0 to " << data.getSize()-1;
      cerr << endl;
      exit(1);
   }

   int i;
   rank0values.setSize(data.getSize());
   rank0values.allowGrowth(0);
   rank0rank.setSize(data.getSize());
   rank0rank.allowGrowth(0);

   for (i=0; i<data.getSize(); i++) {
      rank0rank[i] = i;
      if (reference == i) {
         rank0values[i] = 1.0;
         continue;
      }

      rank0values[i] = pearsonCorrelation(data[reference].getSize(), 
                          data[reference].getBase(), data[i].getBase());
   }

   sortRanks(rank0values, rank0rank);

}



//////////////////////////////
//
// sortRanks -- sort rank values from highest to lowest
//

void sortRanks(Array& values, Array& ranks) {
   int i;
   Array<double*> presort;
   Array<double>  sortdata;
   presort.setSize(values.getSize());
   sortdata.setSize(values.getSize());
   for (i=0; i<values.getSize(); i++) {
      sortdata[i] = values[i];
      presort[i]  = &(sortdata[i]);
   }

   qsort(presort.getBase(), presort.getSize(), sizeof(double*), numbersort);

   for (i=0; i<values.getSize(); i++) {
      *presort[i] = i;
   }

   for (i=0; i<values.getSize(); i++) {
      ranks[i] = (int)sortdata[i];
   }
}



//////////////////////////////
//
// numbersort -- sort items largest first
//

int numbersort(const void* A, const void* B) {
   double valuea = **((double**)A);
   double valueb = **((double**)B);
   if (valuea > valueb) {
      return -1;
   } else if (valuea < valueb) {
      return +1;
   } else {
      return 0;
   }
}


//////////////////////////////
//
// getMean --
//

double getMean(int size, double* a) {
   if (size <= 0) {
      return 0.0;
   }

   int i;
   double sum = 0.0;
   for (i=0; i<size; i++) {
      sum += a[i];
   }

   return sum / size;
}



//////////////////////////////
//
// pearsonCorrelation --
//

double pearsonCorrelation(int size, double* a, double* b) {
   double meana = getMean(size, a);
   double meanb = getMean(size, b);

   double topsum     = 0.0;
   double bottomasum = 0.0;
   double bottombsum = 0.0;

   int i;
   for (i=0; i<size; i++) {
      topsum += (a[i] - meana) * (b[i] - meanb);
      bottomasum += (a[i] - meana) * (a[i] - meana);
      bottombsum += (b[i] - meanb) * (b[i] - meanb);
   }

   if (bottomasum == 0.0 || bottombsum == 0.0) {
      return 0.0;
   }
   return topsum / sqrt(bottomasum * bottombsum);
}



//////////////////////////////
//
// checkOptions --
//

void checkOptions(Options& opts, int argc, char* argv[]) {
   opts.define("r|reference=i:0", "reference sequence");
   opts.define("S|seed=i:-1", "random seed");
   opts.define("R|random=d:0.0", "random fractional amount");
   opts.define("s|smooth=d:0.0", "smoothing of input data");
   opts.define("n|number=b", "return a count of the number of input sequences");
   opts.define("range=b", "print numbers from 1 to data input count");

   opts.define("author=b",  "author of program");
   opts.define("version=b", "compilation info");
   opts.define("example=b", "example usages");
   opts.define("help=b",  "short description");
   opts.process(argc, argv);

   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, June 2007" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: June 2007" << endl;
      cout << "compiled: " << __DATE__ << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }

   smooth     = opts.getDouble("smooth");
   datacountQ = opts.getBoolean("number");
   reference  = opts.getInteger("reference") - 1; // user index from 1
   if (reference < 0) {
      reference = 0;
   }


   randomseed = opts.getInteger("seed");
   randomamt  = opts.getDouble("random");
}



//////////////////////////////
//
// printData -- print the input data (for debugging purposes)
//

void printData(Array >& data) {
   int i, j;
   for (i=0; i<data.getSize(); i++) {
      cout << "\n\n";
      cout << i << ":\t";
      for (j=0; j<data[i].getSize(); j++) {
         cout << data[i][j];
         if (j < data[i].getSize()-1) {
            cout << ",";
         }
      }
      cout << "\n";
   }
}



//////////////////////////////
//
// readData -- read the input data array.
//

void readData(Array >& data, const char* filename) {
   data.setSize(500);
   data.setSize(0);
   data.setGrowth(500);
   data.allowGrowth(1);

   ifstream infile;
   #ifndef OLDCPP
      infile.open(filename, ios::in);
   #else
      infile.open(filename, ios::in | ios::nocreate);
   #endif

   if (!infile.is_open()) {
      cerr << "Cannot open file for reading: " << filename << endl;
      exit(1);
   }

   int inputsize = 0;
   double inputvalues[500];

   int ii;
   char *ptr;
   double value;
   const char* blanks = " \t,:";
   int foundlabels = 0;
   char templine[100000];
   while (!infile.eof()) {
      infile.getline(templine, 90000, '\n');
      if (!foundlabels) {
	 if (!((strncmp(templine, "pid",  3) == 0) ||
	       (strncmp(templine, "!!",   2) == 0) ||
	       (strncmp(templine, "*",    1) == 0) ||
	       (strncmp(templine, "!pid", 4) == 0))) {
            foundlabels = 1;
            strcpy(labels, templine);
            continue;
	 }
      }
      if (infile.eof() && (strcmp(templine, "") == 0)) {
         break;
      } else if (isdigit(templine[0]) || templine[0] == '-' ||
                  templine[0] == '+') {
         ptr = strtok(templine, blanks);
         inputsize = 0;
         while (ptr != NULL && sscanf(ptr, "%lf", &value) == 1) {
            inputvalues[inputsize++] = value;
            if (inputsize >= 100) {
               break;
            }
            ptr = strtok(NULL, blanks);
         }
         if (inputsize > 0 && data.getSize() == 0) {
            data.setSize(inputsize);
            for (ii=0; ii<data.getSize(); ii++) {
               data[ii].setSize(5000);
               data[ii].setGrowth(5000);
               data[ii].allowGrowth(1);
               data[ii].setSize(0);
            }
         }
         for (ii=0; ii<data.getSize() && ii < inputsize; ii++) {
            data[ii].append(inputvalues[ii]);
         }
      } else if (strncmp("!pid", templine, 4) == 0) {
         strcpy(pids, templine);
      } else if (strncmp("pid", templine, 3) == 0) {
         strcpy(pids, templine);
      }
   }


   pidindex.setSize(data.getSize());
   pidindex.allowGrowth(0);
   int i;
   int length = strlen(pids);

   for (i=0; i<pidindex.getSize(); i++) {
      pidindex[i] = null;
   }

   int counter = 0;
   for (i=0; i<length; i++) {
      if (strncmp(&(pids[i]), "pid", 3) == 0) {
         pidindex[counter++] = &(pids[i]);
         i += 3;
         continue;
      }
      if (pids[i] == '\t') {
         pids[i] = '\0';
      }
      if (pids[i] == ' ') {  // probably not necessary
         pids[i] = '\0';
      }
   }


   labelindex.setSize(data.getSize());
   labelindex.allowGrowth(0);
   length = strlen(labels);

   for (i=0; i<labelindex.getSize(); i++) {
      labelindex[i] = null;
   }

   counter = 0;
   labelindex[counter++] = &(labels[0]);
   for (i=0; i<length; i++) {
      if (labels[i] == '\t') {
         labels[i] = '\0';
         if (counter < labelindex.getSize()) {
            labelindex[counter++] = &(labels[i+1]);
         }
         i += 1;
         continue;
      }
   }


   for (i=0; i<pidindex.getSize(); i++) {
      if (pidindex[i][0] == '!') {
         pidindex[i] = &(pidindex[i][1]);
      }
   }

   for (i=0; i<labelindex.getSize(); i++) {
      if (labelindex[i][0] == '!') {
         labelindex[i] = &(labelindex[i][1]);
      }
   }
   

}



//////////////////////////////
//
// example --
//

void example(void) {


}



//////////////////////////////
//
// usage --
//

void usage(const char* command) {

}



//////////////////////////////
//
// calculateCorrelationSet --
//

void calculateCorrelationSet(Array<Array<Array<double> > >& correlationset,
   Array<Array<double> >& data, int reference) {

   correlationset.setSize(data.getSize());
   int i;
   for (i=0; i<correlationset.getSize(); i++) {
      correlationset[i].setSize(0);
      if (reference == i) {
         fillWithDummy(correlationset, reference, 1.0);
      } else {
         calculateCorrelations(correlationset[i], data[reference], data[i]);
      }
      correlationset[i].allowGrowth(0);
	        
   }
   

}


//////////////////////////////
//
// calculateRank1 --
//

void calculateRank1(Array<Array<double> >& data, 
   Array<Array<Array<double> > >& correlationset, int reference,
   Array<double>& rank1values, Array<int>& rank1rank) {

   int setcount = correlationset.getSize();
   int mastercount = 0;
   Array<int> localcount;
   localcount.allowGrowth(0);
   localcount.setSize(setcount);
   localcount.setAll(0);


   Array<int> mask;
   mask.setSize(setcount);
   mask.setAll(1);
   mask[reference] = 0;

   int marker = 0;
   if (correlationset[0].getSize() == 0) {
      marker = 1;
   }

   int length = correlationset[marker].getSize();
   int i,j;
   int datawidth;
   int bestindex;
   for (i=0; i<length-1-lowlimit; i++) {
      datawidth = correlationset[marker][i].getSize();
      for (j=0; j<datawidth; j++) {
         bestindex = getBestIndex(correlationset, i, j, mask);
         mastercount++;
         localcount[bestindex]++;
      }
   }

   rank1values.setSize(data.getSize());
   rank1rank.setSize(data.getSize());
   rank1values.allowGrowth(0);
   rank1rank.allowGrowth(0);

   for (i=0; i<data.getSize(); i++) {
      if (reference == i) {
         rank1values[i] = 1.0;
      } else {
         rank1values[i] = (double)localcount[i] / mastercount;
      }
   }

   sortRanks(rank1values, rank1rank);
}


//////////////////////////////
// 
// getArea --
//

int getArea(Array<Array<Array<double> > >& correlationset,
      Array<int>& mask, Array<double>& areas) {

   areas.setSize(correlationset.getSize());
   areas.setAll(0.0);

   int marker = 0;
   if (correlationset[0].getSize() == 0) {
      marker = 1;
   }   

   int length = correlationset[marker].getSize();
   int i,j;
   int mastercount = 0;
   int datawidth;
   int bestindex;
   for (i=0; i<length-1-lowlimit; i++) {
      datawidth = correlationset[marker][i].getSize();
      for (j=0; j<datawidth; j++) {
         bestindex = getBestIndex(correlationset, i, j, mask);
         mastercount++;
         areas[bestindex] += 1.0;
      }
   }

   int maxx = 0;
   for (i=0; i<areas.getSize(); i++) {
      if (areas[i] > maxx) {
         maxx = i;
      }
      areas[i] = areas[i] / mastercount;
      
   }

   return maxx;  // return the index of the largest area
}



//////////////////////////////
//
// calculateRank3 --
//

void calculateRank3(Array<Array<double> >& data, 
      Array<Array<Array<double> > >& correlationset,
      int reference, Array<double>& rank2values, Array<int>& rank2rank,
      Array<double>& rank3values, Array<int>& rank3rank) {

   rank3rank.setSize(data.getSize());
   rank3rank.setAll(0);
   rank3rank.allowGrowth(0);

   rank3values.setSize(data.getSize());
   rank3values.setAll(0);
   rank3values.allowGrowth(0);

   int noisecut = data.getSize() / 2;
   Array<int> mask;
   mask.setSize(data.getSize());
   mask.allowGrowth(0);
   mask.setAll(0);

   int i;
   // turn on all of the noise layers:
   for (i=0; i<rank2rank.getSize(); i++) {
      if (rank2rank[i] >= noisecut) {
         mask[i] = 1;
      }
      rank3values[i] = rank2values[i];
   }


   // place each non-noise performance into the comparison
   Array<double> area;
   for (i=0; i<correlationset.getSize(); i++) {
      if (mask[i] == 1) { // don't process noise layers
         continue;
      }

      if (i == reference) {
         rank3values[i] = 1.0;
         continue;
      }

      mask[i] = 1;
      getArea(correlationset, mask, area);
      rank3values[i] = area[i];
      mask[i] = 0;
    
   }

   sortRanks(rank3values, rank3rank);

}



//////////////////////////////
//
// calculateRank2 --
//

void calculateRank2(Array<Array<double> >& data, 
      Array<Array<Array<double> > >& correlationset,
      int reference, Array<double>& rank2values, Array<int>& rank2rank) {

   int i;
   int iterations = data.getSize() - 2;

   rank2rank.setSize(data.getSize());
   rank2rank.allowGrowth(0);
   rank2rank[reference] = 0;

   rank2values.setSize(data.getSize());
   rank2values.allowGrowth(0);
   rank2values[reference] = 1.0;

   Array<int> mask;
   mask.setSize(data.getSize());
   mask.allowGrowth(0);
   mask.setAll(1);
   mask[reference] = 0;

   int iter;
   int bestmatch = reference;
   double bestarea = 0.0;
   int rankcounter = 1;

   for (iter=0; iter<iterations; iter++) {
      findBestArea(correlationset, mask, bestmatch, bestarea);
      if (mask[bestmatch] != 1) {
         cerr << "ERROR in iteration " << iter 
              << " in Rank 2 calculation" << endl;
         exit(1);
      }
      mask[bestmatch] = 0;
      rank2rank[bestmatch] = rankcounter++;
      rank2values[bestmatch] = bestarea;
      // fillWithDummy(correlationset, bestmatch);
   }

   for (i=0; i<mask.getSize(); i++) {
      if (mask[i] == 1) {
         mask[i] = 0;
         rank2values[i] = 1.0;
         rank2rank[i] = data.getSize() - 1;
      }
   }


   // scale rank2values by their best match value to minimize tail scores
   double dcount = data.getSize();
   for (i=0; i<rank2values.getSize(); i++) {
      rank2values[i] = rank2values[i] * (dcount-rank2rank[i])/dcount;
   }
   

   // no need to sort since the best matches were found sequentially.

}



//////////////////////////////
//
// fillWithDummy --  default value for value is -1.0
//

void fillWithDummy(Array<Array<Array<double> > >& correlationset,
   int index, double value) {

   int i;
   for (i=0; i<correlationset[index].getSize(); i++) {
      correlationset[index][i].setAll(value);
   }

}



//////////////////////////////
//
// findBestArea --
//

void findBestArea(Array<Array<Array<double> > >& correlationset, 
      Array<int>& mask, int& bestmatch, double& bestarea) {

   bestmatch = -1;
   bestarea = -1;

   Array<int> counts;
   counts.setSize(correlationset.getSize());
   counts.allowGrowth(0);
   counts.setAll(0);
   int mastercount = 0;

   int marker = 0;
   if (correlationset[0].getSize() == 0) {
      marker = 1;
   }
   int length = correlationset[marker].getSize();

   int i, j;
   int datawidth;
   int bestindex;
   for (i=0; i<length-1-lowlimit; i++) {
      datawidth = correlationset[marker][i].getSize();

      for (j=0; j<datawidth; j++) {
         bestindex = getBestIndex(correlationset, i, j, mask);
	 
         mastercount++;
         counts[bestindex]++;
      }
   }

   bestindex = 0;
   for (i=1; i<counts.getSize(); i++) {
      if (counts[i] > counts[bestindex]) {
         bestindex = i;
      }
   }

   bestmatch = bestindex;
   bestarea = (double)counts[bestmatch]/mastercount;
}



//////////////////////////////
//
// calculateCorrelations --
//

void calculateCorrelations(Array<Array<double> >& correlations,
      Array<double>& a, Array<double>& b) {

   int i, j;
   correlations.setSize(a.getSize());
   for (i=0; i<a.getSize(); i++) {
      correlations[i].setSize(i+1);
      correlations[i].allowGrowth(0);
   }

   double *aa;
   double *bb;
   int rowsize;
   int corelsize;

   for (i=0; i<correlations.getSize()-1; i++) {
      rowsize = correlations[i].getSize();
      corelsize = a.getSize() - i;
      for (j=0; j<rowsize; j++) {
         aa = a.getBase() + j;
         bb = b.getBase() + j;
         correlations[i][j] = pearsonCorrelation(corelsize, aa, bb);
      }
   }

   // set the bottom row to 1.0 correlations
   i = correlations.getSize()-1;
   for (j=0; j<correlations[i].getSize(); j++) {
      correlations[i][j] = 1.0;
   }

}



//////////////////////////////
//
// getBestIndex -- return the position of the highest correlation.
//   return -1 if there are more than one highest correlation.
//

int getBestIndex(Array<Array<Array<double> > >& correlations, int i,
      int j, Array<int>& mask) {

   int firstindex = -1;
   int lastindex = -1;
   int bestindex = -1;
   int ii;

   for (ii=0; ii<mask.getSize(); ii++) {
      if (mask[ii] > 0) {
         firstindex = ii;
         break;
      }
   }

   for (ii=mask.getSize()-1; ii>=0; ii--) {
      if (mask[ii] > 0) {
         lastindex = ii;
         break;
      }
   }

   bestindex = firstindex;

   if (bestindex < 0) {
      cerr << "Error: no data to measure best correlation from" << endl;
      exit(1);
   }

   for (ii=firstindex; ii<=lastindex; ii++) {
      if (mask[ii] == 0) {
         continue;
      }
      if (correlations[ii][i][j] > correlations[bestindex][i][j]) {
         bestindex = ii;
      }
   }

   return bestindex;
}



//////////////////////////////
//
// smoothSequence -- smooth the sequence with a
//    symmetric exponential smoothing filter (applied in the forward
//    and reverse directions with the specified input gain.
//
//    Difference equation for smoothing: y[n] = k * x[n] + (1-k) * y[n-1]
//

void smoothSequence(Array& sequence, double gain) {
   double oneminusgain = 1.0 - gain;
   int i;
   int ssize = sequence.getSize();

   // reverse filtering first
   for (i=ssize-2; i>=0; i--) {
      sequence[i] = gain*sequence[i] + oneminusgain*sequence[i+1];
   }

   // then forward filtering
   for (i=1; i<ssize; i++) {
      sequence[i] = gain*sequence[i] + oneminusgain*sequence[i-1];
   }

}



//////////////////////////////
//
// unsmoothSequence -- removed smoothed sequence from original sequence.
//

void unsmoothSequence(Array& sequence, double gain) {
   Array<double> smoothed = sequence;
   smoothSequence(smoothed, gain);
   int i;
   for (i=0; i<sequence.getSize(); i++) {
      sequence[i] -= smoothed[i];
   }
}



//////////////////////////////
//
// getRandomDelta -- return a delta value based on the fraction
//     range of the input value.  For example, if the value is
//     50 and the fraction of 0.1, then the output value will be
//     a number in the range from -5.0 to +5.0.
//

double getRandomDelta(double value, double fraction) {
   double output = value * fraction * 2;
   output = output * (drand48() - 0.5);
   return output;
}



// md5sum: 6972252a73abc956948e90ba77043ba8 scaperank.cpp [20100722]