//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Sun Dec 21 05:13:34 PST 2008
// Last Modified: Sun Dec 21 05:13:38 PST 2008
// Filename:      ...sig/examples/all/cherry.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/humdrum/cherry.cpp
// Syntax:        C++; museinfo
//
// Description:   Performance correlation comparison
//

#include "humdrum.h"
#include "MidiFile.h"

#include <math.h>
#include <iomanip>

// function declarations:
void     checkOptions(Options& opts, int argc, char** argv);
void     example(void);
void     usage(const char* command);
void     getSequences(Array<double>& a, Array<double>& b, 
                                 HumdrumFile& infile);
int      compareSequences(Array<double>& a, Array<double>& b, 
		                 int ind, int len);
double   pearsonCorrelation(int size, double* x, double* y);
double   pearsonCorrelationHole(int size, double* x, double* y, int ignore);
double   getMean(Array<double>& data);
double   getSampleSD(double mean, Array<double>& data);
void     removeIndex(Array<double>& a, Array<double>& b, int best);

// User interface variables:
Options   options;
int       verboseQ = 0;
int       mmaQ = 1;
int       exclude = 5;

//////////////////////////////////////////////////////////////////////////

int main(int argc, char** argv) {

   // process the command-line options
   checkOptions(options, argc, argv);

   HumdrumFile infile;
   infile.read(options.getArg(1));

   Array<double> a;
   Array<double> b;

   getSequences(a, b, infile);
   int best;
   int len = a.getSize();

   if (mmaQ) {
      cout << "data = {\n";
      cout.setf(ios::fixed);
      cout << "{0/" << a.getSize() << ", " 
           << setprecision(12)
	   << pearsonCorrelation(a.getSize(), a.getBase(), b.getBase())
           << "},\n";
   }


   int i;
   int iterations = a.getSize()-exclude;
   for (i=0; i<iterations; i++) {
      best = compareSequences(a, b, i+1, len);
      removeIndex(a, b, best);
      if (mmaQ) {
         if (i < iterations-1) {
            cout << ",\n";
         }
      }
   }

   if (mmaQ) {
      cout << "};\n";
   }

   return 0;
}

//////////////////////////////////////////////////////////////////////////


//////////////////////////////
//
// removeIndex --
//

void removeIndex(Array& a, Array& b, int best) {
   int i;
   int size = a.getSize();
   for (i=best; i<size-1; i++) {
      a[i] = a[i+1];
      b[i] = b[i+1];
   }
   a.setSize(size-1);
   b.setSize(size-1);
}



//////////////////////////////
//
// getSequences --
//

void getSequences(Array& a, Array& b, HumdrumFile& infile) {
   a.setSize(infile.getNumLines());
   b.setSize(infile.getNumLines());
   a.setSize(0);
   b.setSize(0);
   double value;
   int i;

   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() != E_humrec_data) {
         continue;
      }
      value = 0;
      sscanf(infile[i][0], "%lf", &value);
      a.append(value);

      value = 0;
      sscanf(infile[i][1], "%lf", &value);
      b.append(value);
   }
}



//////////////////////////////
//
// compareSequences --
//

int compareSequences(Array& a, Array& b, int ind, int len) {
   int i;
   double basecorr;
   double corr;

   Array<double> corrlist;
   corrlist.setSize(a.getSize());
   corrlist.setSize(0);
   Array<int> index;
   index.setSize(a.getSize());
   index.setSize(0);

   basecorr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
   if (verboseQ) {
      cout << "base" << "\t" << basecorr << "\n";
   }

   for (i=0; i<a.getSize(); i++) {
      corr = pearsonCorrelationHole(a.getSize(), a.getBase(), b.getBase(), i);
      corrlist.append(corr);
      index.append(i);
   }

   double mean = getMean(corrlist);
   double sd = getSampleSD(mean, corrlist);


   Array<double> zscores;
   zscores.setSize(corrlist.getSize());
   int asize = corrlist.getSize();
   for (i=0; i<asize; i++)  {
      zscores[i] = (corrlist[i] - mean) / sd;
   }

   int maxi = index[0];
   for (i=1; i<asize; i++) {
      if (zscores[i] > zscores[maxi]) {
         maxi = i;
      }
   }

   if (mmaQ) {
      cout << "{" << ind << "/" << len << ", " << corrlist[maxi] << "}";
   }

   if (verboseQ) {
      cout << "max\t" << maxi << "\t";
      cout << "mean\t" << mean << "\t";
      cout << "sd\t"   << sd   << "\t";
      cout << corrlist[maxi] << "\t" << zscores[maxi] << "\n";
      for (i=0; i<asize; i++) {
         cout << i << "\t" << corrlist[i] << "\t" << zscores[i] << "\n";
      }
   }

   return maxi;
}



//////////////////////////////
//
// getSampleSD --
//

double getSampleSD(double mean, Array& data) {
   int size = data.getSize();
   double sum = 0.0;
   double value;
   int i;
   for (i=0; i<size; i++) {
      value = data[i] - mean;
      sum += value * value;
   }

   return sqrt(sum / (size - 1.0));
}



//////////////////////////////
//
// getMean --
//

double getMean(Array& data) {
   int size = data.getSize();
   if (size <= 0) {
      return 0.0;
   }

   int i;
   double sum = 0.0;
   for (i=0; i<size; i++) {
      sum += data[i];
   }

   return sum / size;
}


//////////////////////////////
//
// ranksort -- sort counts by largest first
//

int ranksort(const void* A, const void* B) {
   int& a = *(*((int**)A));
   int& b = *(*((int**)B));
   if (a < b) {
      return +1;
   } else if (a > b) {
      return -1;
   } else {
      return 0;
   }
}



//////////////////////////////
//
// pearsonCorrelationHole --
//

double pearsonCorrelationHole(int size, double* x, double* y, int ignore) {

   double sumx  = 0.0;
   double sumy  = 0.0;
   double sumco = 0.0;

   double meanx = x[0];
   double meany = y[0];

   int starti = 1;

   if (ignore == 0) {
      meanx = x[1];
      meany = y[1];
      starti = 2;
   }

   double sweep;
   double deltax;
   double deltay;

   int i;

   for (i=starti; i<ignore; i++) {
      sweep = i / (i+1.0);
      deltax = x[i] - meanx;
      deltay = y[i] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / (i+1);
      meany += deltay / (i+1);
   }

   for (i=ignore+1; i<size; i++) {
      sweep = i / (i+1.0);
      deltax = x[i] - meanx;
      deltay = y[i] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / (i+1);
      meany += deltay / (i+1);
   }

   double popsdx = sqrt(sumx / (size-1));
   double popsdy = sqrt(sumy / (size-1));
   double covxy  = sumco / (size-1);

   return covxy / (popsdx * popsdy);
}




//////////////////////////////
//
// checkOptions --
//

void checkOptions(Options& opts, int argc, char* argv[]) {

   opts.define("author=b",  "author of program");
   opts.define("version=b", "compilation info");
   opts.define("example=b", "example usages");
   opts.define("help=b",  "short description");
   opts.process(argc, argv);

   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, Jan 2008" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 30 Jan 2008" << endl;
      cout << "compiled: " << __DATE__ << endl;
      cout << MUSEINFO_VERSION << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }

}



//////////////////////////////
//
// pearsonCorrelation --
//

double pearsonCorrelation(int size, double* x, double* y) {

   double sumx  = 0.0;
   double sumy  = 0.0;
   double sumco = 0.0;
   double meanx = x[0];
   double meany = y[0];
   double sweep;
   double deltax;
   double deltay;

   int i;
   for (i=2; i<=size; i++) {
      sweep = (i-1.0) / i;
      deltax = x[i-1] - meanx;
      deltay = y[i-1] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / i;
      meany += deltay / i;
   }

   double popsdx = sqrt(sumx / size);
   double popsdy = sqrt(sumy / size);
   double covxy  = sumco / size;

   return covxy / (popsdx * popsdy);
}



//////////////////////////////
//
// example --
//

void example(void) {


}



//////////////////////////////
//
// usage --
//

void usage(const char* command) {

}



// md5sum: f57d74c6d20dfb57c59d7d2d01e174d9 cherry.cpp [20081203]