//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Sun Dec 21 05:13:34 PST 2008
// Last Modified: Sun Dec 21 05:13:38 PST 2008
// Filename:      ...sig/examples/all/cherryb.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/humdrum/cherryb.cpp
// Syntax:        C++; museinfo
//
// Description:   Performance correlation comparison
//

#include "humdrum.h"
#include "MidiFile.h"

#include <math.h>
#include <iomanip>

// function declarations:
void     checkOptions(Options& opts, int argc, char** argv);
void     example(void);
void     usage(const char* command);
void     getSequences(Array<double>& a, Array<double>& b, 
                                 HumdrumFile& infile);
int      compareSequences(Array<double>& a, Array<double>& b, 
		                 int ind, int len);
double   pearsonCorrelation(int size, double* x, double* y);
double   pearsonCorrelationHole(int size, double* x, double* y, int ignore);
double   getMean(Array<double>& data);
double   getSampleSD(double mean, Array<double>& data);
void     removeIndex(Array<double>& a, Array<double>& b, int best);
double   model(double x);
double   getDifference(Array<double>& data, Array<double>& models, 
                                 int preexclude, int exclude, double parameter);
double   getCorrected(Array<double>& data, Array<double>& models, 
                                 int preexclude, int exclude);
double   getParabolicMinimum(double x1, double y1, double x2, double y2,
                                 double x3, double y3);

// User interface variables:
Options   options;
int       verboseQ   = 0;
int       mmaQ       = 1;
int       exclude    = 5;
int       preexclude = 5;

//////////////////////////////////////////////////////////////////////////

int main(int argc, char** argv) {

   // process the command-line options
   checkOptions(options, argc, argv);

   HumdrumFile infile;
   infile.read(options.getArg(1));

   Array<double> a;
   Array<double> b;

   getSequences(a, b, infile);
   int best;
   int len = a.getSize();

   //if (mmaQ) {
   //   cout << "data = {\n";
   //   cout.setf(ios::fixed);
   //   cout << "{0/" << a.getSize() << ", " 
   //        << setprecision(12)
   //	   << pearsonCorrelation(a.getSize(), a.getBase(), b.getBase())
   //           << "},\n";
   //   }


   Array<double> data;
   data.setSize(a.getSize());
   data.setSize(0);

   Array<double> models;
   models.setSize(a.getSize());
   models.setAll(0.0);
   models.allowGrowth(0);

   int i;
   for (i=0; i<models.getSize(); i++) {
      models[i] = model((double)i/models.getSize());
   }

   double corr;
   corr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
   data.append(corr);

   int iterations = a.getSize()-exclude;
   for (i=0; i<iterations; i++) {
      best = compareSequences(a, b, i+1, len);
      removeIndex(a, b, best);
      corr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
      data.append(corr);
      //if (mmaQ) {
      //   if (i < iterations-1) {
      //      cout << ",\n";
      //   }
      //}
   }

   //if (mmaQ) {
   //   cout << "};\n";
   //}

   int count = 1000;
   Array<double> value;
   value.setSize(count);
   value.allowGrowth(0);

   Array<double> parameter;
   parameter.setSize(count);
   parameter.allowGrowth(0);

   for (i=0; i<count; i++) {
      parameter[i] = (double)i/count;
      value[i] = getDifference(data, models, preexclude, exclude, parameter[i]);
   }

   cout.setf(ios::fixed);
   cout << "data = {\n";
   for (i=0; i<value.getSize(); i++) {
      cout << "{" << parameter[i] << ", " << value[i] << "}";
      if (i < value.getSize()-1) {
         cout << ",";
      }
      cout << "\n";
   }
   cout << "};\n";

   cout << "\nmodel = {\n";
   for (i=0; i<models.getSize(); i++) {
      cout << "\t" << models[i];
      if (i < models.getSize()-1) {
         cout << ",";
      }
      cout << "\n";
   }
   cout << "};\n";

   cout << "\nraw = {\n";
   for (i=0; i<data.getSize(); i++) {
      cout << "\t" << data[i];
      if (i < data.getSize()-1) {
         cout << ",";
      }
      cout << "\n";
   }
   cout << "};\n";

   double corrected = getCorrected(data, models, preexclude, exclude);
   cout << "original = " << data[0] << ";\n";
   cout << "revised = " << corrected << ";\n";

   return 0;
}

//////////////////////////////////////////////////////////////////////////


//////////////////////////////
//
// getCorrected --
//

double getCorrected(Array<double>& data, Array<double>& models, 
      int preexclude, int exclude) {
   double target = data[0];
   double targethi = (1.0 - target)/2 + target;
   double targetlo = target/2;
   
   double value   = getDifference(data, models, preexclude, exclude, target);
   double valuehi = getDifference(data, models, preexclude, exclude, targethi);
   double valuelo = getDifference(data, models, preexclude, exclude, targetlo);

   double minimum = getParabolicMinimum(targetlo, valuelo, target, value, 
         targethi, valuehi);

   return minimum;
}



//////////////////////////////
//
// getParabolicMinimum -- or maximum if inflection is negative...
//

double getParabolicMinimum(double x1, double y1, double x2, double y2,
      double x3, double y3) {
   double a = (x3*(y2-y1)+x2*(y1-y3)+x1*(y3-y2))/((x1-x2)*(x1-x3)*(x2-x3));
   double b = (x3*x3*(y1-y2)+x1*x1*(y2-y3)+x2*x2*(y3-y1))/
                    ((x1-x2)*(x1-x3)*(x2-x3));
   //double c = (x1*x3*(x3-x1)*y2 + x2*x2*(x3*y1-x1*y3)+x2(x1*x1*y3-x3*x3*y1))/
   //                 ((x1-x2)*(x1-x3)*(x2-x3));

   return -b/(2.0*a);
}



//////////////////////////////
//
// getDifference --
//

double getDifference(Array<double>& data, Array<double>& models, 
      int preexclude, int exclude, double parameter) {
   int i;
   double sum = 0.0;
   double value;
   for (i=preexclude; i<models.getSize() - exclude; i++) {
      value = data[i] - (models[i] * (1.0 - parameter) + parameter);
      sum += value * value;
   }
   return sum;
}



//////////////////////////////
//
// removeIndex --
//

void removeIndex(Array& a, Array& b, int best) {
   int i;
   int size = a.getSize();
   for (i=best; i<size-1; i++) {
      a[i] = a[i+1];
      b[i] = b[i+1];
   }
   a.setSize(size-1);
   b.setSize(size-1);
}



//////////////////////////////
//
// getSequences --
//

void getSequences(Array& a, Array& b, HumdrumFile& infile) {
   a.setSize(infile.getNumLines());
   b.setSize(infile.getNumLines());
   a.setSize(0);
   b.setSize(0);
   double value;
   int i;

   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() != E_humrec_data) {
         continue;
      }
      value = 0;
      sscanf(infile[i][0], "%lf", &value);
      a.append(value);

      value = 0;
      sscanf(infile[i][1], "%lf", &value);
      b.append(value);
   }
}



//////////////////////////////
//
// compareSequences --
//

int compareSequences(Array& a, Array& b, int ind, int len) {
   int i;
   double basecorr;
   double corr;

   Array<double> corrlist;
   corrlist.setSize(a.getSize());
   corrlist.setSize(0);
   Array<int> index;
   index.setSize(a.getSize());
   index.setSize(0);

   basecorr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
   if (verboseQ) {
      cout << "base" << "\t" << basecorr << "\n";
   }

   for (i=0; i<a.getSize(); i++) {
      corr = pearsonCorrelationHole(a.getSize(), a.getBase(), b.getBase(), i);
      corrlist.append(corr);
      index.append(i);
   }

   double mean = getMean(corrlist);
   double sd = getSampleSD(mean, corrlist);


   Array<double> zscores;
   zscores.setSize(corrlist.getSize());
   int asize = corrlist.getSize();
   for (i=0; i<asize; i++)  {
      zscores[i] = (corrlist[i] - mean) / sd;
   }

   int maxi = index[0];
   for (i=1; i<asize; i++) {
      if (zscores[i] > zscores[maxi]) {
         maxi = i;
      }
   }

   //if (mmaQ) {
   //   cout << "{" << ind << "/" << len << ", " << corrlist[maxi] << "}";
   //}

   if (verboseQ) {
      cout << "max\t" << maxi << "\t";
      cout << "mean\t" << mean << "\t";
      cout << "sd\t"   << sd   << "\t";
      cout << corrlist[maxi] << "\t" << zscores[maxi] << "\n";
      for (i=0; i<asize; i++) {
         cout << i << "\t" << corrlist[i] << "\t" << zscores[i] << "\n";
      }
   }

   return maxi;
}



//////////////////////////////
//
// getSampleSD --
//

double getSampleSD(double mean, Array& data) {
   int size = data.getSize();
   double sum = 0.0;
   double value;
   int i;
   for (i=0; i<size; i++) {
      value = data[i] - mean;
      sum += value * value;
   }

   return sqrt(sum / (size - 1.0));
}



//////////////////////////////
//
// getMean --
//

double getMean(Array& data) {
   int size = data.getSize();
   if (size <= 0) {
      return 0.0;
   }

   int i;
   double sum = 0.0;
   for (i=0; i<size; i++) {
      sum += data[i];
   }

   return sum / size;
}


//////////////////////////////
//
// ranksort -- sort counts by largest first
//

int ranksort(const void* A, const void* B) {
   int& a = *(*((int**)A));
   int& b = *(*((int**)B));
   if (a < b) {
      return +1;
   } else if (a > b) {
      return -1;
   } else {
      return 0;
   }
}



//////////////////////////////
//
// pearsonCorrelationHole --
//

double pearsonCorrelationHole(int size, double* x, double* y, int ignore) {

   double sumx  = 0.0;
   double sumy  = 0.0;
   double sumco = 0.0;

   double meanx = x[0];
   double meany = y[0];

   int starti = 1;

   if (ignore == 0) {
      meanx = x[1];
      meany = y[1];
      starti = 2;
   }

   double sweep;
   double deltax;
   double deltay;

   int i;

   for (i=starti; i<ignore; i++) {
      sweep = i / (i+1.0);
      deltax = x[i] - meanx;
      deltay = y[i] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / (i+1);
      meany += deltay / (i+1);
   }

   for (i=ignore+1; i<size; i++) {
      sweep = i / (i+1.0);
      deltax = x[i] - meanx;
      deltay = y[i] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / (i+1);
      meany += deltay / (i+1);
   }

   double popsdx = sqrt(sumx / (size-1));
   double popsdy = sqrt(sumy / (size-1));
   double covxy  = sumco / (size-1);

   return covxy / (popsdx * popsdy);
}




//////////////////////////////
//
// checkOptions --
//

void checkOptions(Options& opts, int argc, char* argv[]) {

   opts.define("author=b",  "author of program");
   opts.define("version=b", "compilation info");
   opts.define("example=b", "example usages");
   opts.define("help=b",  "short description");
   opts.process(argc, argv);

   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, Jan 2008" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 30 Jan 2008" << endl;
      cout << "compiled: " << __DATE__ << endl;
      cout << MUSEINFO_VERSION << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }

}



//////////////////////////////
//
// pearsonCorrelation --
//

double pearsonCorrelation(int size, double* x, double* y) {

   double sumx  = 0.0;
   double sumy  = 0.0;
   double sumco = 0.0;
   double meanx = x[0];
   double meany = y[0];
   double sweep;
   double deltax;
   double deltay;

   int i;
   for (i=2; i<=size; i++) {
      sweep = (i-1.0) / i;
      deltax = x[i-1] - meanx;
      deltay = y[i-1] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / i;
      meany += deltay / i;
   }

   double popsdx = sqrt(sumx / size);
   double popsdy = sqrt(sumy / size);
   double covxy  = sumco / size;

   return covxy / (popsdx * popsdy);
}



//////////////////////////////
//
// model --
//

double model(double x) {
   double y;
   y =   0.516818 * pow(x,5.0) 
       - 1.708827 * pow(x,4.0) 
       + 2.771050 * pow(x,3.0) 
       - 3.489990 * pow(x,2.0) 
       + 2.910390 * x;
   return y;
}



//////////////////////////////
//
// example --
//

void example(void) {


}



//////////////////////////////
//
// usage --
//

void usage(const char* command) {

}



// md5sum: f57d74c6d20dfb57c59d7d2d01e174d9 cherry.cpp [20081203]