//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Sun Dec 21 05:13:34 PST 2008
// Last Modified: Tue Dec 23 12:42:23 PST 2008
// Filename:      ...sig/examples/all/cherry.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/humdrum/cherry.cpp
// Syntax:        C++; museinfo
//
// Description:   Create a dissimilarity plot between two sequences.
//

#include "humdrum.h"
#include "MidiFile.h"

#include <math.h>

#include <iomanip>


// function declarations:
void     checkOptions(Options& opts, int argc, char** argv);
void     example(void);
void     usage(const char* command);
void     getSequences(Array<double>& a, Array<double>& b, 
                                 HumdrumFile& infile);
int      compareSequences(Array<double>& a, Array<double>& b);
double   pearsonCorrelation(int size, double* x, double* y);
double   pearsonCorrelationHole(int size, double* x, double* y, int ignore);
double   getMean(Array<double>& data);
double   getSampleSD(double mean, Array<double>& data);
void     printData(Array<int>& ranking, Array<double>& data, 
		                 Array<double>& a, Array<double>& b);
void     getDissimilarityData(Array<int>& ranking, Array<double>& data, 
                                 Array<double>& a, Array<double>& b);

template<class type>
void     removeIndex(Array<type>& a, int index);
// User interface variables:
Options   options;

//////////////////////////////////////////////////////////////////////////

int main(int argc, char** argv) {

   // process the command-line options
   checkOptions(options, argc, argv);

   HumdrumFile infile;
   infile.read(options.getArg(1));

   Array<double> a;
   Array<double> b;

   getSequences(a, b, infile);

   Array<double> data;
   Array<int> ranking;
   getDissimilarityData(ranking, data, a, b);
   printData(ranking, data, a, b);

   return 0;
}

//////////////////////////////////////////////////////////////////////////


//////////////////////////////
//
// printData --
//

void printData(Array& ranking, Array& data, Array& a, Array& b) {
   int i;
   cout << "data = {\n";
   for (i=0; i<data.getSize(); i++) {
      cout << "\t{" << i << ", " 
	   << setiosflags(ios::fixed) << setprecision(6) 
	   << data[i] << ", " 
	   << ranking[i] << ", " 
           << a[i] << ", " << b[i] << "}";
      if (i < data.getSize() - 1) {
         cout << ",";
      }
      cout << "\n";
   }
   cout << "};\n";
}



//////////////////////////////
//
// getDissimilarityData --
//

void getDissimilarityData(Array<int>& ranking, Array<double>& data, 
      Array<double>& a, Array<double>& b) {

   data.setSize(a.getSize());
   data.allowGrowth(0);
   data.setAll(0);

   ranking.setSize(a.getSize());
   ranking.allowGrowth(0);
   ranking.setAll(-1);

   Array<double> aa;
   Array<double> bb;
   Array<int> index;
   index.setSize(a.getSize());
   int i;
   for (i=0; i<a.getSize(); i++) {
      index[i] = i;
   }
   aa = a;
   bb = b;

   int best;
   int iterations = a.getSize()-15;

   for (i=0; i<iterations; i++) {
      best = compareSequences(aa, bb);
      removeIndex(aa, best);
      removeIndex(bb, best);
      ranking[index[best]] = i+1;
      data[index[best]] = 1.0 - pearsonCorrelation(aa.getSize(), 
                                                   aa.getBase(), 
                                                   bb.getBase());
      removeIndex(index, best);
   }
}



//////////////////////////////
//
// removeIndex --
//

template<class type>
void removeIndex(Array& a, int index) {
   int i;
   int size = a.getSize();
   for (i=index; i<size-1; i++) {
      a[i] = a[i+1];
   }
   a.setSize(size-1);
}



//////////////////////////////
//
// getSequences --
//

void getSequences(Array& a, Array& b, HumdrumFile& infile) {
   a.setSize(infile.getNumLines());
   b.setSize(infile.getNumLines());
   a.setSize(0);
   b.setSize(0);
   double value;
   int i;

   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() != E_humrec_data) {
         continue;
      }
      value = 0;
      sscanf(infile[i][0], "%lf", &value);
      a.append(value);

      value = 0;
      sscanf(infile[i][1], "%lf", &value);
      b.append(value);
   }
}



//////////////////////////////
//
// compareSequences -- returns the index of the element which, when
//    removed from the sequence, improves the correlation measurement
//    between the two sequences.
//

int compareSequences(Array& a, Array& b) {
   int i;
   double basecorr;
   double corr;

   Array<double> corrlist;
   corrlist.setSize(a.getSize());
   corrlist.setSize(0);
   Array<int> index;
   index.setSize(a.getSize());
   index.setSize(0);

   basecorr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
//   cout << "base" << "\t" << basecorr << "\n";

   for (i=0; i<a.getSize(); i++) {
      corr = pearsonCorrelationHole(a.getSize(), a.getBase(), b.getBase(), i);
      corrlist.append(corr);
      index.append(i);
   }

   double mean = getMean(corrlist);
   double sd = getSampleSD(mean, corrlist);


   Array<double> zscores;
   zscores.setSize(corrlist.getSize());
   int asize = corrlist.getSize();
   for (i=0; i<asize; i++)  {
      zscores[i] = (corrlist[i] - mean) / sd;
   }

   int maxi = index[0];
   for (i=1; i<asize; i++) {
      if (zscores[i] > zscores[maxi]) {
         maxi = i;
      }
   }
//   cout << "max\t" << maxi << "\t";
//   cout << "mean\t" << mean << "\t";
//   cout << "sd\t"   << sd   << "\t";
//   cout << corrlist[maxi] << "\t" << zscores[maxi] << "\n";
//
//   for (i=0; i<asize; i++) {
//      cout << i << "\t" << corrlist[i] << "\t" << zscores[i] << "\n";
//   }

   return maxi;
}



//////////////////////////////
//
// getSampleSD --
//

double getSampleSD(double mean, Array& data) {
   int size = data.getSize();
   double sum = 0.0;
   double value;
   int i;
   for (i=0; i<size; i++) {
      value = data[i] - mean;
      sum += value * value;
   }

   return sqrt(sum / (size - 1.0));
}



//////////////////////////////
//
// getMean --
//

double getMean(Array& data) {
   int size = data.getSize();
   if (size <= 0) {
      return 0.0;
   }

   int i;
   double sum = 0.0;
   for (i=0; i<size; i++) {
      sum += data[i];
   }

   return sum / size;
}


//////////////////////////////
//
// ranksort -- sort counts by largest first
//

int ranksort(const void* A, const void* B) {
   int& a = *(*((int**)A));
   int& b = *(*((int**)B));
   if (a < b) {
      return +1;
   } else if (a > b) {
      return -1;
   } else {
      return 0;
   }
}



//////////////////////////////
//
// pearsonCorrelationHole --
//

double pearsonCorrelationHole(int size, double* x, double* y, int ignore) {

   double sumx  = 0.0;
   double sumy  = 0.0;
   double sumco = 0.0;

   double meanx = x[0];
   double meany = y[0];

   int starti = 1;

   if (ignore == 0) {
      meanx = x[1];
      meany = y[1];
      starti = 2;
   }

   double sweep;
   double deltax;
   double deltay;

   int i;

   for (i=starti; i<ignore; i++) {
      sweep = i / (i+1.0);
      deltax = x[i] - meanx;
      deltay = y[i] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / (i+1);
      meany += deltay / (i+1);
   }

   for (i=ignore+1; i<size; i++) {
      sweep = i / (i+1.0);
      deltax = x[i] - meanx;
      deltay = y[i] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / (i+1);
      meany += deltay / (i+1);
   }

   double popsdx = sqrt(sumx / (size-1));
   double popsdy = sqrt(sumy / (size-1));
   double covxy  = sumco / (size-1);

   return covxy / (popsdx * popsdy);
}




//////////////////////////////
//
// checkOptions --
//

void checkOptions(Options& opts, int argc, char* argv[]) {

   opts.define("author=b",  "author of program");
   opts.define("version=b", "compilation info");
   opts.define("example=b", "example usages");
   opts.define("help=b",  "short description");
   opts.process(argc, argv);

   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, Jan 2008" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 30 Jan 2008" << endl;
      cout << "compiled: " << __DATE__ << endl;
      cout << MUSEINFO_VERSION << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }

}



//////////////////////////////
//
// pearsonCorrelation --
//

double pearsonCorrelation(int size, double* x, double* y) {

   double sumx  = 0.0;
   double sumy  = 0.0;
   double sumco = 0.0;
   double meanx = x[0];
   double meany = y[0];
   double sweep;
   double deltax;
   double deltay;

   int i;
   for (i=2; i<=size; i++) {
      sweep = (i-1.0) / i;
      deltax = x[i-1] - meanx;
      deltay = y[i-1] - meany;
      sumx  += deltax * deltax * sweep;
      sumy  += deltay * deltay * sweep;
      sumco += deltax * deltay * sweep;
      meanx += deltax / i;
      meany += deltay / i;
   }

   double popsdx = sqrt(sumx / size);
   double popsdy = sqrt(sumy / size);
   double covxy  = sumco / size;

   return covxy / (popsdx * popsdy);
}



//////////////////////////////
//
// example --
//

void example(void) {


}



//////////////////////////////
//
// usage --
//

void usage(const char* command) {

}



// md5sum: 67d7640232ce34596f03e06a4602ba01 dissim.cpp [20081225]