//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Fri Nov 23 07:36:56 PST 2007
// Last Modified: Fri Nov 23 07:37:01 PST 2007
// Filename:      ...sig/examples/all/ned.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/ned/ned.cpp
// Syntax:        C++; museinfo
//
// Description:   Noise Equivalent Distance metric.
//

#include "humdrum.h"
#include <math.h>

// function declarations:
void     checkOptions(Options& opts, int argc, char** argv);
void     example(void);
void     usage(const char* command);
double   pearsonCorrelation(int size, double* a, double* b);
double   getMean(int size, double* a);
void     fillArray(HumdrumFile& infile, Array<double>& targarray, 
		                int targ);
double   getNed(HumdrumFile& infile, int ref, int targ, 
                                double& sd, double& correl);
void     addNoise(double amp, double* output, double* input, 
                                int siz);
double   getStandardDeviation(double mean, double* data, int siz);
void     normalizeSequence(double* seq, int size);
void     printRankData(HumdrumFile& infile, int reference);
int      getSeqCount(HumdrumFile& infile);
int      getNameIndex(HumdrumFile& infile);
int      getPidIndex(HumdrumFile& infile);
void     sortRanks(Array<double>& values, Array<int>& ranks);
int      numbersort(const void* A, const void* B);
int      numbersortmin(const void* A, const void* B);
void     sortRanksReverse(Array<double>& values, Array<int>& ranks);

#define NOISE_UNKNOWN  0
#define NOISE_CONSTANT 1
#define NOISE_PROPORTIONAL 2

// User interface variables:
Options  options;
int      reference = 0;
int      target    = 1;
int      avgcount  = 300;
double   tolerance = 0.001;
double   decay     = 0.95;
int      normalQ   = 0;
int      noisetype = NOISE_PROPORTIONAL;
int      rankQ     = 0;
int      countcols = 0;

//////////////////////////////////////////////////////////////////////////

int main(int argc, char** argv) {
   // process the command-line options
   checkOptions(options, argc, argv);

   srand48(time(NULL));

   HumdrumFile infile;

   if (options.getArgCount() <= 0) {
      infile.read(cin);
   } else {
      infile.read(options.getArg(1));
   }

   if (countcols) {
      cout << getSeqCount(infile) << endl;
   }
   else if (rankQ) {
      printRankData(infile, reference);
   } else {
      double fsd = 0.0;
      double rsd = 0.0;
      double corel = 0.0;

      double fned = getNed(infile, reference, target, fsd, corel);
      double rned = getNed(infile, target, reference, rsd, corel);

      cout << "Forward NED:\t" << corel << "\t" << fned << "\t" << fsd << endl;
      cout << "Reverse NED:\t" << corel << "\t" << rned << "\t" << rsd << endl;
   }

   return 0;
}


//////////////////////////////////////////////////////////////////////////


//////////////////////////////
//
// printRankData -- 
//
//**label	**pid	**rank0	**score0	**rank1	**score1	**rank2	**score2	**rank3	**score3// Ashkenazy 1981	pid9058-19	target	target	target	target	target	target	target	target

void printRankData(HumdrumFile& infile, int reference) {
   int seqcount = getSeqCount(infile);

   Array<double> Corel;
   Array<double> Ned;
   Array<double> Sd;
   Array<int>    CorelRank;
   Array<int>    NedRank;

   Corel.setSize(seqcount);
   Ned.setSize(seqcount);
   Sd.setSize(seqcount);
   CorelRank.setSize(seqcount);
   NedRank.setSize(seqcount);

   Corel.allowGrowth(0);
   Ned.allowGrowth(0);
   Sd.allowGrowth(0);
   CorelRank.allowGrowth(0);
   NedRank.allowGrowth(0);

   int namei = getNameIndex(infile);
   int pidi  = getPidIndex(infile);

   double fsd   = 0.0;
   double fned  = 0.0;
   double corel = 0.0;
   int i;
   for (i=0; i<seqcount; i++) {
      if (reference == i) {
         // if (namei >= 0) {
         //    cout << infile[namei][i]+1 << "\ttarget\ttarget" << endl;
         // }
	 Ned[i] = 0.0;
	 Sd[i] = 0.0;
	 Corel[i] = 1.0;
	 continue;
      } 
      fned = getNed(infile, reference, i, fsd, corel);
      Ned[i]    = fned;
      Sd[i]     = fsd;
      Corel[i]  = corel;

//      if (namei >= 0) {
//         cout << infile[namei][i]+1 << "\t" << corel << "\t" 
//              << fned << "\t" << fsd << endl;
//      }

   }

   
   sortRanksReverse(Ned, NedRank);
   sortRanks(Corel, CorelRank);

   cout << "**label\t**pid\t**rank0\t**score0\t**rankN\t**NED\n";
   for (i=0; i<seqcount; i++) {
      if (namei >= 0) {
         cout << infile[namei][i]+1 << "\t";
      } else {
         cout << ".\t";
      }
      if (pidi >= 0 && strlen(infile[pidi][i]+1) > 0) {
         cout << infile[pidi][i]+1 << "\t";
      } else if (strlen(infile[pidi][i]+1) == 0) {
         cout << ".\t";
      } else {
         cout << ".\t";
      }	   
      if (reference == i) {
         cout << "target" << "\t" 
              << "target" << "\t" 
              << "target" << "\t" 
              << "target" << "\t" 
              << "target" << endl;
      } else {
         cout << CorelRank[i] << "\t" 
              << Corel[i]     << "\t" 
              << NedRank[i]   << "\t" 
              << Ned[i]       << "\t" 
              << Sd[i]        << endl;
      }
   }
   cout << "*-\t*-\t*-\t*-\t*-\t*-\n";

}



//////////////////////////////
//
// getNameIndex --
//

int getNameIndex(HumdrumFile& infile) {
   int i;
   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() == E_humrec_local_comment) {
         if (isupper(infile[i][0][1])) {
            return i;
         }
      }
   }

   return -1;
}



//////////////////////////////
//
// getPidIndex --
//

int getPidIndex(HumdrumFile& infile) {
   int i;
   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() == E_humrec_local_comment) {
         if (infile[i][0][1] == 'p' &&
             infile[i][0][2] == 'i' &&
             infile[i][0][3] == 'd') {
            return i;
         }
      }
   }

   return -1;
}



//////////////////////////////
//
// getSeqCount -- return the number of columns in the data
//

int getSeqCount(HumdrumFile& infile) {
   int i;
   int output = 0;
   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() == E_humrec_data) {
         output = infile[i].getFieldCount();
         break;
      }
   }

   return output;
}



//////////////////////////////
//
// getNed -- measure the Ned of a sequence
//

double getNed(HumdrumFile& infile, int ref, int targ, double& sd, 
      double& correl) {
   Array<double> refarray;
   Array<double> targarray;

   fillArray(infile, refarray, ref);
   fillArray(infile, targarray, targ);

   if (normalQ) {
      normalizeSequence(refarray.getBase(), refarray.getSize());
      normalizeSequence(targarray.getBase(), targarray.getSize());
   }

   if (refarray.getSize() != targarray.getSize()) {
      cerr << "Error: data lengths do not match" << endl;
      exit(1);
   }

   // calculate the target correlation
   correl = pearsonCorrelation(refarray.getSize(), 
         refarray.getBase(), targarray.getBase());

   if (correl < 0.0) {
      sd = 0.0;
      return 100.0;
   }

   // cout << "Correl = " << correl << endl;

   double amp = 0.0;
   double increment = 0.1;

   double currcore = 1.0;
   double tempdata[refarray.getSize()];
   int    direction = 1;
   int    lastdirection = 1;
   int    tcount = avgcount;

   double sdvals[tcount];   

   double counter = 0;
   int    i;
   while (fabs(currcore - correl) > tolerance) {
      counter++;
      if (currcore > correl) {
         amp += increment;
	 if (direction < 0) { 
            increment *= decay;
	 }
	 if (lastdirection > 0) {
	    direction = 1;
         }
      } else if (currcore < correl ) {
         amp -= increment;
	 if (direction < 0) {
            increment *= decay;
         }
	 if (lastdirection > 0) {
	    direction = -1;
         }
      }
      if (direction < 0) {
         direction--;
      }  else {
         direction++;
      }
      lastdirection = direction;

//      if (abs(direction) > 4) {
//         increment /= decay;
//	   cout <<"DIR = " << direction << "\t" << increment << endl;
//      }
        
      currcore = 0.0;
      for (i=0; i<tcount; i++) { 
         addNoise(amp, tempdata, refarray.getBase(), refarray.getSize());
         sdvals[i] = pearsonCorrelation(refarray.getSize(), tempdata, 
            refarray.getBase());
         currcore += sdvals[i];
      }
      currcore = currcore / tcount;

      if (counter > 1000) {
         break;
      }

      // cout << correl << "\t" << currcore << "\t" 
      //      << amp << "\t" << increment << "\t" << direction << endl;

   }

   sd = getStandardDeviation(currcore, sdvals, tcount);

   return amp;

}



//////////////////////////////
//
// addNoise --
//

void addNoise(double amp, double* output, double* input, int siz) {
   int i;
   for (i=0; i<siz; i++) {
      if (noisetype == NOISE_CONSTANT) {
         output[i] = input[i] + (drand48() * 2.0 - 1.0) * amp;
      } else { // proportional
         output[i] = input[i] + (drand48() * 2.0 - 1.0) * amp * input[i];
      }
   }
}



//////////////////////////////
//
// fillArray --
//

void fillArray(HumdrumFile& infile, Array& targarray, int targ) {
   int i;
   targarray.setSize(infile.getNumLines());
   targarray.setSize(0);

   double value;
   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() == E_humrec_data) {
         if (targ >= infile[i].getFieldCount()) {
            cerr << "Error on line " << i+1 << " of input data." << endl;
            exit(1);
         }
         value = strtod(infile[i][targ], NULL);
         targarray.append(value);
      }
   }
}



//////////////////////////////
//
// pearsonCorrelation --
//

double pearsonCorrelation(int size, double* a, double* b) {
   double meana = getMean(size, a);
   double meanb = getMean(size, b);

   double topsum     = 0.0;
   double bottomasum = 0.0;
   double bottombsum = 0.0;

   int i;
   for (i=0; i<size; i++) {
      topsum += (a[i] - meana) * (b[i] - meanb);
      bottomasum += (a[i] - meana) * (a[i] - meana);
      bottombsum += (b[i] - meanb) * (b[i] - meanb);
   }

   if (bottomasum == 0.0 || bottombsum == 0.0) {
      return 0.0;
   }
   return topsum / sqrt(bottomasum * bottombsum);
}



//////////////////////////////
//
// getMean --
//

double getMean(int size, double* a) {
   if (size <= 0) {
      return 0.0;
   }

   int i;
   double sum = 0.0;
   for (i=0; i<size; i++) {
      sum += a[i];
   }

   return sum / size;
}



//////////////////////////////
//
// checkOptions -- 
//

void checkOptions(Options& opts, int argc, char* argv[]) {
   opts.define("N|normal=b",        "normalize sequences before calculations"); 
   opts.define("R|rank=b",            "rank calculations using NED"); 
   opts.define("C|constant=b",        "constant noise amplitude"); 
   opts.define("t|target=i:1",        "target sequence column"); 
   opts.define("r|reference=i:0",     "reference sequence column"); 
   opts.define("c|count=i:300",       "Count of trials to average"); 
   opts.define("T|tolerance=d:0.001", "Correlation match tolerance"); 
   opts.define("D|decay=d:0.95",      "Correlation match decay"); 
   opts.define("m|max=b",             "Display column count in data file");

   opts.define("author=b",  "author of program"); 
   opts.define("version=b", "compilation info");
   opts.define("example=b", "example usages");   
   opts.define("help=b",  "short description");
   opts.process(argc, argv);
   
   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, Nov 2007" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 23 Nov 2007" << endl;
      cout << "compiled: " << __DATE__ << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }

   avgcount  = opts.getInteger("count");
   tolerance = opts.getDouble("tolerance");
   decay     = opts.getDouble("decay");
   normalQ   = opts.getBoolean("normal");
   target    = opts.getInteger("target");
   reference = opts.getInteger("reference");
   rankQ     = opts.getBoolean("rank");
   countcols = opts.getBoolean("max");

   if (opts.getBoolean("constant")) {
      noisetype = NOISE_CONSTANT;
   }
}



//////////////////////////////
//
// getStandardDeviation --
//

double getStandardDeviation(double mean, double* data, int siz) {
   double sum = 0.0;
   double value;
   int i;
   for (i=0; i<siz; i++) {
      value = data[i] - mean;
      sum += value * value;
   }
   
   return sqrt(sum/siz);
}


//////////////////////////////
//
// normalizeSequence --
//

void normalizeSequence(double* seq, int size) {
   double mean = getMean(size, seq);
   double sd   = getStandardDeviation(mean, seq, size);

   int i;
   for (i=0; i<size; i++) {
      seq[i] = (seq[i] - mean) / sd;
   }
}
	


//////////////////////////////
//
// example --
//

void example(void) {


}



//////////////////////////////
//
// usage --
//

void usage(const char* command) {

}



//////////////////////////////
//
// sortRanks -- sort rank values from highest to lowest
//

void sortRanks(Array& values, Array& ranks) {
   int i;
   Array<double*> presort;
   Array<double>  sortdata;
   presort.setSize(values.getSize());
   sortdata.setSize(values.getSize());
   for (i=0; i<values.getSize(); i++) {
      sortdata[i] = values[i];
      presort[i]  = &(sortdata[i]);
   }

   qsort(presort.getBase(), presort.getSize(), sizeof(double*), numbersort);

   for (i=0; i<values.getSize(); i++) {
      *presort[i] = i;
   }

   for (i=0; i<values.getSize(); i++) {
      ranks[i] = (int)sortdata[i];
   }
}



//////////////////////////////
//
// sortRanksReverse -- sort rank values from highest to lowest
//

void sortRanksReverse(Array& values, Array& ranks) {
   int i;
   Array<double*> presort;
   Array<double>  sortdata;
   presort.setSize(values.getSize());
   sortdata.setSize(values.getSize());
   for (i=0; i<values.getSize(); i++) {
      sortdata[i] = values[i];
      presort[i]  = &(sortdata[i]);
   }

   qsort(presort.getBase(), presort.getSize(), sizeof(double*), numbersortmin);

   for (i=0; i<values.getSize(); i++) {
      *presort[i] = i;
   }

   for (i=0; i<values.getSize(); i++) {
      ranks[i] = (int)sortdata[i];
   }
}



//////////////////////////////
//
// numbersort -- sort items largest first
//

int numbersort(const void* A, const void* B) {
   double valuea = **((double**)A);
   double valueb = **((double**)B);
   if (valuea > valueb) {
      return -1;
   } else if (valuea < valueb) {
      return +1;
   } else {
      return 0;
   }
}



//////////////////////////////
//
// numbersortmin -- sort items largest first
//

int numbersortmin(const void* A, const void* B) {
   double valuea = **((double**)A);
   double valueb = **((double**)B);
   if (valuea < valueb) {
      return -1;
   } else if (valuea > valueb) {
      return +1;
   } else {
      return 0;
   }
}



// md5sum: 37e2b51bce0d7e5ff2e231e679e44c82 ned.cpp [20080518]