//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Wed Apr 21 12:39:06 PDT 2004
// Last Modified: Wed Apr 21 12:39:09 PDT 2004
// Filename:      ...sig/examples/all/dbnovelty.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/humdrum/dbnovelty.cpp
// Syntax:        C++; museinfo
//
// Description:   Generate a novelty curve for a melody based on other
//                melodies in a database.
//
// OTL: Query Novelty Profiles
//      -- Measure commonness of features in database
// Applications
//      -- Quantify the probability of the match being junk.
//      -- Identify potential errors in the database
//      -- Identify potential errors in the query
//

#include "humdrum.h"
#include "Matrix.h"

#include <string.h>
#include <ctype.h>
#include <math.h>

#ifndef OLDCPP
   #include <sstream>
   #define SSTREAM stringstream
   #define CSTRING str().c_str()
   using namespace std;
#else
   #ifdef VISUAL
      #include <strstrea.h>     /* for windows 95 */
   #else
      #include <strstream.h>
   #endif
   #define SSTREAM strstream
   #define CSTRING str()
#endif
   

///////////////////////////////////////////////////////////////////////////


class Feature {
   public:
      Feature(void) { clear(); }
      Feature(Feature& aFeature) { copyFeature(aFeature); }
      Feature& operator=(Feature& aFeature) { return copyFeature(aFeature); }
      Feature& copyFeature(Feature& aFeature) {
         if (this == &aFeature) {
            return *this;
         }
         strcpy(istn, aFeature.istn);
         line = aFeature.line;
         int i, j;
         feature.setSize(aFeature.feature.getSize());
         for (i=0; i<feature.getSize(); i++) {
            feature[i].setSize(aFeature.feature[i].getSize());
            for (j=0; j<feature[i].getSize(); j++) {
               feature[i][j] = aFeature.feature[i][j];
            }
         }
         return *this;
      }
      void clear(void) { line = -1; istn[0] = '\0'; feature.setSize(0); }
      int is_valid(void) { return istn[0] != '\0' ? 1 : 0; }
       
      // data fields:
      char istn[256];                  // unique serial number/filename
      Array< Array<int> > feature;     // feature vector
      int line;                        // line number in original file
};


///////////////////////////////////////////////////////////////////////////

// function declarations
void      checkOptions(Options& opts, int argc, char* argv[]);
void      example(void);
void      usage(const char* command);
int       buildFeaturesArray(Array<Feature>& features, HumdrumFile& infile);
int       getFeature(Feature& tempfeature, HumdrumRecord& record);
void      sortFeatures(Array<Feature>& features);
int       unanchoredmatch(Feature& query, Feature& datum);
void      buildMelodyMatchData(Array<Feature>& analysis, 
                               Array<Feature>& features, int melody);
int       getMatchCountUnanchored(Feature& query, Array<Feature>& features);
void      buildQuery(Feature& query, Feature& feature, 
                               int start, int length);
void      printAnalysisDataUnanchored(Array<Feature>& analysis, 
                               Array<Feature>& features, int melody);
void      printAnchorType(int index, int size);
void      buildNoveltyCurve(Array<double>& noveltycurve, 
                               Array<Feature>& analysis, int order);
void      printBaseMatrix(double* expected, int size);
void      printMatrix(Matrix<double>& matrix);


// global variables
Options   options;            // database for command-line arguments
int       debugQ = 0;         // used with the --debug option
int       numinputs = 0;
int       depth = -1;         // used with -d option
int       totalcount = 0;     // total number of features for all elements
int       appendQ = 0;        // used with -a option
int       eprofileQ = 0;      // used with -e option
int       mprofileQ = 1;      // used with -P option 
int       melody = 1;         // used with -m option
int       matrixQ = 0;        // print search matrix -x option
int       basematrixQ = 0;    // print search base matrix -X option
int       uniqueQ = 0;        // used with -u option
int       orderQ = 0;         // used with -o option
Array<Feature> features;


///////////////////////////////////////////////////////////////////////////

int main(int argc, char* argv[]) {
   HumdrumFile infile;

   // process the command-line options
   checkOptions(options, argc, argv);

   infile.clear();
   // if no command-line arguments read data file from standard input
   if (numinputs < 1) {
      infile.read(cin);
   } else {
      infile.read(options.getArg(1));
   }

   Array<int> sortlines;
   totalcount = buildFeaturesArray(features, infile);
   melody = melody - 1;
   if (melody < 0) {
      cout << "!! Melody index reset to 1" << endl;
      melody = 0;
   } else if (melody >= totalcount) {
      melody = totalcount - 1;
      cout << "!! Melody index reset to " << melody + 1 << endl;
   }

   Array<Feature> analysis;

   buildMelodyMatchData(analysis, features, melody);
   printAnalysisDataUnanchored(analysis, features, melody);

   return 0;
}

int       sortQ = 0;          // used with -o option

///////////////////////////////////////////////////////////////////////////


//////////////////////////////
//
// printMatrix -- for debugging of calculations.
//

void printMatrix(Matrix& matrix) {
   int i;
   int j;
   int rcount = matrix.getRowCount();
   int ccount = matrix.getColumnCount();
   cout << "{";
   for (i=0; i<rcount; i++) {
      cout << "{";
      for (j=0; j<ccount; j++) {
         cout << matrix.cell(i, j);
         if (j < ccount-1) {
            cout << ", ";
         }
      }
      if (i < rcount-1) {
         cout << "},\n";
      } 
   }
   cout << "}};\n";
   cout << "\n";
}



//////////////////////////////
//
// buildNoveltyCurve --
//

void buildNoveltyCurve(Array<double>& noveltycurve, Array<Feature>& analysis,
      int order) {
   int size = analysis[0].feature.getSize();

   Array<int> colmin;
   colmin.setSize(size);

   Array<int> colmax;
   colmax.setSize(size);

   int i, j;
   for (i=0; i<colmin.getSize(); i++) {
      colmin[i] = analysis[0].feature[0][i];
      colmax[i] = analysis[0].feature[0][i];
   }
   for(i=1; ifor (j=0; j<analysis[0].feature[i].getSize(); j++) {
         if (colmin[j] > analysis[0].feature[i][j]) {
            colmin[j] = analysis[0].feature[i][j];
         }
         if (colmax[j] < analysis[0].feature[i][j]) {
            colmax[j] = analysis[0].feature[i][j];
         }
      }
   }

   int uniquestop = 0;
   for(i=1; iif (colmax[size-i-1] != colmax[size-1]) {
         uniquestop = size-i;
         break;
      }
   }

   static int uniquecount = 0;
   if (uniqueQ && (uniquecount == 0)) {
      uniquecount++;
      cout << "!! Unique stop point = " << uniquestop << endl;
   }

/*   cout << "!! Min values for each position in colmin:" << endl;
   for (i=0; i<colmin.getSize(); i++) {
      cout << colmin[i];
      if (i<colmin.getSize()-1) {
         cout << " ";
      }
   }
   cout << endl;
*/

/*   cout << "!! Max values for each position in colmax:" << endl;
   for (i=0; i<colmax.getSize(); i++) {
      cout << colmax[i];
      if (i<colmax.getSize()-1) {
         cout << " ";
      }
   }
   cout << endl;
*/

   // generate normalized matrix of search results:
   Matrix<double> matrix;
   matrix.setSize(size, size);
   matrix.zero();
   for(i=0; ifor (j=0; j<size-i; j++) {
         // if (j >= uniquestop) {
         //    matrix.cell(i, i+j) = analysis[0].feature[i][j];
         //    break;
         // }
         if (analysis[0].feature[i][j] > 0) {
            // normalized matrix:
            // matrix.cell(i, i+j) = ((double)(colmin[j]))/analysis[0].feature[i][j];
            // un-normalized matrix:
            matrix.cell(i, i+j) = analysis[0].feature[i][j];
         } else {
            matrix.cell(i, i+j) = 0.0;
         }
      }
   }

   static int matrixcount = 0;
   if(matrixQ && matrixcount == 0) {
      matrixcount++;
      cout << "searchmatrix = ";
      printMatrix(matrix);
   }

   noveltycurve.setSize(size);
   noveltycurve.setAll(0);

   double expected[] = {7194.33, 3179.93, 883.09, 221.86, 55.75, 15.46, 5.61, 2.58, 1.77, 1.49,
1.39, 1.33, 1.3, 1.27, 1.25, 1.23, 1.22, 1.21, 1.21, 1.2, 1.19, 1.19,
1.18, 1.17, 1.17, 1.17, 1.16, 1.16, 1.15, 1.15, 1.15, 1.14, 1.14, 1.13,
1.13, 1.13, 1.12, 1.12, 1.12, 1.12, 1.11, 1.11, 1.11, 1.11, 1.11, 1.11,
1.1, 1.1, 1.1, 1.09, 1.09, 1.09, 1.09, 1.08, 1.08, 1.08, 1.08, 1.08,
1.08, 1.08, 1.08, 1.08, 1.08, 1.08, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07,
1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07,
1.07, 1.07, 1.07, 1.08, 1.08, 1.07, 1.08, 1.07, 1.07, 1.07, 1.07, 1.06,
1.07, 1.07, 1.07, 1.07, 1.08, 1.07, 1.07, 1.07, 1.07, 1.07, 1.08, 1.07,
1.07, 1.07, 1.08, 1.08, 1.08, 1.07, 1.07, 1.08, 1.08, 1.08, 1.08, 1.08,
1.09, 1.09, 1.09, 1.09, 1.1, 1.1, 1.09, 1.1, 1.1, 1.1, 1.1, 1.1, 1.11,
1.1, 1.09, 1.09, 1.09, 1.08, 1.08, 1.08, 1.09, 1.09, 1.09, 1.09, 1.1,
1.1, 1.1, 1.1, 1.1, 1.11, 1.11, 1.11, 1.1, 1.1, 1.1, 1.1, 1.11, 1.09,
1.1, 1.1, 1.1, 1.1, 1.11, 1.11, 1.11, 1.11, 1.11, 1.11, 1.11, 1.11, 1.1,
1.1, 1.1, 1.1, 1.1, 1.02, 1.02, 1.02, 1.03, 1.03, 1.03, 1.03, 1.03, 1.03,
1.03, 1.03, 1.03, 1.03, 1.03, 1.03, 1.03, 1.03, 1.03, 1.03, 1.04, 1.04,
1.04, 1.04, 1.04, 1.04, 1.04, 1.04, 1.04, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
   };


   static int basematrixcount = 0;
   if (basematrixQ && (basematrixcount == 0)) {
      basematrixcount++;
      printBaseMatrix(expected, size);
   }

   Array<double> tempvalues(size);
   tempvalues.setAll(0);

   for(i=0; i for(i=0; ifor (j=0; j<=order; j++) {
         if (i+j >= size) {
            break;
         }
         count++;
         noveltycurve[i+j] += tempvalues[i];
      }
      noveltycurve[i] /= (double)count;
   }

}



//////////////////////////////
//
// printBaseMatrix --
//

void printBaseMatrix(double* expected, int size) {
   Matrix<double> basematrix;
   basematrix.setSize(size, size);
   basematrix.zero();
   int i, j;
   for(i=0; ifor (j=i; j<size; j++) {
         if (j-i < 200) {
            basematrix.cell(i,j) = expected[j-i];
         } else {
            basematrix.cell(i,j) = 1.0;
         }
      }
   }
   cout << "basematrix = ";
   printMatrix(basematrix);

}



//////////////////////////////
//
// printVariable --
//

void printVariable(char* string) {
   int length = strlen(string);
   int i;
   for(i=0; iif (string[i] == '/') {
         cout << 'D';
      } else if (string[i] == '.') {
         cout << 'X';
      } else if (string[i] == '-') {
         cout << 'H';
      } else {
         cout << string[i];
      }
   }
}



//////////////////////////////
//
// printAnalysisDataUnanchored --
//

void printAnalysisDataUnanchored(Array<Feature>& analysis, 
      Array<Feature>& features, int melody) {

   Array<double> noveltycurve;

#define RANGE 8

   int ii;
   Array<double> noveltysum;
   noveltysum.setSize(analysis[0].feature[0].getSize());
   noveltysum.setAll(0);

   int count = 0;
   int i;
   for(ii=0; iiif (orderQ) {
         cout << "ORDER" << ii << " = {";
      }
      for (i=0; i<noveltycurve.getSize(); i++) {
         noveltysum[i] += noveltycurve[i];
         if (orderQ) {
            cout << noveltycurve[i];
            if (i<noveltycurve.getSize()-1) {
               cout << ", ";
            }
         }
      }
      if (orderQ) {
         cout << "};\n";
      }
   }

   printVariable(analysis[0].istn);
   cout << "= {";
   for (i=0; i<noveltysum.getSize(); i++) {
      cout << noveltysum[i]/(double)count;
      if (i<noveltysum.getSize()-1) {
         cout << ", ";
      }
   }
   cout << "};\n";

}



//////////////////////////////
//
// printAnchorType --
//

void printAnchorType(int index, int size) {
   index = index+1;
   int maxdigits = ((int)log10((double)size))+1;
   int digits = ((int)log10((double)index))+1;
   int i;
   for(i=0; i//////////////////////////////
//
// buildMelodyMatchData -- 
//

void buildMelodyMatchData(Array<Feature>& analysis, 
      Array<Feature>& features, int index) {
   analysis.setSize(1);
   analysis.allowGrowth(0);
   int j, k;
   int searchsize = 0;
   Feature query;
   strcpy(analysis[0].istn, features[index].istn);
   analysis[0].line = features[index].line;

   analysis[0].feature.setSize(features[index].feature[0].getSize());

   for (j=0; j<analysis[0].feature.getSize(); j++) {
      analysis[0].feature[j].setSize(analysis[0].feature.getSize()-j);
      analysis[0].feature[j].setAll(0);
      searchsize = analysis[0].feature[j].getSize();
      if ((depth > 0) && (searchsize > depth)) {
         searchsize = depth;
      }
      for (k=0; k<searchsize; k++) {
         buildQuery(query, features[index], j, k+1);
         analysis[0].feature[j][k] = getMatchCountUnanchored(query,features);
      }
   }
}



////////////////////
//
// buildQuery --
//

void buildQuery(Feature& query, Feature& feature, int start, int length) {
   query.feature.setSize(1);
   query.feature[0].setSize(length);
   int i;
   for(i=0; i//////////////////////////////
//
// getMatchCountUnanchored --
//

int getMatchCountUnanchored(Feature& query, Array<Feature>& features) {
   int count = 0;
   int i;
   for (i=0; i<features.getSize(); i++) {
      count += unanchoredmatch(query, features[i]);
   }

   return count;
}



//////////////////////////////
//
// unanchoredmatch --
//

int unanchoredmatch(Feature& query, Feature& datum) {
   int i, j;

   int qsize = query.feature[0].getSize();
   int dsize = datum.feature[0].getSize();
   int match = 0;
   for (i=0; i<=(dsize-qsize); i++) {
      if (query.feature[0][0] != datum.feature[0][i]) {
         continue;
      }

      // the first item of both strings matches, try the rest
      // and continue if not an exact match.
      match = 1;
      for (j=1; j<qsize; j++) {
         if (query.feature[0][j] != datum.feature[0][i+j]) {
            match = 0;
            break;
         }
      }
      if (match) {
         break;
      }
   }

// cout << "RESULT      " << match << endl;
   if(match) {
      return 1;
   } else {
      return 0;
   }   
}



//////////////////////////////
//
// buildFeaturesArray -- returns the total number of features
//

int buildFeaturesArray(Array<Feature>& features, HumdrumFile& infile) {
   int i;
   Feature tempfeature;
   features.setSize(infile.getNumLines());
   features.setSize(0);
   int totalfeatures = 0;
   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() == E_humrec_data) {
         totalfeatures += getFeature(tempfeature, infile[i]);
         if (tempfeature.is_valid()) {
            tempfeature.line = i;
            features.append(tempfeature);       
         }
      }
   }

   return totalfeatures;
}



//////////////////////////////
//
// getFeature -- returns the total number of features found on the line
//

int getFeature(Feature& tempfeature, HumdrumRecord& record) {
   tempfeature.clear();
   int i;
   int featurecount = 0;
   char buffer2[1024] = {0};

   // find the ISTN value
   for (i=0; i<record.getFieldCount(); i++) {
      if (strcmp(record.getExInterp(i), "**istn") == 0) {
         strcpy(buffer2, record[i]);
         if (debugQ) {
            cout << "found istn: " << buffer2 << endl;
         }
      } else if (strcmp(record.getExInterp(i), "**feature") == 0) {
         featurecount++;
      }
   }
   if ((featurecount == 0) || (buffer2[0] == '\0')) {
      // no valid data
      return 0;
   } 

   strcpy(tempfeature.istn, buffer2);

   tempfeature.feature.setSize(1);
   tempfeature.feature[0].setSize(100);
   tempfeature.feature[0].setGrowth(1000);
   tempfeature.feature[0].setSize(0);

   char buffer[100000] = {0};
   int code = 0;
   char* ptr = NULL;
   int total = 0;
   // find the first feature (second dimension to be added later)
   for (i=0; i<record.getFieldCount(); i++) {
      if (strcmp(record.getExInterp(i), "**feature") == 0) {
         strncpy(buffer, record[i], 100000);
         ptr = strtok(buffer, " ");
         while (ptr != NULL) {
            if (sscanf(ptr, "%d", &code) > 0) {
               tempfeature.feature[0].append(code);
               total++;
            }
            ptr = strtok(NULL, " ");
         }
      }
   }

   // do generalized features here.

   return total;
}



//////////////////////////////
//
// checkOptions -- validate and process command-line options.
//

void checkOptions(Options& opts, int argc, char* argv[]) {
   opts.define("m|melody-number=i:1", "set the melody index in the database");
   opts.define("d|depth=i:-1",        "set the maximum search depth");
   opts.define("a|append=b",          "append original data to the analysis");
   opts.define("S|no-state=b",        "do not print state distributions");
   opts.define("x|matrix=b",          "print search matrix");
   opts.define("X|basematrix=b",      "print predicted matrix");
   opts.define("u|unique=b",          "print unique limit");
   opts.define("o|order=b",           "print intermediate novelty curves");
   opts.define("e|entropy-profile=b", "print entropy profile");

   opts.define("debug=b");           // determine bad input line num
   opts.define("author=b");          // author of program
   opts.define("version=b");         // compilation info
   opts.define("example=b");         // example usages
   opts.define("h|help=b");          // short description
   opts.process(argc, argv);
   
   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, April 2004" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 29 April 2004" << endl;
      cout << "compiled: " << __DATE__ << endl;
      cout << MUSEINFO_VERSION << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }

   eprofileQ   = opts.getBoolean("entropy-profile");
   numinputs   = opts.getArgCount();
   debugQ      = opts.getBoolean("debug");
   depth       = opts.getInteger("depth");
   appendQ     = opts.getBoolean("append");
   melody      = opts.getInteger("melody-number");
   matrixQ     = opts.getBoolean("matrix");
   basematrixQ = opts.getBoolean("basematrix");
   uniqueQ     = opts.getBoolean("unique");
   orderQ      = opts.getBoolean("order");

}
  


//////////////////////////////
//
// example -- example usage of the program
//

void example(void) {
   cout <<
   "                                                                         \n"
   << endl;
}



//////////////////////////////
//
// usage -- gives the usage statement for the program
//

void usage(const char* command) {
   cout <<
   "                                                                         \n"
   << endl;
}



// md5sum: 841fdd011e26576c6d53c62895155473 dbnovelty.cpp [20080518]