//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Tue Apr 27 10:40:50 PDT 2004
// Last Modified: Tue Apr 27 10:40:54 PDT 2004
// Filename:      ...sig/examples/all/jointfeature.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/humdrum/jointfeature.cpp
// Syntax:        C++; museinfo
//
// Description:   
//

#include "humdrum.h"

#include <string.h>
#include <ctype.h>

#ifndef OLDCPP
   #include <sstream>
   #define SSTREAM stringstream
   #define CSTRING str().c_str()
   using namespace std;
#else
   #ifdef VISUAL
      #include <strstrea.h>     /* for windows 95 */
   #else
      #include <strstream.h>
   #endif
   #define SSTREAM strstream
   #define CSTRING str()
#endif
   
#include <stdlib.h>             /* for qsort and bsearch functions */

///////////////////////////////////////////////////////////////////////////

#define MAXPRIME 169
int primes[MAXPRIME] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41,
43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113,
127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193,
197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271,
277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359,
367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443,
449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541,
547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619,
631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719,
727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821,
823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911,
919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997, 1009};

///////////////////////////////////////////////////////////////////////////

class Feature {
   public:
      Feature(void) { clear(); }
      Feature(Feature& aFeature) { copyFeature(aFeature); }
      Feature& operator=(Feature& aFeature) { return copyFeature(aFeature); }
      Feature& copyFeature(Feature& aFeature) {
         if (this == &aFeature) {
            return *this;
         }
         strcpy(istn, aFeature.istn);
         line = aFeature.line;
         int i, j;
         feature.setSize(aFeature.feature.getSize());
         for (i=0; i<feature.getSize(); i++) {
            feature[i].setSize(aFeature.feature[i].getSize());
            for (j=0; j<feature[i].getSize(); j++) {
               feature[i][j] = aFeature.feature[i][j];
            }
         }
         return *this;
      }
      void clear(void) { line = -1; istn[0] = '\0'; feature.setSize(0); }
      int is_valid(void) { return istn[0] != '\0' ? 1 : 0; }
       
      // data fields:
      char istn[256];                  // unique serial number/fileaname
      Array< Array<int> > feature;     // feature vector
      int line;                        // line number in original file
};


///////////////////////////////////////////////////////////////////////////

// function declarations
void      checkOptions(Options& opts, int argc, char* argv[]);
void      example(void);
void      usage(const char* command);
int       buildFeaturesArray(Array<Feature>& features, HumdrumFile& infile);
int       getFeature(Feature& tempfeature, HumdrumRecord& record);
void      buildAnalysisData(Array<Feature>& analysis, 
                              Array<Feature>& features);
void      getFeatureStateList(Array<int>& output, Array<Feature>& features,
                              int v);
int       getPrime(int value, Array<int>& states, int offset);
void      printFeature(int index, Array<Feature>& features1, 
                              Array<Feature>& features2, 
                              Array<int>& states1, Array<int>& states2);
int       findmatch(int index1, Array<Feature>& features1, 
                              Array<Feature>& features2);


// global variables
Options   options;            // database for command-line arguments
int       debugQ = 0;         // used with the --debug option
int       totalcount = 0;     // total number of features for all elements


///////////////////////////////////////////////////////////////////////////

int main(int argc, char* argv[]) {

   // process the command-line options
   checkOptions(options, argc, argv);

   cout << "!! Joint features for the following files" << endl;
   cout << "!!!FILE1:\t" << options.getArg(1) << endl;
   cout << "!!!FILE2:\t" << options.getArg(2) << endl;

   HumdrumFile infile1;
   HumdrumFile infile2;
   Array<int> states1;
   Array<int> states2;
   Array<Feature> features1;
   Array<Feature> features2;
   Array<int> translate1;
   Array<int> translate2;

   infile1.read(options.getArg(1));
   infile2.read(options.getArg(2));

   int symbolcount1 = buildFeaturesArray(features1, infile1);
   int symbolcount2 = buildFeaturesArray(features2, infile2);

   cout << "!!!SYMBOL_COUNT_1:\t" << symbolcount1 << endl;
   cout << "!!!SYMBOL_COUNT_2:\t" << symbolcount2 << endl;

   getFeatureStateList(states1, features1, 1);
   getFeatureStateList(states2, features2, 2);

   cout << "!!!STATE_COUNT_1:\t" << states1.getSize() << endl;
   cout << "!!!STATE_COUNT_2:\t" << states1.getSize() << endl;

   if (states1.getSize() + states2.getSize() >= MAXPRIME) {
      cout << "Error: too many states to process correctly\n";
      exit(1);
   }

   // print symbol translations:
   int i;
   for (i=0; i<states1.getSize(); i++) {
      cout << "!!!STATE_" << states1[i] << "_1:\t" << primes[i] << endl;
   }
   for (i=0; i<states2.getSize(); i++) {
      cout << "!!!STATE_" << states2[i] << "_2:\t" 
           << primes[i + states1.getSize()] << endl;
   }

   // now it is time to reassemble the feature data:


   cout << "**istn\t**feature\n";
   for (i=0; i<features1.getSize(); i++) {
      printFeature(i, features1, features2, states1, states2);
   }
   cout << "*-\t*-\n";

   return 0;
}


///////////////////////////////////////////////////////////////////////////


//////////////////////////////
//
// findmatch --
//

int findmatch(int index1, Array& features1, Array& features2) {
   int high;
   int low;
   int i;
   for (i=0; i<features2.getSize(); i++) {
      high = index1 + i;
      low = index1 - i;
      if (low >= 0) {
         if (strcmp(features1[index1].istn, features2[low].istn) == 0) {
            return low;
         }
      }
      if (high < features2.getSize()) {
         if (strcmp(features1[index1].istn, features2[high].istn) == 0) {
            return high;
         }
      }
   }

   return -1;
}



//////////////////////////////
//
// printFeature --
//

void printFeature(int index, Array<Feature>& features1, 
      Array<Feature>& features2, Array<int>& states1, Array<int>& states2) {
   int index1 = index;
   int index2 = index;

   if (index < features2.getSize()-1) {
      if (strcmp(features1[index].istn, features2[index].istn) != 0) {
         index2 = findmatch(index1, features1, features2);
      }
      if (index2 < 0) {
         return;  // can not print the features because no match in second set.
      }
   } else {
      return;
   }

   cout << features1[index].istn;
   cout << "\t";

   int max1 = features1[index1].feature[0].getSize();
   int max2 = features2[index2].feature[0].getSize();
   int max = max1 < max2 ? max1 : max2;

   int i;
   int value1; 
   int value2;
   for (i=0; i<max; i++) {
      value1 = getPrime(features1[index1].feature[0][i], states1, 0);
      value2 = getPrime(features2[index2].feature[0][i], states2, 
                        states1.getSize());
      cout << value1 * value2;
      if (i<max-1) {
         cout << " ";
      }
   }
   cout << "\n";

}



///////////////////////////////
//
// getPrime --
//

int getPrime(int value, Array& states, int offset) {
   int i;
   int index = -1;
   for (i=0; i<states.getSize(); i++) {
      if (states[i] == value) {
         index = i;
         break;
      }
   }
   if (index < 0) {
      cout << "Error: could not find an expected state" << endl;
      exit(1);
   }

   if (index + offset > MAXPRIME) {
      cout << "Error: exceeded the list of primes available for assignment" 
           << endl;
   }

   return primes[index + offset];
} 



//////////////////////////////
//
// getFeatureStateList -- 
//

void getFeatureStateList(Array& output, Array& features, int v) {
#define MAXSTATE 200000
   int maxstate = 0;
   Array<int> states;
   states.setSize(MAXSTATE);
   states.setAll(0);
   int warnnegative = 0;
   int warnmaximum = 0;
   int index;
   maxstate = 0;
   int i, j;
   for (i=0; i<features.getSize(); i++) {
      for (j=0; j<features[i].feature[0].getSize(); j++) {
         index = features[i].feature[0][j];
         if (index < 0) {
            warnnegative++;
            continue;
         } else if (index >= MAXSTATE) {
            warnmaximum++;
         }
         states[index]++;
         if (index > maxstate) {
            maxstate = index;
         }
      }
   }

   if (warnnegative) {
      cout << "!! Warning: there were " << warnnegative 
           << " uncounted negative states in the data set\n";
   }
   if (warnmaximum) {
      cout << "!! Warning: there were " << warnmaximum
           << " uncounted states exceeding 99,999 in the data set\n";
   }

   int count = 0;
   for (i=0; i<=maxstate; i++) {
      if (states[i]) {
         count++;
      }
   }

   output.setSize(maxstate+1);
   output.setSize(0);
   for (i=0; i<maxstate+1; i++) {
      if (states[i] > 0) {
         output.append(i);
         cout << "!!!SYM-COUNT-" << i << "_" << v << ":\t" << states[i] << endl;
      }
   }

}



//////////////////////////////
//
// featuresort -- sort items by feature space elements.
//

int featuresort(const void* A, const void* B) {
   Feature& a = *((Feature*)A);
   Feature& b = *((Feature*)B);
   int i;
   int sizea = a.feature[0].getSize();
   int sizeb = b.feature[0].getSize();
   int minsize = sizea < sizeb ? sizea : sizeb;
   for (i=0; i<minsize; i++) {
      if (a.feature[0][i] < b.feature[0][i]) {
         return -1;
      }
      if (a.feature[0][i] > b.feature[0][i]) {
         return +1;
      }
   }
 
   if (sizea > sizeb) {
      return +1;
   } else if (sizea < sizeb) {
      return -1;
   } else {
      return 0;
   }
}



//////////////////////////////
//
// featuresort2 -- sort for count matches  limit to the size of the first
//   element
//

int featuresort2(const void* A, const void* B) {
   Feature& a = *((Feature*)A);
   Feature& b = *((Feature*)B);
   int i;
   int sizea = a.feature[0].getSize();
   int sizeb = b.feature[0].getSize();
   int minsize = sizea < sizeb ? sizea : sizeb;
   for (i=0; i<minsize; i++) {
      if (a.feature[0][i] < b.feature[0][i]) {
         return -1;
      }
      if (a.feature[0][i] > b.feature[0][i]) {
         return +1;
      }
   }

   // all of the elements in the search range are equal
   return 0;
 
}



//////////////////////////////
//
// buildFeaturesArray -- returns the total number of features
//

int buildFeaturesArray(Array& features, HumdrumFile& infile) {
   int i;
   Feature tempfeature;
   features.setSize(infile.getNumLines());
   features.setSize(0);
   int totalfeatures = 0;
   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() == E_humrec_data) {
         totalfeatures += getFeature(tempfeature, infile[i]);
         if (tempfeature.is_valid()) {
            tempfeature.line = i;
            features.append(tempfeature);       
         }
      }
   }

   return totalfeatures;
}



//////////////////////////////
//
// getFeature -- returns the total number of features found on the line
//

int getFeature(Feature& tempfeature, HumdrumRecord& record) {
   tempfeature.clear();
   int i;
   int featurecount = 0;
   char buffer2[1024] = {0};

   // find the ISTN value
   for (i=0; i<record.getFieldCount(); i++) {
      if (strcmp(record.getExInterp(i), "**istn") == 0) {
         strcpy(buffer2, record[i]);
         if (debugQ) {
            cout << "found istn: " << buffer2 << endl;
         }
      } else if (strcmp(record.getExInterp(i), "**feature") == 0) {
         featurecount++;
      }
   }
   if ((featurecount == 0) || (buffer2[0] == '\0')) {
      // no valid data
      return 0;
   } 

   strcpy(tempfeature.istn, buffer2);

   tempfeature.feature.setSize(1);
   tempfeature.feature[0].setSize(100);
   tempfeature.feature[0].setGrowth(1000);
   tempfeature.feature[0].setSize(0);

   char buffer[100000] = {0};
   int code = 0;
   char* ptr = NULL;
   int total = 0;
   // find the first feature (second dimension to be added later)
   for (i=0; i<record.getFieldCount(); i++) {
      if (strcmp(record.getExInterp(i), "**feature") == 0) {
         strncpy(buffer, record[i], 100000);
         ptr = strtok(buffer, " ");
         while (ptr != NULL) {
            if (sscanf(ptr, "%d", &code) > 0) {
               tempfeature.feature[0].append(code);
               total++;
            }
            ptr = strtok(NULL, " ");
         }
      }
   }

   // do generalized features here.

   return total;
}



//////////////////////////////
//
// checkOptions -- validate and process command-line options.
//

void checkOptions(Options& opts, int argc, char* argv[]) {
   opts.define("debug=b");           // determine bad input line num
   opts.define("author=b");          // author of program
   opts.define("version=b");         // compilation info
   opts.define("example=b");         // example usages
   opts.define("h|help=b");          // short description
   opts.process(argc, argv);
   
   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, April 2004" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 15 April 2004" << endl;
      cout << "compiled: " << __DATE__ << endl;
      cout << MUSEINFO_VERSION << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }

   if (opts.getArgCount() != 2) {
      cout << "Usage: " << opts.getCommand() << " file1 file2" << endl;
      exit(1);
   }
}
  


//////////////////////////////
//
// example -- example usage of the program
//

void example(void) {
   cout <<
   "                                                                         \n"
   << endl;
}



//////////////////////////////
//
// usage -- gives the usage statement for the program
//

void usage(const char* command) {
   cout <<
   "                                                                         \n"
   << endl;
}



// md5sum: f8a32e21a8a2eb9b3c88bdd3e1da11be jointfeature.cpp [20050403]