//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Thu Apr  8 17:06:42 PDT 2004
// Last Modified: Fri Apr  9 00:01:00 PDT 2004
// Filename:      ...sig/examples/all/ttuprofile.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/humdrum/ttuprofile.cpp
// Syntax:        C++; museinfo
//
// Description:   
//

#include "humdrum.h"

#include <string.h>
#include <ctype.h>
#include <math.h>

#ifndef OLDCPP
   #include <sstream>
   #define SSTREAM stringstream
   #define CSTRING str().c_str()
   using namespace std;
#else
   #ifdef VISUAL
      #include <strstrea.h>     /* for windows 95 */
   #else
      #include <strstream.h>
   #endif
   #define SSTREAM strstream
   #define CSTRING str()
#endif
   
#include <stdlib.h>             /* for qsort and bsearch functions */

///////////////////////////////////////////////////////////////////////////


class Feature {
   public:
      Feature(void) { clear(); }
      Feature(Feature& aFeature) { copyFeature(aFeature); }
      Feature& operator=(Feature& aFeature) { return copyFeature(aFeature); }
      Feature& copyFeature(Feature& aFeature) {
         if (this == &aFeature) {
            return *this;
         }
         strcpy(istn, aFeature.istn);
         line = aFeature.line;
         int i, j;
         feature.setSize(aFeature.feature.getSize());
         for (i=0; i<feature.getSize(); i++) {
            feature[i].setSize(aFeature.feature[i].getSize());
            for (j=0; j<feature[i].getSize(); j++) {
               feature[i][j] = aFeature.feature[i][j];
            }
         }
         return *this;
      }
      void clear(void) { line = -1; istn[0] = '\0'; feature.setSize(0); }
      int is_valid(void) { return istn[0] != '\0' ? 1 : 0; }
       
      // data fields:
      char istn[256];                  // unique serial number/fileaname
      Array< Array<int> > feature;     // feature vector
      int line;                        // line number in original file
};


///////////////////////////////////////////////////////////////////////////

// function declarations
void      checkOptions(Options& opts, int argc, char* argv[]);
void      example(void);
void      usage(const char* command);
int       buildFeaturesArray(Array<Feature>& features, HumdrumFile& infile);
int       getFeature(Feature& tempfeature, HumdrumRecord& record);
void      buildAnalysisData(Array<Feature>& analysis, 
                              Array<Feature>& features);
void      printAnalysisData(Array<Feature>& analysis, Array<int>& ttu,
                              Array<int>& tts, Array<Feature>& features,
                              Array<int>& sortlines);
void      sortFeatures(Array<Feature>& features);
int       featuresort(const void* A, const void* B);
int       featuresort2(const void* A, const void* B);
int       getMatchCount(Array<Feature>& features, int index, int level);
void      generateTTData(Array<Feature>& analysis, Array<int>& ttu, 
                              Array<int>& tts);
void      sortLines(Array<int>& sortlines);
int       linesort(const void* A, const void* B);
int       unanchoredmatch(Feature& query, Feature& datum);
void      buildAnalysisDataUnanchored(Array<Feature>& analysis, 
                              Array<Feature>& features);
int       getMatchCountUnanchored(Feature& query, Array<Feature>& features);
void      buildQuery(Feature& query, Feature& feature, 
                              int start, int length);
void      printAnalysisDataUnanchored(Array<Feature>& analysis, 
                              Array<Feature>& features);
void      printAnchorType(int index, int size);
int       getFeatureStateCount(int& maxstate, Array<int>& states, 
                               Array<Feature>& features);


// global variables
Options   options;            // database for command-line arguments
int       debugQ = 0;         // used with the --debug option
int       numinputs = 0;
int       depth = -1;         // used with -d option
int       sufficient = 10;    // used with -s option
int       totalcount = 0;     // total number of features for all elements
int       sortQ = 0;          // used with -o option
int       appendQ = 0;        // used with -a option
int       anchorQ = 1;        // used with -A option
int       stateQ = 1;         // used with -S option
int       eprofileQ = 0;      // used with -e option
int       mprofileQ = 1;      // used with -P option 
int       unanchoredQ = 0;    // used with -u option
Array<Feature> features;


///////////////////////////////////////////////////////////////////////////

int main(int argc, char* argv[]) {
   HumdrumFile infile;

   // process the command-line options
   checkOptions(options, argc, argv);

   infile.clear();
   // if no command-line arguments read data file from standard input
   if (numinputs < 1) {
      infile.read(cin);
   } else {
      infile.read(options.getArg(1));
   }

   Array<int> sortlines;
   totalcount = buildFeaturesArray(features, infile);
   sortlines.setSize(features.getSize());
   int i;
   for (i=0; i<sortlines.getSize(); i++) {
      sortlines[i] = i;
   }

   if (debugQ) {
      cout << "There are " << features.getSize() 
           << " data elements in the dataset" << endl;
      cout << "There are " << totalcount << " features in the dataset" << endl;
      cout << "Average features per element: " 
           << (double)totalcount/features.getSize() << endl;
   }

   Array<Feature> analysis;
   Array<int> ttu;
   Array<int> tts;

   if (anchorQ) {
      sortFeatures(features);
      sortLines(sortlines);
      buildAnalysisData(analysis, features);
      generateTTData(analysis, ttu, tts);
      printAnalysisData(analysis, ttu, tts, features, sortlines);
   } else {
      buildAnalysisDataUnanchored(analysis, features);
      printAnalysisDataUnanchored(analysis, features);
   }

   return 0;
}


///////////////////////////////////////////////////////////////////////////


//////////////////////////////
//
// generateTTData -- ttu = time to unique; tts = time to sufficient
//   negative tts value means sufficiency was not reached.
//

void generateTTData(Array<Feature>& analysis, Array<int>& ttu, 
      Array<int>& tts) {

   ttu.setSize(analysis.getSize());
   tts.setSize(analysis.getSize());

   ttu.setAll(0);
   tts.setAll(0);

   int i, j;
   int isize = 0;
   int testvalue = 0;
   int tempttu = 0;
   int temptts = 0;

   // find the time-to-uniqueness (ttu) value:

   for (i=0; i<analysis.getSize(); i++) {
      isize = analysis[i].feature[0].getSize();
      if (isize == 0) {
         continue;
      }
      testvalue = analysis[i].feature[0][isize-1];
      tempttu = isize-1;
      for (j=isize-2; j>=0; j--) {
         if (analysis[i].feature[0][j] != testvalue) {
            tempttu = j+1;
            break;
         }
         tempttu = j;
      }

      if (tempttu == isize - 1) {
         if (analysis[i].feature[0][isize-1] == 1) {
            // found uniq element on the last try...
            ttu[i] = isize;
         } else {
            // unsure if a uniq element was found
            ttu[i] = -(tempttu + 1);
         }
      } else {
         ttu[i] = tempttu+1;
      }

   }


   // find the time-to-sufficiency (tts) value:

   for (i=0; i<analysis.getSize(); i++) {
      isize = analysis[i].feature[0].getSize();
      if (isize == 0) {
         continue;
      }
      testvalue = analysis[i].feature[0][isize-1];
      if (testvalue > sufficient) {
         tts[i] = -isize;
         continue;
      }
      temptts = isize-1;
      j = isize-2;
      while (j>=0) {
         if (analysis[i].feature[0][j] <= sufficient) {
           j--;
         } else {
            temptts = j+1;
            break;
         }
      }
      if (j < 0) {
         temptts = 0;
      } 

      tts[i] = temptts+1;
   }

}



//////////////////////////////
//
// printAnchorType --
//

void printAnchorType(int index, int size) {
   index = index+1;
   int maxdigits = ((int)log10((double)size))+1;
   int digits = ((int)log10((double)index))+1;
   int i;
   for (i=0; i<maxdigits-digits; i++) {
      cout << '0';
   }
   cout << index;
}


//////////////////////////////
//
// printAnalysisDataUnanchored --
//

void printAnalysisDataUnanchored(Array<Feature>& analysis, 
      Array<Feature>& features) {
   int i, j, k;

   cout << "**istn\t**anchor\t**length";
   if (mprofileQ) {
      cout << "\t**mprofile";
   }
   if (eprofileQ) {
      cout << "\t**eprofile";
   }
   if (appendQ) {
      cout << "\t**feature";
   }
   cout << "\n";

   double entropy = 0.0;
   double entropysum = 0.0;

   for (i=0; i<analysis.getSize(); i++) {
      for (j=0; j<analysis[i].feature.getSize(); j++) {
         cout << analysis[i].istn;
         cout << "-";
         printAnchorType(j, analysis[i].feature.getSize());
         cout << "\t";
         cout << j+1;
         cout << "\t";      
         cout << analysis[i].feature.getSize() - j;
         cout << "\t";      
   
         if (mprofileQ) {
            for (k=0; k<analysis[i].feature[j].getSize(); k++) {
               cout << analysis[i].feature[j][k];
               if (k < analysis[i].feature[j].getSize()-1) {
                  cout << " ";
               }
            }
         }

         if (eprofileQ) {
            cout << "\t";
            entropy = 0.0;
            entropysum = 0.0;
            for (k=0; k<analysis[i].feature[j].getSize(); k++) {
               entropy = log((double)analysis.getSize()/
                             analysis[i].feature[j][k])/log(2.0);
               cout << entropy - entropysum;
               entropysum = entropy;
               if (k<analysis[i].feature[j].getSize() - 1) {
                  cout << " ";
               }
            }
         }

         if (appendQ) {
            cout << "\t";
            for (k=0; k<analysis[i].feature[j].getSize(); k++) {
               cout << features[i].feature[0][j+k];
               // next section might need debugging based on max search length:
               if (k < features[i].feature[0].getSize()-1) {
                  cout << " ";
               }
            }
         }
         cout << "\n";
      }

   }
   cout << "*-\t*-\t*-";
   if (mprofileQ) {
      cout << "\t*-";
   }
   if (eprofileQ) {
      cout << "\t*-";
   }
   if (appendQ) {
      cout << "\t*-";
   }
   cout << "\n";
}



//////////////////////////////
//
// getFeatureStateCount -- return the number of states that any one feature
//    could posses.  E.g., for Gross contour, there are 3 states:
//    up, down and same.  States can be any integer in the range
//    from 0 to MAXSTATE-1.
//

int getFeatureStateCount(int& maxstate, Array<int>& states, 
      Array<Feature>& features) {
#define MAXSTATE 500000
   states.setSize(MAXSTATE);
   states.setAll(0);
   int warnnegative = 0;
   int warnmaximum = 0;
   int index;
   maxstate = 0;
   int i, j;
   for (i=0; i<features.getSize(); i++) {
      for (j=0; j<features[i].feature[0].getSize(); j++) {
         index = features[i].feature[0][j];
         if (index < 0) {
            warnnegative++;
            continue;
         } else if (index >= MAXSTATE) {
            warnmaximum++;
         }
         states[index]++;
         if (index > maxstate) {
            maxstate = index;
         }
      }
   }

   if (warnnegative) {
      cout << "!! Warning: there were " << warnnegative 
           << " uncounted negative states in the data set\n";
   }
   if (warnmaximum) {
      cout << "!! Warning: there were " << warnmaximum
           << " uncounted states exceeding 99,999 in the data set\n";
   }

   int count = 0;
   for (i=0; i<=maxstate; i++) {
      if (states[i]) {
         count++;
      }
   }

   return count;
}



//////////////////////////////
//
// printAnalysisData --
//

void printAnalysisData(Array<Feature>& analysis, Array<int>& ttu, 
      Array<int>& tts, Array<Feature>& features, Array<int>& sortlines) {
   int i, j;
   int sorti;
   int ttufail = 0;
   int ttsfail = 0;
   int ttusum = 0;
   int ttssum = 0;
   int ttumax = 0;
   int ttumin = 0;
   int ttsmax = 0;
   int ttsmin = 0;

   int minfeaturecount = features[0].feature[0].getSize();
   int maxfeaturecount = features[0].feature[0].getSize();
   for (i=1; i<features.getSize(); i++) {
      if (minfeaturecount > features[i].feature[0].getSize()) {
         minfeaturecount = features[i].feature[0].getSize();
      }
      if (maxfeaturecount < features[i].feature[0].getSize()) {
         maxfeaturecount = features[i].feature[0].getSize();
      }
   }
   
   for (i=0; i<ttu.getSize(); i++) {
      if (ttu[i] <= 0) { ttufail++; }
      if (tts[i] <= 0) { ttsfail++; }
      if (ttu[i] <= 0) { continue; }
      if (tts[i] <= 0) { continue; }
      if (ttu[i] > 0)  { ttusum+= ttu[i]; }
      if (tts[i] > 0)  { ttssum+= tts[i]; }
      if (ttumax == 0) { ttumax = ttu[i]; }
      if (ttumin == 0) { ttumin = ttu[i]; }
      if (ttsmax == 0) { ttsmax = tts[i]; }
      if (ttsmin == 0) { ttsmin = tts[i]; }
      if (ttumax < ttu[i]) { ttumax = ttu[i]; }
      if (ttumin > ttu[i]) { ttumin = ttu[i]; }
      if (ttsmax < tts[i]) { ttsmax = tts[i]; }
      if (ttsmin > tts[i]) { ttsmin = tts[i]; }
   }
   double ttuaverage = (double)ttusum / (ttu.getSize() - ttufail);
   double ttsaverage = (double)ttssum / (tts.getSize() - ttsfail);

   double entropy = 0.0;    // for entropy profile
   double entropysum = 0.0; // for entropy profile

   Array<int> states;
   int maxstate;
   int statecount = getFeatureStateCount(maxstate, states, features);

   cout << "!!!theme_count:\t\t\t" << analysis.getSize() << " themes\n";
   cout << "!!!symbol_count:\t\t"  << totalcount << " notes\n";
   cout << "!!!state_count:\t\t\t" << statecount << " states/symbol\n";
   if (stateQ) {
      // print state distribution statistics
      for (i=0; i<=maxstate; i++) {
         if (states[i]) {
            cout << "!!!state_" << i << "_count:\t\t"
                 << states[i] << " symbols (" 
                 << (double)states[i]/totalcount*100.0 
                 << "% of total symbols)\n";
         }
      }
   }
   cout << "!!!average_symbols_per_theme:\t" 
        << (double)totalcount/analysis.getSize() << " symbols\n";
   cout << "!!!average_entropy_rate:\t" 
        << log((double)analysis.getSize())/log(2.0)/ttuaverage 
        << " bits/symbol\n";
   cout << "!!!max_theoretical_entropy_rt:\t" 
        << log((double)statecount)/log(2.0) 
        << " bits/symbol\n";
   cout << "!!!min_theme_symbol_count:\t" << minfeaturecount << " symbols\n";
   cout << "!!!max_theme_symbol_count:\t" << maxfeaturecount << " symbols\n";
   cout << "!!!search_type:\t\t\t";
   if (anchorQ) {
      if (unanchoredQ) {
         cout << "unanchored";
      } else {
         cout << "anchored";
      }
   } else {
      cout << "doubly unanchored";
   }
   cout << "\n";
   cout << "!!!sufficiency_level:\t\t" << sufficient << " matches\n";
   cout << "!!!ttu_failure_count:\t\t" << ttufail 
        << "\t(" << (double)ttufail/ttu.getSize() * 100 << "%)" << "\n";
   cout << "!!!tts_failure_count:\t\t" << ttsfail 
        << "\t(" << (double)ttsfail/tts.getSize() * 100 << "%)" << "\n";
   cout << "!!!ttu_mean:\t\t\t" << ttuaverage << " symbols\n";
   cout << "!!!ttu_min:\t\t\t" << ttumin << " symbols\n";
   cout << "!!!ttu_max:\t\t\t" << ttumax << " symbols\n";
   cout << "!!!tts_mean:\t\t\t" << ttsaverage << " symbols\n";
   cout << "!!!tts_min:\t\t\t" << ttsmin << " symbols\n";
   cout << "!!!tts_max:\t\t\t" << ttsmax << " symbols\n";
   cout << "!!\n";
   cout << "!! The music for each theme can be viewed in Themefinder by\n";
   cout << "!! changing xxxxxxxxxxxx to a ISTN value given below in this web address:\n";
   cout << "!!    http://themefinder.org/cgi-bin/themeinfo?istn=xxxxxxxxxxxx\n";
   cout << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n";
   cout << "\n";
   cout << "**istn\t**length\t**ttu\t**tts";
   if (mprofileQ) {
      cout << "\t**mprofile";
   }
   if (eprofileQ) {
      cout << "\t**eprofile";
   }
   if (appendQ) {
      cout << "\t**feature";
   }
   cout << "\n";

   for (i=0; i<analysis.getSize(); i++) {
      if (sortQ) {
         sorti = i;
      } else {
         sorti = sortlines[i];
      }
      cout << analysis[sorti].istn;
      cout << "\t";      
      cout << analysis[sorti].feature[0].getSize();
      cout << "\t";      
      cout << ttu[sorti];
      cout << "\t";      
      cout << tts[sorti];

      if (mprofileQ) {
         cout << "\t";      
         for (j=0; j<analysis[sorti].feature[0].getSize(); j++) {
            cout << analysis[sorti].feature[0][j];
            if (j < analysis[sorti].feature[0].getSize()-1) {
               cout << " ";
            }
         }
      }

      if (eprofileQ) {
         cout << "\t";
         entropy = 0.0;
         entropysum = 0.0;
         for (j=0; j<analysis[sorti].feature[0].getSize(); j++) {
            entropy = log((double)analysis.getSize()/
                          analysis[sorti].feature[0][j])/log(2.0);
            cout << entropy - entropysum;
            entropysum = entropy;
            if (j<analysis[sorti].feature[0].getSize() - 1) {
               cout << " ";
            }
         }
      }

      if (appendQ) {
         cout << "\t";
         for (j=0; j<features[sorti].feature[0].getSize(); j++) {
            cout << features[sorti].feature[0][j];
            if (j < features[sorti].feature[0].getSize()-1) {
               cout << " ";
            }
         }
      }

      cout << "\n";
   }
   cout << "*-\t*-\t*-\t*-\t*-";
   if (appendQ) {
      cout << "\t*-";
   }
   cout << "\n";
}



//////////////////////////////
//
// buildAnalysisDataUnanchored -- features MUST be sorted before sending
//    them into this function.
//

void buildAnalysisDataUnanchored(Array<Feature>& analysis, 
      Array<Feature>& features) {
   analysis.setSize(features.getSize());
   analysis.allowGrowth(0);
   int i, j, k;
   int searchsize = 0;
   Feature query;
   for (i=0; i<analysis.getSize(); i++) {
      strcpy(analysis[i].istn, features[i].istn);
      analysis[i].line = features[i].line;

      analysis[i].feature.setSize(features[i].feature[0].getSize());

      for (j=0; j<analysis[i].feature.getSize(); j++) {
         analysis[i].feature[j].setSize(analysis[i].feature.getSize()-j);
         analysis[i].feature[j].setAll(0);
         searchsize = analysis[i].feature[j].getSize();
         if ((depth > 0) && (searchsize > depth)) {
            searchsize = depth;
         }
         for (k=0; k<searchsize; k++) {
            buildQuery(query, features[i], j, k+1);
            analysis[i].feature[j][k] = getMatchCountUnanchored(query,features);
         }
      }
   }
}



////////////////////
//
// buildQuery --
//

void buildQuery(Feature& query, Feature& feature, int start, int length) {
   query.feature.setSize(1);
   query.feature[0].setSize(length);
   int i;
   for (i=0; i<length; i++) {
      query.feature[0][i] = feature.feature[0][start+i];
   }
}



//////////////////////////////
//
// getMatchCountUnanchored --
//

int getMatchCountUnanchored(Feature& query, Array& features) {
   int count = 0;
   int i;
   for (i=0; i<features.getSize(); i++) {
      count += unanchoredmatch(query, features[i]);
   }

   return count;
}



//////////////////////////////
//
// unanchoredmatch --
//

int unanchoredmatch(Feature& query, Feature& datum) {
   int i, j;

/*cout << "SEARCHING FOR ";
for (i=0; i<query.feature[0].getSize(); i++) {
cout << query.feature[0][i] << " ";
}
cout << endl;
cout << "IN            ";
for (i=0; i<datum.feature[0].getSize(); i++) {
cout << datum.feature[0][i] << " ";
}
cout << endl;
*/


   int qsize = query.feature[0].getSize();
   int dsize = datum.feature[0].getSize();
   int match = 0;
   for (i=0; i<=(dsize-qsize); i++) {
      if (query.feature[0][0] != datum.feature[0][i]) {
         continue;
      }

      // the first item of both strings matches, try the rest
      // and continue if not an exact match.
      match = 1;
      for (j=1; j<qsize; j++) {
         if (query.feature[0][j] != datum.feature[0][i+j]) {
            match = 0;
            break;
         }
      }
      if (match) {
         break;
      }
   }

// cout << "RESULT      " << match << endl;
   if (match) {
      return 1;
   } else {
      return 0;
   }   
}



//////////////////////////////
//
// buildAnalysisData -- features MUST be sorted before sending
//    them into this function.
//

void buildAnalysisData(Array& analysis, Array& features) {
   analysis.setSize(features.getSize());
   analysis.allowGrowth(0);
   int i, j;
   int searchsize = 0;
   Feature query;
   query.feature.setSize(1);
   query.feature[0].setSize(0);

   for (i=0; i<analysis.getSize(); i++) {
      strcpy(analysis[i].istn, features[i].istn);
      analysis[i].feature.setSize(1);
      analysis[i].line = features[i].line;

      searchsize = features[i].feature[0].getSize();

      if ((depth > 0) && (searchsize > depth)) {
         searchsize = depth;
      }

      analysis[i].feature[0].setSize(searchsize);
      
      query.feature[0].setSize(features[i].feature[0].getSize());
      query.feature[0].setSize(0);

      for (j=0; j<searchsize; j++) {
         if (unanchoredQ) {  // unanchored search
            query.feature[0].append(features[i].feature[0][j]);
            analysis[i].feature[0][j] = getMatchCountUnanchored(query,features);
         } else { // anchord search: take advantage of presorting to search
            analysis[i].feature[0][j] = getMatchCount(features, i, j);
         }
      }
   }
}



//////////////////////////////
//
// getMatchCount --
//

int getMatchCount(Array& features, int index, int level) {
   Feature* ptr = NULL;
   Feature searchfeature = features[index];
   if (searchfeature.feature[0].getSize() > level+1) {
      searchfeature.feature[0].setSize(level+1);
   }

   ///////////////////////////////////////////////

   ptr = &(features[index]);
/*   ptr = (Feature*)bsearch(&searchfeature, 
                 features.getBase(), 
                 features.getSize(),
                 sizeof(Feature), 
                 featuresort2);

   if (ptr == NULL) {
      return 0;
   }
*/

   ///////////////////////////////////////////////

   // count the number of exact matches

   int matchindex = ((int)ptr - (int)(features.getBase()))/sizeof(Feature);
   int forward = matchindex;
   int backward = matchindex;

   backward--;
   while ((backward >= 0) && (featuresort2((void*)&searchfeature, 
        (void*)(&(features[backward]))) == 0)) {
      backward--;
   }
   backward++;

   forward++;
   while ((forward < features.getSize()) && (featuresort2((void*)&searchfeature, 
        (void*)(&(features[forward]))) == 0)) {
      forward++;
   }
   forward--;

   return forward - backward + 1;
}



//////////////////////////////
//
// sortFeatures --
//

void sortFeatures(Array& features) {
   qsort(features.getBase(), features.getSize(), sizeof(Feature), featuresort);
}



//////////////////////////////
//
// sortLines --
//

void sortLines(Array& sortlines) {
   qsort(sortlines.getBase(), sortlines.getSize(), sizeof(int), linesort);
}



//////////////////////////////
//
// linesort -- sort items by feature space elements.
//

int linesort(const void* A, const void* B) {
   int indexa = *((int*)A);
   int indexb = *((int*)B);
   if (features[indexa].line < features[indexb].line) {
      return -1;
   } else if (features[indexa].line > features[indexb].line) {
      return +1;
   } else {
      return 0;
   }
}



//////////////////////////////
//
// featuresort -- sort items by feature space elements.
//

int featuresort(const void* A, const void* B) {
   Feature& a = *((Feature*)A);
   Feature& b = *((Feature*)B);
   int i;
   int sizea = a.feature[0].getSize();
   int sizeb = b.feature[0].getSize();
   int minsize = sizea < sizeb ? sizea : sizeb;
   for (i=0; i<minsize; i++) {
      if (a.feature[0][i] < b.feature[0][i]) {
         return -1;
      }
      if (a.feature[0][i] > b.feature[0][i]) {
         return +1;
      }
   }
 
   if (sizea > sizeb) {
      return +1;
   } else if (sizea < sizeb) {
      return -1;
   } else {
      return 0;
   }
}



//////////////////////////////
//
// featuresort2 -- sort for count matches  limit to the size of the first
//   element
//

int featuresort2(const void* A, const void* B) {
   Feature& a = *((Feature*)A);
   Feature& b = *((Feature*)B);
   int i;
   int sizea = a.feature[0].getSize();
   int sizeb = b.feature[0].getSize();
   int minsize = sizea < sizeb ? sizea : sizeb;
   for (i=0; i<minsize; i++) {
      if (a.feature[0][i] < b.feature[0][i]) {
         return -1;
      }
      if (a.feature[0][i] > b.feature[0][i]) {
         return +1;
      }
   }

   // all of the elements in the search range are equal
   return 0;
 
}



//////////////////////////////
//
// buildFeaturesArray -- returns the total number of features
//

int buildFeaturesArray(Array& features, HumdrumFile& infile) {
   int i;
   Feature tempfeature;
   features.setSize(infile.getNumLines());
   features.setSize(0);
   int totalfeatures = 0;
   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() == E_humrec_data) {
         totalfeatures += getFeature(tempfeature, infile[i]);
         if (tempfeature.is_valid()) {
            tempfeature.line = i;
            features.append(tempfeature);       
         }
      }
   }

   return totalfeatures;
}



//////////////////////////////
//
// getFeature -- returns the total number of features found on the line
//

int getFeature(Feature& tempfeature, HumdrumRecord& record) {
   tempfeature.clear();
   int i;
   int featurecount = 0;
   char buffer2[1024] = {0};

   // find the ISTN value
   for (i=0; i<record.getFieldCount(); i++) {
      if (strcmp(record.getExInterp(i), "**istn") == 0) {
         strcpy(buffer2, record[i]);
         if (debugQ) {
            cout << "found istn: " << buffer2 << endl;
         }
      } else if (strcmp(record.getExInterp(i), "**feature") == 0) {
         featurecount++;
      }
   }
   if ((featurecount == 0) || (buffer2[0] == '\0')) {
      // no valid data
      return 0;
   } 

   strcpy(tempfeature.istn, buffer2);

   tempfeature.feature.setSize(1);
   tempfeature.feature[0].setSize(100);
   tempfeature.feature[0].setGrowth(1000);
   tempfeature.feature[0].setSize(0);

   char buffer[100000] = {0};
   int code = 0;
   char* ptr = NULL;
   int total = 0;
   // find the first feature (second dimension to be added later)
   for (i=0; i<record.getFieldCount(); i++) {
      if (strcmp(record.getExInterp(i), "**feature") == 0) {
         strncpy(buffer, record[i], 100000);
         ptr = strtok(buffer, " ");
         while (ptr != NULL) {
            if (sscanf(ptr, "%d", &code) > 0) {
               tempfeature.feature[0].append(code);
               total++;
            }
            ptr = strtok(NULL, " ");
         }
      }
   }

   // do generalized features here.

   return total;
}



//////////////////////////////
//
// checkOptions -- validate and process command-line options.
//

void checkOptions(Options& opts, int argc, char* argv[]) {
   opts.define("d|depth=i:-1",      "set the maximum search depth");
   opts.define("a|append=b",        "append original data to the analysis");
   opts.define("s|sufficient=i:10", "set the sufficiency size");
   opts.define("S|no-state=b",      "do not print state distributions");
   opts.define("e|entropy-profile=b", "print entropy profile");
   opts.define("o|order|sort=b",    "sort the output by features");
   opts.define("P|no-match-profile=b", "do not output match profile");
   opts.define("u|unanchored=b",    "search for feature anywhere in sequences");
   opts.define("p|permutations=b",  "search for feature anywhere in sequences");

   opts.define("debug=b");           // determine bad input line num
   opts.define("author=b");          // author of program
   opts.define("version=b");         // compilation info
   opts.define("example=b");         // example usages
   opts.define("h|help=b");          // short description
   opts.process(argc, argv);
   
   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, April 2004" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 15 April 2004" << endl;
      cout << "compiled: " << __DATE__ << endl;
      cout << MUSEINFO_VERSION << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }

   eprofileQ   = opts.getBoolean("entropy-profile");
   numinputs   = opts.getArgCount();
   debugQ      = opts.getBoolean("debug");
   depth       = opts.getInteger("depth");
   sufficient  = opts.getInteger("sufficient");
   sortQ       = opts.getBoolean("order");
   appendQ     = opts.getBoolean("append");
   anchorQ     = !opts.getBoolean("permutations");
   unanchoredQ =  opts.getBoolean("unanchored");
   stateQ      = !opts.getBoolean("no-state");
   mprofileQ   = !opts.getBoolean("no-match-profile");

   if (sufficient <= 0) {
      sufficient = 1;
   }
}
  


//////////////////////////////
//
// example -- example usage of the program
//

void example(void) {
   cout <<
   "                                                                         \n"
   << endl;
}



//////////////////////////////
//
// usage -- gives the usage statement for the program
//

void usage(const char* command) {
   cout <<
   "                                                                         \n"
   << endl;
}



// md5sum: 7183f56fc7bdbe435b46742ef5ca4397 ttuprofile.cpp [20080518]