//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Tue Jan 11 14:21:06 PST 2005
// Last Modified: Tue Jan 11 14:21:09 PST 2005
// Filename:      ...sig/examples/all/tinfo.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/humdrum/tinfo.cpp
// Syntax:        C++; museinfo
//
// Description:   Basic information theory measurements on **kern
// 

#include "humdrum.h"
#include "CircularBuffer.h"
#include <math.h>

// function declarations
void   checkOptions(Options& opts, int argc, char* argv[]);
void   example(void);
void   usage(const char* command);
int    storesymbol(Array<const char*>& table, const char* token);
int    buildtable(HumdrumFile& infile, Array<const char*>& symboltable,
                                 Array<int>& frequency);
void   printTable(Array<const char*>& symboltable, 
                                 Array<int>& frequency, int totalcount);

double entropy(Array<int>& frequency, int totalcount);
int    findsymbol(Array<const char*>& table, const char* token);
void   printAnalysis(HumdrumFile& infile, 
                                 Array<const char*>& symboltable, 
                                 Array<int>& frequency, int totalcount);
int    buildnthorder(HumdrumFile& infile, 
                                 Array<Array<int> >& nthsymboltable, 
                                 Array<int>& nthfrequency, 
                                 Array<const char*> symboltable, int order);
int    storenthsymbol(Array<Array<int> >& nthsymboltable, 
                                 CircularBuffer<int>& buffer);
void   printNthOrder(Array<int>& sequence, 
                                 Array<const char*> lookup);
void   printnthTable(Array<const char*>& symboltable, 
                                 Array<Array<int> >& nthsymboltable, 
                                 Array<int>& nthfrequency, int nthtotalcount);
void   extractSequence(HumdrumFile& infile, Array<int>& sequence, 
                                 Array<const char*>& symboltable);
int    findNthSymbol(Array<Array<int> >& nthsymboltable, 
                                 CircularBuffer<int>& buffer);
void   fillRow(Array<double>& row, Array<int>& sequence, 
                                 Array<const char*>& symboltable, int order,
                                 HumdrumFile& infile);
void   printPicture(Array<const char*>& symboltable, 
                                 HumdrumFile& infile);
void   printNthAnalysis(HumdrumFile& infile, 
                                 Array<const char*>& symboltable, 
                                 Array<Array<int> >& nthsymboltable, 
                                 Array<int>& nthfrequency, 
                                 int nthtotalcount, int order);


// global variables
Options      options;            // database for command-line arguments
int          debugQ     = 0;     // used with the --debug option
int          appendQ    = 0;     // used with the -a option
int          maxcount   = 50000; // used with the --max option
int          tableQ     = 0;     // used with the -t option
int          order      = 1;     // used with the -n option
int          bgval      = 255;   // used with the -b option
int          fileQ      = 0;     // used with the -n option

///////////////////////////////////////////////////////////////////////////

int main(int argc, char* argv[]) {
   HumdrumFile infile;

   // process the command-line options
   checkOptions(options, argc, argv);

   // if no command-line arguments read data file from standard input
   int numinputs = options.getArgCount();
   if (numinputs < 1) {
      infile.read(cin);
   } else {
      infile.read(options.getArg(1));
   }

   int          nthtotalcount = 0;

   Array<const char*> symboltable;
   Array<int>   frequency;
   int          totalcount = 0;

   totalcount = buildtable(infile, symboltable, frequency);

   Array<Array<int> > nthsymboltable;
   Array<int> nthfrequency;
   nthtotalcount = buildnthorder(infile, nthsymboltable, nthfrequency, 
      symboltable, order);

   if (tableQ) {
      if (order == 1) {
         printTable(symboltable, frequency, totalcount);
      } else {
         printnthTable(symboltable, nthsymboltable, nthfrequency, 
                       nthtotalcount);
      }
   } else if (fileQ) {
      // printAnalysis(infile, symboltable, frequency, totalcount);
      printNthAnalysis(infile, symboltable, nthsymboltable, nthfrequency, 
                       nthtotalcount, order);
   } else {
      printPicture(symboltable, infile);
   }

   return 0;
}


///////////////////////////////////////////////////////////////////////////


//////////////////////////////
//
// printNthAnalysis -- 
//

void printNthAnalysis(HumdrumFile& infile, Array<const char*>& symboltable, 
      Array<Array<int> >& nthsymboltable, Array<int>& nthfrequency, 
      int nthtotalcount, int order) {

   CircularBuffer<int> buffer;
   buffer.setSize(order * 2);
   buffer.reset();

   int i;
   double freqnorm;
   int sindex = 0;
   int nthindex;

   for (i=0; i<infile.getNumLines(); i++) {
      cout << infile[i];
      if (infile[i].getType() == E_humrec_interpretation) {
         cout << "\t";
         if (strcmp(infile[i][0], "*-") == 0) {
            cout << "*-";
         } else if (strncmp(infile[i][0], "**", 2) == 0) {
            cout << "**info";
         } else {
            cout << "*";
         }
      } else if (infile[i].getType() == E_humrec_data_measure) {
         cout << "\t" << infile[i][0];
      } else if (infile[i].getType() == E_humrec_data) {
         cout << "\t";
         sindex = findsymbol(symboltable, infile[i][0]);
         buffer.insert(sindex);
         if (buffer.getCount() >= order) {
            nthindex = findNthSymbol(nthsymboltable, buffer);
            freqnorm = (double)nthfrequency[nthindex]/nthtotalcount;
            cout << -log(freqnorm)/log(2.0);
            buffer.extract();
         } else {
            cout << ".";
         }
      } else if (infile[i].getType() == E_humrec_data_comment) {
         cout << "\t!";
      }
      cout << "\n";
   }

}



//////////////////////////////
//
// printAnalysis -- 
//

void printAnalysis(HumdrumFile& infile, Array<const char*>& symboltable, 
      Array<int>& frequency, int totalcount) {

   int i;
   double freqnorm;
   int sindex = 0;

   for (i=0; i<infile.getNumLines(); i++) {
      cout << infile[i];
      if (infile[i].getType() == E_humrec_interpretation) {
         cout << "\t";
         if (strcmp(infile[i][0], "*-") == 0) {
            cout << "*-";
         } else if (strncmp(infile[i][0], "**", 2) == 0) {
            cout << "**info";
         } else {
            cout << "*";
         }
      } else if (infile[i].getType() == E_humrec_data_measure) {
         cout << "\t" << infile[i][0];
      } else if (infile[i].getType() == E_humrec_data) {
         cout << "\t";
         sindex = findsymbol(symboltable, infile[i][0]);
         freqnorm = (double)frequency[sindex]/totalcount;
         cout << -log(freqnorm)/log(2.0);
      } else if (infile[i].getType() == E_humrec_data_comment) {
         cout << "\t!";
      }
      cout << "\n";
   }

}



//////////////////////////////
//
// entropy -- 
//

double entropy(Array& frequency, int totalcount) {
   double sum = 0.0;
   double value = 0.0;
   int i;
   for (i=0; i<frequency.getSize(); i++) {
      value = (double)frequency[i]/totalcount;
      sum += value * log(value)/log(2.0);
   }

   return -sum;
}

//////////////////////////////
//
// printnthTable -- 
//

void printnthTable(Array<const char*>& symboltable, 
      Array<Array<int> >& nthsymboltable, Array<int>& nthfrequency,
      int nthtotalcount) {

   double freqnorm;
   int i;
   cout << "!! tokencount       = " << nthtotalcount << "\n";
   cout << "!! weighted_entropy = " << entropy(nthfrequency, nthtotalcount) 
                                    << "\n";
   cout << "!! unique_states    = " << nthfrequency.getSize() << "\n";
   cout << "**idx\t**sym\t**freq\t**norm\t**info\n";
   for (i=0; i<nthsymboltable.getSize(); i++) {
      freqnorm = (double)nthfrequency[i]/nthtotalcount;
      cout << i << "\t";
      printNthOrder(nthsymboltable[i], symboltable);
      cout << "\t" << nthfrequency[i]
           << "\t" << freqnorm  << "\t";
      cout << -log(freqnorm)/log(2.0);
      cout << "\n";
   }
   cout << "*-\t*-\t*-\t*-\t*-\n";

}



//////////////////////////////
//
// printNthOrder --
//

void printNthOrder(Array& sequence, Array lookup) {
   int i;
   for (i=0; i<sequence.getSize(); i++) {
      cout << lookup[sequence[i]];
      if (i < sequence.getSize() - 1) {
         cout << " ";
      }
   }
}



//////////////////////////////
//
// printTable -- 
//

void printTable(Array<const char*>& symboltable, Array<int>& frequency, 
      int totalcount) {

   double freqnorm;
   int i;
   cout << "!! tokencount       = " << totalcount << "\n";
   cout << "!! weighted_entropy = " << entropy(frequency, totalcount) << "\n";
   cout << "!! unique_states    = " << frequency.getSize() << "\n";
   cout << "**idx\t**sym\t**freq\t**norm\t**info\n";
   for (i=0; i<symboltable.getSize(); i++) {
      freqnorm = (double)frequency[i]/totalcount;
      cout << i << "\t" << symboltable[i] << "\t" << frequency[i]
           << "\t" << freqnorm  << "\t";
      cout << -log(freqnorm)/log(2.0);
      cout << "\n";
   }
   cout << "*-\t*-\t*-\t*-\t*-\n";

}



//////////////////////////////
//
// buildtable -- 
//

int buildtable(HumdrumFile& infile, Array<const char*>& symboltable,
      Array<int>& frequency) {

   symboltable.setSize(maxcount);
   symboltable.setGrowth(maxcount);
   symboltable.setSize(0);

   frequency.setSize(maxcount);
   frequency.setGrowth(maxcount);
   frequency.setSize(0);

   int i=0;
   int total = 0;
   int zero=0;
   int hindex = 0;
   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() != E_humrec_data) {
         continue;
      }

      if (strcmp(infile[i][0], ".") != 0) {       // ignore null tokens
         hindex = storesymbol(symboltable, infile[i][0]);  // second index = spine #
         if (hindex == frequency.getSize()) {   // add a new location in ftab.
            frequency.append(zero);
         }
         frequency[hindex]++;
         total++;
      }

   }

   return total;
}



//////////////////////////////
//
// buildnthorder --
//
int buildnthorder(HumdrumFile& infile, Array<Array<int> >& nthsymboltable, 
      Array<int>& nthfrequency, Array<const char*> symboltable, int order) {

   nthsymboltable.setSize(maxcount);
   nthsymboltable.setGrowth(maxcount);
   nthsymboltable.setSize(0);

   nthfrequency.setSize(maxcount);
   nthfrequency.setGrowth(maxcount);
   nthfrequency.setSize(0);

   int i=0;
   int total = 0;
   int zero=0;
   int hindex = 0;
   CircularBuffer<int> buffer;
   buffer.setSize(order*2);
   buffer.reset();
   int sindex;

   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() != E_humrec_data) {
         continue;
      }

      sindex = findsymbol(symboltable, infile[i][0]);
      buffer.insert(sindex);
      if ((buffer.getCount() >= order) && (strcmp(infile[i][0], ".") != 0)) {    
         hindex = storenthsymbol(nthsymboltable, buffer);
         if (hindex == nthfrequency.getSize()) {   
            nthfrequency.append(zero);
         }
         buffer.read();
         nthfrequency[hindex]++;
         total++;
      }

   }

   return total;
}



///////////////////////////////
//
// storenthsymbol --
//

int storenthsymbol(Array<Array<int> >& nthsymboltable, 
      CircularBuffer<int>& buffer) {

   int i;
   int j;
   int found = 0;

   for (i=0; i<nthsymboltable.getSize(); i++) {
      found = 1;
      for (j=0; j<order; j++) {
         if (buffer[j] != nthsymboltable[i][j]) {
            found = 0;
         }
      }
      if (found == 1) {
         return i;
      }
   }

   // symbol not found: add it to the table
   int size = nthsymboltable.getSize();
   nthsymboltable.setSize(size+1);
   nthsymboltable[size].setSize(order);
   for (i=0; i<order; i++) {
      nthsymboltable[size][i] = buffer[i];
   }

   return size;
}



//////////////////////////////
//
// storesymbol -- store a symbol in the table (if not there already) and
//     then return its index.

int storesymbol(Array& table, const char* token) {
   int i;
   for (i=0; i<table.getSize(); i++) {
      if (strcmp(table[i], token) == 0) {
         return i;
      }
   }

   table.append(token);
   return table.getSize()-1;
}



//////////////////////////////
//
// findsymbol -- store a symbol in the table (if not there already) and
//     then return its index.

int findsymbol(Array& table, const char* token) {
   int i;
   for (i=0; i<table.getSize(); i++) {
      if (strcmp(table[i], token) == 0) {
         return i;
      }
   }

   return -1;
}







//////////////////////////////
//
// checkOptions -- validate and process command-line options.
//

void checkOptions(Options& opts, int argc, char* argv[]) {
   opts.define("a|append=b",         "append analysis to data in output");   
   opts.define("max=i:50000",        "maximum expected unique states");   
   opts.define("t|table=b",          "display table of statistics");
   opts.define("n|order=i:1",        "context order");
   opts.define("b|background=i:255", "background color");

   opts.define("debug=b",       "trace input parsing");   
   opts.define("author=b",      "author of the program");   
   opts.define("version=b",     "compilation information"); 
   opts.define("example=b",     "example usage"); 
   opts.define("h|help=b",      "short description"); 
   opts.process(argc, argv);
   
   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, Dec 2000" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 6 Dec 2000" << endl;
      cout << "compiled: " << __DATE__ << endl;
      cout << MUSEINFO_VERSION << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }

   debugQ   = opts.getBoolean("debug");
   appendQ  = opts.getBoolean("append");
   maxcount = opts.getInteger("max");
   tableQ   = opts.getBoolean("table");
   order    = opts.getInteger("order");
   fileQ    = opts.getBoolean("order");
   bgval    = opts.getInteger("background");
   if (bgval < 0) bgval = 0;
   if (bgval > 255) bgval = 255;
}



//////////////////////////////
//
// example -- example usage of the maxent program
//

void example(void) {
   cout <<
   "                                                                        \n"
   << endl;
}



//////////////////////////////
//
// usage -- gives the usage statement for the quality program
//

void usage(const char* command) {
   cout <<
   "                                                                        \n"
   << endl;
}



//////////////////////////////
//
// fillRow --
//

void fillRow(Array<double>& row, Array<int>& sequence, 
      Array<const char*>& symboltable, int order,
      HumdrumFile& infile) {

   row.setSize(0);
   double value;

   Array<Array<int> > nthsymboltable;
   Array<int> nthfrequency;
   int nthtotalcount;

   nthtotalcount = buildnthorder(infile, nthsymboltable, nthfrequency, 
                                 symboltable, order);
   int i;
   int nthindex;
   CircularBuffer<int> buffer;
   buffer.setSize(order*2);
   buffer.reset();

   for (i=0; i<sequence.getSize(); i++) {
      buffer.insert(sequence[i]);
      if (buffer.getCount() >= order) {
         nthindex = findNthSymbol(nthsymboltable, buffer);
         value = (double)nthfrequency[nthindex] / nthtotalcount;
         value = -log(value)/log(2.0);
         row.append(value);
         buffer.extract();
      }
   };

if (!debugQ) {
   double mmin = 0;
   double mmax = 0;

   mmin = row[0];
   mmax = row[0];
   for (i=0; i<row.getSize(); i++) {
      if (row[i] < mmin) mmin = row[i];
      if (row[i] > mmax) mmax = row[i];
   }

   if (mmax == mmin) {
      for (i=0; i<row.getSize(); i++) {
         row[i] = 128.0;
      }
   } else {
      for (i=0; i<row.getSize(); i++) {
         row[i] = (int)((row[i] - mmin)/(mmax-mmin) * 255);
      }
   }
}

}




//////////////////////////////
//
// printPicture --
//

void printPicture(Array& symboltable, HumdrumFile& infile) {
   Array<int> sequence;
   extractSequence(infile, sequence, symboltable);

   Array<double> row;
   row.setSize(sequence.getSize());  // changes in fillRow

   cout << "P3\n";
   cout << sequence.getSize() << " " << sequence.getSize() << "\n";
   cout << "255\n";

   int i;
   int j;
   int pre;
   int post;

   for (i=0; i<sequence.getSize(); i++) {
      for (pre=0; pre<i/2; pre++) {
         if (debugQ) {
            cout << bgval << " ";
         } else {
            cout << bgval << " " << bgval << " " << bgval << " ";
         }
      }
      
      fillRow(row, sequence, symboltable, i+1, infile);
      for (j=0; j<row.getSize(); j++) {
         // cout << row[j] << " " << row[j] << " " << row[j] << " ";
         if (debugQ) {
            cout << row[j] << " ";
         } else {
            cout <<(int)row[j]<< " " <<(int)row[j]<< " " <<(int)row[j]<< " ";
         }
      }

      for (post=0; post<i/2+(i%2); post++) {
         if (debugQ) {
            cout << bgval << " ";
         } else {
            cout << bgval << " " << bgval << " " << bgval << " ";
         }
      }
      
      cout << "\n";
   }
}




//////////////////////////////
//
// extractSequence -- 
//

void extractSequence(HumdrumFile& infile, Array<int>& sequence, 
      Array<const char*>& symboltable) {
   sequence.setSize(infile.getNumLines());
   sequence.setSize(0);

   int i;
   int sindex = 0;

   for (i=0; i<infile.getNumLines(); i++) {
      if (infile[i].getType() != E_humrec_data) {
         continue;
      }
      sindex = findsymbol(symboltable, infile[i][0]);
      sequence.append(sindex);
   }
}



///////////////////////////////
//
// findNthSymbol --
//

int findNthSymbol(Array<Array<int> >& nthsymboltable, 
      CircularBuffer<int>& buffer) {

   int i;
   int j;
   int found = 0;

   for (i=0; i<nthsymboltable.getSize(); i++) {
      found = 1;
      for (j=0; j<order; j++) {
         if (buffer[j] != nthsymboltable[i][j]) {
            found = 0;
         }
      }
      if (found == 1) {
         return i;
      }
   }

   return -1;
}


// md5sum: aa828fbc777274d5739ca43a542a618e tinfo.cpp [20050403]