//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Tue Jan 11 14:21:06 PST 2005
// Last Modified: Tue Jan 11 14:21:09 PST 2005
// Filename: ...sig/examples/all/tinfo.cpp
// Web Address: http://sig.sapp.org/examples/museinfo/humdrum/tinfo.cpp
// Syntax: C++; museinfo
//
// Description: Basic information theory measurements on **kern
//
#include "humdrum.h"
#include "CircularBuffer.h"
#include <math.h>
// function declarations
void checkOptions(Options& opts, int argc, char* argv[]);
void example(void);
void usage(const char* command);
int storesymbol(Array<const char*>& table, const char* token);
int buildtable(HumdrumFile& infile, Array<const char*>& symboltable,
Array<int>& frequency);
void printTable(Array<const char*>& symboltable,
Array<int>& frequency, int totalcount);
double entropy(Array<int>& frequency, int totalcount);
int findsymbol(Array<const char*>& table, const char* token);
void printAnalysis(HumdrumFile& infile,
Array<const char*>& symboltable,
Array<int>& frequency, int totalcount);
int buildnthorder(HumdrumFile& infile,
Array<Array<int> >& nthsymboltable,
Array<int>& nthfrequency,
Array<const char*> symboltable, int order);
int storenthsymbol(Array<Array<int> >& nthsymboltable,
CircularBuffer<int>& buffer);
void printNthOrder(Array<int>& sequence,
Array<const char*> lookup);
void printnthTable(Array<const char*>& symboltable,
Array<Array<int> >& nthsymboltable,
Array<int>& nthfrequency, int nthtotalcount);
void extractSequence(HumdrumFile& infile, Array<int>& sequence,
Array<const char*>& symboltable);
int findNthSymbol(Array<Array<int> >& nthsymboltable,
CircularBuffer<int>& buffer);
void fillRow(Array<double>& row, Array<int>& sequence,
Array<const char*>& symboltable, int order,
HumdrumFile& infile);
void printPicture(Array<const char*>& symboltable,
HumdrumFile& infile);
void printNthAnalysis(HumdrumFile& infile,
Array<const char*>& symboltable,
Array<Array<int> >& nthsymboltable,
Array<int>& nthfrequency,
int nthtotalcount, int order);
// global variables
Options options; // database for command-line arguments
int debugQ = 0; // used with the --debug option
int appendQ = 0; // used with the -a option
int maxcount = 50000; // used with the --max option
int tableQ = 0; // used with the -t option
int order = 1; // used with the -n option
int bgval = 255; // used with the -b option
int fileQ = 0; // used with the -n option
///////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[]) {
HumdrumFile infile;
// process the command-line options
checkOptions(options, argc, argv);
// if no command-line arguments read data file from standard input
int numinputs = options.getArgCount();
if (numinputs < 1) {
infile.read(cin);
} else {
infile.read(options.getArg(1));
}
int nthtotalcount = 0;
Array<const char*> symboltable;
Array<int> frequency;
int totalcount = 0;
totalcount = buildtable(infile, symboltable, frequency);
Array<Array<int> > nthsymboltable;
Array<int> nthfrequency;
nthtotalcount = buildnthorder(infile, nthsymboltable, nthfrequency,
symboltable, order);
if (tableQ) {
if (order == 1) {
printTable(symboltable, frequency, totalcount);
} else {
printnthTable(symboltable, nthsymboltable, nthfrequency,
nthtotalcount);
}
} else if (fileQ) {
// printAnalysis(infile, symboltable, frequency, totalcount);
printNthAnalysis(infile, symboltable, nthsymboltable, nthfrequency,
nthtotalcount, order);
} else {
printPicture(symboltable, infile);
}
return 0;
}
///////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// printNthAnalysis --
//
void printNthAnalysis(HumdrumFile& infile, Array<const char*>& symboltable,
Array<Array<int> >& nthsymboltable, Array<int>& nthfrequency,
int nthtotalcount, int order) {
CircularBuffer<int> buffer;
buffer.setSize(order * 2);
buffer.reset();
int i;
double freqnorm;
int sindex = 0;
int nthindex;
for (i=0; i<infile.getNumLines(); i++) {
cout << infile[i];
if (infile[i].getType() == E_humrec_interpretation) {
cout << "\t";
if (strcmp(infile[i][0], "*-") == 0) {
cout << "*-";
} else if (strncmp(infile[i][0], "**", 2) == 0) {
cout << "**info";
} else {
cout << "*";
}
} else if (infile[i].getType() == E_humrec_data_measure) {
cout << "\t" << infile[i][0];
} else if (infile[i].getType() == E_humrec_data) {
cout << "\t";
sindex = findsymbol(symboltable, infile[i][0]);
buffer.insert(sindex);
if (buffer.getCount() >= order) {
nthindex = findNthSymbol(nthsymboltable, buffer);
freqnorm = (double)nthfrequency[nthindex]/nthtotalcount;
cout << -log(freqnorm)/log(2.0);
buffer.extract();
} else {
cout << ".";
}
} else if (infile[i].getType() == E_humrec_data_comment) {
cout << "\t!";
}
cout << "\n";
}
}
//////////////////////////////
//
// printAnalysis --
//
void printAnalysis(HumdrumFile& infile, Array<const char*>& symboltable,
Array<int>& frequency, int totalcount) {
int i;
double freqnorm;
int sindex = 0;
for (i=0; i<infile.getNumLines(); i++) {
cout << infile[i];
if (infile[i].getType() == E_humrec_interpretation) {
cout << "\t";
if (strcmp(infile[i][0], "*-") == 0) {
cout << "*-";
} else if (strncmp(infile[i][0], "**", 2) == 0) {
cout << "**info";
} else {
cout << "*";
}
} else if (infile[i].getType() == E_humrec_data_measure) {
cout << "\t" << infile[i][0];
} else if (infile[i].getType() == E_humrec_data) {
cout << "\t";
sindex = findsymbol(symboltable, infile[i][0]);
freqnorm = (double)frequency[sindex]/totalcount;
cout << -log(freqnorm)/log(2.0);
} else if (infile[i].getType() == E_humrec_data_comment) {
cout << "\t!";
}
cout << "\n";
}
}
//////////////////////////////
//
// entropy --
//
double entropy(Array& frequency, int totalcount) {
double sum = 0.0;
double value = 0.0;
int i;
for (i=0; i<frequency.getSize(); i++) {
value = (double)frequency[i]/totalcount;
sum += value * log(value)/log(2.0);
}
return -sum;
}
//////////////////////////////
//
// printnthTable --
//
void printnthTable(Array<const char*>& symboltable,
Array<Array<int> >& nthsymboltable, Array<int>& nthfrequency,
int nthtotalcount) {
double freqnorm;
int i;
cout << "!! tokencount = " << nthtotalcount << "\n";
cout << "!! weighted_entropy = " << entropy(nthfrequency, nthtotalcount)
<< "\n";
cout << "!! unique_states = " << nthfrequency.getSize() << "\n";
cout << "**idx\t**sym\t**freq\t**norm\t**info\n";
for (i=0; i<nthsymboltable.getSize(); i++) {
freqnorm = (double)nthfrequency[i]/nthtotalcount;
cout << i << "\t";
printNthOrder(nthsymboltable[i], symboltable);
cout << "\t" << nthfrequency[i]
<< "\t" << freqnorm << "\t";
cout << -log(freqnorm)/log(2.0);
cout << "\n";
}
cout << "*-\t*-\t*-\t*-\t*-\n";
}
//////////////////////////////
//
// printNthOrder --
//
void printNthOrder(Array& sequence, Array lookup) {
int i;
for (i=0; i<sequence.getSize(); i++) {
cout << lookup[sequence[i]];
if (i < sequence.getSize() - 1) {
cout << " ";
}
}
}
//////////////////////////////
//
// printTable --
//
void printTable(Array<const char*>& symboltable, Array<int>& frequency,
int totalcount) {
double freqnorm;
int i;
cout << "!! tokencount = " << totalcount << "\n";
cout << "!! weighted_entropy = " << entropy(frequency, totalcount) << "\n";
cout << "!! unique_states = " << frequency.getSize() << "\n";
cout << "**idx\t**sym\t**freq\t**norm\t**info\n";
for (i=0; i<symboltable.getSize(); i++) {
freqnorm = (double)frequency[i]/totalcount;
cout << i << "\t" << symboltable[i] << "\t" << frequency[i]
<< "\t" << freqnorm << "\t";
cout << -log(freqnorm)/log(2.0);
cout << "\n";
}
cout << "*-\t*-\t*-\t*-\t*-\n";
}
//////////////////////////////
//
// buildtable --
//
int buildtable(HumdrumFile& infile, Array<const char*>& symboltable,
Array<int>& frequency) {
symboltable.setSize(maxcount);
symboltable.setGrowth(maxcount);
symboltable.setSize(0);
frequency.setSize(maxcount);
frequency.setGrowth(maxcount);
frequency.setSize(0);
int i=0;
int total = 0;
int zero=0;
int hindex = 0;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() != E_humrec_data) {
continue;
}
if (strcmp(infile[i][0], ".") != 0) { // ignore null tokens
hindex = storesymbol(symboltable, infile[i][0]); // second index = spine #
if (hindex == frequency.getSize()) { // add a new location in ftab.
frequency.append(zero);
}
frequency[hindex]++;
total++;
}
}
return total;
}
//////////////////////////////
//
// buildnthorder --
//
int buildnthorder(HumdrumFile& infile, Array<Array<int> >& nthsymboltable,
Array<int>& nthfrequency, Array<const char*> symboltable, int order) {
nthsymboltable.setSize(maxcount);
nthsymboltable.setGrowth(maxcount);
nthsymboltable.setSize(0);
nthfrequency.setSize(maxcount);
nthfrequency.setGrowth(maxcount);
nthfrequency.setSize(0);
int i=0;
int total = 0;
int zero=0;
int hindex = 0;
CircularBuffer<int> buffer;
buffer.setSize(order*2);
buffer.reset();
int sindex;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() != E_humrec_data) {
continue;
}
sindex = findsymbol(symboltable, infile[i][0]);
buffer.insert(sindex);
if ((buffer.getCount() >= order) && (strcmp(infile[i][0], ".") != 0)) {
hindex = storenthsymbol(nthsymboltable, buffer);
if (hindex == nthfrequency.getSize()) {
nthfrequency.append(zero);
}
buffer.read();
nthfrequency[hindex]++;
total++;
}
}
return total;
}
///////////////////////////////
//
// storenthsymbol --
//
int storenthsymbol(Array<Array<int> >& nthsymboltable,
CircularBuffer<int>& buffer) {
int i;
int j;
int found = 0;
for (i=0; i<nthsymboltable.getSize(); i++) {
found = 1;
for (j=0; j<order; j++) {
if (buffer[j] != nthsymboltable[i][j]) {
found = 0;
}
}
if (found == 1) {
return i;
}
}
// symbol not found: add it to the table
int size = nthsymboltable.getSize();
nthsymboltable.setSize(size+1);
nthsymboltable[size].setSize(order);
for (i=0; i<order; i++) {
nthsymboltable[size][i] = buffer[i];
}
return size;
}
//////////////////////////////
//
// storesymbol -- store a symbol in the table (if not there already) and
// then return its index.
int storesymbol(Array& table, const char* token) {
int i;
for (i=0; i<table.getSize(); i++) {
if (strcmp(table[i], token) == 0) {
return i;
}
}
table.append(token);
return table.getSize()-1;
}
//////////////////////////////
//
// findsymbol -- store a symbol in the table (if not there already) and
// then return its index.
int findsymbol(Array& table, const char* token) {
int i;
for (i=0; i<table.getSize(); i++) {
if (strcmp(table[i], token) == 0) {
return i;
}
}
return -1;
}
//////////////////////////////
//
// checkOptions -- validate and process command-line options.
//
void checkOptions(Options& opts, int argc, char* argv[]) {
opts.define("a|append=b", "append analysis to data in output");
opts.define("max=i:50000", "maximum expected unique states");
opts.define("t|table=b", "display table of statistics");
opts.define("n|order=i:1", "context order");
opts.define("b|background=i:255", "background color");
opts.define("debug=b", "trace input parsing");
opts.define("author=b", "author of the program");
opts.define("version=b", "compilation information");
opts.define("example=b", "example usage");
opts.define("h|help=b", "short description");
opts.process(argc, argv);
// handle basic options:
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, Dec 2000" << endl;
exit(0);
} else if (opts.getBoolean("version")) {
cout << argv[0] << ", version: 6 Dec 2000" << endl;
cout << "compiled: " << __DATE__ << endl;
cout << MUSEINFO_VERSION << endl;
exit(0);
} else if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
} else if (opts.getBoolean("example")) {
example();
exit(0);
}
debugQ = opts.getBoolean("debug");
appendQ = opts.getBoolean("append");
maxcount = opts.getInteger("max");
tableQ = opts.getBoolean("table");
order = opts.getInteger("order");
fileQ = opts.getBoolean("order");
bgval = opts.getInteger("background");
if (bgval < 0) bgval = 0;
if (bgval > 255) bgval = 255;
}
//////////////////////////////
//
// example -- example usage of the maxent program
//
void example(void) {
cout <<
" \n"
<< endl;
}
//////////////////////////////
//
// usage -- gives the usage statement for the quality program
//
void usage(const char* command) {
cout <<
" \n"
<< endl;
}
//////////////////////////////
//
// fillRow --
//
void fillRow(Array<double>& row, Array<int>& sequence,
Array<const char*>& symboltable, int order,
HumdrumFile& infile) {
row.setSize(0);
double value;
Array<Array<int> > nthsymboltable;
Array<int> nthfrequency;
int nthtotalcount;
nthtotalcount = buildnthorder(infile, nthsymboltable, nthfrequency,
symboltable, order);
int i;
int nthindex;
CircularBuffer<int> buffer;
buffer.setSize(order*2);
buffer.reset();
for (i=0; i<sequence.getSize(); i++) {
buffer.insert(sequence[i]);
if (buffer.getCount() >= order) {
nthindex = findNthSymbol(nthsymboltable, buffer);
value = (double)nthfrequency[nthindex] / nthtotalcount;
value = -log(value)/log(2.0);
row.append(value);
buffer.extract();
}
};
if (!debugQ) {
double mmin = 0;
double mmax = 0;
mmin = row[0];
mmax = row[0];
for (i=0; i<row.getSize(); i++) {
if (row[i] < mmin) mmin = row[i];
if (row[i] > mmax) mmax = row[i];
}
if (mmax == mmin) {
for (i=0; i<row.getSize(); i++) {
row[i] = 128.0;
}
} else {
for (i=0; i<row.getSize(); i++) {
row[i] = (int)((row[i] - mmin)/(mmax-mmin) * 255);
}
}
}
}
//////////////////////////////
//
// printPicture --
//
void printPicture(Array& symboltable, HumdrumFile& infile) {
Array<int> sequence;
extractSequence(infile, sequence, symboltable);
Array<double> row;
row.setSize(sequence.getSize()); // changes in fillRow
cout << "P3\n";
cout << sequence.getSize() << " " << sequence.getSize() << "\n";
cout << "255\n";
int i;
int j;
int pre;
int post;
for (i=0; i<sequence.getSize(); i++) {
for (pre=0; pre<i/2; pre++) {
if (debugQ) {
cout << bgval << " ";
} else {
cout << bgval << " " << bgval << " " << bgval << " ";
}
}
fillRow(row, sequence, symboltable, i+1, infile);
for (j=0; j<row.getSize(); j++) {
// cout << row[j] << " " << row[j] << " " << row[j] << " ";
if (debugQ) {
cout << row[j] << " ";
} else {
cout <<(int)row[j]<< " " <<(int)row[j]<< " " <<(int)row[j]<< " ";
}
}
for (post=0; post<i/2+(i%2); post++) {
if (debugQ) {
cout << bgval << " ";
} else {
cout << bgval << " " << bgval << " " << bgval << " ";
}
}
cout << "\n";
}
}
//////////////////////////////
//
// extractSequence --
//
void extractSequence(HumdrumFile& infile, Array<int>& sequence,
Array<const char*>& symboltable) {
sequence.setSize(infile.getNumLines());
sequence.setSize(0);
int i;
int sindex = 0;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() != E_humrec_data) {
continue;
}
sindex = findsymbol(symboltable, infile[i][0]);
sequence.append(sindex);
}
}
///////////////////////////////
//
// findNthSymbol --
//
int findNthSymbol(Array<Array<int> >& nthsymboltable,
CircularBuffer<int>& buffer) {
int i;
int j;
int found = 0;
for (i=0; i<nthsymboltable.getSize(); i++) {
found = 1;
for (j=0; j<order; j++) {
if (buffer[j] != nthsymboltable[i][j]) {
found = 0;
}
}
if (found == 1) {
return i;
}
}
return -1;
}
// md5sum: aa828fbc777274d5739ca43a542a618e tinfo.cpp [20050403]