//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Wed Apr 21 12:39:06 PDT 2004
// Last Modified: Wed Apr 21 12:39:09 PDT 2004
// Filename: ...sig/examples/all/dbnovelty.cpp
// Web Address: http://sig.sapp.org/examples/museinfo/humdrum/dbnovelty.cpp
// Syntax: C++; museinfo
//
// Description: Generate a novelty curve for a melody based on other
// melodies in a database.
//
// OTL: Query Novelty Profiles
// -- Measure commonness of features in database
// Applications
// -- Quantify the probability of the match being junk.
// -- Identify potential errors in the database
// -- Identify potential errors in the query
//
#include "humdrum.h"
#include "Matrix.h"
#include <string.h>
#include <ctype.h>
#include <math.h>
#ifndef OLDCPP
#include <sstream>
#define SSTREAM stringstream
#define CSTRING str().c_str()
using namespace std;
#else
#ifdef VISUAL
#include <strstrea.h> /* for windows 95 */
#else
#include <strstream.h>
#endif
#define SSTREAM strstream
#define CSTRING str()
#endif
///////////////////////////////////////////////////////////////////////////
class Feature {
public:
Feature(void) { clear(); }
Feature(Feature& aFeature) { copyFeature(aFeature); }
Feature& operator=(Feature& aFeature) { return copyFeature(aFeature); }
Feature& copyFeature(Feature& aFeature) {
if (this == &aFeature) {
return *this;
}
strcpy(istn, aFeature.istn);
line = aFeature.line;
int i, j;
feature.setSize(aFeature.feature.getSize());
for (i=0; i<feature.getSize(); i++) {
feature[i].setSize(aFeature.feature[i].getSize());
for (j=0; j<feature[i].getSize(); j++) {
feature[i][j] = aFeature.feature[i][j];
}
}
return *this;
}
void clear(void) { line = -1; istn[0] = '\0'; feature.setSize(0); }
int is_valid(void) { return istn[0] != '\0' ? 1 : 0; }
// data fields:
char istn[256]; // unique serial number/filename
Array< Array<int> > feature; // feature vector
int line; // line number in original file
};
///////////////////////////////////////////////////////////////////////////
// function declarations
void checkOptions(Options& opts, int argc, char* argv[]);
void example(void);
void usage(const char* command);
int buildFeaturesArray(Array<Feature>& features, HumdrumFile& infile);
int getFeature(Feature& tempfeature, HumdrumRecord& record);
void sortFeatures(Array<Feature>& features);
int unanchoredmatch(Feature& query, Feature& datum);
void buildMelodyMatchData(Array<Feature>& analysis,
Array<Feature>& features, int melody);
int getMatchCountUnanchored(Feature& query, Array<Feature>& features);
void buildQuery(Feature& query, Feature& feature,
int start, int length);
void printAnalysisDataUnanchored(Array<Feature>& analysis,
Array<Feature>& features, int melody);
void printAnchorType(int index, int size);
void buildNoveltyCurve(Array<double>& noveltycurve,
Array<Feature>& analysis, int order);
void printBaseMatrix(double* expected, int size);
void printMatrix(Matrix<double>& matrix);
// global variables
Options options; // database for command-line arguments
int debugQ = 0; // used with the --debug option
int numinputs = 0;
int depth = -1; // used with -d option
int totalcount = 0; // total number of features for all elements
int appendQ = 0; // used with -a option
int eprofileQ = 0; // used with -e option
int mprofileQ = 1; // used with -P option
int melody = 1; // used with -m option
int matrixQ = 0; // print search matrix -x option
int basematrixQ = 0; // print search base matrix -X option
int uniqueQ = 0; // used with -u option
int orderQ = 0; // used with -o option
Array<Feature> features;
///////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[]) {
HumdrumFile infile;
// process the command-line options
checkOptions(options, argc, argv);
infile.clear();
// if no command-line arguments read data file from standard input
if (numinputs < 1) {
infile.read(cin);
} else {
infile.read(options.getArg(1));
}
Array<int> sortlines;
totalcount = buildFeaturesArray(features, infile);
melody = melody - 1;
if (melody < 0) {
cout << "!! Melody index reset to 1" << endl;
melody = 0;
} else if (melody >= totalcount) {
melody = totalcount - 1;
cout << "!! Melody index reset to " << melody + 1 << endl;
}
Array<Feature> analysis;
buildMelodyMatchData(analysis, features, melody);
printAnalysisDataUnanchored(analysis, features, melody);
return 0;
}
int sortQ = 0; // used with -o option
///////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// printMatrix -- for debugging of calculations.
//
void printMatrix(Matrix& matrix) {
int i;
int j;
int rcount = matrix.getRowCount();
int ccount = matrix.getColumnCount();
cout << "{";
for (i=0; i<rcount; i++) {
cout << "{";
for (j=0; j<ccount; j++) {
cout << matrix.cell(i, j);
if (j < ccount-1) {
cout << ", ";
}
}
if (i < rcount-1) {
cout << "},\n";
}
}
cout << "}};\n";
cout << "\n";
}
//////////////////////////////
//
// buildNoveltyCurve --
//
void buildNoveltyCurve(Array<double>& noveltycurve, Array<Feature>& analysis,
int order) {
int size = analysis[0].feature.getSize();
Array<int> colmin;
colmin.setSize(size);
Array<int> colmax;
colmax.setSize(size);
int i, j;
for (i=0; i<colmin.getSize(); i++) {
colmin[i] = analysis[0].feature[0][i];
colmax[i] = analysis[0].feature[0][i];
}
for(i=1; ifor (j=0; j<analysis[0].feature[i].getSize(); j++) {
if (colmin[j] > analysis[0].feature[i][j]) {
colmin[j] = analysis[0].feature[i][j];
}
if (colmax[j] < analysis[0].feature[i][j]) {
colmax[j] = analysis[0].feature[i][j];
}
}
}
int uniquestop = 0;
for(i=1; iif (colmax[size-i-1] != colmax[size-1]) {
uniquestop = size-i;
break;
}
}
static int uniquecount = 0;
if (uniqueQ && (uniquecount == 0)) {
uniquecount++;
cout << "!! Unique stop point = " << uniquestop << endl;
}
/* cout << "!! Min values for each position in colmin:" << endl;
for (i=0; i<colmin.getSize(); i++) {
cout << colmin[i];
if (i<colmin.getSize()-1) {
cout << " ";
}
}
cout << endl;
*/
/* cout << "!! Max values for each position in colmax:" << endl;
for (i=0; i<colmax.getSize(); i++) {
cout << colmax[i];
if (i<colmax.getSize()-1) {
cout << " ";
}
}
cout << endl;
*/
// generate normalized matrix of search results:
Matrix<double> matrix;
matrix.setSize(size, size);
matrix.zero();
for(i=0; ifor (j=0; j<size-i; j++) {
// if (j >= uniquestop) {
// matrix.cell(i, i+j) = analysis[0].feature[i][j];
// break;
// }
if (analysis[0].feature[i][j] > 0) {
// normalized matrix:
// matrix.cell(i, i+j) = ((double)(colmin[j]))/analysis[0].feature[i][j];
// un-normalized matrix:
matrix.cell(i, i+j) = analysis[0].feature[i][j];
} else {
matrix.cell(i, i+j) = 0.0;
}
}
}
static int matrixcount = 0;
if(matrixQ && matrixcount == 0) {
matrixcount++;
cout << "searchmatrix = ";
printMatrix(matrix);
}
noveltycurve.setSize(size);
noveltycurve.setAll(0);
double expected[] = {7194.33, 3179.93, 883.09, 221.86, 55.75, 15.46, 5.61, 2.58, 1.77, 1.49,
1.39, 1.33, 1.3, 1.27, 1.25, 1.23, 1.22, 1.21, 1.21, 1.2, 1.19, 1.19,
1.18, 1.17, 1.17, 1.17, 1.16, 1.16, 1.15, 1.15, 1.15, 1.14, 1.14, 1.13,
1.13, 1.13, 1.12, 1.12, 1.12, 1.12, 1.11, 1.11, 1.11, 1.11, 1.11, 1.11,
1.1, 1.1, 1.1, 1.09, 1.09, 1.09, 1.09, 1.08, 1.08, 1.08, 1.08, 1.08,
1.08, 1.08, 1.08, 1.08, 1.08, 1.08, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07,
1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07, 1.07,
1.07, 1.07, 1.07, 1.08, 1.08, 1.07, 1.08, 1.07, 1.07, 1.07, 1.07, 1.06,
1.07, 1.07, 1.07, 1.07, 1.08, 1.07, 1.07, 1.07, 1.07, 1.07, 1.08, 1.07,
1.07, 1.07, 1.08, 1.08, 1.08, 1.07, 1.07, 1.08, 1.08, 1.08, 1.08, 1.08,
1.09, 1.09, 1.09, 1.09, 1.1, 1.1, 1.09, 1.1, 1.1, 1.1, 1.1, 1.1, 1.11,
1.1, 1.09, 1.09, 1.09, 1.08, 1.08, 1.08, 1.09, 1.09, 1.09, 1.09, 1.1,
1.1, 1.1, 1.1, 1.1, 1.11, 1.11, 1.11, 1.1, 1.1, 1.1, 1.1, 1.11, 1.09,
1.1, 1.1, 1.1, 1.1, 1.11, 1.11, 1.11, 1.11, 1.11, 1.11, 1.11, 1.11, 1.1,
1.1, 1.1, 1.1, 1.1, 1.02, 1.02, 1.02, 1.03, 1.03, 1.03, 1.03, 1.03, 1.03,
1.03, 1.03, 1.03, 1.03, 1.03, 1.03, 1.03, 1.03, 1.03, 1.03, 1.04, 1.04,
1.04, 1.04, 1.04, 1.04, 1.04, 1.04, 1.04, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};
static int basematrixcount = 0;
if (basematrixQ && (basematrixcount == 0)) {
basematrixcount++;
printBaseMatrix(expected, size);
}
Array<double> tempvalues(size);
tempvalues.setAll(0);
for(i=0; i for(i=0; ifor (j=0; j<=order; j++) {
if (i+j >= size) {
break;
}
count++;
noveltycurve[i+j] += tempvalues[i];
}
noveltycurve[i] /= (double)count;
}
}
//////////////////////////////
//
// printBaseMatrix --
//
void printBaseMatrix(double* expected, int size) {
Matrix<double> basematrix;
basematrix.setSize(size, size);
basematrix.zero();
int i, j;
for(i=0; ifor (j=i; j<size; j++) {
if (j-i < 200) {
basematrix.cell(i,j) = expected[j-i];
} else {
basematrix.cell(i,j) = 1.0;
}
}
}
cout << "basematrix = ";
printMatrix(basematrix);
}
//////////////////////////////
//
// printVariable --
//
void printVariable(char* string) {
int length = strlen(string);
int i;
for(i=0; iif (string[i] == '/') {
cout << 'D';
} else if (string[i] == '.') {
cout << 'X';
} else if (string[i] == '-') {
cout << 'H';
} else {
cout << string[i];
}
}
}
//////////////////////////////
//
// printAnalysisDataUnanchored --
//
void printAnalysisDataUnanchored(Array<Feature>& analysis,
Array<Feature>& features, int melody) {
Array<double> noveltycurve;
#define RANGE 8
int ii;
Array<double> noveltysum;
noveltysum.setSize(analysis[0].feature[0].getSize());
noveltysum.setAll(0);
int count = 0;
int i;
for(ii=0; iiif (orderQ) {
cout << "ORDER" << ii << " = {";
}
for (i=0; i<noveltycurve.getSize(); i++) {
noveltysum[i] += noveltycurve[i];
if (orderQ) {
cout << noveltycurve[i];
if (i<noveltycurve.getSize()-1) {
cout << ", ";
}
}
}
if (orderQ) {
cout << "};\n";
}
}
printVariable(analysis[0].istn);
cout << "= {";
for (i=0; i<noveltysum.getSize(); i++) {
cout << noveltysum[i]/(double)count;
if (i<noveltysum.getSize()-1) {
cout << ", ";
}
}
cout << "};\n";
}
//////////////////////////////
//
// printAnchorType --
//
void printAnchorType(int index, int size) {
index = index+1;
int maxdigits = ((int)log10((double)size))+1;
int digits = ((int)log10((double)index))+1;
int i;
for(i=0; i//////////////////////////////
//
// buildMelodyMatchData --
//
void buildMelodyMatchData(Array<Feature>& analysis,
Array<Feature>& features, int index) {
analysis.setSize(1);
analysis.allowGrowth(0);
int j, k;
int searchsize = 0;
Feature query;
strcpy(analysis[0].istn, features[index].istn);
analysis[0].line = features[index].line;
analysis[0].feature.setSize(features[index].feature[0].getSize());
for (j=0; j<analysis[0].feature.getSize(); j++) {
analysis[0].feature[j].setSize(analysis[0].feature.getSize()-j);
analysis[0].feature[j].setAll(0);
searchsize = analysis[0].feature[j].getSize();
if ((depth > 0) && (searchsize > depth)) {
searchsize = depth;
}
for (k=0; k<searchsize; k++) {
buildQuery(query, features[index], j, k+1);
analysis[0].feature[j][k] = getMatchCountUnanchored(query,features);
}
}
}
////////////////////
//
// buildQuery --
//
void buildQuery(Feature& query, Feature& feature, int start, int length) {
query.feature.setSize(1);
query.feature[0].setSize(length);
int i;
for(i=0; i//////////////////////////////
//
// getMatchCountUnanchored --
//
int getMatchCountUnanchored(Feature& query, Array<Feature>& features) {
int count = 0;
int i;
for (i=0; i<features.getSize(); i++) {
count += unanchoredmatch(query, features[i]);
}
return count;
}
//////////////////////////////
//
// unanchoredmatch --
//
int unanchoredmatch(Feature& query, Feature& datum) {
int i, j;
int qsize = query.feature[0].getSize();
int dsize = datum.feature[0].getSize();
int match = 0;
for (i=0; i<=(dsize-qsize); i++) {
if (query.feature[0][0] != datum.feature[0][i]) {
continue;
}
// the first item of both strings matches, try the rest
// and continue if not an exact match.
match = 1;
for (j=1; j<qsize; j++) {
if (query.feature[0][j] != datum.feature[0][i+j]) {
match = 0;
break;
}
}
if (match) {
break;
}
}
// cout << "RESULT " << match << endl;
if(match) {
return 1;
} else {
return 0;
}
}
//////////////////////////////
//
// buildFeaturesArray -- returns the total number of features
//
int buildFeaturesArray(Array<Feature>& features, HumdrumFile& infile) {
int i;
Feature tempfeature;
features.setSize(infile.getNumLines());
features.setSize(0);
int totalfeatures = 0;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() == E_humrec_data) {
totalfeatures += getFeature(tempfeature, infile[i]);
if (tempfeature.is_valid()) {
tempfeature.line = i;
features.append(tempfeature);
}
}
}
return totalfeatures;
}
//////////////////////////////
//
// getFeature -- returns the total number of features found on the line
//
int getFeature(Feature& tempfeature, HumdrumRecord& record) {
tempfeature.clear();
int i;
int featurecount = 0;
char buffer2[1024] = {0};
// find the ISTN value
for (i=0; i<record.getFieldCount(); i++) {
if (strcmp(record.getExInterp(i), "**istn") == 0) {
strcpy(buffer2, record[i]);
if (debugQ) {
cout << "found istn: " << buffer2 << endl;
}
} else if (strcmp(record.getExInterp(i), "**feature") == 0) {
featurecount++;
}
}
if ((featurecount == 0) || (buffer2[0] == '\0')) {
// no valid data
return 0;
}
strcpy(tempfeature.istn, buffer2);
tempfeature.feature.setSize(1);
tempfeature.feature[0].setSize(100);
tempfeature.feature[0].setGrowth(1000);
tempfeature.feature[0].setSize(0);
char buffer[100000] = {0};
int code = 0;
char* ptr = NULL;
int total = 0;
// find the first feature (second dimension to be added later)
for (i=0; i<record.getFieldCount(); i++) {
if (strcmp(record.getExInterp(i), "**feature") == 0) {
strncpy(buffer, record[i], 100000);
ptr = strtok(buffer, " ");
while (ptr != NULL) {
if (sscanf(ptr, "%d", &code) > 0) {
tempfeature.feature[0].append(code);
total++;
}
ptr = strtok(NULL, " ");
}
}
}
// do generalized features here.
return total;
}
//////////////////////////////
//
// checkOptions -- validate and process command-line options.
//
void checkOptions(Options& opts, int argc, char* argv[]) {
opts.define("m|melody-number=i:1", "set the melody index in the database");
opts.define("d|depth=i:-1", "set the maximum search depth");
opts.define("a|append=b", "append original data to the analysis");
opts.define("S|no-state=b", "do not print state distributions");
opts.define("x|matrix=b", "print search matrix");
opts.define("X|basematrix=b", "print predicted matrix");
opts.define("u|unique=b", "print unique limit");
opts.define("o|order=b", "print intermediate novelty curves");
opts.define("e|entropy-profile=b", "print entropy profile");
opts.define("debug=b"); // determine bad input line num
opts.define("author=b"); // author of program
opts.define("version=b"); // compilation info
opts.define("example=b"); // example usages
opts.define("h|help=b"); // short description
opts.process(argc, argv);
// handle basic options:
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, April 2004" << endl;
exit(0);
} else if (opts.getBoolean("version")) {
cout << argv[0] << ", version: 29 April 2004" << endl;
cout << "compiled: " << __DATE__ << endl;
cout << MUSEINFO_VERSION << endl;
exit(0);
} else if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
} else if (opts.getBoolean("example")) {
example();
exit(0);
}
eprofileQ = opts.getBoolean("entropy-profile");
numinputs = opts.getArgCount();
debugQ = opts.getBoolean("debug");
depth = opts.getInteger("depth");
appendQ = opts.getBoolean("append");
melody = opts.getInteger("melody-number");
matrixQ = opts.getBoolean("matrix");
basematrixQ = opts.getBoolean("basematrix");
uniqueQ = opts.getBoolean("unique");
orderQ = opts.getBoolean("order");
}
//////////////////////////////
//
// example -- example usage of the program
//
void example(void) {
cout <<
" \n"
<< endl;
}
//////////////////////////////
//
// usage -- gives the usage statement for the program
//
void usage(const char* command) {
cout <<
" \n"
<< endl;
}
// md5sum: 841fdd011e26576c6d53c62895155473 dbnovelty.cpp [20080518]