//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Sun Dec 21 05:13:34 PST 2008
// Last Modified: Mon May 18 11:42:45 PDT 2009
// Filename: ...sig/examples/all/cherryc.cpp
// Web Address: http://sig.sapp.org/examples/museinfo/humdrum/cherryc.cpp
// Syntax: C++; museinfo
//
// Description: Performance correlation comparison
//
#include "humdrum.h"
#include "MidiFile.h"
#include <math.h>
#include <iomanip>
// function declarations:
void checkOptions(Options& opts, int argc, char** argv);
void example(void);
void usage(const char* command);
void getSequences(Array<Array<double> >& performances,
Array<Array<char> >& names,
HumdrumFile& infile);
int compareSequences(Array<double>& a, Array<double>& b,
int ind, int len);
double pearsonCorrelation(int size, double* x, double* y);
double pearsonCorrelationHole(int size, double* x, double* y, int ignore);
double getMean(Array<double>& data);
double getSampleSD(double mean, Array<double>& data);
void removeIndex(Array<double>& a, Array<double>& b, int best);
double model(double x);
double modelOld(double x);
double getDifference(Array<double>& data, Array<double>& models,
int preexclude, int exclude, double parameter);
double getCorrected(Array<double>& data, Array<double>& models,
int preexclude, int exclude);
double getParabolicMinimum(double x1, double y1, double x2, double y2,
double x3, double y3);
void extractNames(Array<Array<char> >& names,
HumdrumRecord& record);
double adjustedCorrelation(Array<double> a, Array<double> b);
double adjustedCorrelationNew(Array<double> a, Array<double> b, int count);
void printCorrelations(Array<Array<double> >& corr,
Array<Array<char> >& names);
void fixLocations(Array<int>& locations);
double adjustedCorrelationSigmoid(Array<double> a, Array<double> b);
double sigmoid(double value);
// User interface variables:
Options options;
int verboseQ = 0;
int mmaQ = 1;
int exclude = 5;
int preexclude = 5;
int printSig = 0; // used with -s option
double sigsens = 1.0;
int printAdj = 0; // used with -a option
int printNew = 0; // used with -n option
int newcount = 20; // used with -n option
//////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv) {
// process the command-line options
checkOptions(options, argc, argv);
HumdrumFile infile;
infile.read(options.getArg(1));
Array<Array<char> > names; // name/year of the performer
Array<Array<double> > performances; // data for each performer
getSequences(performances, names, infile);
Array<Array<double> > rawcorr;
Array<Array<double> > adjcorr;
rawcorr.setSize(performances.getSize());
adjcorr.setSize(performances.getSize());
int i, j;
for (i=0; i<performances.getSize(); i++) {
for (j=0; j<performances.getSize(); j++) {
rawcorr[i].setSize(performances.getSize());
adjcorr[i].setSize(performances.getSize());
rawcorr[i].setAll(-1.0);
adjcorr[i].setAll(-1.0);
}
}
double corr;
if (printSig) {
// calculate sigmoid-scaled correlations
for (i=0; i<performances.getSize(); i++) {
adjcorr[i][i] = 1.0;
for (j=i+1; j<performances.getSize(); j++) {
corr = adjustedCorrelationSigmoid(performances[i], performances[j]);
adjcorr[i][j] = corr;
adjcorr[j][i] = corr;
}
}
cout << "!! SIGMOID CORRELATIONS:" << endl;
printCorrelations(adjcorr, names);
} else if (printAdj) {
// calculate adjusted correlations
for (i=0; i<performances.getSize(); i++) {
adjcorr[i][i] = 1.0;
for (j=i+1; j<performances.getSize(); j++) {
cerr << "Calculating " << i << "," << j << "..." << endl;
corr = adjustedCorrelation(performances[i], performances[j]);
adjcorr[i][j] = corr;
adjcorr[j][i] = corr;
}
}
cout << "!! ADJUSTED CORRELATIONS:" << endl;
printCorrelations(adjcorr, names);
} else if (printNew) {
// calculate adjusted correlations, new model
for (i=0; i<performances.getSize(); i++) {
adjcorr[i][i] = 1.0;
for (j=i+1; j<performances.getSize(); j++) {
cerr << "Calculating " << i << "," << j << "..." << endl;
corr = adjustedCorrelationNew(performances[i],
performances[j], newcount);
adjcorr[i][j] = corr;
adjcorr[j][i] = corr;
}
}
cout << "!! ADJUSTED CORRELATIONS:" << endl;
printCorrelations(adjcorr, names);
} else {
// calculate raw correlations
for (i=0; i<performances.getSize(); i++) {
rawcorr[i][i] = 1.0;
for (j=i+1; j<performances.getSize(); j++) {
corr = pearsonCorrelation(performances[i].getSize(),
performances[i].getBase(), performances[j].getBase());
rawcorr[i][j] = corr;
rawcorr[j][i] = corr;
}
}
cout << "!! RAW CORRELATIONS:" << endl;
printCorrelations(rawcorr, names);
}
return 0;
}
//////////////////////////////
//
// printCorrelations --
//
void printCorrelations(Array<Array<double> >& corr,
Array<Array<char> >& names) {
int i, j;
cout << "**name";
for (i=0; i<names.getSize(); i++) {
cout << "\t**corr";
}
cout << endl;
cout << "!";
for (i=0; i<names.getSize(); i++) {
cout << "\t!" << names[i].getBase();
}
cout << endl;
for (i=0; i<corr.getSize(); i++) {
cout << names[i].getBase();
for (j=0; j<corr[i].getSize(); j++) {
cout << "\t" << corr[i][j];
}
cout << endl;
}
cout << "*-";
for (i=0; i<names.getSize(); i++) {
cout << "\t*-";
}
cout << endl;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// adjustedCorrelation --
//
double adjustedCorrelation(Array a, Array b) {
int i, best;
int len = a.getSize();
Array<double> data;
data.setSize(a.getSize());
data.setSize(0);
Array<double> models;
models.setSize(a.getSize());
models.setAll(0.0);
models.allowGrowth(0);
for (i=0; i<models.getSize(); i++) {
models[i] = model((double)i/models.getSize());
}
double corr;
corr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
data.append(corr);
int iterations = a.getSize()-exclude;
for (i=0; i<iterations; i++) {
best = compareSequences(a, b, i+1, len);
removeIndex(a, b, best);
corr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
data.append(corr);
//if (mmaQ) {
// if (i < iterations-1) {
// cout << ",\n";
// }
//}
}
//if (mmaQ) {
// cout << "};\n";
//}
int count = 1000;
Array<double> value;
value.setSize(count);
value.allowGrowth(0);
Array<double> parameter;
parameter.setSize(count);
parameter.allowGrowth(0);
for (i=0; i<count; i++) {
parameter[i] = (double)i/count;
value[i] = getDifference(data, models, preexclude, exclude, parameter[i]);
}
// cout.setf(ios::fixed);
// cout << "data = {\n";
// for (i=0; i<value.getSize(); i++) {
// cout << "{" << parameter[i] << ", " << value[i] << "}";
// if (i < value.getSize()-1) {
// cout << ",";
// }
// cout << "\n";
// }
// cout << "};\n";
//
// cout << "\nmodel = {\n";
// for (i=0; i<models.getSize(); i++) {
// cout << "\t" << models[i];
// if (i < models.getSize()-1) {
// cout << ",";
// }
// cout << "\n";
// }
// cout << "};\n";
//
// cout << "\nraw = {\n";
// for (i=0; i<data.getSize(); i++) {
// cout << "\t" << data[i];
// if (i < data.getSize()-1) {
// cout << ",";
// }
// cout << "\n";
// }
// cout << "};\n";
double corrected = getCorrected(data, models, preexclude, exclude);
// cout << "original = " << data[0] << ";\n";
// cout << "revised = " << corrected << ";\n";
return corrected;
}
//////////////////////////////
//
// adjustedCorrelationNew -- attenuate bad points
//
double adjustedCorrelationNew(Array a, Array b, int ncount) {
int i, best;
int len = a.getSize();
//Array<double> data;
//data.setSize(a.getSize());
//data.setSize(0);
Array<double> models;
models.setSize(a.getSize());
models.setAll(0.0);
models.allowGrowth(0);
// first adjust the values so that they are z-scores:
// dividing by the sd is not necessary but the mean subtraction is.
double amean, bmean;
double asd, bsd;
amean = getMean(a);
bmean = getMean(b);
asd = getSampleSD(amean, a);
bsd = getSampleSD(bmean, b);
Array<double> afinale(a.getSize());
Array<double> bfinale(b.getSize());
for (i=0; i<a.getSize(); i++) {
a[i] = (a[i] - amean) / asd;
b[i] = (b[i] - bmean) / bsd;
afinale[i] = a[i];
bfinale[i] = b[i];
}
for (i=0; i<models.getSize(); i++) {
models[i] = model((double)i/models.getSize());
}
double corr;
//corr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
//data.append(corr);
int iterations = ncount;
Array<int> locations;
locations.setSize(iterations);
locations.setAll(0);
// int iterations = a.getSize()-exclude;
for (i=0; i<iterations; i++) {
best = compareSequences(a, b, i+1, len);
locations[i] = best;
removeIndex(a, b, best);
corr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
//data.append(corr);
//if (mmaQ) {
// if (i < iterations-1) {
// cout << ",\n";
// }
//}
}
fixLocations(locations);
Array<double> window;
window.setSize(locations.getSize());
window.allowGrowth(0);
for (i=0; i<window.getSize(); i++) {
window[i] = pow(sin(M_PI / 2.0 * (i + 0.5) / window.getSize()), 0.5);
afinale[locations[i]] *= window[i];
bfinale[locations[i]] *= window[i];
}
return pearsonCorrelation(afinale.getSize(),
afinale.getBase(), bfinale.getBase());
}
//////////////////////////////
//
// adjustedCorrelationSigmoid --
//
double adjustedCorrelationSigmoid(Array a, Array b) {
// first adjust the values so that they are z-scores:
// dividing by the sd is not necessary but the mean subtraction is.
double amean, bmean;
double asd, bsd;
amean = getMean(a);
bmean = getMean(b);
asd = getSampleSD(amean, a);
bsd = getSampleSD(bmean, b);
int i;
for (i=0; i<a.getSize(); i++) {
a[i] = (a[i] - amean) / asd;
b[i] = (b[i] - bmean) / bsd;
}
// now apply a sigmoid scaling to the results:
for (i=0; i<a.getSize(); i++) {
a[i] = sigmoid(a[i] * sigsens);
b[i] = sigmoid(b[i]);
}
return pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
}
///////////////////////////////
//
// sigmoid --
//
double sigmoid(double value) {
double output = 1.0 / (1.0 + exp(-value));
return 2 * (output - 0.5);
}
///////////////////////////////
//
// fixLocations --
//
void fixLocations(Array& locations) {
int i, j;
for (i=0; i<locations.getSize(); i++) {
for (j=i+1; j<locations.getSize(); j++) {
if (locations[j] >= locations[i]) {
locations[j]++;
}
}
}
}
//////////////////////////////
//
// getCorrected --
//
double getCorrected(Array<double>& data, Array<double>& models,
int preexclude, int exclude) {
double target = data[0];
double targethi = (1.0 - target)/2 + target;
double targetlo = target/2;
double value = getDifference(data, models, preexclude, exclude, target);
double valuehi = getDifference(data, models, preexclude, exclude, targethi);
double valuelo = getDifference(data, models, preexclude, exclude, targetlo);
double minimum = getParabolicMinimum(targetlo, valuelo, target, value,
targethi, valuehi);
return minimum;
}
//////////////////////////////
//
// getParabolicMinimum -- or maximum if inflection is negative...
//
double getParabolicMinimum(double x1, double y1, double x2, double y2,
double x3, double y3) {
double a = (x3*(y2-y1)+x2*(y1-y3)+x1*(y3-y2))/((x1-x2)*(x1-x3)*(x2-x3));
double b = (x3*x3*(y1-y2)+x1*x1*(y2-y3)+x2*x2*(y3-y1))/
((x1-x2)*(x1-x3)*(x2-x3));
//double c = (x1*x3*(x3-x1)*y2 + x2*x2*(x3*y1-x1*y3)+x2(x1*x1*y3-x3*x3*y1))/
// ((x1-x2)*(x1-x3)*(x2-x3));
return -b/(2.0*a);
}
//////////////////////////////
//
// getDifference --
//
double getDifference(Array<double>& data, Array<double>& models,
int preexclude, int exclude, double parameter) {
int i;
double sum = 0.0;
double value;
for (i=preexclude; i<models.getSize() - exclude; i++) {
value = data[i] - (models[i] * (1.0 - parameter) + parameter);
sum += value * value;
}
return sum;
}
//////////////////////////////
//
// removeIndex --
//
void removeIndex(Array& a, Array& b, int best) {
int i;
int size = a.getSize();
for (i=best; i<size-1; i++) {
a[i] = a[i+1];
b[i] = b[i+1];
}
a.setSize(size-1);
b.setSize(size-1);
}
//////////////////////////////
//
// getSequences --
//
void getSequences(Array<Array<double> >& performances,
Array<Array<char> >& names, HumdrumFile& infile) {
int i, j;
double value;
int pcount = infile.getMaxTracks();
int rcount = infile.getNumLines();
int foundnames = 0;
performances.setSize(pcount);
names.setSize(pcount);
performances.allowGrowth(0);
names.allowGrowth(0);
for (i=0; i<pcount; i++) {
performances[i].setSize(rcount);
performances[i].setSize(0);
names[i].setSize(1);
names[0] = '\0';
names[i].setSize(0);
}
for (i=0; i<infile.getNumLines(); i++) {
if ((!foundnames) && infile[i].isLocalComment()) {
if (strstr(infile[i].getLine(), "pid") == NULL) {
extractNames(names, infile[i]);
foundnames = 1;
}
continue;
}
if (infile[i].getType() != E_humrec_data) {
continue;
}
for (j=0; j<infile[i].getFieldCount(); j++) {
value = 0;
sscanf(infile[i][j], "%lf", &value);
performances[j].append(value);
}
}
}
//////////////////////////////
//
// extractNames --
//
void extractNames(Array >& names, HumdrumRecord& record) {
int i, len;
for (i=0; i<record.getFieldCount(); i++) {
len = strlen(record[i]);
names[i].setSize(len); // no need for +1 here, see next
strcpy(names[i].getBase(), record[i]+1); // +1 to skip initial "!"
}
}
//////////////////////////////
//
// compareSequences --
//
int compareSequences(Array& a, Array& b, int ind, int len) {
int i;
double basecorr;
double corr;
Array<double> corrlist;
corrlist.setSize(a.getSize());
corrlist.setSize(0);
Array<int> index;
index.setSize(a.getSize());
index.setSize(0);
basecorr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
if (verboseQ) {
cout << "base" << "\t" << basecorr << "\n";
}
for (i=0; i<a.getSize(); i++) {
corr = pearsonCorrelationHole(a.getSize(), a.getBase(), b.getBase(), i);
corrlist.append(corr);
index.append(i);
}
double mean = getMean(corrlist);
double sd = getSampleSD(mean, corrlist);
Array<double> zscores;
zscores.setSize(corrlist.getSize());
int asize = corrlist.getSize();
for (i=0; i<asize; i++) {
zscores[i] = (corrlist[i] - mean) / sd;
}
int maxi = index[0];
for (i=1; i<asize; i++) {
if (zscores[i] > zscores[maxi]) {
maxi = i;
}
}
//if (mmaQ) {
// cout << "{" << ind << "/" << len << ", " << corrlist[maxi] << "}";
//}
if (verboseQ) {
cout << "max\t" << maxi << "\t";
cout << "mean\t" << mean << "\t";
cout << "sd\t" << sd << "\t";
cout << corrlist[maxi] << "\t" << zscores[maxi] << "\n";
for (i=0; i<asize; i++) {
cout << i << "\t" << corrlist[i] << "\t" << zscores[i] << "\n";
}
}
return maxi;
}
//////////////////////////////
//
// getSampleSD --
//
double getSampleSD(double mean, Array& data) {
int size = data.getSize();
double sum = 0.0;
double value;
int i;
for (i=0; i<size; i++) {
value = data[i] - mean;
sum += value * value;
}
return sqrt(sum / (size - 1.0));
}
//////////////////////////////
//
// getMean --
//
double getMean(Array& data) {
int size = data.getSize();
if (size <= 0) {
return 0.0;
}
int i;
double sum = 0.0;
for (i=0; i<size; i++) {
sum += data[i];
}
return sum / size;
}
//////////////////////////////
//
// ranksort -- sort counts by largest first
//
int ranksort(const void* A, const void* B) {
int& a = *(*((int**)A));
int& b = *(*((int**)B));
if (a < b) {
return +1;
} else if (a > b) {
return -1;
} else {
return 0;
}
}
//////////////////////////////
//
// pearsonCorrelationHole --
//
double pearsonCorrelationHole(int size, double* x, double* y, int ignore) {
double sumx = 0.0;
double sumy = 0.0;
double sumco = 0.0;
double meanx = x[0];
double meany = y[0];
int starti = 1;
if (ignore == 0) {
meanx = x[1];
meany = y[1];
starti = 2;
}
double sweep;
double deltax;
double deltay;
int i;
for (i=starti; i<ignore; i++) {
sweep = i / (i+1.0);
deltax = x[i] - meanx;
deltay = y[i] - meany;
sumx += deltax * deltax * sweep;
sumy += deltay * deltay * sweep;
sumco += deltax * deltay * sweep;
meanx += deltax / (i+1);
meany += deltay / (i+1);
}
for (i=ignore+1; i<size; i++) {
sweep = i / (i+1.0);
deltax = x[i] - meanx;
deltay = y[i] - meany;
sumx += deltax * deltax * sweep;
sumy += deltay * deltay * sweep;
sumco += deltax * deltay * sweep;
meanx += deltax / (i+1);
meany += deltay / (i+1);
}
double popsdx = sqrt(sumx / (size-1));
double popsdy = sqrt(sumy / (size-1));
double covxy = sumco / (size-1);
return covxy / (popsdx * popsdy);
}
//////////////////////////////
//
// checkOptions --
//
void checkOptions(Options& opts, int argc, char* argv[]) {
opts.define("a|adjusted=b", "print adjusted correlation values");
opts.define("n|new-model=i:20", "print adjusted correlation values");
opts.define("s|sigmoid=d:1.0", "print sigmoid-scaled correlation values");
opts.define("author=b", "author of program");
opts.define("version=b", "compilation info");
opts.define("example=b", "example usages");
opts.define("help=b", "short description");
opts.process(argc, argv);
// handle basic options:
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, Jan 2008" << endl;
exit(0);
} else if (opts.getBoolean("version")) {
cout << argv[0] << ", version: 30 Jan 2008" << endl;
cout << "compiled: " << __DATE__ << endl;
cout << MUSEINFO_VERSION << endl;
exit(0);
} else if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
} else if (opts.getBoolean("example")) {
example();
exit(0);
}
printAdj = opts.getBoolean("adjusted");
printSig = opts.getBoolean("sigmoid");
sigsens = opts.getDouble("sigmoid");
printNew = opts.getBoolean("new-model");
newcount = opts.getInteger("new-model");
}
//////////////////////////////
//
// pearsonCorrelation --
//
double pearsonCorrelation(int size, double* x, double* y) {
double sumx = 0.0;
double sumy = 0.0;
double sumco = 0.0;
double meanx = x[0];
double meany = y[0];
double sweep;
double deltax;
double deltay;
int i;
for (i=2; i<=size; i++) {
sweep = (i-1.0) / i;
deltax = x[i-1] - meanx;
deltay = y[i-1] - meany;
sumx += deltax * deltax * sweep;
sumy += deltay * deltay * sweep;
sumco += deltax * deltay * sweep;
meanx += deltax / i;
meany += deltay / i;
}
double popsdx = sqrt(sumx / size);
double popsdy = sqrt(sumy / size);
double covxy = sumco / size;
return covxy / (popsdx * popsdy);
}
//////////////////////////////
//
// model -- modeled after a whitenoise sequence compared to
// another whitenoise sequence added to the original whitenoise sequence.
//
double model(double x) {
double y;
y = 0.923588 * pow(x,5.0)
- 3.385430 * pow(x,4.0)
+ 5.339920 * pow(x,3.0)
- 5.237760 * pow(x,2.0)
+ 3.359680 * x;
return y;
}
//////////////////////////////
//
// modelOld -- modeled after the curve generated by two whitenoise
// sequences.
//
double modelOld(double x) {
double y;
y = 0.516818 * pow(x,5.0)
- 1.708827 * pow(x,4.0)
+ 2.771050 * pow(x,3.0)
- 3.489990 * pow(x,2.0)
+ 2.910390 * x;
return y;
}
//////////////////////////////
//
// example --
//
void example(void) {
}
//////////////////////////////
//
// usage --
//
void usage(const char* command) {
}
// md5sum: 34e03c86fea5cec9cbd74461161faace cherryc.cpp [20090525]