//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Sun Dec 21 05:13:34 PST 2008
// Last Modified: Tue Dec 23 12:42:23 PST 2008
// Filename: ...sig/examples/all/cherry.cpp
// Web Address: http://sig.sapp.org/examples/museinfo/humdrum/cherry.cpp
// Syntax: C++; museinfo
//
// Description: Create a dissimilarity plot between two sequences.
//
#include "humdrum.h"
#include "MidiFile.h"
#include <math.h>
#include <iomanip>
// function declarations:
void checkOptions(Options& opts, int argc, char** argv);
void example(void);
void usage(const char* command);
void getSequences(Array<double>& a, Array<double>& b,
HumdrumFile& infile);
int compareSequences(Array<double>& a, Array<double>& b);
double pearsonCorrelation(int size, double* x, double* y);
double pearsonCorrelationHole(int size, double* x, double* y, int ignore);
double getMean(Array<double>& data);
double getSampleSD(double mean, Array<double>& data);
void printData(Array<int>& ranking, Array<double>& data,
Array<double>& a, Array<double>& b);
void getDissimilarityData(Array<int>& ranking, Array<double>& data,
Array<double>& a, Array<double>& b);
template<class type>
void removeIndex(Array<type>& a, int index);
// User interface variables:
Options options;
//////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv) {
// process the command-line options
checkOptions(options, argc, argv);
HumdrumFile infile;
infile.read(options.getArg(1));
Array<double> a;
Array<double> b;
getSequences(a, b, infile);
Array<double> data;
Array<int> ranking;
getDissimilarityData(ranking, data, a, b);
printData(ranking, data, a, b);
return 0;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// printData --
//
void printData(Array& ranking, Array& data, Array& a, Array& b) {
int i;
cout << "data = {\n";
for (i=0; i<data.getSize(); i++) {
cout << "\t{" << i << ", "
<< setiosflags(ios::fixed) << setprecision(6)
<< data[i] << ", "
<< ranking[i] << ", "
<< a[i] << ", " << b[i] << "}";
if (i < data.getSize() - 1) {
cout << ",";
}
cout << "\n";
}
cout << "};\n";
}
//////////////////////////////
//
// getDissimilarityData --
//
void getDissimilarityData(Array<int>& ranking, Array<double>& data,
Array<double>& a, Array<double>& b) {
data.setSize(a.getSize());
data.allowGrowth(0);
data.setAll(0);
ranking.setSize(a.getSize());
ranking.allowGrowth(0);
ranking.setAll(-1);
Array<double> aa;
Array<double> bb;
Array<int> index;
index.setSize(a.getSize());
int i;
for (i=0; i<a.getSize(); i++) {
index[i] = i;
}
aa = a;
bb = b;
int best;
int iterations = a.getSize()-15;
for (i=0; i<iterations; i++) {
best = compareSequences(aa, bb);
removeIndex(aa, best);
removeIndex(bb, best);
ranking[index[best]] = i+1;
data[index[best]] = 1.0 - pearsonCorrelation(aa.getSize(),
aa.getBase(),
bb.getBase());
removeIndex(index, best);
}
}
//////////////////////////////
//
// removeIndex --
//
template<class type>
void removeIndex(Array& a, int index) {
int i;
int size = a.getSize();
for (i=index; i<size-1; i++) {
a[i] = a[i+1];
}
a.setSize(size-1);
}
//////////////////////////////
//
// getSequences --
//
void getSequences(Array& a, Array& b, HumdrumFile& infile) {
a.setSize(infile.getNumLines());
b.setSize(infile.getNumLines());
a.setSize(0);
b.setSize(0);
double value;
int i;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() != E_humrec_data) {
continue;
}
value = 0;
sscanf(infile[i][0], "%lf", &value);
a.append(value);
value = 0;
sscanf(infile[i][1], "%lf", &value);
b.append(value);
}
}
//////////////////////////////
//
// compareSequences -- returns the index of the element which, when
// removed from the sequence, improves the correlation measurement
// between the two sequences.
//
int compareSequences(Array& a, Array& b) {
int i;
double basecorr;
double corr;
Array<double> corrlist;
corrlist.setSize(a.getSize());
corrlist.setSize(0);
Array<int> index;
index.setSize(a.getSize());
index.setSize(0);
basecorr = pearsonCorrelation(a.getSize(), a.getBase(), b.getBase());
// cout << "base" << "\t" << basecorr << "\n";
for (i=0; i<a.getSize(); i++) {
corr = pearsonCorrelationHole(a.getSize(), a.getBase(), b.getBase(), i);
corrlist.append(corr);
index.append(i);
}
double mean = getMean(corrlist);
double sd = getSampleSD(mean, corrlist);
Array<double> zscores;
zscores.setSize(corrlist.getSize());
int asize = corrlist.getSize();
for (i=0; i<asize; i++) {
zscores[i] = (corrlist[i] - mean) / sd;
}
int maxi = index[0];
for (i=1; i<asize; i++) {
if (zscores[i] > zscores[maxi]) {
maxi = i;
}
}
// cout << "max\t" << maxi << "\t";
// cout << "mean\t" << mean << "\t";
// cout << "sd\t" << sd << "\t";
// cout << corrlist[maxi] << "\t" << zscores[maxi] << "\n";
//
// for (i=0; i<asize; i++) {
// cout << i << "\t" << corrlist[i] << "\t" << zscores[i] << "\n";
// }
return maxi;
}
//////////////////////////////
//
// getSampleSD --
//
double getSampleSD(double mean, Array& data) {
int size = data.getSize();
double sum = 0.0;
double value;
int i;
for (i=0; i<size; i++) {
value = data[i] - mean;
sum += value * value;
}
return sqrt(sum / (size - 1.0));
}
//////////////////////////////
//
// getMean --
//
double getMean(Array& data) {
int size = data.getSize();
if (size <= 0) {
return 0.0;
}
int i;
double sum = 0.0;
for (i=0; i<size; i++) {
sum += data[i];
}
return sum / size;
}
//////////////////////////////
//
// ranksort -- sort counts by largest first
//
int ranksort(const void* A, const void* B) {
int& a = *(*((int**)A));
int& b = *(*((int**)B));
if (a < b) {
return +1;
} else if (a > b) {
return -1;
} else {
return 0;
}
}
//////////////////////////////
//
// pearsonCorrelationHole --
//
double pearsonCorrelationHole(int size, double* x, double* y, int ignore) {
double sumx = 0.0;
double sumy = 0.0;
double sumco = 0.0;
double meanx = x[0];
double meany = y[0];
int starti = 1;
if (ignore == 0) {
meanx = x[1];
meany = y[1];
starti = 2;
}
double sweep;
double deltax;
double deltay;
int i;
for (i=starti; i<ignore; i++) {
sweep = i / (i+1.0);
deltax = x[i] - meanx;
deltay = y[i] - meany;
sumx += deltax * deltax * sweep;
sumy += deltay * deltay * sweep;
sumco += deltax * deltay * sweep;
meanx += deltax / (i+1);
meany += deltay / (i+1);
}
for (i=ignore+1; i<size; i++) {
sweep = i / (i+1.0);
deltax = x[i] - meanx;
deltay = y[i] - meany;
sumx += deltax * deltax * sweep;
sumy += deltay * deltay * sweep;
sumco += deltax * deltay * sweep;
meanx += deltax / (i+1);
meany += deltay / (i+1);
}
double popsdx = sqrt(sumx / (size-1));
double popsdy = sqrt(sumy / (size-1));
double covxy = sumco / (size-1);
return covxy / (popsdx * popsdy);
}
//////////////////////////////
//
// checkOptions --
//
void checkOptions(Options& opts, int argc, char* argv[]) {
opts.define("author=b", "author of program");
opts.define("version=b", "compilation info");
opts.define("example=b", "example usages");
opts.define("help=b", "short description");
opts.process(argc, argv);
// handle basic options:
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, Jan 2008" << endl;
exit(0);
} else if (opts.getBoolean("version")) {
cout << argv[0] << ", version: 30 Jan 2008" << endl;
cout << "compiled: " << __DATE__ << endl;
cout << MUSEINFO_VERSION << endl;
exit(0);
} else if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
} else if (opts.getBoolean("example")) {
example();
exit(0);
}
}
//////////////////////////////
//
// pearsonCorrelation --
//
double pearsonCorrelation(int size, double* x, double* y) {
double sumx = 0.0;
double sumy = 0.0;
double sumco = 0.0;
double meanx = x[0];
double meany = y[0];
double sweep;
double deltax;
double deltay;
int i;
for (i=2; i<=size; i++) {
sweep = (i-1.0) / i;
deltax = x[i-1] - meanx;
deltay = y[i-1] - meany;
sumx += deltax * deltax * sweep;
sumy += deltay * deltay * sweep;
sumco += deltax * deltay * sweep;
meanx += deltax / i;
meany += deltay / i;
}
double popsdx = sqrt(sumx / size);
double popsdy = sqrt(sumy / size);
double covxy = sumco / size;
return covxy / (popsdx * popsdy);
}
//////////////////////////////
//
// example --
//
void example(void) {
}
//////////////////////////////
//
// usage --
//
void usage(const char* command) {
}
// md5sum: 67d7640232ce34596f03e06a4602ba01 dissim.cpp [20081225]