//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Sat Oct 4 05:50:39 PDT 2008 (extended version of scaperank)
// Last Modified: Sun Oct 5 16:40:18 PDT 2008
// Last Modified: Mon Sep 10 22:09:53 PDT 2012 copied from correlation.cpp
// Filename: ...sig/examples/all/correlation.cpp
// Web Address: http://sig.sapp.org/examples/museinfo/correlation/correlation.cpp
// Syntax: C++; museinfo
//
// Description: Generate a comparison between multiple sets of
// number sequences from S0 through S4.
//
#include "Array.h"
#include "Options.h"
#include <math.h>
#ifndef OLDCPP
using namespace std;
#include <fstream>
#else
#include <fstream.h>
#endif
#include <stdlib.h> /* for qsort and bsearch functions */
#include <time.h> /* for random number seeding */
#include "museinfo.h" /* for humdrum file class */
///////////////////////////////////////////////////////////////////////////
class Performance {
public:
Performance(void);
~Performance();
int spine; // original spine from input
int process; // process = 0 means do not process in analysis
int random; // random = 1 => random testing sequence
int average; // average = 1 => average performance data
const char* pid; // performance id string, if given in input
const char* name; // performance name
Array<double> data; // array of performance data
};
Performance::Performance(void) {
spine = process = random = average = -1;
pid = "";
name = "";
data.setSize(1000);
data.setSize(0);
data.setGrowth(1000);
}
Performance::~Performance() {
data.setSize(0);
pid = "";
name = "";
spine = process = random = average = -1;
}
class Scape {
public:
Scape (void);
~Scape (void);
void setScapeSize (int aSize);
Array<Array<float> > data;
};
Scape::Scape(void) {
data.setSize(0);
}
Scape::~Scape() {
// do nothing;
}
void Scape::setScapeSize(int aSize) {
//cout << "SCAPE SIZE: " << aSize << endl;
data.setSize(aSize-1);
data.allowGrowth(0);
int i;
for (i=0; i<data.getSize(); i++) {
data[i].setSize(i+1);
data[i].allowGrowth(0);
data[i].setAll(-1000.0);
}
}
///////////////////////////////////////////////////////////////////////////
// function declarations:
void checkOptions(Options& opts, int argc, char** argv);
void example(void);
void usage(const char* command);
void readData(Array<Performance>& performances,
HumdrumFile& infile);
void printNumberDecimal(ofstream& outfile, double number,
int decimals);
void printScore(const char* filename, HumdrumFile& infile,
Array<Performance>& performances,
Array<Array<double> >& score0data,
int decimals);
void sortRanks(Array<int>& ranks, Array<double>& values);
double pearsonCorrelation(int size, double* a, double* b);
void calculateScore0Array(Array<Array<double> >& dataarray,
Array<Performance>& performances);
void calculateRankArray(Array<Array<int> >& rankarray,
Array<Array<double> >& scorearray);
double getMean(int size, double* a);
int numbersort(const void* A, const void* B);
void getRange(double& minn, double& maxx,
Array<double> data);
int performerNameQ(const char* string);
void printRank(const char* filename, HumdrumFile& infile,
Array<Performance>& performances,
Array<Array<int> >& rank0data);
void calculateAllCorrelations(Array<Array<Scape> >& correlations,
Array<Performance>& performances);
void calculateCorrelationLayer(Scape& scape, Performance& a,
Performance& b);
void calculateType1Scores(Array<Array<double> >& score1data,
Array<Array<Scape> >& correlations,
Array<Performance>& perfs);
void calculateType1ScoreSingle(Array<double>& performancedata,
Array<Array<Scape> >& correlations,
int reference,
Array<int>& masklayer);
int getMaxCorrIndex(Array<Array<Scape> >& correlations,
int reference, int scaperow, int scapecol,
Array<int>& masklayer);
void calculateScore0Test(Array<Array<double> >& score0data,
Array<Array<Scape> >& correlations);
double getCorrelation(int row, int col, int scaperow, int scapecol,
Array<Array<Scape> >& correlations);
void findBestMatch(Array<Array<Scape> >& correlations,
Array<int>& masklayer, int& bestmatch,
double& bestscore, int reference);
void removeBetterHalfFromMaskLayer(Array<int>& masklayer,
Array<int>& rank2data);
void calculateType3ScoreSingle(Array<double>& score3data,
Array<int>& masklayer,
Array<Array<Scape> >& correlations,
int reference);
void printCorrelationRankings(const char* reference,
Array<Array<double> >& score0data,
Array<Array<int> >& rank0data,
Array<Performance>& performances);
// User interface variables:
Options options;
int score0q = 0; // used with --s0 (correlation score)
int rank0q = 0; // used with --r0 (correlation score)
int referenceQ = 0; // used with -r option
const char* reference = ""; // used with -r option
const char* filebase = "data"; // used with -b: output file basename
int level = 0;
///////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv) {
// process the command-line options
checkOptions(options, argc, argv);
HumdrumFile infile;
Array<Performance> performances;
const char* hum_filename = "";
if (options.getArgCount() <= 0) {
hum_filename = "<STDIN>";
infile.read(cin);
} else {
hum_filename = options.getArg(1);
infile.read(hum_filename);
}
readData(performances, infile);
if (level < 0) { // finished calculating everything requested
std::cout << "Warning: no calculations done" << std::endl;
exit(0);
}
// calculate score 0 data: /////////////////////////////////////////
Array<Array<double> > score0data;
Array<Array<int> > rank0data;
char filename[1024] = {0};
calculateScore0Array(score0data, performances);
calculateRankArray(rank0data, score0data);
if (referenceQ) {
printCorrelationRankings(reference, score0data, rank0data, performances);
} else {
if (score0q) {
strcpy(filename, filebase);
strcat(filename, "-s0.txt");
cout << "Storing score-0 data in " << filename << endl;
printScore(filename, infile, performances, score0data, 3);
}
if (rank0q) {
strcpy(filename, filebase);
strcat(filename, "-r0.txt");
cout << "Storing score-0 ranks in " << filename << endl;
printRank(filename, infile, performances, rank0data);
}
}
return 0;
}
///////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// printCorrelationRankings --
//
void printCorrelationRankings(const char* reference,
Array<Array<double> >& score0data, Array<Array<int> >& rank0data,
Array<Performance>& performances) {
// cout << "PERFORMER = " << reference << endl;
int i;
int index = -1;
for (i=0; i<performances.getSize(); i++) {
if (strcmp(reference, performances[i].name) == 0) {
index = i;
break;
}
}
// cout << "INDEX = " << index << endl;
if (index < 0) {
cout << "ERROR: could not find performance: " << reference << endl;
cout << "REFERENCE PERFORMER: " << reference << endl;
for (i=0; i<performances.getSize(); i++) {
cout << "PERFORMER: " << performances[i].name << endl;
}
exit(1);
}
for (i=0; i<performances.getSize(); i++) {
cout << rank0data[index][i];
cout << "\t";
cout << score0data[index][i];
cout << "\t";
cout << performances[i].name;
cout << "\n";
}
}
//////////////////////////////
//
// findBestMatch --
//
void findBestMatch(Array<Array<Scape> >& correlations, Array<int>& masklayer,
int& bestmatch, double& bestscore, int reference) {
Array<double> score1data;
score1data.setSize(masklayer.getSize());
score1data.allowGrowth(0);
score1data.setAll(-1000);
calculateType1ScoreSingle(score1data, correlations, reference, masklayer);
bestmatch = -1000;
bestscore = -1000.0;
int i;
for (i=1; i<score1data.getSize(); i++) {
if (masklayer[i] == 0) {
continue;
}
if (bestscore < score1data[i]) {
bestscore = score1data[i];
bestmatch = i;
}
}
}
//////////////////////////////
//
// calculateScore0Test --
//
void calculateScore0Test(Array<Array<double> >& score0data,
Array<Array<Scape> >& correlations) {
int i, j;
for (i=0; i<score0data.getSize(); i++) {
for (j=0; j<score0data.getSize(); j++) {
score0data[i][j] = getCorrelation(i, j, 0, 0, correlations);
}
}
}
//////////////////////////////
//
// getCorrelation --
//
double getCorrelation(int row, int col, int scaperow, int scapecol,
Array<Array<Scape> >& correlations) {
int a;
int b;
if (row < col) {
a = col;
b = row;
} else if (row > col) {
a = row;
b = col;
} else {
return 1.0;
}
//cout << "correlation[" << row << "][" << col << "][" << scaperow
// << "][" << scapecol << "] = "
// << correlations[a-1][b].data[scaperow][scapecol]
// << endl;
return correlations[a-1][b].data[scaperow][scapecol];
}
//////////////////////////////
//
// calculateType1Scores -- calculate the index of the best correlations
// for each subsequence.
//
void calculateType1Scores(Array<Array<double> >& score1data,
Array<Array<Scape> >& correlations, Array<Performance>& perfs) {
Array<Array<int> > masklayer;
int arraysize = score1data.getSize();
int i;
masklayer.setSize(arraysize);
masklayer.allowGrowth(0);
for (i=0; i<arraysize; i++) {
masklayer[i].setSize(arraysize);
masklayer[i].allowGrowth(0);
masklayer[i].setAll(1);
masklayer[i][i] = 0; // exclude self-matching from analysis
}
for (i=0; i<score1data.getSize(); i++) {
cout << "\t";
if (strcmp(perfs[i].name, "") == 0) {
cout << i;
} else {
cout << perfs[i].name;
}
cout << endl;
calculateType1ScoreSingle(score1data[i], correlations, i,
masklayer[i]);
}
}
//////////////////////////////
//
// calculateType1ScoreSingle --
//
void calculateType1ScoreSingle(Array<double>& performancedata,
Array<Array<Scape> >& correlations, int reference,
Array<int>& masklayer) {
Array<int> sums;
sums.setSize(performancedata.getSize());
sums.allowGrowth(0);
sums.setAll(0);
int i, j;
int maxindex;
int tsum = 0;
int scaperows = correlations[0][0].data.getSize();
int scapecols;
for (i=0; i<scaperows; i++) {
scapecols = i + 1;
for (j=0; j<scapecols; j++) {
maxindex = getMaxCorrIndex(correlations, reference, i, j,
masklayer);
sums[maxindex]++;
tsum++;
}
}
for (i=0; i<sums.getSize(); i++) {
if (reference == i) {
performancedata[i] = 1.0;
} else {
performancedata[i] = (double)sums[i] / (double)tsum;
}
}
}
//////////////////////////////
//
// getMaxCorrIndex -- find the best match at a given scape location.
//
int getMaxCorrIndex(Array<Array<Scape> >& correlations, int reference,
int scaperow, int scapecol, Array<int>& masklayer) {
int maxindex = -2;
double maxval = -2.0;
double testval;
int i;
int rowsize = correlations.getSize()+1;
for (i=0; i<rowsize; i++) {
//if (i == reference) {
if (!masklayer[i]) {
continue;
}
testval = getCorrelation(reference, i, scaperow, scapecol, correlations);
if (maxval < testval) {
maxval = testval;
maxindex = i;
}
}
return maxindex;
}
//////////////////////////////
//
// calculateAllCorrelations -- calculate a lot of correlations
//
void calculateAllCorrelations(Array<Array<Scape> >& correlations,
Array<Performance>& performances) {
int i, j;
// subtract one for top row since diagonal always one
correlations.setSize(performances.getSize()-1);
for (i=1; i<correlations.getSize() + 1; i++) {
correlations[i-1].setSize(i);
for (j=0; j<i; j++) {
correlations[i-1][j].setScapeSize(performances[0].data.getSize());
calculateCorrelationLayer(correlations[i-1][j], performances[i],
performances[j]);
}
}
}
//////////////////////////////
//
// calculateCorrelationLayer --
//
void calculateCorrelationLayer(Scape& scape, Performance& a, Performance& b) {
int i, j;
double *aa;
double *bb;
int rowsize;
int corelsize;
for (i=0; i<scape.data.getSize(); i++) {
rowsize = scape.data[i].getSize();
corelsize = a.data.getSize() - i;
for (j=0; j<rowsize; j++) {
aa = a.data.getBase() + j;
bb = b.data.getBase() + j;
scape.data[i][j] = pearsonCorrelation(corelsize, aa, bb);
}
}
}
//////////////////////////////
//
// readData -- read the input data array from humdrum.
// All spines must be data. Spines cannot split/join/exchange
// or terminate at other times than the rest of the data.
//
void readData(Array& performances, HumdrumFile& infile) {
int dataspines = infile.getMaxTracks();
performances.setSize(dataspines);
performances.allowGrowth(0);
// pre-allocate storage space for data
int i, j;
for (i=0; i<performances.getSize(); i++) {
performances[i].data.setSize(infile.getNumLines());
performances[i].data.setSize(0);
performances[i].spine = i;
performances[i].process = 1;
performances[i].average = 0;
performances[i].random = 0;
}
int flag;
double value;
// i = line of input
// j = spine of input; performance number
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() == E_humrec_data) {
for (j=0; j<infile[i].getFieldCount(); j++) {
flag = sscanf(infile[i][j], "%lf", &value);
if (flag <= 0) {
std::cout << "Error: line " << i+1
<< " spine " << j+1 << " is invalid:\n";
std::cout << infile[i] << std::endl;
exit(1);
}
performances[j].data.append(value);
}
} else if (infile[i].getType() == E_humrec_local_comment) {
if (performerNameQ(infile[i][0])) {
for (j=0; j<infile[i].getFieldCount(); j++) {
performances[j].name = &(infile[i][j][1]);
performances[j].pid = &(infile[i][j][1]);
}
}
}
}
// identify random and average spines
double minn;
double maxx;
for (i=0; i<performances.getSize(); i++) {
if (strncmp(performances[i].name, "Average", 7) == 0) {
performances[i].average = 1;
}
getRange(minn, maxx, performances[i].data);
if ((minn <= 1.0) && (minn >= 0.0) && (maxx >= 0.0) && (maxx <= 1.0)) {
performances[i].random = 1;
}
}
}
//////////////////////////////
//
// performerNameQ --
//
int performerNameQ(const char* string) {
int digitcount = 0;
int spacecount = 0;
int coloncount = 0;
int equalscount = 0;
int len = strlen(string);
int i;
for (i=0; i<len; i++) {
if (string[i] == ' ') {
spacecount++;
}
if (string[i] == '=') {
equalscount++;
}
if (string[i] == ':') {
coloncount++;
}
if (isdigit(string[i])) {
digitcount++;
}
}
if (spacecount+coloncount+equalscount != 0) {
return 0;
}
if (digitcount != 4) {
return 0;
}
return 1;
}
//////////////////////////////
//
// getRange --
//
void getRange(double& minn, double& maxx, Array data) {
int i;
minn = data[0];
maxx = data[0];
for (i=1; i<data.getSize(); i++) {
if (minn > data[i]) {
minn = data[i];
}
if (maxx < data[i]) {
maxx = data[i];
}
}
}
//////////////////////////////
//
// calculateScore0Array --
//
void calculateScore0Array(Array<Array<double> >& dataarray,
Array<Performance>& performances) {
dataarray.setSize(performances.getSize());
int i, j;
for (i=0; i<dataarray.getSize(); i++) {
dataarray[i].setSize(performances.getSize());
dataarray[i][i] = 1.0;
}
int plen = performances[0].data.getSize();
for (i=0; i<dataarray.getSize(); i++) {
for (j=0; j<i; j++) {
dataarray[i][j] = pearsonCorrelation(plen,
performances[i].data.getBase(),
performances[j].data.getBase());
dataarray[j][i] = dataarray[i][j];
}
}
}
//////////////////////////////
//
// calculateRankArray --
//
void calculateRankArray(Array<Array<int> >& rankarray,
Array<Array<double> >& scorearray) {
rankarray.setSize(scorearray.getSize());
int i;
for (i=0; i<scorearray.getSize(); i++) {
rankarray[i].setSize(scorearray.getSize());
sortRanks(rankarray[i], scorearray[i]);
}
}
////////////////////
//
// sortRanks --
//
void sortRanks(Array& ranks, Array& values) {
int i;
Array<double*> presort;
Array<double> sortdata;
presort.setSize(values.getSize());
sortdata.setSize(values.getSize());
for (i=0; i<values.getSize(); i++) {
sortdata[i] = values[i];
presort[i] = &(sortdata[i]);
}
qsort(presort.getBase(), presort.getSize(), sizeof(double*), numbersort);
for (i=0; i<values.getSize(); i++) {
*presort[i] = i;
}
for (i=0; i<values.getSize(); i++) {
ranks[i] = (int)sortdata[i];
}
}
//////////////////////////////
//
// printRank --
//
void printRank(const char* filename, HumdrumFile& infile,
Array<Performance>& performances, Array<Array<int> >& rank0data) {
int i, j;
ofstream outfile;
#ifndef OLDCPP
outfile.open(filename);
#else
outfile.open(filename, ios::noreplace);
#endif
if (!outfile.is_open()) {
std::cout << "Error: could not open " << filename
<< " for writing" << std::endl;
exit(1);
}
// print bibliographic information:
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() == E_humrec_bibliography) {
outfile << infile[i] << "\n";
}
}
// print start of data markers
outfile << "**text";
for (i=0; i<performances.getSize(); i++) {
outfile << "\t**data";
}
outfile << "\n";
// print pid information
outfile << "!";
for (i=0; i<performances.getSize(); i++) {
outfile << "\t!" << performances[i].pid;
}
outfile << "\n";
// print performer information
outfile << "!";
for (i=0; i<performances.getSize(); i++) {
outfile << "\t!" << performances[i].name;
}
outfile << "\n";
// print data
if (performances.getSize() != rank0data.getSize()) {
std::cout << "Error: matrix is not square" << std::endl;
exit(1);
}
if (rank0data[0].getSize() != rank0data.getSize()) {
std::cout << "Error: matrix is not square" << std::endl;
exit(1);
}
for (i=0; i<performances.getSize(); i++) {
if (strcmp(performances[i].name, "") == 0) {
outfile << ".";
} else {
outfile << performances[i].name;
}
for (j=0; j<rank0data.getSize(); j++) {
// indices have to be reversed here since performance
// data is printed in the column not the row:
outfile << "\t" << rank0data[j][i];
}
outfile << "\n";
}
// print end of data markers
outfile << "*-";
for (i=0; i<rank0data.getSize(); i++) {
outfile << "\t*-";
}
outfile << "\n";
}
//////////////////////////////
//
// printScore --
//
void printScore(const char* filename, HumdrumFile& infile,
Array<Performance>& performances, Array<Array<double> >& score0data,
int decimals) {
int i, j;
ofstream outfile;
#ifndef OLDCPP
outfile.open(filename);
#else
outfile.open(filename, ios::noreplace);
#endif
if (!outfile.is_open()) {
std::cout << "Error: could not open " << filename
<< " for writing" << std::endl;
exit(1);
}
// print bibliographic information:
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() == E_humrec_bibliography) {
outfile << infile[i] << "\n";
}
}
// print start of data markers
outfile << "**text";
for (i=0; i<performances.getSize(); i++) {
outfile << "\t**data";
}
outfile << "\n";
// print pid information
outfile << "!";
for (i=0; i<performances.getSize(); i++) {
outfile << "\t!" << performances[i].pid;
}
outfile << "\n";
// print performer information
outfile << "!";
for (i=0; i<performances.getSize(); i++) {
outfile << "\t!" << performances[i].name;
}
outfile << "\n";
if (performances.getSize() != score0data.getSize()) {
std::cout << "Error: matrix is not square" << std::endl;
exit(1);
}
if (score0data[0].getSize() != score0data.getSize()) {
std::cout << "Error: matrix is not square" << std::endl;
exit(1);
}
// print data
for (i=0; i<performances.getSize(); i++) {
if (strcmp(performances[i].name, "") == 0) {
outfile << ".";
} else {
outfile << performances[i].name;
}
for (j=0; j<performances.getSize(); j++) {
outfile << "\t";
// indices have to be reversed here since performance
// data is printed in the column not the row:
printNumberDecimal(outfile, score0data[j][i], decimals);
}
outfile << "\n";
}
// print end of data markers
outfile << "*-";
for (i=0; i<performances.getSize(); i++) {
outfile << "\t*-";
}
outfile << "\n";
}
//////////////////////////////
//
// printNumberDecimal --
//
void printNumberDecimal(ofstream& outfile, double number, int decimals) {
number = int(number * pow(10.0, (double)decimals) + 0.5);
number = number / pow(10.0, (double)decimals);
char string[32];
char format[32];
strcpy(format, "%.");
sprintf(string, "%d", decimals);
strcat(format, string);
strcat(format, "lf");
sprintf(string, format, number);
int len = strlen(string);
int i;
int deccount = 0;
int decarea = 0;
for (i=0; i<len; i++) {
if (string[i] == '.') {
decarea = 1;
continue;
}
if (decarea) {
deccount++;
}
}
if (deccount > decimals) {
std::cout << "Error printing number " << number << std::endl;
std::cout << "deccount = " << deccount << std::endl;
std::cout << "decimals = " << decimals << std::endl;
std::cout << "string = >>" << string << "<<" << std::endl;
exit(1);
}
outfile << string;
if (decarea == 0) {
outfile << '.';
}
for (i=0; i<decimals-deccount; i++) {
outfile << '0';
}
}
//////////////////////////////
//
// pearsonCorrelation --
//
double pearsonCorrelation(int size, double* a, double* b) {
double meana = getMean(size, a);
double meanb = getMean(size, b);
double topsum = 0.0;
double bottomasum = 0.0;
double bottombsum = 0.0;
int i;
for (i=0; i<size; i++) {
topsum += (a[i] - meana) * (b[i] - meanb);
bottomasum += (a[i] - meana) * (a[i] - meana);
bottombsum += (b[i] - meanb) * (b[i] - meanb);
}
if (bottomasum == 0.0 || bottombsum == 0.0) {
return 0.0;
}
return topsum / sqrt(bottomasum * bottombsum);
}
//////////////////////////////
//
// getMean --
//
double getMean(int size, double* a) {
if (size <= 0) {
return 0.0;
}
int i;
double sum = 0.0;
for (i=0; i<size; i++) {
sum += a[i];
}
return sum / size;
}
//////////////////////////////
//
// numbersort -- sort items largest first
//
int numbersort(const void* A, const void* B) {
double valuea = **((double**)A);
double valueb = **((double**)B);
if (valuea > valueb) {
return -1;
} else if (valuea < valueb) {
return +1;
} else {
return 0;
}
}
//////////////////////////////
//
// checkOptions --
//
void checkOptions(Options& opts, int argc, char* argv[]) {
opts.define("s0=b", "create score-0 data file");
opts.define("r0=b", "create rank-0 data file");
opts.define("r|reference=s:", "reference performance");
opts.define("0=b", "create rank-0 and score-0 data file");
opts.define("a|all=b", "create all score and rank files");
opts.define("b|base|filebase=s:data", "basename for generating filenames");
opts.define("author=b", "author of program");
opts.define("version=b", "compilation info");
opts.define("example=b", "example usages");
opts.define("help=b", "short description");
opts.process(argc, argv);
// handle basic options:
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, October 2008" << std::endl;
exit(0);
} else if (opts.getBoolean("version")) {
cout << argv[0] << ", version: October 2008" << std::endl;
cout << "compiled: " << __DATE__ << std::endl;
exit(0);
} else if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
} else if (opts.getBoolean("example")) {
example();
exit(0);
}
if (opts.getBoolean("s0")) {
score0q = 1;
}
if (opts.getBoolean("r0")) {
rank0q = 1;
}
if (opts.getBoolean("0")) {
score0q = rank0q = 1;
}
referenceQ = opts.getBoolean("reference");
reference = opts.getString("reference");
if (referenceQ) {
score0q = rank0q = 1;
}
if (opts.getBoolean("all")) {
score0q = rank0q = 1;
level = 0;
}
filebase = opts.getString("filebase");
}
//////////////////////////////
//
// example --
//
void example(void) {
}
//////////////////////////////
//
// usage --
//
void usage(const char* command) {
}
// md5sum: 47a5dff89985fe51a295f5c4ae0f6bf5 correlation.cpp [20121016]