//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Sun Jan 13 08:14:22 PST 2008
// Last Modified: Sun Jan 13 13:40:14 PST 2008
// Filename: ...sig/examples/all/zscores.cpp
// Web Address: http://sig.sapp.org/examples/museinfo/humdrum/zscores.cpp
// Syntax: C++; museinfo
//
// Description: Calculate normalized scores (z-scores) for specified spine.
//
#include <math.h>
#include "humdrum.h"
#ifndef OLDCPP
#include <iostream>
#else
#include <iostream.h>
#endif
class Datum {
public:
double data;
int line;
int spine;
int token;
};
// function declarations:
void checkOptions(Options& opts, int argc, char** argv);
void example(void);
void usage(const char* command);
int findBestField(HumdrumFile& infile);
void printRawNumbers(Array<Datum> data, int style);
void extractNumbers(HumdrumFile& infile, int field,
Array<Datum>& data);
double getMean(Array<Datum>& data);
double getStandardDeviation(double mean, Array<Datum>& data);
double getSampleSD(double mean, Array<Datum>& data);
void adjustData(Array<Datum>& data, double mean, double sd,
int reverse);
void printDataSingle(Array<Datum>& data, int field,
HumdrumFile& infile, double mean, double sd);
void printDataReplace(Array<Datum>& data, int field,
HumdrumFile& infile, double mean, double sd);
void printDataPrepend(Array<Datum>& data, int field,
HumdrumFile& infile, double mean, double sd);
void printDataAppend(Array<Datum>& data, int field,
HumdrumFile& infile, double mean, double sd);
// User interface variables:
Options options;
int field = 0; // used with -f option
int inputQ = 0; // used with -i option
int fullQ = 0; // used with --full option
int statQ = 0; // used with -s option
int rawQ = 0; // used with -r option
int appendQ = 0; // used with -a option
int prependQ = 0; // used with -p option
int replaceQ = 0; // used with --replace option
int suppressQ = 0; // used with -S option
int reverseQ = 0; // used with --reverse option
int meanQ = 0; // used with -m option
int sampleQ = 0; // used with --sample
int sdQ = 0; // used with -d option
double myMean = 0; // used with -m option
double mySd = 0; // used with -d option
//////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv) {
// process the command-line options
checkOptions(options, argc, argv);
HumdrumFile infile;
if (options.getArgCount() < 1) {
infile.read(cin);
} else {
infile.read(options.getArg(1));
}
if (field <= 0) { // field counter starts at 1
field = findBestField(infile);
}
if (field > infile.getMaxTracks()) {
field = infile.getMaxTracks();
}
Array<Datum> data;
extractNumbers(infile, field, data);
if (inputQ) {
printRawNumbers(data, fullQ);
}
double mean;
double sd;
if (meanQ) {
mean = myMean;
} else {
mean = getMean(data);
}
if (sdQ) {
sd = mySd;
} else {
if (sampleQ) {
sd = getSampleSD(mean, data);
} else {
sd = getStandardDeviation(mean, data);
}
}
if (statQ) {
cout << "Mean:\t" << mean << "\n";
cout << "SD:\t" << sd << "\n";
}
if (inputQ) {
exit(0);
}
if (statQ && !rawQ) {
exit(0);
}
adjustData(data, mean, sd, reverseQ);
if (rawQ) {
printRawNumbers(data, fullQ);
exit(0);
} else if (replaceQ) {
printDataReplace(data, field, infile, mean, sd);
} else if (prependQ) {
printDataPrepend(data, field, infile, mean, sd);
} else if (appendQ) {
printDataAppend(data, field, infile, mean, sd);
} else {
printDataSingle(data, field, infile, mean, sd);
}
return 0;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// printDataReplace --
//
void printDataReplace(Array<Datum>& data, int field, HumdrumFile& infile,
double mean, double sd) {
int z = 0;
int scount;
int i, j, k;
int exinterp;
for (i=0; i<infile.getNumLines(); i++) {
switch (infile[i].getType()) {
case E_humrec_none:
case E_humrec_empty:
case E_humrec_global_comment:
case E_humrec_bibliography:
case E_humrec_data_comment:
case E_humrec_data_kern_measure:
cout << infile[i];
break;
case E_humrec_interpretation:
exinterp = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (j > 0) {
cout << '\t';
}
cout << infile[i][j];
if (infile[i].getPrimaryTrack(j) == field) {
if (!suppressQ && (strncmp(infile[i][j], "**", 2) == 0)) {
exinterp = 1;
}
}
}
if (exinterp && !suppressQ) {
cout << '\n';
for (j=0; j<infile[i].getFieldCount(); j++) {
if (j > 0) {
cout << '\t';
}
if (infile[i].getPrimaryTrack(j) == field) {
cout << "!shift=" << mean;
} else {
cout << '!';
}
}
cout << '\n';
for (j=0; j<infile[i].getFieldCount(); j++) {
if (j > 0) {
cout << '\t';
}
if (infile[i].getPrimaryTrack(j) == field) {
cout << "!scale=" << sd;
} else {
cout << '!';
}
}
}
break;
case E_humrec_data:
for (j=0; j<infile[i].getFieldCount(); j++) {
if (j > 0) {
cout << '\t';
}
if (infile[i].getPrimaryTrack(j) == field) {
if (strcmp(infile[i][j], ".") == 0) {
cout << ".";
} else {
scount = infile[i].getTokenCount(j);
for (k=0; k<scount; k++) {
if ((data[z].line != i) ||
(data[z].spine != j) ||
(data[z].token != k) ) {
cerr << "STRANGE ERROR on line " << i+1;
cerr << " in spine " << j+1;
cerr << " at token " << k+1;
cerr << endl;
exit(1);
}
cout << data[z++].data;
if (k < scount - 1) {
cout << " ";
}
}
}
} else {
cout << infile[i][j];
}
}
break;
default:
cout << infile[i];
break;
}
cout << '\n';
}
}
//////////////////////////////
//
// printDataAppend --
//
void printDataAppend(Array<Datum>& data, int field, HumdrumFile& infile,
double mean, double sd) {
int z = 0;
int scount;
int i, j, k;
int exinterp = 0;
int collapse = 0;
int counter = 0;
int counter2 = 0;
for (i=0; i<infile.getNumLines(); i++) {
switch (infile[i].getType()) {
case E_humrec_none:
case E_humrec_empty:
case E_humrec_global_comment:
case E_humrec_bibliography:
cout << infile[i];
break;
case E_humrec_interpretation:
counter = 0;
exinterp = 0;
collapse = 0;
counter2 = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
counter2++;
if (strcmp(infile[i][j], "*v") == 0) {
collapse++;
break;
}
}
}
if (collapse) {
for (j=0; j<infile[i].getFieldCount(); j++) {
cout << "*\t";
}
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter++ > 0) {
cout << '\t';
}
cout << infile[i][j];
}
}
cout << "\n";
////////////////////////////////////////////
cout << infile[i] << "\t";
counter = 0;
for (j=0; j<counter2 - collapse + 1; j++) {
if (j > 0) {
cout << "\t";
}
cout << "*";
}
} else {
cout << infile[i];
cout << "\t";
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter++ > 0) {
cout << '\t';
}
cout << infile[i][j];
if (!suppressQ && (strncmp(infile[i][j], "**", 2) == 0)) {
exinterp = 1;
}
}
}
if (exinterp && !suppressQ) {
counter = 0;
cout << "\n";
for (j=0; j<infile[i].getFieldCount(); j++) {
cout << "!\t";
}
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter++ > 0) {
cout << '\t';
}
cout << "!shift=" << mean;
}
}
cout << '\n';
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
cout << "!\t";
}
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter++ > 0) {
cout << '\t';
}
cout << "!scale=" << sd;
}
}
} else {
// cout << "\t" << infile[i];
}
}
break;
case E_humrec_data_kern_measure:
counter = 0;
cout << infile[i] << "\t";
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter > 0) {
cout << '\t';
}
counter++;
cout << infile[i][j];
}
}
break;
case E_humrec_data:
cout << infile[i] << "\t";
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter > 0) {
cout << '\t';
}
counter++;
if (strcmp(infile[i][j], ".") == 0) {
cout << ".";
} else {
scount = infile[i].getTokenCount(j);
for (k=0; k<scount; k++) {
if ((data[z].line != i) ||
(data[z].spine != j) ||
(data[z].token != k) ) {
cerr << "STRANGE ERROR on line " << i+1;
cerr << " in spine " << j+1;
cerr << " at token " << k+1;
cerr << endl;
exit(1);
}
cout << data[z++].data;
if (k < scount - 1) {
cout << " ";
}
}
}
}
}
break;
default:
cout << infile[i];
break;
}
cout << '\n';
}
}
//////////////////////////////
//
// printDataPrepend --
//
void printDataPrepend(Array<Datum>& data, int field, HumdrumFile& infile,
double mean, double sd) {
int z = 0;
int scount;
int i, j, k;
int exinterp = 0;
int collapse = 0;
int counter = 0;
int counter2 = 0;
for (i=0; i<infile.getNumLines(); i++) {
switch (infile[i].getType()) {
case E_humrec_none:
case E_humrec_empty:
case E_humrec_global_comment:
case E_humrec_bibliography:
cout << infile[i];
break;
case E_humrec_interpretation:
counter = 0;
exinterp = 0;
collapse = 0;
counter2 = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
counter2++;
if (strcmp(infile[i][j], "*v") == 0) {
collapse++;
break;
}
}
}
if (collapse) {
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter++ > 0) {
cout << '\t';
}
cout << infile[i][j];
}
}
for (j=0; j<infile[i].getFieldCount(); j++) {
cout << "\t*";
}
cout << "\n";
////////////////////////////////////////////
counter = 0;
for (j=0; j<counter2 - collapse + 1; j++) {
if (j > 0) {
cout << "\t";
}
cout << "*";
}
} else {
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter++ > 0) {
cout << '\t';
}
cout << infile[i][j];
if (!suppressQ && (strncmp(infile[i][j], "**", 2) == 0)) {
exinterp = 1;
}
}
}
if (exinterp && !suppressQ) {
cout << '\t';
cout << infile[i];
cout << '\n';
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter++ > 0) {
cout << '\t';
}
cout << "!shift=" << mean;
}
}
for (j=0; j<infile[i].getFieldCount(); j++) {
cout << "\t!";
}
cout << '\n';
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter++ > 0) {
cout << '\t';
}
cout << "!scale=" << sd;
}
}
for (j=0; j<infile[i].getFieldCount(); j++) {
cout << "\t!";
}
} else {
//cout << "\t" << infile[i];
}
}
break;
case E_humrec_data_kern_measure:
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter > 0) {
cout << '\t';
}
counter++;
cout << infile[i][j];
}
}
break;
case E_humrec_data:
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter > 0) {
cout << '\t';
}
counter++;
if (strcmp(infile[i][j], ".") == 0) {
cout << ".";
} else {
scount = infile[i].getTokenCount(j);
for (k=0; k<scount; k++) {
if ((data[z].line != i) ||
(data[z].spine != j) ||
(data[z].token != k) ) {
cerr << "STRANGE ERROR on line " << i+1;
cerr << " in spine " << j+1;
cerr << " at token " << k+1;
cerr << endl;
exit(1);
}
cout << data[z++].data;
if (k < scount - 1) {
cout << " ";
}
}
}
}
}
break;
default:
cout << infile[i];
break;
}
if (!exinterp) {
cout << '\t' << infile[i];
} else {
exinterp = 0;
}
cout << '\n';
}
}
//////////////////////////////
//
// printDataSingle --
//
void printDataSingle(Array<Datum>& data, int field, HumdrumFile& infile,
double mean, double sd) {
int z = 0;
int scount;
int i, j, k;
int interp;
int counter = 0;
for (i=0; i<infile.getNumLines(); i++) {
switch (infile[i].getType()) {
case E_humrec_none:
case E_humrec_empty:
case E_humrec_global_comment:
case E_humrec_bibliography:
cout << infile[i];
break;
case E_humrec_interpretation:
interp = 0;
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter > 0) {
cout << '\t';
}
counter++;
cout << infile[i][j];
if (!suppressQ && (strncmp(infile[i][j], "**", 2) == 0)) {
cout << "\n!shift=" << mean << '\n';
cout << "!scale=" << sd;
}
}
}
break;
case E_humrec_data_comment:
case E_humrec_data_kern_measure:
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter > 0) {
cout << '\t';
}
counter++;
cout << infile[i][j];
}
}
break;
case E_humrec_data:
counter = 0;
for (j=0; j<infile[i].getFieldCount(); j++) {
if (infile[i].getPrimaryTrack(j) == field) {
if (counter > 0) {
cout << '\t';
}
counter++;
if (strcmp(infile[i][j], ".") == 0) {
cout << ".";
} else {
scount = infile[i].getTokenCount(j);
for (k=0; k<scount; k++) {
if ((data[z].line != i) ||
(data[z].spine != j) ||
(data[z].token != k) ) {
cerr << "STRANGE ERROR on line " << i+1;
cerr << " in spine " << j+1;
cerr << " at token " << k+1;
cerr << endl;
exit(1);
}
cout << data[z++].data;
if (k < scount - 1) {
cout << " ";
}
}
}
}
}
break;
default:
cout << infile[i];
break;
}
cout << '\n';
}
}
//////////////////////////////
//
// adjustData -- apply mean shift and standard deviation scaling.
//
void adjustData(Array& data, double mean, double sd, int reverse) {
int i;
int size = data.getSize();
if (reverse) {
for (i=0; i<size; i++) {
data[i].data = data[i].data * sd + mean;
}
} else {
for (i=0; i<size; i++) {
data[i].data = (data[i].data - mean) / sd;
}
}
}
//////////////////////////////
//
// getMean --
//
double getMean(Array& data) {
int size = data.getSize();
if (size <= 0) {
return 0.0;
}
int i;
double sum = 0.0;
for (i=0; i<size; i++) {
sum += data[i].data;
}
return sum / size;
}
//////////////////////////////
//
// getStandardDeviation --
//
double getStandardDeviation(double mean, Array& data) {
int size = data.getSize();
double sum = 0.0;
double value;
int i;
for (i=0; i<size; i++) {
value = data[i].data - mean;
sum += value * value;
}
return sqrt(sum / size);
}
//////////////////////////////
//
// getSampleSD --
//
double getSampleSD(double mean, Array& data) {
int size = data.getSize();
double sum = 0.0;
double value;
int i;
for (i=0; i<size; i++) {
value = data[i].data - mean;
sum += value * value;
}
return sqrt(sum / (size - 1.0));
}
//////////////////////////////
//
// printRawNumbers --
//
void printRawNumbers(Array data, int style) {
int i;
for (i=0; i<data.getSize(); i++) {
cout << data[i].data;
if (style) {
cout << '\t' << data[i].line;
cout << '\t' << data[i].spine;
cout << '\t' << data[i].token;
}
cout << '\n';
}
}
//////////////////////////////
//
// extractNumbers --
//
void extractNumbers(HumdrumFile& infile, int field, Array& data) {
data.setSize(infile.getNumLines()*2);
data.setGrowth(infile.getNumLines()*2);
data.setSize(0);
Datum datum;
double value;
int i, j, k;
int scount;
char buffer[10000] = {0};
for (i=0; i<infile.getNumLines(); i++) {
// cout << endl << " --> LINE " << i+1 << ": ";
if (infile[i].getType() != E_humrec_data) {
continue;
}
for (j=0; j<infile[i].getFieldCount(); j++) {
// cout << j << ",";
if (field == infile[i].getPrimaryTrack(j)) {
if (strcmp(infile[i][j], ".") == 0) {
continue; // ignore null tokens
}
scount = infile[i].getTokenCount(j);
for (k=0; k<scount; k++) {
infile[i].getToken(buffer, j, k);
if (strcmp(buffer, ".") == 0) {
continue; // ignore null subtokens
}
if (sscanf(buffer, "%lf", &value) == 1) {
// cout << endl << "FOUND " << value << " at " << i << "," << j << "," << k << endl;
datum.data = value;
datum.line = i;
datum.spine = j;
datum.token = k;
data.append(datum);
}
}
}
}
}
data.allowGrowth(0);
}
//////////////////////////////
//
// findBestField -- skip over the first **kern columns
//
int findBestField(HumdrumFile& infile) {
int output = 0;
int i, j;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() != E_humrec_interpretation) {
continue;
}
if (strncmp(infile[i][0], "**", 2) != 0) {
continue; // data file is bad if this case occurs
}
for (j=0; j<infile[i].getFieldCount(); j++) {
if (strcmp(infile[i][j], "**kern") == 0) {
continue;
}
output = j;
break;
}
break;
}
return output + 1; // index from 1 to N
}
//////////////////////////////
//
// checkOptions --
//
void checkOptions(Options& opts, int argc, char* argv[]) {
opts.define("m|mean=d:0.0", "use this mean rather than calculated one");
opts.define("d|sd=d:0.0", "use this SD rather than calculated one");
opts.define("a|append=b", "append output data to input");
opts.define("p|prepend=b", "prepend output data to input");
opts.define("r|raw=b", "raw output display");
opts.define("sample=b", "use sample rather than population SD");
opts.define("replace=b", "raw output display");
opts.define("s|stat=b", "display analyzed statistics");
opts.define("S|suppress=b", "suppress display of statistics in output");
opts.define("reverse=b", "reverse z-score calculation process");
opts.define("f|field=i:0", "spine number to process");
opts.define("i|input=b", "display only extract values");
opts.define("full=b", "display only extract values with locations");
opts.define("author=b", "author of program");
opts.define("version=b", "compilation info");
opts.define("example=b", "example usages");
opts.define("help=b", "short description");
opts.process(argc, argv);
// handle basic options:
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, Jan 2008" << endl;
exit(0);
} else if (opts.getBoolean("version")) {
cout << argv[0] << ", version: 12 Jan 2002" << endl;
cout << "compiled: " << __DATE__ << endl;
cout << MUSEINFO_VERSION << endl;
exit(0);
} else if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
} else if (opts.getBoolean("example")) {
example();
exit(0);
}
field = opts.getInteger("field");
inputQ = opts.getBoolean("input");
fullQ = opts.getBoolean("full");
statQ = opts.getBoolean("stat");
rawQ = opts.getBoolean("raw");
appendQ = opts.getBoolean("append");
prependQ = opts.getBoolean("prepend");
replaceQ = opts.getBoolean("replace");
suppressQ = opts.getBoolean("suppress");
reverseQ = opts.getBoolean("reverse");
sampleQ = opts.getBoolean("sample");
meanQ = opts.getBoolean("mean");
myMean = opts.getDouble("mean");
sdQ = opts.getBoolean("sd");
mySd = opts.getDouble("sd");
}
//////////////////////////////
//
// example --
//
void example(void) {
}
//////////////////////////////
//
// usage --
//
void usage(const char* command) {
}
// md5sum: b17f9fee811c0f042b1d011ef0b71bfa zscores.cpp [20080202]