//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Fri Nov 23 07:36:56 PST 2007
// Last Modified: Fri Nov 23 07:37:01 PST 2007
// Filename: ...sig/examples/all/ned.cpp
// Web Address: http://sig.sapp.org/examples/museinfo/ned/ned.cpp
// Syntax: C++; museinfo
//
// Description: Noise Equivalent Distance metric.
//
#include "humdrum.h"
#include <math.h>
// function declarations:
void checkOptions(Options& opts, int argc, char** argv);
void example(void);
void usage(const char* command);
double pearsonCorrelation(int size, double* a, double* b);
double getMean(int size, double* a);
void fillArray(HumdrumFile& infile, Array<double>& targarray,
int targ);
double getNed(HumdrumFile& infile, int ref, int targ,
double& sd, double& correl);
void addNoise(double amp, double* output, double* input,
int siz);
double getStandardDeviation(double mean, double* data, int siz);
void normalizeSequence(double* seq, int size);
void printRankData(HumdrumFile& infile, int reference);
int getSeqCount(HumdrumFile& infile);
int getNameIndex(HumdrumFile& infile);
int getPidIndex(HumdrumFile& infile);
void sortRanks(Array<double>& values, Array<int>& ranks);
int numbersort(const void* A, const void* B);
int numbersortmin(const void* A, const void* B);
void sortRanksReverse(Array<double>& values, Array<int>& ranks);
#define NOISE_UNKNOWN 0
#define NOISE_CONSTANT 1
#define NOISE_PROPORTIONAL 2
// User interface variables:
Options options;
int reference = 0;
int target = 1;
int avgcount = 300;
double tolerance = 0.001;
double decay = 0.95;
int normalQ = 0;
int noisetype = NOISE_PROPORTIONAL;
int rankQ = 0;
int countcols = 0;
//////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv) {
// process the command-line options
checkOptions(options, argc, argv);
srand48(time(NULL));
HumdrumFile infile;
if (options.getArgCount() <= 0) {
infile.read(cin);
} else {
infile.read(options.getArg(1));
}
if (countcols) {
cout << getSeqCount(infile) << endl;
}
else if (rankQ) {
printRankData(infile, reference);
} else {
double fsd = 0.0;
double rsd = 0.0;
double corel = 0.0;
double fned = getNed(infile, reference, target, fsd, corel);
double rned = getNed(infile, target, reference, rsd, corel);
cout << "Forward NED:\t" << corel << "\t" << fned << "\t" << fsd << endl;
cout << "Reverse NED:\t" << corel << "\t" << rned << "\t" << rsd << endl;
}
return 0;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// printRankData --
//
//**label **pid **rank0 **score0 **rank1 **score1 **rank2 **score2 **rank3 **score3// Ashkenazy 1981 pid9058-19 target target target target target target target target
void printRankData(HumdrumFile& infile, int reference) {
int seqcount = getSeqCount(infile);
Array<double> Corel;
Array<double> Ned;
Array<double> Sd;
Array<int> CorelRank;
Array<int> NedRank;
Corel.setSize(seqcount);
Ned.setSize(seqcount);
Sd.setSize(seqcount);
CorelRank.setSize(seqcount);
NedRank.setSize(seqcount);
Corel.allowGrowth(0);
Ned.allowGrowth(0);
Sd.allowGrowth(0);
CorelRank.allowGrowth(0);
NedRank.allowGrowth(0);
int namei = getNameIndex(infile);
int pidi = getPidIndex(infile);
double fsd = 0.0;
double fned = 0.0;
double corel = 0.0;
int i;
for (i=0; i<seqcount; i++) {
if (reference == i) {
// if (namei >= 0) {
// cout << infile[namei][i]+1 << "\ttarget\ttarget" << endl;
// }
Ned[i] = 0.0;
Sd[i] = 0.0;
Corel[i] = 1.0;
continue;
}
fned = getNed(infile, reference, i, fsd, corel);
Ned[i] = fned;
Sd[i] = fsd;
Corel[i] = corel;
// if (namei >= 0) {
// cout << infile[namei][i]+1 << "\t" << corel << "\t"
// << fned << "\t" << fsd << endl;
// }
}
sortRanksReverse(Ned, NedRank);
sortRanks(Corel, CorelRank);
cout << "**label\t**pid\t**rank0\t**score0\t**rankN\t**NED\n";
for (i=0; i<seqcount; i++) {
if (namei >= 0) {
cout << infile[namei][i]+1 << "\t";
} else {
cout << ".\t";
}
if (pidi >= 0 && strlen(infile[pidi][i]+1) > 0) {
cout << infile[pidi][i]+1 << "\t";
} else if (strlen(infile[pidi][i]+1) == 0) {
cout << ".\t";
} else {
cout << ".\t";
}
if (reference == i) {
cout << "target" << "\t"
<< "target" << "\t"
<< "target" << "\t"
<< "target" << "\t"
<< "target" << endl;
} else {
cout << CorelRank[i] << "\t"
<< Corel[i] << "\t"
<< NedRank[i] << "\t"
<< Ned[i] << "\t"
<< Sd[i] << endl;
}
}
cout << "*-\t*-\t*-\t*-\t*-\t*-\n";
}
//////////////////////////////
//
// getNameIndex --
//
int getNameIndex(HumdrumFile& infile) {
int i;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() == E_humrec_local_comment) {
if (isupper(infile[i][0][1])) {
return i;
}
}
}
return -1;
}
//////////////////////////////
//
// getPidIndex --
//
int getPidIndex(HumdrumFile& infile) {
int i;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() == E_humrec_local_comment) {
if (infile[i][0][1] == 'p' &&
infile[i][0][2] == 'i' &&
infile[i][0][3] == 'd') {
return i;
}
}
}
return -1;
}
//////////////////////////////
//
// getSeqCount -- return the number of columns in the data
//
int getSeqCount(HumdrumFile& infile) {
int i;
int output = 0;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() == E_humrec_data) {
output = infile[i].getFieldCount();
break;
}
}
return output;
}
//////////////////////////////
//
// getNed -- measure the Ned of a sequence
//
double getNed(HumdrumFile& infile, int ref, int targ, double& sd,
double& correl) {
Array<double> refarray;
Array<double> targarray;
fillArray(infile, refarray, ref);
fillArray(infile, targarray, targ);
if (normalQ) {
normalizeSequence(refarray.getBase(), refarray.getSize());
normalizeSequence(targarray.getBase(), targarray.getSize());
}
if (refarray.getSize() != targarray.getSize()) {
cerr << "Error: data lengths do not match" << endl;
exit(1);
}
// calculate the target correlation
correl = pearsonCorrelation(refarray.getSize(),
refarray.getBase(), targarray.getBase());
if (correl < 0.0) {
sd = 0.0;
return 100.0;
}
// cout << "Correl = " << correl << endl;
double amp = 0.0;
double increment = 0.1;
double currcore = 1.0;
double tempdata[refarray.getSize()];
int direction = 1;
int lastdirection = 1;
int tcount = avgcount;
double sdvals[tcount];
double counter = 0;
int i;
while (fabs(currcore - correl) > tolerance) {
counter++;
if (currcore > correl) {
amp += increment;
if (direction < 0) {
increment *= decay;
}
if (lastdirection > 0) {
direction = 1;
}
} else if (currcore < correl ) {
amp -= increment;
if (direction < 0) {
increment *= decay;
}
if (lastdirection > 0) {
direction = -1;
}
}
if (direction < 0) {
direction--;
} else {
direction++;
}
lastdirection = direction;
// if (abs(direction) > 4) {
// increment /= decay;
// cout <<"DIR = " << direction << "\t" << increment << endl;
// }
currcore = 0.0;
for (i=0; i<tcount; i++) {
addNoise(amp, tempdata, refarray.getBase(), refarray.getSize());
sdvals[i] = pearsonCorrelation(refarray.getSize(), tempdata,
refarray.getBase());
currcore += sdvals[i];
}
currcore = currcore / tcount;
if (counter > 1000) {
break;
}
// cout << correl << "\t" << currcore << "\t"
// << amp << "\t" << increment << "\t" << direction << endl;
}
sd = getStandardDeviation(currcore, sdvals, tcount);
return amp;
}
//////////////////////////////
//
// addNoise --
//
void addNoise(double amp, double* output, double* input, int siz) {
int i;
for (i=0; i<siz; i++) {
if (noisetype == NOISE_CONSTANT) {
output[i] = input[i] + (drand48() * 2.0 - 1.0) * amp;
} else { // proportional
output[i] = input[i] + (drand48() * 2.0 - 1.0) * amp * input[i];
}
}
}
//////////////////////////////
//
// fillArray --
//
void fillArray(HumdrumFile& infile, Array& targarray, int targ) {
int i;
targarray.setSize(infile.getNumLines());
targarray.setSize(0);
double value;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() == E_humrec_data) {
if (targ >= infile[i].getFieldCount()) {
cerr << "Error on line " << i+1 << " of input data." << endl;
exit(1);
}
value = strtod(infile[i][targ], NULL);
targarray.append(value);
}
}
}
//////////////////////////////
//
// pearsonCorrelation --
//
double pearsonCorrelation(int size, double* a, double* b) {
double meana = getMean(size, a);
double meanb = getMean(size, b);
double topsum = 0.0;
double bottomasum = 0.0;
double bottombsum = 0.0;
int i;
for (i=0; i<size; i++) {
topsum += (a[i] - meana) * (b[i] - meanb);
bottomasum += (a[i] - meana) * (a[i] - meana);
bottombsum += (b[i] - meanb) * (b[i] - meanb);
}
if (bottomasum == 0.0 || bottombsum == 0.0) {
return 0.0;
}
return topsum / sqrt(bottomasum * bottombsum);
}
//////////////////////////////
//
// getMean --
//
double getMean(int size, double* a) {
if (size <= 0) {
return 0.0;
}
int i;
double sum = 0.0;
for (i=0; i<size; i++) {
sum += a[i];
}
return sum / size;
}
//////////////////////////////
//
// checkOptions --
//
void checkOptions(Options& opts, int argc, char* argv[]) {
opts.define("N|normal=b", "normalize sequences before calculations");
opts.define("R|rank=b", "rank calculations using NED");
opts.define("C|constant=b", "constant noise amplitude");
opts.define("t|target=i:1", "target sequence column");
opts.define("r|reference=i:0", "reference sequence column");
opts.define("c|count=i:300", "Count of trials to average");
opts.define("T|tolerance=d:0.001", "Correlation match tolerance");
opts.define("D|decay=d:0.95", "Correlation match decay");
opts.define("m|max=b", "Display column count in data file");
opts.define("author=b", "author of program");
opts.define("version=b", "compilation info");
opts.define("example=b", "example usages");
opts.define("help=b", "short description");
opts.process(argc, argv);
// handle basic options:
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, Nov 2007" << endl;
exit(0);
} else if (opts.getBoolean("version")) {
cout << argv[0] << ", version: 23 Nov 2007" << endl;
cout << "compiled: " << __DATE__ << endl;
exit(0);
} else if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
} else if (opts.getBoolean("example")) {
example();
exit(0);
}
avgcount = opts.getInteger("count");
tolerance = opts.getDouble("tolerance");
decay = opts.getDouble("decay");
normalQ = opts.getBoolean("normal");
target = opts.getInteger("target");
reference = opts.getInteger("reference");
rankQ = opts.getBoolean("rank");
countcols = opts.getBoolean("max");
if (opts.getBoolean("constant")) {
noisetype = NOISE_CONSTANT;
}
}
//////////////////////////////
//
// getStandardDeviation --
//
double getStandardDeviation(double mean, double* data, int siz) {
double sum = 0.0;
double value;
int i;
for (i=0; i<siz; i++) {
value = data[i] - mean;
sum += value * value;
}
return sqrt(sum/siz);
}
//////////////////////////////
//
// normalizeSequence --
//
void normalizeSequence(double* seq, int size) {
double mean = getMean(size, seq);
double sd = getStandardDeviation(mean, seq, size);
int i;
for (i=0; i<size; i++) {
seq[i] = (seq[i] - mean) / sd;
}
}
//////////////////////////////
//
// example --
//
void example(void) {
}
//////////////////////////////
//
// usage --
//
void usage(const char* command) {
}
//////////////////////////////
//
// sortRanks -- sort rank values from highest to lowest
//
void sortRanks(Array& values, Array& ranks) {
int i;
Array<double*> presort;
Array<double> sortdata;
presort.setSize(values.getSize());
sortdata.setSize(values.getSize());
for (i=0; i<values.getSize(); i++) {
sortdata[i] = values[i];
presort[i] = &(sortdata[i]);
}
qsort(presort.getBase(), presort.getSize(), sizeof(double*), numbersort);
for (i=0; i<values.getSize(); i++) {
*presort[i] = i;
}
for (i=0; i<values.getSize(); i++) {
ranks[i] = (int)sortdata[i];
}
}
//////////////////////////////
//
// sortRanksReverse -- sort rank values from highest to lowest
//
void sortRanksReverse(Array& values, Array& ranks) {
int i;
Array<double*> presort;
Array<double> sortdata;
presort.setSize(values.getSize());
sortdata.setSize(values.getSize());
for (i=0; i<values.getSize(); i++) {
sortdata[i] = values[i];
presort[i] = &(sortdata[i]);
}
qsort(presort.getBase(), presort.getSize(), sizeof(double*), numbersortmin);
for (i=0; i<values.getSize(); i++) {
*presort[i] = i;
}
for (i=0; i<values.getSize(); i++) {
ranks[i] = (int)sortdata[i];
}
}
//////////////////////////////
//
// numbersort -- sort items largest first
//
int numbersort(const void* A, const void* B) {
double valuea = **((double**)A);
double valueb = **((double**)B);
if (valuea > valueb) {
return -1;
} else if (valuea < valueb) {
return +1;
} else {
return 0;
}
}
//////////////////////////////
//
// numbersortmin -- sort items largest first
//
int numbersortmin(const void* A, const void* B) {
double valuea = **((double**)A);
double valueb = **((double**)B);
if (valuea < valueb) {
return -1;
} else if (valuea > valueb) {
return +1;
} else {
return 0;
}
}
// md5sum: 37e2b51bce0d7e5ff2e231e679e44c82 ned.cpp [20080518]