#include <string.h> #include <fstream> #include <iostream> #include <math.h> #include <Array.h> void storedata(Array<Array<double> >& data, const char* file); double pearsonCorrelation(int size, double* a, double* b); double getMean(int size, double* a); void printCorrelations(Array<Array<double> >& dataa, Array<Array<double> >& datar); double dixonDistance(Array<Array<double> >& dataa, Array<Array<double> >& datar, int i, int j); void getDifference(Array<double>& output, Array<double>& newer, Array<double>& older); using namespace std; int main(int argc, char** argv) { if (argc < 3) { cout << "Error: not enough files" << endl; exit(1); } Array<Array<double> > dataa; Array<Array<double> > datar; const char* filea = argv[1]; const char* filer = argv[2]; storedata(dataa, filea); storedata(datar, filer); printCorrelations(dataa, datar); return 0; } void printCorrelations(Array<Array<double> >& dataa, Array<Array<double> >& datar) { int i; int j; double correlation; for (i=0; i<dataa.getSize(); i++) { for (j=0; j<datar.getSize(); j++) { //correlation = pearsonCorrelation(dataa[i].getSize(), // dataa[i].getBase(), datar[j].getBase()); //correlation = (int)(correlation * 100 + 0.5) / 100.0; correlation = dixonDistance(dataa, datar, i, j); cout << correlation; if (j < datar.getSize()-1) { cout << "\t"; } } cout << "\n"; } } void storedata(Array>& data, const char* file) { ifstream reader; reader.open(file); if (!reader.is_open()) { cout << "ERROR reading " << file << endl; exit(1); } data.setSize(100000); data.setSize(0); Array<double> temparray; temparray.setSize(100000); temparray.setSize(0); double value; int i; int location; char buffer[1024 * 1024] = {0}; while (!reader.eof()) { temparray.setSize(0); reader.getline(buffer, 1024*1024); char* ptr = strtok(buffer, " \t\n,"); while (ptr != NULL) { if (sscanf(ptr, "%lf", &value)) { temparray.append(value); } ptr = strtok(NULL, " \t\n,"); } // append the new row to the data output location = data.getSize(); data.setSize(data.getSize()+1); data[location].setSize(temparray.getSize()); data[location].allowGrowth(0); for (i=0; i<temparray.getSize(); i++) { data[location][i] = temparray[i]; } } } ////////////////////////////// // // pearsonCorrelation -- // double pearsonCorrelation(int size, double* a, double* b) { double meana = getMean(size, a); double meanb = getMean(size, b); double topsum = 0.0; double bottomasum = 0.0; double bottombsum = 0.0; int i; for (i=0; i<size; i++) { topsum += (a[i] - meana) * (b[i] - meanb); bottomasum += (a[i] - meana) * (a[i] - meana); bottombsum += (b[i] - meanb) * (b[i] - meanb); } if (bottomasum == 0.0 || bottombsum == 0.0) { return 0.0; } return topsum / sqrt(bottomasum * bottombsum); } ////////////////////////////// // // getMean -- // double getMean(int size, double* a) { if (size <= 0) { return 0.0; } int i; double sum = 0.0; for (i=0; i<size; i++) { sum += a[i]; } return sum / size; } ////////////////////////////// // // dixonDistance // double dixonDistance(Array<Array<double> >& dataa, Array<Array<double> >& datar, int i, int j) { if ((i==0) || (j==0)) { return 0.0; } Array<double> differenceA; Array<double> differenceR; getDifference(differenceA, dataa[i], dataa[i-1]); getDifference(differenceR, datar[j], datar[j-1]); if (differenceA.getSize() == 0) { return 0.0; } if (differenceR.getSize() == 0) { return 0.0; } double sum = 0.0; double value; for (i=0; i<differenceA.getSize(); i++){ value = differenceA[i] - differenceR[i]; sum += value * value; } return sqrt(sum); } ////////////////////////////// // // getDifference -- half wave rectified spectrum values // negative differences get set to 0 // void getDifference(Array<double>& output, Array<double>& newer, Array<double>& older) { int i; output.setSize(newer.getSize()); output.allowGrowth(0); for (i=0; i<output.getSize(); i++) { output[i] = newer[i] - older[i]; if (output[i] < 0.0) { output[i] = 0.0; } } } // md5sum: d34b62b21f3320f6e6ca6da134b4087e cormatrix.cpp [20100722]