#include <string.h>
#include <fstream>
#include <iostream>
#include <math.h>
#include <Array.h>
void storedata(Array<Array<double> >& data, const char* file);
double pearsonCorrelation(int size, double* a, double* b);
double getMean(int size, double* a);
void printCorrelations(Array<Array<double> >& dataa,
Array<Array<double> >& datar);
double dixonDistance(Array<Array<double> >& dataa,
Array<Array<double> >& datar, int i, int j);
void getDifference(Array<double>& output, Array<double>& newer,
Array<double>& older);
using namespace std;
int main(int argc, char** argv) {
if (argc < 3) {
cout << "Error: not enough files" << endl;
exit(1);
}
Array<Array<double> > dataa;
Array<Array<double> > datar;
const char* filea = argv[1];
const char* filer = argv[2];
storedata(dataa, filea);
storedata(datar, filer);
printCorrelations(dataa, datar);
return 0;
}
void printCorrelations(Array<Array<double> >& dataa,
Array<Array<double> >& datar) {
int i;
int j;
double correlation;
for (i=0; i<dataa.getSize(); i++) {
for (j=0; j<datar.getSize(); j++) {
//correlation = pearsonCorrelation(dataa[i].getSize(),
// dataa[i].getBase(), datar[j].getBase());
//correlation = (int)(correlation * 100 + 0.5) / 100.0;
correlation = dixonDistance(dataa, datar, i, j);
cout << correlation;
if (j < datar.getSize()-1) {
cout << "\t";
}
}
cout << "\n";
}
}
void storedata(Array >& data, const char* file) {
ifstream reader;
reader.open(file);
if (!reader.is_open()) {
cout << "ERROR reading " << file << endl;
exit(1);
}
data.setSize(100000);
data.setSize(0);
Array<double> temparray;
temparray.setSize(100000);
temparray.setSize(0);
double value;
int i;
int location;
char buffer[1024 * 1024] = {0};
while (!reader.eof()) {
temparray.setSize(0);
reader.getline(buffer, 1024*1024);
char* ptr = strtok(buffer, " \t\n,");
while (ptr != NULL) {
if (sscanf(ptr, "%lf", &value)) {
temparray.append(value);
}
ptr = strtok(NULL, " \t\n,");
}
// append the new row to the data output
location = data.getSize();
data.setSize(data.getSize()+1);
data[location].setSize(temparray.getSize());
data[location].allowGrowth(0);
for (i=0; i<temparray.getSize(); i++) {
data[location][i] = temparray[i];
}
}
}
//////////////////////////////
//
// pearsonCorrelation --
//
double pearsonCorrelation(int size, double* a, double* b) {
double meana = getMean(size, a);
double meanb = getMean(size, b);
double topsum = 0.0;
double bottomasum = 0.0;
double bottombsum = 0.0;
int i;
for (i=0; i<size; i++) {
topsum += (a[i] - meana) * (b[i] - meanb);
bottomasum += (a[i] - meana) * (a[i] - meana);
bottombsum += (b[i] - meanb) * (b[i] - meanb);
}
if (bottomasum == 0.0 || bottombsum == 0.0) {
return 0.0;
}
return topsum / sqrt(bottomasum * bottombsum);
}
//////////////////////////////
//
// getMean --
//
double getMean(int size, double* a) {
if (size <= 0) {
return 0.0;
}
int i;
double sum = 0.0;
for (i=0; i<size; i++) {
sum += a[i];
}
return sum / size;
}
//////////////////////////////
//
// dixonDistance
//
double dixonDistance(Array<Array<double> >& dataa,
Array<Array<double> >& datar, int i, int j) {
if ((i==0) || (j==0)) {
return 0.0;
}
Array<double> differenceA;
Array<double> differenceR;
getDifference(differenceA, dataa[i], dataa[i-1]);
getDifference(differenceR, datar[j], datar[j-1]);
if (differenceA.getSize() == 0) {
return 0.0;
}
if (differenceR.getSize() == 0) {
return 0.0;
}
double sum = 0.0;
double value;
for (i=0; i<differenceA.getSize(); i++){
value = differenceA[i] - differenceR[i];
sum += value * value;
}
return sqrt(sum);
}
//////////////////////////////
//
// getDifference -- half wave rectified spectrum values
// negative differences get set to 0
//
void getDifference(Array<double>& output, Array<double>& newer,
Array<double>& older) {
int i;
output.setSize(newer.getSize());
output.allowGrowth(0);
for (i=0; i<output.getSize(); i++) {
output[i] = newer[i] - older[i];
if (output[i] < 0.0) {
output[i] = 0.0;
}
}
}
// md5sum: d34b62b21f3320f6e6ca6da134b4087e cormatrix.cpp [20100722]