//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Tue Apr 27 10:40:50 PDT 2004
// Last Modified: Tue Apr 27 10:40:54 PDT 2004
// Filename: ...sig/examples/all/jointfeature.cpp
// Web Address: http://sig.sapp.org/examples/museinfo/humdrum/jointfeature.cpp
// Syntax: C++; museinfo
//
// Description:
//
#include "humdrum.h"
#include <string.h>
#include <ctype.h>
#ifndef OLDCPP
#include <sstream>
#define SSTREAM stringstream
#define CSTRING str().c_str()
using namespace std;
#else
#ifdef VISUAL
#include <strstrea.h> /* for windows 95 */
#else
#include <strstream.h>
#endif
#define SSTREAM strstream
#define CSTRING str()
#endif
#include <stdlib.h> /* for qsort and bsearch functions */
///////////////////////////////////////////////////////////////////////////
#define MAXPRIME 169
int primes[MAXPRIME] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41,
43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113,
127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193,
197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271,
277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359,
367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443,
449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541,
547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619,
631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719,
727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821,
823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911,
919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997, 1009};
///////////////////////////////////////////////////////////////////////////
class Feature {
public:
Feature(void) { clear(); }
Feature(Feature& aFeature) { copyFeature(aFeature); }
Feature& operator=(Feature& aFeature) { return copyFeature(aFeature); }
Feature& copyFeature(Feature& aFeature) {
if (this == &aFeature) {
return *this;
}
strcpy(istn, aFeature.istn);
line = aFeature.line;
int i, j;
feature.setSize(aFeature.feature.getSize());
for (i=0; i<feature.getSize(); i++) {
feature[i].setSize(aFeature.feature[i].getSize());
for (j=0; j<feature[i].getSize(); j++) {
feature[i][j] = aFeature.feature[i][j];
}
}
return *this;
}
void clear(void) { line = -1; istn[0] = '\0'; feature.setSize(0); }
int is_valid(void) { return istn[0] != '\0' ? 1 : 0; }
// data fields:
char istn[256]; // unique serial number/fileaname
Array< Array<int> > feature; // feature vector
int line; // line number in original file
};
///////////////////////////////////////////////////////////////////////////
// function declarations
void checkOptions(Options& opts, int argc, char* argv[]);
void example(void);
void usage(const char* command);
int buildFeaturesArray(Array<Feature>& features, HumdrumFile& infile);
int getFeature(Feature& tempfeature, HumdrumRecord& record);
void buildAnalysisData(Array<Feature>& analysis,
Array<Feature>& features);
void getFeatureStateList(Array<int>& output, Array<Feature>& features,
int v);
int getPrime(int value, Array<int>& states, int offset);
void printFeature(int index, Array<Feature>& features1,
Array<Feature>& features2,
Array<int>& states1, Array<int>& states2);
int findmatch(int index1, Array<Feature>& features1,
Array<Feature>& features2);
// global variables
Options options; // database for command-line arguments
int debugQ = 0; // used with the --debug option
int totalcount = 0; // total number of features for all elements
///////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[]) {
// process the command-line options
checkOptions(options, argc, argv);
cout << "!! Joint features for the following files" << endl;
cout << "!!!FILE1:\t" << options.getArg(1) << endl;
cout << "!!!FILE2:\t" << options.getArg(2) << endl;
HumdrumFile infile1;
HumdrumFile infile2;
Array<int> states1;
Array<int> states2;
Array<Feature> features1;
Array<Feature> features2;
Array<int> translate1;
Array<int> translate2;
infile1.read(options.getArg(1));
infile2.read(options.getArg(2));
int symbolcount1 = buildFeaturesArray(features1, infile1);
int symbolcount2 = buildFeaturesArray(features2, infile2);
cout << "!!!SYMBOL_COUNT_1:\t" << symbolcount1 << endl;
cout << "!!!SYMBOL_COUNT_2:\t" << symbolcount2 << endl;
getFeatureStateList(states1, features1, 1);
getFeatureStateList(states2, features2, 2);
cout << "!!!STATE_COUNT_1:\t" << states1.getSize() << endl;
cout << "!!!STATE_COUNT_2:\t" << states1.getSize() << endl;
if (states1.getSize() + states2.getSize() >= MAXPRIME) {
cout << "Error: too many states to process correctly\n";
exit(1);
}
// print symbol translations:
int i;
for (i=0; i<states1.getSize(); i++) {
cout << "!!!STATE_" << states1[i] << "_1:\t" << primes[i] << endl;
}
for (i=0; i<states2.getSize(); i++) {
cout << "!!!STATE_" << states2[i] << "_2:\t"
<< primes[i + states1.getSize()] << endl;
}
// now it is time to reassemble the feature data:
cout << "**istn\t**feature\n";
for (i=0; i<features1.getSize(); i++) {
printFeature(i, features1, features2, states1, states2);
}
cout << "*-\t*-\n";
return 0;
}
///////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// findmatch --
//
int findmatch(int index1, Array& features1, Array& features2) {
int high;
int low;
int i;
for (i=0; i<features2.getSize(); i++) {
high = index1 + i;
low = index1 - i;
if (low >= 0) {
if (strcmp(features1[index1].istn, features2[low].istn) == 0) {
return low;
}
}
if (high < features2.getSize()) {
if (strcmp(features1[index1].istn, features2[high].istn) == 0) {
return high;
}
}
}
return -1;
}
//////////////////////////////
//
// printFeature --
//
void printFeature(int index, Array<Feature>& features1,
Array<Feature>& features2, Array<int>& states1, Array<int>& states2) {
int index1 = index;
int index2 = index;
if (index < features2.getSize()-1) {
if (strcmp(features1[index].istn, features2[index].istn) != 0) {
index2 = findmatch(index1, features1, features2);
}
if (index2 < 0) {
return; // can not print the features because no match in second set.
}
} else {
return;
}
cout << features1[index].istn;
cout << "\t";
int max1 = features1[index1].feature[0].getSize();
int max2 = features2[index2].feature[0].getSize();
int max = max1 < max2 ? max1 : max2;
int i;
int value1;
int value2;
for (i=0; i<max; i++) {
value1 = getPrime(features1[index1].feature[0][i], states1, 0);
value2 = getPrime(features2[index2].feature[0][i], states2,
states1.getSize());
cout << value1 * value2;
if (i<max-1) {
cout << " ";
}
}
cout << "\n";
}
///////////////////////////////
//
// getPrime --
//
int getPrime(int value, Array& states, int offset) {
int i;
int index = -1;
for (i=0; i<states.getSize(); i++) {
if (states[i] == value) {
index = i;
break;
}
}
if (index < 0) {
cout << "Error: could not find an expected state" << endl;
exit(1);
}
if (index + offset > MAXPRIME) {
cout << "Error: exceeded the list of primes available for assignment"
<< endl;
}
return primes[index + offset];
}
//////////////////////////////
//
// getFeatureStateList --
//
void getFeatureStateList(Array& output, Array& features, int v) {
#define MAXSTATE 200000
int maxstate = 0;
Array<int> states;
states.setSize(MAXSTATE);
states.setAll(0);
int warnnegative = 0;
int warnmaximum = 0;
int index;
maxstate = 0;
int i, j;
for (i=0; i<features.getSize(); i++) {
for (j=0; j<features[i].feature[0].getSize(); j++) {
index = features[i].feature[0][j];
if (index < 0) {
warnnegative++;
continue;
} else if (index >= MAXSTATE) {
warnmaximum++;
}
states[index]++;
if (index > maxstate) {
maxstate = index;
}
}
}
if (warnnegative) {
cout << "!! Warning: there were " << warnnegative
<< " uncounted negative states in the data set\n";
}
if (warnmaximum) {
cout << "!! Warning: there were " << warnmaximum
<< " uncounted states exceeding 99,999 in the data set\n";
}
int count = 0;
for (i=0; i<=maxstate; i++) {
if (states[i]) {
count++;
}
}
output.setSize(maxstate+1);
output.setSize(0);
for (i=0; i<maxstate+1; i++) {
if (states[i] > 0) {
output.append(i);
cout << "!!!SYM-COUNT-" << i << "_" << v << ":\t" << states[i] << endl;
}
}
}
//////////////////////////////
//
// featuresort -- sort items by feature space elements.
//
int featuresort(const void* A, const void* B) {
Feature& a = *((Feature*)A);
Feature& b = *((Feature*)B);
int i;
int sizea = a.feature[0].getSize();
int sizeb = b.feature[0].getSize();
int minsize = sizea < sizeb ? sizea : sizeb;
for (i=0; i<minsize; i++) {
if (a.feature[0][i] < b.feature[0][i]) {
return -1;
}
if (a.feature[0][i] > b.feature[0][i]) {
return +1;
}
}
if (sizea > sizeb) {
return +1;
} else if (sizea < sizeb) {
return -1;
} else {
return 0;
}
}
//////////////////////////////
//
// featuresort2 -- sort for count matches limit to the size of the first
// element
//
int featuresort2(const void* A, const void* B) {
Feature& a = *((Feature*)A);
Feature& b = *((Feature*)B);
int i;
int sizea = a.feature[0].getSize();
int sizeb = b.feature[0].getSize();
int minsize = sizea < sizeb ? sizea : sizeb;
for (i=0; i<minsize; i++) {
if (a.feature[0][i] < b.feature[0][i]) {
return -1;
}
if (a.feature[0][i] > b.feature[0][i]) {
return +1;
}
}
// all of the elements in the search range are equal
return 0;
}
//////////////////////////////
//
// buildFeaturesArray -- returns the total number of features
//
int buildFeaturesArray(Array& features, HumdrumFile& infile) {
int i;
Feature tempfeature;
features.setSize(infile.getNumLines());
features.setSize(0);
int totalfeatures = 0;
for (i=0; i<infile.getNumLines(); i++) {
if (infile[i].getType() == E_humrec_data) {
totalfeatures += getFeature(tempfeature, infile[i]);
if (tempfeature.is_valid()) {
tempfeature.line = i;
features.append(tempfeature);
}
}
}
return totalfeatures;
}
//////////////////////////////
//
// getFeature -- returns the total number of features found on the line
//
int getFeature(Feature& tempfeature, HumdrumRecord& record) {
tempfeature.clear();
int i;
int featurecount = 0;
char buffer2[1024] = {0};
// find the ISTN value
for (i=0; i<record.getFieldCount(); i++) {
if (strcmp(record.getExInterp(i), "**istn") == 0) {
strcpy(buffer2, record[i]);
if (debugQ) {
cout << "found istn: " << buffer2 << endl;
}
} else if (strcmp(record.getExInterp(i), "**feature") == 0) {
featurecount++;
}
}
if ((featurecount == 0) || (buffer2[0] == '\0')) {
// no valid data
return 0;
}
strcpy(tempfeature.istn, buffer2);
tempfeature.feature.setSize(1);
tempfeature.feature[0].setSize(100);
tempfeature.feature[0].setGrowth(1000);
tempfeature.feature[0].setSize(0);
char buffer[100000] = {0};
int code = 0;
char* ptr = NULL;
int total = 0;
// find the first feature (second dimension to be added later)
for (i=0; i<record.getFieldCount(); i++) {
if (strcmp(record.getExInterp(i), "**feature") == 0) {
strncpy(buffer, record[i], 100000);
ptr = strtok(buffer, " ");
while (ptr != NULL) {
if (sscanf(ptr, "%d", &code) > 0) {
tempfeature.feature[0].append(code);
total++;
}
ptr = strtok(NULL, " ");
}
}
}
// do generalized features here.
return total;
}
//////////////////////////////
//
// checkOptions -- validate and process command-line options.
//
void checkOptions(Options& opts, int argc, char* argv[]) {
opts.define("debug=b"); // determine bad input line num
opts.define("author=b"); // author of program
opts.define("version=b"); // compilation info
opts.define("example=b"); // example usages
opts.define("h|help=b"); // short description
opts.process(argc, argv);
// handle basic options:
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, April 2004" << endl;
exit(0);
} else if (opts.getBoolean("version")) {
cout << argv[0] << ", version: 15 April 2004" << endl;
cout << "compiled: " << __DATE__ << endl;
cout << MUSEINFO_VERSION << endl;
exit(0);
} else if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
} else if (opts.getBoolean("example")) {
example();
exit(0);
}
if (opts.getArgCount() != 2) {
cout << "Usage: " << opts.getCommand() << " file1 file2" << endl;
exit(1);
}
}
//////////////////////////////
//
// example -- example usage of the program
//
void example(void) {
cout <<
" \n"
<< endl;
}
//////////////////////////////
//
// usage -- gives the usage statement for the program
//
void usage(const char* command) {
cout <<
" \n"
<< endl;
}
// md5sum: f8a32e21a8a2eb9b3c88bdd3e1da11be jointfeature.cpp [20050403]