//
// Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Sun Apr 19 20:03:05 PDT 2009
// Last Modified: Tue May 4 23:11:52 PDT 2009 Added -t, -c, -B, --sep options
// Last Modified: Tue May 5 09:54:01 PDT 2009 Added -p, -P, --and options
// Last Modified: Mon May 11 20:56:43 PDT 2009 Fix initial beat for comments
// Last Modified: Wed Apr 28 18:49:29 PDT 2010 Added -T and -D options
// Last Modified: Wed Sep 14 10:40:48 PDT 2011 Added -F option
// Last Modified: Sat Apr 6 01:16:22 PDT 2013 Enabled multiple segment input
// Filename: ...sig/examples/all/hgrep.cpp
// Web Address: http://sig.sapp.org/examples/museinfo/humdrum/hgrep.cpp
// Syntax: C++; museinfo
//
// Description: Grep with Humdrum intelligence built into it.
//
#include "humdrum.h"
#include <string.h>
#include <math.h>
#include <regex.h>
#ifndef OLDCPP
#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;
#else
#include <iostream.h>
#include <fstream.h>
#include <sstream.h>
#endif
//////////////////////////////////////////////////////////////////////////
// function declarations:
void checkOptions(Options& opts, int argc, char** argv);
void example(void);
void usage(const char* command);
void doSearch(const char* searchstring, HumdrumFile& infile,
const char* filename);
void printPreInfo(const char* filename, HumdrumFile& infile,
double measure, int line, int spine = -1);
char* searchAndReplace(char* buffer, const char* searchstring,
const char* replacestring,
const char* datastring);
void displayFraction(double fraction);
void fillAndSearches(Array<regex_t>& relist,
Array<Array<char> >& exlist,
const char* string);
int tokenSearch(int& column, HumdrumFile& infile, int line,
regex_t& re);
int singleTokenSearch(int& column, HumdrumFile& infile, int line,
regex_t& re, const char* exstring);
void destroyAndSearches(Array<regex_t>& relist);
double getBeatOfNextData(HumdrumFile& infile, int line);
void printDitto(HumdrumFile& infile, int line);
void markKernNotes(HumdrumFile& infile, int line);
// User interface variables:
Options options;
const char* searchstring = ""; // first argument
int kernQ = 0; // used with -k option
int dataQ = 0; // used with -d option
int tandemQ = 0; // used with -t option
int commentQ = 0; // used with -C option
int primaryQ = 0; // used with -p option
int nonprimaryQ = 0; // used with -P option
int bibliographicQ = 0; // used with -B option
int absbeatQ = 0; // used with -a option
int fracQ = 0; // used with -f option
int spineQ = 0; // used with -s option
int beatQ = 0; // used with -b option
int measureQ = 0; // used with -m option
int nullQ = 0; // used with -n option
int parenQ = 1; // used with -N,-n option
int quietQ = 0; // used with -q option
int exinterpQ = 0; // used with -x option
int formQ = 0; // used with -F option
int tokenizeQ = 0; // used with -T option
int datastopQ = 0; // used with -D option
int markQ = 0; // used with --mark option
const char* exinterps = ""; // used with -x option
char separator[1024] = {0}; // used with --sep option
Array<regex_t> Andlist; // used with --and option
Array<Array<char> > Andexinterp; // used with --and option
int MarkerCount = 0;
int MarkerMatchCount= 0;
// standard grep option emulations:
int fileQ = 0; // used with -H option
int matchfilesQ = 0; // used with -l option
int nomatchfilesQ = 0; // used with -L option
int ignorecaseQ = 0; // used with -i option
int basicQ = 0; // used with -G option
int lineQ = 0; // used with -n option
int invertQ = 0; // used with -v option
//////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv) {
// process the command-line options
checkOptions(options, argc, argv);
HumdrumFileSet infiles;
int numinputs = options.getArgCount() - 1;
int i;
if (numinputs < 1) {
infiles.read(cin);
} else {
for (i=1; i<numinputs+1; i++) {
infiles.readAppend(options.getArg(i+1));
}
}
for (i=0; i<infiles.getCount(); i++) {
if (absbeatQ || beatQ || measureQ || fracQ) {
// need to do this for measureQ because of pickup information
infiles[i].analyzeRhythm("4");
}
doSearch(searchstring, infiles[i], infiles[i].getFilename());
if (markQ) {
cout << infiles[i];
if (MarkerCount) {
cout << "!!!RDF**kern: @ = marked note ("
<< MarkerCount << " marks in " << MarkerMatchCount
<< " matches)" << endl;
MarkerCount = 0;
MarkerMatchCount = 0;
}
}
}
destroyAndSearches(Andlist);
return 0;
}
//////////////////////////////////////////////////////////////////////////
//////////////////////////////
//
// doSearch --
//
void doSearch(const char* searchstring, HumdrumFile& infile,
const char* filename) {
regex_t re;
int flags = 0;
double measure = 1;
if (infile.getPickupDur() != 0.0) {
measure = 0;
}
if (!basicQ) {
flags = flags | REG_EXTENDED;
}
if (ignorecaseQ) {
flags = flags | REG_ICASE;
}
int status;
status = regcomp(&re, searchstring, flags);
if (status != 0) {
char errstring[1024] = {0};
regerror(status, &re, errstring, 1000);
cerr << errstring << endl;
exit(1);
}
int i;
int matchcount = 0;
for (i=0; i<infile.getNumLines(); i++) {
if (datastopQ && infile[i].isData()) {
break;
}
if (formQ && !infile[i].isData()) {
if (!invertQ) {
if (nullQ) {
printDitto(infile, i);
cout << endl;
} else {
cout << infile[i] << endl;
}
} else {
if (strcmp(infile[i][0], "*-") == 0) {
// handled at marker xyga
} else if (strncmp(infile[i][0], "**", 2) == 0) {
// handled at marker xyga
} else {
if (nullQ) {
printDitto(infile, i);
cout << endl;
} else {
cout << infile[i] << endl;
}
}
}
}
if (infile[i].isEmpty()) { continue; }
if (infile[i].isMeasure()) {
sscanf(infile[i][0],"=%lf", &measure);
}
if (dataQ && tandemQ) {
if (!(infile[i].isData() || infile[i].isTandem())) { continue; }
} else {
if (dataQ && (!infile[i].isData())) { continue; }
if (tandemQ && (!infile[i].isTandem())) { continue; }
}
if (commentQ && (!infile[i].isComment())) { continue; }
if (bibliographicQ && (!infile[i].isBibliographic())) { continue; }
if (exinterpQ || kernQ) {
if (!(infile[i].isData() || infile[i].isInterpretation() ||
infile[i].isMeasure() || infile[i].isLocalComment())) {
continue;
}
int column = -1;
status = tokenSearch(column, infile, i, re);
// status == 0 means a match was found
// status != 0 means a match was not found
if (markQ && !status) {
markKernNotes(infile, i);
continue;
}
if (invertQ) {
status = !status;
}
if (status == 0) {
matchcount++;
if (matchfilesQ) {
cout << filename << endl;
return;
}
if (nomatchfilesQ) {
continue;
}
printPreInfo(filename, infile, measure, i, column);
if (!quietQ) {
// marker xyga
cout << infile[i];
if (nullQ) {
printDitto(infile, i);
} else {
cout << infile[i];
}
}
cout << endl;
}
} else { // search entire line as a single unit
if (tokenizeQ) {
status = 0;
for (int ii=0; ii<infile[i].getFieldCount(); ii++) {
status = tokenSearch(ii, infile, i, re);
if (markQ && !status) {
markKernNotes(infile, i);
continue;
}
if (status == 0) {
break;
}
}
} else {
status = regexec(&re, infile[i].getLine(), 0, NULL, 0);
if (markQ && !status) {
markKernNotes(infile, i);
continue;
}
}
if (Andlist.getSize() > 0) {
for (int aa=0; aa<Andlist.getSize(); aa++) {
int newstatus = regexec(&Andlist[aa], infile[i].getLine(),
0, NULL, 0);
if (newstatus != 0) {
status = 1;
}
}
}
if (invertQ) {
status = !status;
}
if (status == 0) {
matchcount++;
if (matchfilesQ) {
cout << filename << endl;
return;
}
if (nomatchfilesQ) {
continue;
}
printPreInfo(filename, infile, measure, i, -1);
if (!quietQ) {
if (nullQ) {
printDitto(infile, i);
} else {
cout << infile[i];
}
}
cout << endl;
}
}
}
regfree(&re);
if (nomatchfilesQ && matchcount == 0) {
cout << filename << endl;
}
}
//////////////////////////////
//
// markKernNotes -- Read across the line and mark all notes with '@' which
// occur on that line, or resolve the null tokens to mark previous notes
// which are currently sounding.
//
void markKernNotes(HumdrumFile& infile, int line) {
int j;
if (!infile[line].isData()) {
return;
}
MarkerMatchCount++;
char buffer[1024] = {0};
HumdrumFileAddress add;
for (j=0; j<infile[line].getFieldCount(); j++) {
if (!infile[line].isExInterp(j, "**kern")) {
continue;
}
add.setLineField(line, j);
infile.getNonNullAddress(add);
if (strchr(infile[add], 'r') != NULL) {
continue;
}
if (strchr(infile[add], '@') != NULL) {
// don't duplicate @ marker in a token
continue;
}
MarkerCount++;
strcpy(buffer, infile[add]);
strcat(buffer, "@");
infile.changeField(add, buffer);
}
}
//////////////////////////////
//
// printDitto -- fill in null tokens with/without parentheses
//
void printDitto(HumdrumFile& infile, int line) {
int j, ii, jj;
if (!infile[line].isData()) {
cout << infile[line] << endl;
}
int count = infile[line].getFieldCount();
int null = 0;
for (j=0; j<count; j++) {
ii = line;
jj = j;
null = 0;
if (strcmp(infile[ii][jj], ".") == 0) {
ii = infile[line].getDotLine(j);
jj = infile[line].getDotSpine(j);
null = 1;
}
if (null && parenQ) {
cout << "(" << infile[ii][jj] << ")";
} else {
cout << infile[ii][jj];
}
if (j<count-1) {
cout << '\t';
}
}
}
//////////////////////////////
//
// singleTokenSearch -- used in tokenSearch, but only doing one search
// on a line at a time. Returns 0 if a match was found, otherwise
// returns 1 (or a non-zero) if no match.
//
int singleTokenSearch(int& column, HumdrumFile& infile, int line, regex_t& re,
const char* exstring) {
int exlen = strlen(exstring);
int status;
for (int j=0; j<infile[line].getFieldCount(); j++) {
if (exlen == 0) {
// don't filter out based on exclusive interpretation types
} else if (strstr(exstring, infile[line].getExInterp(j)) == NULL) {
continue;
}
if ((strchr(infile[line].getSpineInfo(j), 'b') != NULL) ||
(strstr(infile[line].getSpineInfo(j), "((") != NULL)) {
if (primaryQ) {
continue;
}
} else {
if (nonprimaryQ) {
continue;
}
}
status = regexec(&re, infile[line][j], 0, NULL, 0);
if (status == 0) {
//cout << "AND MATCH FOUND IN TOKEN: " << infile[line][j] << endl;
//cout << "USING REGEX " << (int)(&re) << endl;
return status;
}
}
return 1; // no match was found on the line
}
//////////////////////////////
//
// tokenSearch -- returns 0 if a match was found, otherwise returns 1
// if no match was found.
//
//
int tokenSearch(int& column, HumdrumFile& infile, int line, regex_t& re) {
int matchfound = 0;
if (kernQ) {
matchfound = singleTokenSearch(column, infile, line, re, "**kern");
} else {
matchfound = singleTokenSearch(column, infile, line, re, exinterps);
}
if (matchfound != 0) {
return matchfound;
}
if (Andlist.getSize() == 0) {
return matchfound;
}
//cout << "CHECKING AND DATA" << endl;
for (int i=0; i<Andlist.getSize(); i++) {
if (kernQ && (strcmp(Andexinterp[i].getBase(), "") == 0)) {
matchfound = singleTokenSearch(column, infile, line,
Andlist[i], "**kern");
} else {
matchfound = singleTokenSearch(column, infile, line,
Andlist[i], Andexinterp[i].getBase());
}
if (matchfound != 0) {
column = -1; // don't identify spine for anded searches
return 1;
}
}
column = -1; // don't identify spine for anded searches
return 0; // all matches were satisfied
}
//////////////////////////////
//
// searchAndReplace --
//
char* searchAndReplace(char* buffer, const char* searchstring,
const char* replacestring, const char* datastring) {
buffer[0] = '\0';
regex_t re;
regmatch_t match;
int compflags = 0;
if (!basicQ) {
compflags = compflags | REG_EXTENDED;
}
if (ignorecaseQ) {
compflags = compflags | REG_ICASE;
}
int status = regcomp(&re, searchstring, compflags);
if (status !=0) {
regerror(status, &re, buffer, 1024);
cerr << buffer << endl;
exit(1);
}
status = regexec(&re, datastring, 1, &match, 0);
while (status == 0) {
strncat(buffer, datastring, match.rm_so);
strcat(buffer, replacestring);
datastring += match.rm_eo;
status = regexec(&re, datastring, 1, &match, REG_NOTBOL);
}
strcat(buffer, datastring);
regfree(&re);
return buffer;
}
//////////////////////////////
//
// printPreInfo --
// default value: spine = -1
//
void printPreInfo(const char* filename, HumdrumFile& infile, double measure,
int line, int spine) {
if (fileQ) {
cout << filename << separator;
}
if (lineQ) {
cout << "line " << line+1 << separator;
}
if (spineQ && (spine >= 0)) {
cout << "spine " << spine+1 << separator;
} /* else if (spineQ) { cout << "spine " << 1 << separator;
} */
if (measureQ) {
cout << "measure " << measure << separator;
}
if (beatQ) {
if (infile[line].getBeat() == 0.0) {
cout << "beat " << getBeatOfNextData(infile, line) << separator;
} else {
cout << "beat " << infile[line].getBeat() << separator;
}
}
if (absbeatQ) {
cout << "absbeat " << infile[line].getAbsBeat() << separator;
}
if (fracQ) {
cout << "frac ";
displayFraction(infile[line].getAbsBeat()/infile.getTotalDuration());
cout << separator;
}
}
//////////////////////////////
//
// getBeatOfNextData -- used to fix the beat position of comments
// at the start of measures which would be labeled as
// beat 0 instead of beat 1.
//
double getBeatOfNextData(HumdrumFile& infile, int line) {
int i;
double output = infile[line].getBeat();
for (i=line; i<infile.getNumLines(); i++) {
if ((!infile[i].isData()) && (!infile[i].isMeasure())) {
continue;
}
output = infile[i].getBeat();
break;
}
return output;
}
//////////////////////////////
//
// displayFraction
//
void displayFraction(double fraction) {
int value;
if (fraction == 0.0) {
cout << "0.000";
} else if (fraction == 1.0) {
cout << "1.000";
} else if (fraction > 0.0 && fraction < 1.0) {
value = int(fraction * 1000.0 + 0.5);
cout << "0.";
if (value < 100) { cout << "0"; }
if (value < 10) { cout << "0"; }
cout << value;
} else {
cout << fraction;
}
}
//////////////////////////////
//
// checkOptions --
//
void checkOptions(Options& opts, int argc, char* argv[]) {
opts.define("a|absbeat=b", "display the absolute beat postion of a match");
opts.define("b|beat=b", "display the metrical beat postion of a match");
opts.define("B|bibliographic=b", "search only bibliographic records");
opts.define("C|comment=b", "search only comment records");
opts.define("d|data=b", "search only data records");
opts.define("f|fraction|frac=b", "disp. pos. as fraction of total length");
opts.define("F|form=b", "preserve Humdrum file formattting");
opts.define("k|kern=b", "search only **kern records");
opts.define("p|primary=b", "search only primary spines");
opts.define("P|nonprimary=b", "search only non-primary spines");
opts.define("q|quiet=b", "don't display matching lines (just pre markers)");
opts.define("t|tandem=b", "search only data records");
opts.define("s|spine=b", "display spine number of match");
opts.define("m|measure=b", "display measure number of match");
opts.define("N|null|ditto=b", "resolve null tokens like ditto command");
opts.define("x|exinterp=s", "search only listed exinterps");
opts.define("T|tokenize=b", "search tokens independently");
opts.define("mark=b", "mark notes on data lines that match");
opts.define("D|data-stop=b", "stop search at first data record");
opts.define("sep|separator=s::", "data separator string");
opts.define("no-paren=b", "don't display null parentheses");
opts.define("and=s:", "anded search strings");
// options which mimic regular grep program:
opts.define("G|basic-regexp=b", "use basic regular expression syntax");
opts.define("H|with-filename=b", "display filename at start match line");
opts.define("h|no-filename=b","do not display filename on match line");
opts.define("L|files-without-match=b", "list files without match");
opts.define("l|files-with-match=b", "list files with match(es)");
opts.define("i|ignore-case=b", "ignore case in matches");
opts.define("n|line-number=b", "display line number of match");
opts.define("v|invert-match=b", "invert the matching criteria");
opts.define("author=b", "author of program");
opts.define("version=b", "compilation info");
opts.define("example=b", "example usages");
opts.define("help=b", "short description");
opts.process(argc, argv);
// handle basic options:
if (opts.getBoolean("author")) {
cout << "Written by Craig Stuart Sapp, "
<< "craig@ccrma.stanford.edu, April 2009" << endl;
exit(0);
} else if (opts.getBoolean("version")) {
cout << argv[0] << ", version: 5 May 2009" << endl;
cout << "compiled: " << __DATE__ << endl;
cout << MUSEINFO_VERSION << endl;
exit(0);
} else if (opts.getBoolean("help")) {
usage(opts.getCommand());
exit(0);
} else if (opts.getBoolean("example")) {
example();
exit(0);
}
if (opts.getArgCount() < 1) {
cerr << "Error: you must supply a search string\n";
exit(1);
}
searchstring = opts.getArg(1);
basicQ = opts.getBoolean("basic-regexp");
fileQ = opts.getBoolean("with-filename");
matchfilesQ = opts.getBoolean("files-with-match");
nomatchfilesQ = opts.getBoolean("files-without-match");
ignorecaseQ = opts.getBoolean("ignore-case");
lineQ = opts.getBoolean("line-number");
invertQ = opts.getBoolean("invert-match");
absbeatQ = opts.getBoolean("absbeat");
beatQ = opts.getBoolean("beat");
quietQ = opts.getBoolean("quiet");
kernQ = opts.getBoolean("kern");
fracQ = opts.getBoolean("fraction");
dataQ = opts.getBoolean("data");
tandemQ = opts.getBoolean("tandem");
primaryQ = opts.getBoolean("primary");
commentQ = opts.getBoolean("comment");
nonprimaryQ = opts.getBoolean("nonprimary");
bibliographicQ= opts.getBoolean("bibliographic");
spineQ = opts.getBoolean("spine");
formQ = opts.getBoolean("form");
measureQ = opts.getBoolean("measure");
nullQ = opts.getBoolean("ditto");
parenQ = !opts.getBoolean("no-paren");
tokenizeQ = opts.getBoolean("tokenize");
datastopQ = opts.getBoolean("data-stop");
markQ = opts.getBoolean("mark");
exinterpQ = opts.getBoolean("exinterp");
exinterps = opts.getString("exinterp");
char tempbuffer[1024] = {0};
searchAndReplace(tempbuffer, "[\\]t", " ", opts.getString("separator"));
searchAndReplace(separator, "[\\]n", "\n", tempbuffer);
if (matchfilesQ && nomatchfilesQ) {
nomatchfilesQ = 0;
}
Andlist.setSize(0);
Andexinterp.setSize(0);
if (opts.getBoolean("and")) {
fillAndSearches(Andlist, Andexinterp, opts.getString("and"));
}
}
//////////////////////////////
//
// fillAndSearches -- exinterp strings are sticky.
//
void fillAndSearches(Array<regex_t>& relist, Array<Array<char> >& exlist,
const char* string) {
int len = strlen(string);
Array<char> mybuffer(len*2+128);
mybuffer.setAll('\0');
char *buffer = mybuffer.getBase();
relist.setSize(100);
relist.setSize(0);
char exbuff[1024] = {0};
int status;
int compflags = 0;
if (!basicQ) {
compflags = compflags | REG_EXTENDED;
}
if (ignorecaseQ) {
compflags = compflags | REG_ICASE;
}
searchAndReplace(buffer, "[\\]n", "\n", string);
char* ptr = strtok(buffer, "\n");
while (ptr != NULL) {
if (strncmp("**", ptr, 2) == 0) {
strcpy(exbuff, ptr);
ptr = strtok(NULL, "\n");
continue;
}
relist.setSize(relist.getSize()+1);
//cout << "SEARCH STRING ADDED: " << ptr << endl;
//cout << "ADDRESS IS " << (int)(&(relist[relist.getSize()-1])) << endl;
status = regcomp(&(relist[relist.getSize()-1]), ptr, compflags);
if (status !=0) {
regerror(status, &(relist[relist.getSize()-1]), buffer, 1024);
cerr << buffer << endl;
exit(1);
}
exlist.setSize(exlist.getSize()+1);
exlist[exlist.getSize()-1].setSize(strlen(exbuff)+1);
strcpy(exlist[exlist.getSize()-1].getBase(), exbuff);
ptr = strtok(NULL, "\n");
}
}
//////////////////////////////
//
// destroyAndSearches --
//
void destroyAndSearches(Array& relist) {
for (int i=0; i<relist.getSize(); i++) {
regfree(&(relist[i]));
}
relist.setSize(0);
}
//////////////////////////////
//
// example --
//
void example(void) {
}
//////////////////////////////
//
// usage --
//
void usage(const char* command) {
}
// md5sum: d6f6e9e66f043f55df4bf5d25d7d8952 hgrep.cpp [20130420]