// // Programmer: Craig Stuart Sapp <craig@ccrma.stanford.edu> // Creation Date: Thu Jun 2 13:29:07 PDT 2011 // Last Modified: Mon Jun 6 14:16:16 PDT 2011 // Filename: ...sig/examples/all/xmlparse.cpp // Web Address: http://sig.sapp.org/examples/museinfo/xml/xmlparse.cpp // Syntax: C++; museinfo // // Description: Very simple parse of the contents of an XML file this parsing // does not use a class. See xmlparse2 for same technique // for parsing, but using the XmlFileBasic class. // #include <math.h> #include "humdrum.h" #ifndef OLDCPP #include <iostream> #include <fstream> #else #include <iostream.h> #include <fstream.h> using namespace std; #endif #include "SigCollection.h" // function declarations: void checkOptions(Options& opts, int argc, char** argv); void example(void); void usage(const char* command); void parseXmlFile(const char* filename); void parseXmlStream(istream& input); void extractTag(istream& input); void extractWhiteSpace(istream& input); void extractText(istream& input); // User interface variables: Options options; ////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { // process the command-line options checkOptions(options, argc, argv); int i; if (options.getArgCount() >= 1) { for (i=1; i<=options.getArgCount(); i++) { parseXmlFile(options.getArg(i)); } } else { parseXmlStream(cin); } return 0; } ////////////////////////////////////////////////////////////////////////// ////////////////////////////// // // extractText -- // void extractText(istream& input) { static Array<char> whitespace; whitespace.setSize(128); whitespace.setSize(0); whitespace.setGrowth(1123123); cout << "TEXT:"; int i; int ch; ch = input.get(); char cch; while ((!input.eof()) && ((char)ch != '<')) { cch = (char)ch; if (isspace(cch)) { whitespace.append(cch); } else { for (i=0; i<whitespace.getSize(); i++) { cout << (char)(*(whitespace.getBase()+i)); } whitespace.setSize(0); cout << cch; } ch = input.get(); } if ((char)ch == '<') { input.putback((char)ch); } cout << endl; if (whitespace.getSize() > 0) { cout << "WHITESPACE:>>"; for (i=0; i<whitespace.getSize(); i++) { cout << (char)(*(whitespace.getBase()+i)); } cout << "<<" << endl; whitespace.setSize(0); } } ////////////////////////////// // // parseXmlFile -- // void parseXmlFile(const char* filename) { fstream input; input.open(filename, ios::in); parseXmlStream(input); } ////////////////////////////// // // parseXmlStream -- // void parseXmlStream(istream& input) { int ch; while (!input.eof()) { ch = input.peek(); if (ch < 0) { // end of data stream break; } if (isspace((char)ch)) { extractWhiteSpace(input); } else if ((char)ch == '<') { extractTag(input); } else { extractText(input); } } } ////////////////////////////// // // extractTag -- assumes first character in stream at this point is "<"; // // read through until a ">" is found. Whenever an "=" is found, switch // to an attribute-reading mode, looking for paired double or single quotes // void extractTag(istream& input) { int attributeMode = 0; int parenState = 'x'; int finishedQ = 0; int commentQ = 0; int secondchar = -1; int thirdchar = -1; int fourthchar = -1; int charcount = 0; int lastchar = -1; int lastlastchar = -1; cout << "TAG:"; int ch = -1; lastlastchar = lastchar; lastchar = ch; ch = input.get(); charcount++; while ((!input.eof()) && (!finishedQ)) { if (charcount == 2) { secondchar = ch; } else if (charcount == 3) { thirdchar = ch; } else if (charcount == 4) { fourthchar = ch; if ((secondchar == '!') && (thirdchar == '-') && (fourthchar == '-')) { commentQ = 1; } } switch (attributeMode) { case 0: // in tag but outside of an attribute value if ((int)ch == '=') { if (!commentQ) { // attributeMode = 1; } } else if ((int)ch == '>') { if (commentQ) { if ((lastlastchar == '-') && (lastchar == '-')) { finishedQ = 1; } } else { finishedQ = 1; } } cout << (char)ch; break; case 1: // start of attribute value but not inside of parens if (isspace((char)ch)) { cout << (char)ch; } else if ((char)ch == '\'') { parenState = '\''; attributeMode = 2; } else if ((char)ch == '"') { parenState = '"'; attributeMode = 2; } else { cerr << "ERROR READING ATTRIBUTE VALUE\n"; exit(1); } if (commentQ) { attributeMode = 0; parenState = 'x'; } case 2: // in attribute value within parens. if ((char)ch == parenState) { attributeMode = 0; } if ((char)ch == '>') { cout << "ERROR: '>' cannot occur within an attribute value\n"; exit(1); } if ((char)ch == '<') { cout << "ERROR: '<' cannot occur within an attribute value\n"; exit(1); } cout << (char)ch; } if (finishedQ) { break; } lastlastchar = lastchar; lastchar = ch; ch = input.get(); charcount++; } cout << endl; } ////////////////////////////// // // extractWhiteSpace -- // void extractWhiteSpace(istream& input) { cout << "WHITESPACE:>>"; int ch; ch = input.get(); while (!input.eof() && isspace((char)ch)) { cout << (char)ch; ch = input.get(); } if (!input.eof()) { input.putback((char)ch); } cout << "<< " << endl; } ////////////////////////////// // // checkOptions -- // void checkOptions(Options& opts, int argc, char* argv[]) { opts.define("author=b", "author of program"); opts.define("version=b", "compilation info"); opts.define("example=b", "example usages"); opts.define("help=b", "short description"); opts.process(argc, argv); // handle basic options: if (opts.getBoolean("author")) { cout << "Written by Craig Stuart Sapp, " << "craig@ccrma.stanford.edu, June 2011" << endl; exit(0); } else if (opts.getBoolean("version")) { cout << argv[0] << ", version: 2 Jan 2011" << endl; cout << "compiled: " << __DATE__ << endl; cout << MUSEINFO_VERSION << endl; exit(0); } else if (opts.getBoolean("help")) { usage(opts.getCommand()); exit(0); } else if (opts.getBoolean("example")) { example(); exit(0); } } ////////////////////////////// // // example -- // void example(void) { } ////////////////////////////// // // usage -- // void usage(const char* command) { } // md5sum: 00471a1743462c431825fdcf34037f50 xmlparse.cpp [20110711]