//
// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Creation Date: Thu Jun  2 13:29:07 PDT 2011
// Last Modified: Mon Jun  6 14:16:16 PDT 2011
// Filename:      ...sig/examples/all/xmlparse.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/xml/xmlparse.cpp
// Syntax:        C++; museinfo
//
// Description:   Very simple parse of the contents of an XML file this parsing
//                does not use a class. See xmlparse2 for same technique
//                for parsing, but using the XmlFileBasic class.
//

#include <math.h>
#include "humdrum.h"

#ifndef OLDCPP
   #include <iostream>
   #include <fstream>
#else
   #include <iostream.h>
   #include <fstream.h>
   using namespace std;
#endif

#include "SigCollection.h"

// function declarations:
void      checkOptions(Options& opts, int argc, char** argv);
void      example(void);
void      usage(const char* command);

void      parseXmlFile(const char* filename);
void      parseXmlStream(istream& input);

void      extractTag(istream& input);
void      extractWhiteSpace(istream& input);
void      extractText(istream& input);

// User interface variables:
Options   options;



//////////////////////////////////////////////////////////////////////////

int main(int argc, char** argv) {
   // process the command-line options
   checkOptions(options, argc, argv);

   int i;
   if (options.getArgCount() >= 1) {
      for (i=1; i<=options.getArgCount(); i++) {
         parseXmlFile(options.getArg(i));
      }
   } else {
      parseXmlStream(cin);
   }

   return 0;
}

//////////////////////////////////////////////////////////////////////////



//////////////////////////////
//
// extractText --
//

void extractText(istream& input) {
   static Array<char> whitespace;
   whitespace.setSize(128);
   whitespace.setSize(0);
   whitespace.setGrowth(1123123);

   cout << "TEXT:";
   
   int i;
   int ch;
   ch = input.get();
   char cch;
   while ((!input.eof()) && ((char)ch != '<')) {
      cch = (char)ch;
      if (isspace(cch)) {
         whitespace.append(cch);
      } else {
         for (i=0; i<whitespace.getSize(); i++) {
            cout << (char)(*(whitespace.getBase()+i));
         }
         whitespace.setSize(0);
         cout << cch;
      }
      ch = input.get();
   }
   if ((char)ch == '<') {
      input.putback((char)ch);
   }
   cout << endl;

   if (whitespace.getSize() > 0) {
      cout << "WHITESPACE:>>";
      for (i=0; i<whitespace.getSize(); i++) {
         cout << (char)(*(whitespace.getBase()+i));
      }
      cout << "<<" << endl;
      whitespace.setSize(0);
   }
}



//////////////////////////////
//
// parseXmlFile --
//

void parseXmlFile(const char* filename) {
   fstream input;
   input.open(filename, ios::in);
   parseXmlStream(input);
}



//////////////////////////////
//
// parseXmlStream --
//

void parseXmlStream(istream& input) {
   int ch;
   while (!input.eof()) {
      ch = input.peek();
      if (ch < 0) {
         // end of data stream
         break;
      }
      if (isspace((char)ch)) {
         extractWhiteSpace(input);
      } else if ((char)ch == '<') {
         extractTag(input);
      } else {
         extractText(input);
      }
   }
}



//////////////////////////////
//
// extractTag -- assumes first character in stream at this point is "<";
//
// read through until a ">" is found.  Whenever an "=" is found, switch
// to an attribute-reading mode, looking for paired double or single quotes
//

void extractTag(istream& input) {
   int attributeMode =  0;
   int parenState    = 'x';
   int finishedQ     =  0;
   int commentQ      =  0;
   int secondchar    = -1;
   int thirdchar     = -1;
   int fourthchar    = -1;
   int charcount     =  0;
   int lastchar      = -1;
   int lastlastchar  = -1;

   cout << "TAG:";

   int ch = -1;
   lastlastchar = lastchar;
   lastchar = ch;
   ch = input.get();
   charcount++;
   while ((!input.eof()) && (!finishedQ)) {
      if      (charcount == 2) { secondchar = ch; }
      else if (charcount == 3) { thirdchar = ch; }
      else if (charcount == 4) { fourthchar = ch; 
         if ((secondchar == '!') && (thirdchar == '-') && (fourthchar == '-')) {
            commentQ = 1;
         }
      }

      switch (attributeMode) {
         case 0:   // in tag but outside of an attribute value
            if ((int)ch == '=') {
               if (!commentQ) {
                  // attributeMode = 1;
               } 
            } else if ((int)ch == '>') {
               if (commentQ) {
                  if ((lastlastchar == '-') && (lastchar == '-')) {
                     finishedQ = 1;
                  }
               } else {
                  finishedQ = 1;
               }
            }
            cout << (char)ch;
            break;

         case 1:   // start of attribute value but not inside of parens
            if (isspace((char)ch)) {
               cout << (char)ch;
            } else if ((char)ch == '\'') {
               parenState = '\'';
               attributeMode = 2;
            } else if ((char)ch == '"') {
               parenState = '"';
               attributeMode = 2;
            } else {
               cerr << "ERROR READING ATTRIBUTE VALUE\n";
               exit(1);
            }
            if (commentQ) {
               attributeMode = 0;
               parenState = 'x';
            }

         case 2:   // in attribute value within parens.
            if ((char)ch == parenState) {
              attributeMode = 0;
            }
            if ((char)ch == '>') {
               cout << "ERROR: '>' cannot occur within an attribute value\n";
               exit(1);
            } if ((char)ch == '<') {
               cout << "ERROR: '<' cannot occur within an attribute value\n";
               exit(1);
            }
            cout << (char)ch;
      }
      if (finishedQ) {
         break;
      }
      lastlastchar = lastchar;
      lastchar = ch;
      ch = input.get();
      charcount++;
   }

   cout << endl;
}



//////////////////////////////
//
// extractWhiteSpace --
//

void extractWhiteSpace(istream& input) {
   cout << "WHITESPACE:>>";
   int ch;
   ch = input.get();
   while (!input.eof() && isspace((char)ch)) {
      cout << (char)ch;
      ch = input.get();
   }
   if (!input.eof()) {
      input.putback((char)ch);
   }
   cout << "<< " << endl;
}



//////////////////////////////
//
// checkOptions -- 
//

void checkOptions(Options& opts, int argc, char* argv[]) {

   opts.define("author=b",  "author of program"); 
   opts.define("version=b", "compilation info");
   opts.define("example=b", "example usages");   
   opts.define("help=b",  "short description");
   opts.process(argc, argv);
   
   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Written by Craig Stuart Sapp, "
           << "craig@ccrma.stanford.edu, June 2011" << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 2 Jan 2011" << endl;
      cout << "compiled: " << __DATE__ << endl;
      cout << MUSEINFO_VERSION << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }
}



//////////////////////////////
//
// example --
//

void example(void) {


}



//////////////////////////////
//
// usage --
//

void usage(const char* command) {

}



// md5sum: 00471a1743462c431825fdcf34037f50 xmlparse.cpp [20110711]