// // Programmer: Craig Stuart Sapp // Creation Date: Mon May 3 21:54:58 PDT 2010 // Last Modified: Thu May 6 23:16:19 PDT 2010 // Last Modified Mon Feb 7 17:55:04 PST 2011 (fixed md5sum calculation) // Filename: ...sig/examples/all/make64.cpp // Web Address: http://sig.sapp.org/examples/museinfo/humdrum/humpdf.cpp // Syntax: C++; museinfo // // Description: Embed a Humdrum file into a PDF file as an attachment. // // Links: // PDF 1.7 reference (ISO 32000.1 2008): // http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf // http://blogs.adobe.com/pdfdevjunkie/PDF_Inside_and_Out.pdf // PDF 1.4 reference: // http://www.adobe.com/devnet/pdf/pdfs/PDFReference.pdf // section 3.10.3 Embedded File Streams, page 123 // #include "humdrum.h" #include "PerlRegularExpression.h" #include "PDFFile.h" #include "CheckSum.h" #include #include #include #include #include #include using namespace std; #include #include #include void checkOptions (Options& opts, int argc, char* argv[]); void example (void); void usage (const char* command); void printMimeEncoding (ostream& out, int count, char char1, char char2, char char3); void createStreamData (ostream& out, stringstream& datatoencode, const char* filename); int printStreamObject (ostream& out, int objnum, stringstream& datatoencode, const char* filename, Array& objectindex, Array& offsetindex, int initialoffset); int createFileEntry (stringstream& out, HumdrumFile& infile, const char* filename, int nextobject, Array& objectindex, Array& offsetindex, int initialoffset); int generateNewXref (stringstream& out, Array& objectindex, Array& offsetindex, int filesize); void printPdfDate (ostream& out, struct tm* date); void addTrailerPrev (Array& trailerstring, int newprevoffset); int linkToRootObject (ostream& out, Array& objectindex, Array& offsetindex, int initialoffset, Array trailerstring, int xrefoffset, istream& file, int nextobject, PDFFile& pdffile); void getObject (ostream& out, istream& file, int offset); int updateNamesObject (ostream& out, Array& objectindex, Array& offsetindex, int initialoffset, ostream& file, int nextobject, int ndoffset); int updateRootObject (ostream& out, int rootobjnum, int initialoffset, PDFFile& pdffile, Array& rootstring, Array& objectindex, Array& offsetindex, int embedcount, int nextobject); void addDictionaryEntry (Array& objectstring, Array& entry); int getSequentialObjectCount(Array& list, int starti); // global variables: Options options; string pdffilename = ""; // used with -p option int footerQ = 0; // used with -A option int keepdirQ = 0; // used with -D option int hiddenQ = 0; // used with --hidden option (not active) int debugQ = 0; // used with --debug option int prefixQ = 0; // used with -P option const char* prefix = ""; // used with -P option /////////////////////////////////////////////////////////////////////////// int main(int argc, char** argv) { // process the command-line options checkOptions(options, argc, argv); istream *file; ifstream filestream; if (pdffilename == "") { // read standard input if no -p option given file = &cin; } else { filestream.open(pdffilename, ios::in | ios::binary); file = &filestream; if (!filestream.is_open()) { cerr << "ERROR: cannot open file: " << pdffilename << "\n"; exit(1); } } int i; int initialoffset = 0; HumdrumFile infile; PDFFile pdffile; pdffile.process(*file); // reads structural information from PDF file. // A pointer to ifstream file is stored in // pdffile, so don't close file while still // extracting data using pdffile. int filesize = pdffile.getFileSize(); int xrefoffset = pdffile.getXrefOffset(0); int nextobject = pdffile.getObjectCount(); if (xrefoffset <= 5) { cerr << "ERROR: no xref offset found in file " << pdffilename << endl; exit(1); } // figure out the number of input files to process int numinputs = options.getArgCount(); Array filesegments; // temporary storage for embedded files if (numinputs <= 0) { filesegments.setSize(1); } else { filesegments.setSize(numinputs); } filesegments.allowGrowth(0); for (i=0; i objectindex(1000); Array offsetindex(1000); objectindex.setGrowth(10000); offsetindex.setGrowth(10000); objectindex.setSize(0); offsetindex.setSize(0); const char* filename = ""; int fcounter = 0; for (i=0; istr().length(); fcounter++; } if (!footerQ) { // print initial contents of input PDF pdffile.print(cout); cout << flush; } for (i=0; istr() << flush; } Array trailerstring; pdffile.getTrailerString(0, trailerstring); if (!hiddenQ) { // If "hidden", the file will disappear if Save As... is used to save, // because the document root does not know about it. stringstream rootlink; nextobject = linkToRootObject(rootlink, objectindex, offsetindex, initialoffset, trailerstring, xrefoffset, *file, nextobject, pdffile); cout << rootlink.str() << flush; initialoffset += rootlink.str().length(); } // update the object count (/Size) in the trailer: char replacement[128] = {0}; sprintf(replacement, "/Size %d", nextobject); PerlRegularExpression pre; pre.sar(trailerstring, "\\/Size\\s+\\d+", replacement); // remove any /Prev entry in trailer, and replace with // new one addTrailerPrev(trailerstring, xrefoffset); stringstream xrefstream; int newxrefoffset = generateNewXref(xrefstream, objectindex, offsetindex, filesize); newxrefoffset += initialoffset; cout << xrefstream.str(); cout << trailerstring.getBase() << endl; cout << "startxref\n"; // byte location of new xref goes here: cout << newxrefoffset + filesize << endl; cout << "%%EOF" << endl; for (i=0; i> // The value for /Names entry is an array of two items: // (A) the name of the object in UTF-16, usually called // "_Untitled Object" // (B) An indirect object reference point to an object which // contains the /Type /FileSpec in the dictionary // (which contains the true filesystem's filename, and // an indirect object reference to a stream object which // contains the actual contents of a file. // // General Map of attaching the embedded file to the Root object: // Root object --> Names Dictionary --> Embedded List --> Embedded File // Specification --> Embedded File Stream // // * Root object has a /Names entry in its dictionary which points to // an indirect object that gives the Names dictionary of the Root object. // (create a /Names entry if it does not already exist and update // the Root object in the PDF file; otherwise, leave the original // Root entry unchanged, and go to the Name Dictionary object for // further processing. Example Root object: // 1 0 obj<> endobj // or without a /Names entry in the dictionary: // 1 0 obj<> endobj // * Names Dictionary should have a dictionary entry called /EmbeddedFiles // which points to an indirect object which will list the embedded files. // Example Names Dictionary: // 14 0 obj << /EmbeddedFiles 15 0 R >> // This entry says that the list of embedded files is found in indirect // object 15. If the Names Dictionary does not contain an /EmbeddedFiles // entry, then update the object to add one. // * Embedded List: A dictionary with a /Names entry which contains an // array of pairs of entries which list the embedded files. The first // of the pair is a Unicode-16 string giving the embedded name of the // data (not the filename, and not really used for anything that I can // figure out. The typical name is "_Untitled Object". The second // value of the pair is a reference to an indirect object. In the example // below, the indirect object is #13. This is a link to the /FileSpec // entry for the embedded file. // 15 0 obj // << /Names [(_^@U^@n^@t^@i^@t^@l^@e^@d^@ ^@O^@b^@j^@e^@c^@t) 13 0 R] >> // endobj // * Embedded File Specification: // 13 0 obj // << /Type /Filespec // /F (file.krn) // /EF << /F 12 0 R >> // /Desc (Short Description of File) // >> // endobj // The Embedded file specification lists the name of the file in the /F // entry, the embedded content stream is listed as an indirect object // in the /F entry in the dictionary of the /EF entry (in this case // object #12). // * Embedded File Stream: // Contains the actual contents of the embedded file plus some // file content information: // // 12 0 obj // << // /Type /EmbeddedFile // /SubType /application#2fx-humdrum // /Length 34 // /Params // << // /CreationDate (D:20100510042439-08'00') // /ModDate (D:20100510042439-08'00') // /EmbedDate (D:19991214040506-08'00') // /Size 34 // /CheckSum // >> // >> // stream // **kern // *M4/4 // *k[] // *c: // =- // 1c // == // *- // endstream // endobj // // The /Length field is required and gives the number of bytes // between the string "stream\n" and endstream. "stream" should // have the newline 0x0a or "0x0d 0x0a" after it (but not 0x0d alone). // An optional newline before "endstream" is allowed, and will not be // considered part of the data inside of the stream. // // int linkToRootObject(ostream& out, Array& objectindex, Array& offsetindex, int initialoffset, Array trailerstring, int xrefoffset, istream& file, int nextobject, PDFFile& pdffile) { // when this function is called, only embedded files have been // added to the PDF. There are two indirect objects for each // embedded file (the /FileSpec entry and the actual contents, // So embedcount is the number of files which have been included: int embedcount = objectindex.getSize() / 2; int rootindex = pdffile.getRootIndex(); int rootoffset = pdffile.getObjectOffset(rootindex); //// Now go to root object and check to see if there is a /Names entry int i; if (debugQ) { cerr << ">>> Indirect object byte offset table:" << endl; for (i=0; i>> " << i << ":\t" << pdffile.getObjectOffset(i) << endl; } } stringstream rootstream; getObject(rootstream, file, rootoffset); Array rootstring; rootstring.setSize(rootstream.str().length()+1); for (i=0; i<(int)rootstream.str().length(); i++) { rootstring[i] = rootstream.str()[i]; } rootstring[rootstring.getSize()-1] = '\0'; //// if there is a /Names entry in dictionary, then don't bother updating //// the Root entry and instead go directly to the /Names object and modify. //// If there is not a /Names entry, then add one as indirect object //// and also insert a revised Root object. PerlRegularExpression pre; if (pre.search(rootstring.getBase(), "/Names\\s+(\\d+)\\s+(\\d)\\s+R", "")) { // int namesobj = atol(pre.getSubmatch(1)); // int namesver = atol(pre.getSubmatch(2)); // ggg // nextobject = updateNamesObject(out, objectindex, offsetindex, // initialoffset, file, nextobject, objectoffsets[namesobj]); } else { // update Root dictionary to add /Names entry, create Names dictionary // and create the list of Embedded files in another object. stringstream newroot; nextobject = updateRootObject(newroot, rootindex, initialoffset, pdffile, rootstring, objectindex, offsetindex, embedcount, nextobject); initialoffset += newroot.str().length(); out << newroot.str() << flush; } return nextobject; } ////////////////////////////// // // updateRootObject -- // newroot == Output data stream for revised root object // rootobjnum == Root indirect object number (second value presumed 0) // initialoffset == Byte offset from the start of the file to start // of newroot stream. // pdffile == Storage for byte offset data. // rootstring == The original Root object entry which needs to have // an added name dictionary, etc. // objectindex == List of indirect objects created by this program // (index in order of creation). // offsetindex == List of byte offsets for objects created by this // program. (index in order of creation). // embedcount == Number of embedded files added by program. // int updateRootObject(ostream& out, int rootobjnum, int initialoffset, PDFFile& pdffile, Array& rootstring, Array& objectindex, Array& offsetindex, int embedcount, int nextobject) { stringstream newroot; newroot << "\n"; // objectoffsets[rootobjnum] = initialoffset + newroot.str().length(); objectindex.append(rootobjnum); int tval = newroot.str().length() + initialoffset; offsetindex.append(tval); int ndobjectnumber = pdffile.getObjectCount() + embedcount*2; int ndversion = 0; nextobject++; Array entry; entry.setSize(1024); sprintf(entry.getBase(), " /Names %d %d R ", ndobjectnumber, ndversion); entry.setSize(strlen(entry.getBase())+1); addDictionaryEntry(rootstring, entry); int i; for (i=0; i>\n"; namedict << "endobj\n"; initialoffset += namedict.str().length(); nextobject++; out << namedict.str() << flush; // add the embedded file listing: objectindex.append(embedlistobjnum); stringstream embedlist; embedlist << "\n"; tempoffset = initialoffset + embedlist.str().length(); offsetindex.append(tempoffset); embedlist << embedlistobjnum << " 0 obj\n"; embedlist << "<<\n"; embedlist << " /Names [\n"; int tempobjnum; char nullchar = (char)0; for (i=0; i>\n"; embedlist << "endobj\n"; initialoffset += embedlist.str().length(); out << embedlist.str() << flush; return nextobject; } ////////////////////////////// // // updateNamesObject -- Adds an EmbeddedFiles entry into the Root's // Name Dictionary, or creates an EmbeddedFiles entry if none // exists. // 261 0 obj // << // /Dests 254 0 R // /EmbeddedFiles 400 0 R % Added Embedded Files list // >> // endobj // int updateNamesObject(ostream& out, Array& objectindex, Array& offsetindex, int initialoffset, ifstream& file, int nextobject, int ndoffset) { stringstream ndstream; getObject(ndstream, file, ndoffset); Array ndstring; ndstring.setSize(ndstream.str().length()+1); int i; for (i=0; i<(int)ndstream.str().length(); i++) { ndstring[i] = ndstream.str()[i]; } ndstring[ndstring.getSize()-1] = '\0'; PerlRegularExpression pre; // char buffer[128] = {0}; Array entry; entry.setSize(1000); if (pre.search(ndstring.getBase(), "/EmbeddedFiles\\s+(\\d+)\\s+(\\d+)\\s+R", "")) { // nothing to change in Name Dictionary, just // go to the list of embedded files... // int iobject = atol(pre.getSubmatch(1)); // nextobject = updateEmbeddedFileList(out, objectindex, offsetindex, // initialoffset, file, nextobject, offsetindex[iobject]) // ggg } else { // Add an /EmbeddedFiles entry to the Name Dictionary int assignednum = nextobject++; int version = 0; sprintf(entry.getBase(), " /EmbeddedFiles %d %d R ", assignednum, version); entry.setSize(strlen(entry.getBase())+1); addDictionaryEntry(ndstring, entry); // print the new Root's name dictionay object // and create a list of embedded files. // ggg } return nextobject; } ////////////////////////////// // // addDictionaryEntry -- add an entry to a dictionary. You should // do a check before calling this function to make sure that // the name key is not already in the dictionary. // void addDictionaryEntry(Array& objectstring, Array& entry) { int level = 0; Array newobject; newobject.setSize(objectstring.getSize()+entry.getSize()+100); newobject.setGrowth(1000); newobject.setSize(0); int inserted = 0; char ch; int i, j; // char buffer[128] = {0}; // int plen; for (i=0; i') { level--; } if (inserted || (level != 2)) { continue; } inserted = 1; for (j=0; j') { level--; } if (level <= 0) { if (endstate[endindex] == ch) { endindex++; } else { endindex = 0; } if (endindex == endtarget) { out << ch; return; } } out << ch; file.get(ch); } cerr << "ERROR: end of Object had strange error" << endl; exit(1); return; } ////////////////////////////// // // generateNewXref -- // filesize == size in bytes of original file. // int generateNewXref(stringstream& finalout, Array& objectindex, Array& offsetindex, int filesize) { stringstream out; // temporary code for testing: // filesize = 0; int output = 0; out << "\n"; output = out.str().length(); out << "xref\n"; // don't need the null object: // out << "0 1\n"; // out << "0000000000 65535 f" << (char)0x0d << (char)0x0a; // output the starting object number in a sequence // and then how many follow, then the offset and version numbers // followed by " n" 0x0d 0x0a. if (objectindex.getSize() == 0) { return output; } int i; int currenti = 0; int currentlen = getSequentialObjectCount(objectindex, currenti); int value; while (currenti < objectindex.getSize()) { out << objectindex[currenti] << " " << currentlen << "\n"; for (i=currenti; i& list, int starti) { int i; int output = 1; for (i=starti+1; i& trailerstring, int newprevoffset) { int level = 0; Array newtrailer; newtrailer.setSize(trailerstring.getSize()+1000); newtrailer.setGrowth(1000); newtrailer.setSize(0); PerlRegularExpression pre; int prevprinted = 0; char ch; int i, j; char buffer[128] = {0}; int plen; for (i=0; i') { level--; } if (level != 2) { newtrailer.append(trailerstring[i]); continue; } if (trailerstring[i] != '/') { newtrailer.append(trailerstring[i]); continue; } if (pre.search(trailerstring.getBase()+i, "^(/Prev\\s+\\d+)", "")) { const char* ptr = pre.getSubmatch(1); plen = strlen(ptr); i+= plen-1; sprintf(buffer, "/Prev %d", newprevoffset); j = 0; while (buffer[j] != '\0') { newtrailer.append(buffer[j++]); } prevprinted = 1; continue; } newtrailer.append(trailerstring[i]); continue; } ch = '\0'; newtrailer.append(ch); if (!prevprinted) { // need to insert a /Prev entry (at end of dictionary) level = 0; for (i=newtrailer.getSize()-1; i>=0; i--) { if (newtrailer[i] == '>') { level++; } if (newtrailer[i] == '<') { level--; } if (level != 2) { continue; } sprintf(buffer, " /Prev %d\n", newprevoffset); plen = strlen(buffer); int oldlen = newtrailer.getSize(); newtrailer.setSize(oldlen+plen); int newlen = newtrailer.getSize(); for (j=0; j& objectindex, Array& offsetindex, int initialoffset) { stringstream datatoencode; infile.write(datatoencode); nextobject = printStreamObject(out, nextobject, datatoencode, filename, objectindex, offsetindex, initialoffset); return nextobject; } ///////////////////////////// // // printStreamObject -- // int printStreamObject(ostream& finalout, int objnum, stringstream& datatoencode, const char* filename, Array& objectindex, Array& offsetindex, int initialoffset) { stringstream streamcontents; stringstream out; createStreamData(streamcontents, datatoencode, filename); int contentsize = streamcontents.str().length(); char newline = 0x0a; int version = 0; int initiallen = out.str().length(); // print the embedded file content stream object /////////////////// out << "\n"; objectindex.append(objnum); int offset = out.str().length() - initiallen; offset += initialoffset; offsetindex.append(offset); out << objnum++ << " " << version << " obj\n"; out << "<<\n"; out << " /Type /EmbeddedFile\n"; out << " /SubType /application#2fx-humdrum\n"; out << " /Length " << contentsize << "\n"; struct stat attrib; stat(filename, &attrib); struct tm* moddate; moddate = gmtime(&(attrib.st_mtime)); out << " /Params\n"; out << " <<\n"; out << " /CreationDate ("; printPdfDate(out, moddate); // example: D:20050727132644-04'00' out << ")\n"; out << " /ModDate ("; printPdfDate(out, moddate); // example D:20050727143111-04'00' out << ")\n"; out << " /Size " << datatoencode.str().length() << "\n"; out << " /CheckSum <"; CheckSum::getMD5Sum(out, datatoencode); // such as 5C94A7BE7C695C70271E29A26B5705C1 out << ">\n"; out << " >>\n"; out << ">>\n"; out << "stream" << newline; out << streamcontents.str(); int len1 = streamcontents.str().length(); if ((streamcontents.str()[len1-1] != 0x0a) && (streamcontents.str()[len1-1] != 0x0d)) { out << "\n"; } out << "endstream\n"; out << "endobj\n"; // print the file spec object: ///////////////////////////////////// Array outfilename; int len = strlen(filename); outfilename.setSize(1000 + len); outfilename.setGrowth(1000); strcpy(outfilename.getBase(), filename); outfilename.setSize(len+1); PerlRegularExpression pre; if (!keepdirQ) { pre.sar(outfilename, ".*/", "", "g"); } out << "\n"; objectindex.append(objnum); offset = out.str().length() - initiallen; offset += initialoffset; offsetindex.append(offset); out << objnum << " " << version << " obj\n"; objnum++; out << "<<\n"; out << " /Type /Filespec\n"; out << " /F ("; if (prefixQ) { out << prefix; } out << outfilename.getBase(); out << ")\n"; out << " /EF << /F " << objnum-2 << " 0 R >>\n"; // object with contents out << " /Desc (Embedded Humdrum File)\n"; // descripion of file out << ">>\n"; out << "endobj\n"; finalout << out.str(); return objnum; } ////////////////////////////// // // printPdfDate -- time is printed in UTC plus deviation from UTC for // localtime. // example D:20050727143111-04'00' // D:yyyymmddhhmmss-HH'MM' // void printPdfDate(ostream& out, struct tm* date) { char buffer[128] = {0}; strftime(buffer, 128, "D:%Y%m%d%H%M%S", date); out << buffer; // print time zone information (need to check on daylight savings) // Remove timezone info for now since Apple OS X is having difficulties. int value = 0; // int value = timezone; char sign = '-'; // if (timezone < 0) { // value = -timezone; // } int hour = value / 3600; int min = value - hour * 3600; if (min < 0) { min = 0; } out << sign; if (hour < 10) { out << "0"; } out << hour << "'"; if (min < 10) { out << "0"; } out << min << "'"; } ////////////////////////////// // // createStreamData -- // void createStreamData(ostream& out, stringstream& datatoencode, const char* filename) { out << datatoencode.str(); } void createStreamDataOld(ostream& out, stringstream& datatoencode, const char* filename) { datatoencode << ends; out << "\n"; string sss = datatoencode.str(); // out << "Length of string " << sss.length() << endl; int count = sss.length() - 1; int packets = count / 3; // deal with leftovers later... int i; for (i=0; i\n"; } ////////////////////////////// // // printMimeEncoding -- Not used any longer... // void printMimeEncoding(ostream& out, int count, char char1, char char2, char char3) { static char table[64] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'}; out << table[char1 >> 2]; out << table[((char1 & 0x03) << 4) | (char2 >> 4)]; if (count == 1) { out << "=="; return; } out << table[((char2 & 0x0f) << 2) | (char3 >> 6)]; if (count == 2) { out << "="; return; } out << table[char3 & 0x3f]; } ////////////////////////////// // // checkOptions -- validate and process command-line options. // void checkOptions(Options& opts, int argc, char* argv[]) { opts.define("p|pdf=s:", "PDF file on which file(s) will be attached"); opts.define("A|append-only=b", "output only data to append to PDF"); opts.define("debug=b", "print debugging statements to standard error"); opts.define("D|keep-directory=b", "keep directory in filename"); opts.define("d|directory=b", "append directory path to filename"); opts.define("P|prefix=s:", "prepend path to written filename"); opts.define("author=b", "author of program"); opts.define("version=b", "compilation info"); opts.define("example=b", "example usages"); opts.define("help=b", "short description"); opts.process(argc, argv); // handle basic options: if (opts.getBoolean("author")) { cout << "Written by Craig Stuart Sapp, " << "craig@ccrma.stanford.edu, May 2010" << endl; exit(0); } else if (opts.getBoolean("version")) { cout << argv[0] << ", version: 5 May 2010" << endl; cout << "compiled: " << __DATE__ << endl; cout << MUSEINFO_VERSION << endl; exit(0); } else if (opts.getBoolean("help")) { usage(opts.getCommand().c_str()); exit(0); } else if (opts.getBoolean("example")) { example(); exit(0); } if (opts.getBoolean("pdf")) { pdffilename = opts.getString("pdf").c_str(); } else { //// No -p option is now allowed. It means that the PDF will be //// coming into the program from standard input. // cerr << "Error: -p file.pdf option is required." << endl; // exit(1); } footerQ = opts.getBoolean("append-only"); debugQ = opts.getBoolean("debug"); keepdirQ = opts.getBoolean("keep-directory"); prefixQ = opts.getBoolean("prefix"); if (prefixQ) { prefix = opts.getString("prefix").c_str(); keepdirQ = 0; } } ////////////////////////////// // // example -- example usage of the quality program // void example(void) { cout << " \n" << endl; } ////////////////////////////// // // usage -- gives the usage statement for the meter program // void usage(const char* command) { cout << " \n" << endl; } // md5sum: 070c757774ff28184747d76e94712cab humpdf.cpp [20170605]