// Programmer:    Craig Stuart Sapp <craig@ccrma.stanford.edu>
// Note:          Adapted from the Humdrum Toolkit program "simil":
//                   simil.c 1.3 92/04/21 by Keith Orpen, University of 
//                   Waterloo, Ontario.
// Creation Date: Tue Nov 17 14:35:26 PST 2009
// Last Modified: Tue Dec  8 20:06:25 PST 2009
// Filename:      ...sig/examples/all/simil.cpp
// Web Address:   http://sig.sapp.org/examples/museinfo/humdrum/simil.cpp
// Syntax:        C++; museinfo
//
// Description:   Measure Damerau-Levenshtein edit distance between a Humdrum
//                source file and template file.
//

#include "humdrum.h"
#include "PerlRegularExpression.h"

#include <string.h>
#include <stdio.h>
#include <math.h>

#ifndef OLDCPP
   #include <iostream>
   #include <fstream>
#else
   #include <iostream.h>
   #include <fstream.h>
#endif


// function declarations:
void      checkOptions(Options& opts, int argc, char** argv);
void      example(void);
void      usage(const char* command);
void      readTemplateContents(Array<Array<char> >& storage, 
                                 const char* filename);
void      readTemplateContents(Array<Array<char> >& storage, 
                                 istream& filename);
void      printTemplate(Array<Array<char> >& storage);
int       chooseSpine(const char* interp, HumdrumFile& infile);
void      fillSourceData(Array<Array<char> >& sourcedata, 
                                 Array<int>& datalines, 
		                 HumdrumFile& infile, int spine, int nulltest);
void      usual_thing           (Array<double>& results, 
                                 Array<Array<char> >& sourcedata, 
                                 Array<Array<char> >& templatedata);
void      unusual_thing         (Array<Array<double> >& results, 
                                 Array<Array<char> >& sourcedata, 
				 Array<Array<char> >& templatedata, 
                                 int sublen);
void      printResults(Array<double>& results, 
                                 Array<Array<char> >& sourcedata,
                                 Array<Array<char> >& templatedata);
void      printResultsSubString(Array<Array<double> >& results, 
                                 Array<Array<char> >& sourcedata, 
                                 Array<Array<char> >& templatedata);
double    dlv(Array<Array<char> >& s1, int offset1, 
                                 Array<Array<char> >& s2, int offset2);
void      doDamerauLevenshteinAnalysis(Array<Array<char> >& sourcedata, 
                                 Array<Array<char> >& templatedata);
double    dlvcharstar(char** set1, int len1, char** set2, int len2);
double    dlvstring(Array<Array<char> >& set1, int offset1, 
                                 Array<Array<char> >& set2, int offset2,
                                 int temlen);
double    dlvint(int *s1, int len1, int *s2, int len2);
double    dlv(Array<Array<char> >& set1, int offset1, 
                                 Array<Array<char> >& set2, int offset2);
int       getMinIndex(Array<double>& list);
void      printSubStringInfo(Array<double>& list, double target);
void      printWeights(void);
void      readEditWeights(const char* filename);
void      printSequence(Array<Array<char> >& sourcedata, int index, 
                                 int size, int flag);

// User interface variables:
Options     options;
int         debugQ    =  0;  // used with --debug option
int         rawQ      =  0;  // used with -R option
int         xlen      =  0;  // used with -x option
int         reverseQ  =  0;  // used with -r option
int         scalingQ  =  0;  // used with -n option
const char* interp    =  ""; // used with -i option
int         pweightQ  = 0;   // used with -W option
int         nullQ     = 0;   // used with -N option
int         appendQ   = 0;   // used with -a option (not implemented)
int         prependQ  = 0;   // used with -p option (not implemented)
int         sequenceQ = 0;   // used with -s option
int         spacesQ   = 1;   // used with -S option
double      threshold = 0.0; // used with -t option

double      weight_R1 = 1.0; // --R1: deleting a repeated element of S1
double      weight_R2 = 1.0; // --R2: deleting a repeated element of S2
double      weight_D1 = 1.0; // --R1: deleting a non-repeated element of S1
double      weight_D2 = 1.0; // --R1: deleting a non-repeated element of S2
double      weight_S0 = 1.0; // --S0: sub by a non-repeated element
double      weight_S1 = 1.0; // --S1: sub by an element repeated in S1
double      weight_S2 = 1.0; // --S2: sub by an element repeated in S2
double      weight_S3 = 1.0; // --S3: sub by an element repeated in S1 and S2


//////////////////////////////////////////////////////////////////////////

int main(int argc, char** argv) {
   // process the command-line options
   checkOptions(options, argc, argv);
   HumdrumFile infile;
   Array<Array<char> > templatedata;
   Array<Array<char> > sourcedata;
   Array<int> datalines;

   if ((options.getArgCount() < 1) || (options.getArgCount() > 2)) {
      usage(options.getCommand());
      exit(1);
   }

   int sourcearg   = 1;
   int templatearg = 2;
   if (reverseQ) {
      sourcearg   = 2;
      templatearg = 1;
   }
   
   if (options.getArgCount() == 2) {
      infile.read(options.getArg(sourcearg));
      readTemplateContents(templatedata, options.getArg(templatearg));

   } else {
      // read second file from standard input
      if (sourcearg == 1) {
         infile.read(options.getArg(1));
	 readTemplateContents(templatedata, cin);
      } else {
         infile.read(cin);
	 readTemplateContents(templatedata, options.getArg(1));
      }
   }

   int spine = chooseSpine(interp, infile);

   fillSourceData(sourcedata, datalines, infile, spine, nullQ);

   if (debugQ) {
      cout << "SOURCE DATA: " << endl;
      printTemplate(sourcedata);
      cout << "TEMPLATE DATA: " << endl;
      printTemplate(templatedata);
   }
        

   doDamerauLevenshteinAnalysis(sourcedata, templatedata);

   if (pweightQ) {
      printWeights();
   }

   return 0;
}

//////////////////////////////////////////////////////////////////////////



//////////////////////////////
//
// fillSourceData -- extract tokens from source Humdrum file for use
//    in analysis.
//

void fillSourceData(Array<Array<char> >& sourcedata, Array<int>& datalines, 
      HumdrumFile& infile, int spine, int nulltest) {

   sourcedata.setSize(infile.getNumLines());
   sourcedata.setSize(0);
   datalines.setSize(infile.getNumLines());
   datalines.setSize(0);
   
   int strsize;
   int i, j;
   int index;
   for (i=0; i<infile.getNumLines(); i++) {
      if (!infile[i].isData()) {
         continue;
      }
      for (j=0; j<infile[i].getFieldCount(); j++) {
         if (infile[i].getPrimaryTrack(j) != spine) {
            continue;
         }
         if (nulltest && (strcmp(infile[i][j], ".") == 0)) {
            break;
         }
         strsize = strlen(infile[i][j]);
         index   = sourcedata.getSize();
         sourcedata.setSize(index + 1);
         sourcedata[index].setSize(strsize + 1);
         strcpy(sourcedata[index].getBase(), infile[i][j]);
         datalines.append(i);
         break;
      }
   }

   sourcedata.allowGrowth(0);
   datalines.allowGrowth(0);
}



//////////////////////////////
//
// chooseSpine -- select the first column of data; otherwise, locate
//      the first spine of the specified exclusive interpretation type.
//

int chooseSpine(const char* interp, HumdrumFile& infile) {
   if (interp == NULL) {
     return 1;
   }
   if (strcmp(interp, "") == 0) {
     return 1;
   }

   int maxtracks = infile.getMaxTracks();
   int i;
   for (i=1; i<=maxtracks; i++) {
      if (strcmp(interp, infile.getTrackExInterp(i)) == 0) {
         return i;
      }
   }
	     
   cerr << "Error: A " << interp 
        << " data spine is not present in the source" << endl;
   exit(1);

   return 1;
}



//////////////////////////////
//
// printTemplate --
//

void printTemplate(Array >& storage) {
   int i;
   for (i=0; i<storage.getSize(); i++) {
      cout << storage[i].getBase() << "\n";
   }
}


//////////////////////////////
//
// readEditWeights --
//

void readEditWeights(const char* filename) {
   #ifndef OLDCPP
      ifstream weightfile(filename, ios::in);
   #else
      ifstream weightfile(filename, ios::in | ios::nocreate);
   #endif

   if (weightfile.eof()) {
      cerr << "Error opening weightfile file: " << filename << endl;
      exit(1);
   }

   PerlRegularExpression pre;
   PerlRegularExpression pre2;

   Array<char> storage;
   char buffer[1024] = {0};
   int strsize;

   weightfile.getline(buffer, 1024);
   while (!weightfile.eof()) {
      strsize = strlen(buffer);
      storage.setSize(strsize+1);
      strcpy(storage.getBase(), buffer);
      weightfile.getline(buffer, 1024);
      pre.sar(storage, "\\s*#.*", "", "");
      if (pre.search(storage, 
            "^\\s*(d1|d2|r1|r2|s0|s1|s2|s3)\\s+([\\d.+\\-]+)", "i")) {
         if (pre2.search(pre.getSubmatch(1), "D1", "i")) {
            weight_D1 = atof(pre.getSubmatch(2));
         } else if (pre2.search(pre.getSubmatch(1), "D2", "i")) {
            weight_D2 = atof(pre.getSubmatch(2));
         } else if (pre2.search(pre.getSubmatch(1), "R1", "i")) {
            weight_R1 = atof(pre.getSubmatch(2));
         } else if (pre2.search(pre.getSubmatch(1), "R2", "i")) {
            weight_R2 = atof(pre.getSubmatch(2));
         } else if (pre2.search(pre.getSubmatch(1), "S0", "i")) {
            weight_S0 = atof(pre.getSubmatch(2));
         } else if (pre2.search(pre.getSubmatch(1), "S1", "i")) {
            weight_S1 = atof(pre.getSubmatch(2));
         } else if (pre2.search(pre.getSubmatch(1), "S2", "i")) {
            weight_S2 = atof(pre.getSubmatch(2));
         } else if (pre2.search(pre.getSubmatch(1), "S3", "i")) {
            weight_S3 = atof(pre.getSubmatch(2));
         }
      }
   }

   weightfile.close();
}



//////////////////////////////
//
// readTemplateContents -- read the contents of the template file
//

void readTemplateContents(Array >& storage, const char* filename) {
   #ifndef OLDCPP
      ifstream templatefile(filename, ios::in);
   #else
      ifstream templatefile(filename, ios::in | ios::nocreate);
   #endif
   readTemplateContents(storage, templatefile);
   templatefile.close();
}


void readTemplateContents(Array >& storage, istream& instream) {
   if (instream.eof()) {
      cerr << "Error opening template file" << endl;
      exit(1);
   }

   storage.setSize(1000);
   storage.setSize(0);
   storage.setGrowth(100000);

   char buffer[1024] = {0};
   int index;
   int strsize;

   instream.getline(buffer, 1024);
   while (!instream.eof()) {
      strsize = strlen(buffer);
      storage.setSize(storage.getSize()+1);
      index = storage.getSize() - 1;
      storage[index].setSize(strsize+1);
      strcpy(storage[index].getBase(), buffer);
      instream.getline(buffer, 1024);
   }

   storage.allowGrowth(0);
}




//////////////////////////////
//
// checkOptions -- 
//

void checkOptions(Options& opts, int argc, char* argv[]) {
   options.define("n|no-normalize=b",  "do not normalize similarity measures");
   options.define("r|reverse=b",       "reverse source and template arguments");
   options.define("R|raw=b",           "don't normalize the DLV values");
   options.define("i|interp=s",        "use first spine in excl. interp.");
   options.define("x|length=i:0",      "subordinate pattern length");
   options.define("W|print-weights=b", "print edit operation scores");
   options.define("w|weight-file=s",   "load editing weights from file");
   options.define("N|exclude-null-tokens=b", "do not consider null tokens");
   options.define("a|append=b",        "append analysis to input data");
   options.define("p|prepend=b",       "prepend analysis to input data");
   options.define("debug=b",           "print debugging information");
   options.define("s|sequence=b",      "print search sequences");
   options.define("S|no-spaces=b",     "print search sequences without spaces");
   options.define("t|threshold=d:0.0", "similarity threshold for output");

   options.define("R1|r1=d:1.0", "scr for deleting a repeated element of S1");
   options.define("R2|r2=d:1.0", "scr for deleting a repeated element of S2");
   options.define("D1|d1=d:1.0", "scr for deleting non-repeated element of S1");
   options.define("D2|d2=d:1.0", "scr for deleting non-repeated element of S2");
   options.define("S0|s0=d:1.0", "scr for sub by a non-repeated element");
   options.define("S1|s1=d:1.0", "scr for sub by element repeated in S1");
   options.define("S2|s2=d:1.0", "scr for sub by element repeated in S2");
   options.define("S3|s3=d:1.0", "scr for sub by element repeated in S1 & S2");

   opts.define("author=b",  "author of program"); 
   opts.define("version=b", "compilation info");
   opts.define("example=b", "example usages");   
   opts.define("help=b",  "short description");
   opts.process(argc, argv);
   
   // handle basic options:
   if (opts.getBoolean("author")) {
      cout << "Adapted by Craig Stuart Sapp 2009" 
           << " from the simil program written by Keith Opern 1992"
	   << endl;
      exit(0);
   } else if (opts.getBoolean("version")) {
      cout << argv[0] << ", version: 17 Nov 2009" << endl;
      cout << "compiled: " << __DATE__ << endl;
      cout << MUSEINFO_VERSION << endl;
      exit(0);
   } else if (opts.getBoolean("help")) {
      usage(opts.getCommand());
      exit(0);
   } else if (opts.getBoolean("example")) {
      example();
      exit(0);
   }
   
   xlen      =  options.getInteger("x");
   reverseQ  =  options.getBoolean("r");
   scalingQ  = !options.getBoolean("n");
   interp    =  NULL;
   if (options.getBoolean("i")) {
      interp =  options.getString("i");
   }
   debugQ    =  options.getBoolean("debug");
   rawQ      =  options.getBoolean("raw");
   pweightQ  =  options.getBoolean("print-weights");
   nullQ     =  options.getBoolean("exclude-null-tokens");
   appendQ   =  options.getBoolean("append");
   prependQ  =  options.getBoolean("prepend");
   sequenceQ =  options.getBoolean("sequence");
   spacesQ   = !options.getBoolean("no-spaces");
   threshold =  options.getDouble("threshold");

   if (options.getBoolean("weight-file")) {
      readEditWeights(options.getString("weight-file"));
   }

   if (options.getBoolean("R1")) { weight_R1 = options.getDouble("R1"); }
   if (options.getBoolean("R2")) { weight_R2 = options.getDouble("R2"); }
   if (options.getBoolean("D1")) { weight_D1 = options.getDouble("D1"); }
   if (options.getBoolean("D2")) { weight_D2 = options.getDouble("D2"); }
   if (options.getBoolean("S0")) { weight_S0 = options.getDouble("S0"); }
   if (options.getBoolean("S1")) { weight_S1 = options.getDouble("S1"); }
   if (options.getBoolean("S2")) { weight_S2 = options.getDouble("S2"); }
   if (options.getBoolean("S3")) { weight_S3 = options.getDouble("S3"); }
}



//////////////////////////////
//
// example --
//

void example(void) {


}



//////////////////////////////
//
// usage --
//

void usage(const char* command) {

}


///////////////////////////////////////////////////////////////////////////



//////////////////////////////
//
// doDamerauLevenshteinAnalysis --
//

void doDamerauLevenshteinAnalysis(Array<Array<char> >& sourcedata, 
      Array<Array<char> >& templatedata) {
   
   if (xlen <= 0) {
      Array<double> sresults;
      usual_thing(sresults, sourcedata, templatedata);
      printResults(sresults, sourcedata, templatedata);
   } else {
      Array<Array<double> > mresults;
      unusual_thing(mresults, sourcedata, templatedata, xlen);
      printResultsSubString(mresults, sourcedata, templatedata);
   }
}



double normalize1(double d, int len) {
   return exp(-d/len);
}

double normalize2(double d, int len) {
   return exp(-d);
}



//////////////////////////////
//
// unusual_thing -- Do sub-string matching
//

void unusual_thing(Array<Array<double> >& results, 
      Array<Array<char> >& sourcedata, Array<Array<char> >& templatedata, 
      int sublen) {

   int i, j;
   int len = (int)fabs(sourcedata.getSize() - sublen + 1);
   int subcount = templatedata.getSize() - sublen + 1;

   results.setSize(0);
   if (len <= 0) {
      return;
   }
   results.setSize(len);

   for (i=0; i<results.getSize(); i++) {
      results[i].setSize(subcount);
   }

   for (i=0; i<len; i++) {
      for (j=0; j<subcount; j++) {
         results[i][j] = dlvstring(sourcedata, i, templatedata, j, sublen);
      }
   }
}



//////////////////////////////
//
// getMinIndex --  need to get the index of the minimum value
//    on the line, because this is the raw value before scaling.
//    Scaling will flip the ordering so that the minimum value
//    will become the maximum value.
//

int getMinIndex(Array& list) {
   int i;
   int output = 0;
   if (list.getSize() <= 0) {
      return -1;
   }

   for (i=1; i<list.getSize(); i++) {
      if (list[i] < list[output]) {
         output = i;
      }
   }

   return output;
}



//////////////////////////////
//
// printSubStringInfo -- print all of the substrings which are
//   at an equal or higher similarity measurement value than
//   the target.  The thresholding is done on the raw edit 
//   distance values, so the threshold is for a minimum rather
//   than a maximum.
//

void printSubStringInfo(Array& list, double target) {
   int i;
   int counter = 0;
   for (i=0; i<list.getSize(); i++) {
      if (list[i] <= target) {
         counter++;
         if (counter > 1) {
            cout << ",";
         }
         cout << i+1;
      }
   }

}



//////////////////////////////
//
// printResultsSubString --
//

void printResultsSubString(Array<Array<double> >& results, 
      Array<Array<char> >& sourcedata, Array<Array<char> >& templatedata) {
   int i;
   double value;
   cout << "**simil\t**simxrf" << endl;
   int len = xlen;
   if (len <= 0) {
      len = templatedata.getSize();
   }

   int maxi;

   for (i=0; i<results.getSize(); i++) {
      maxi = getMinIndex(results[i]);
      value = results[i][maxi];
      if (rawQ) {
         cout << value;
      } else if (scalingQ) {
         value = normalize1(value, len);
	 value = int(value * 100.0 + 0.5) / 100.0;
	 printf("%.2lf", value);
      } else {
	 value = int(value * 100.0 + 0.5) / 100.0;
         value = normalize2(value, len);
	 printf("%.2lf", value);
      }
      cout << "\t";
      printSubStringInfo(results[i], results[i][maxi]);
      cout << "\n";
   }

   // int dots = templatedata.getSize() - 1;
   int dots = xlen - 1;
   for (i=0; i<dots; i++) {
      cout << ".\t.\n";
   }

   cout << "*-\t*-" << endl;
}



//////////////////////////////
//
// printResults --
//

void printResults(Array<double>& results, Array<Array<char> >& sourcedata,
      Array<Array<char> >& templatedata) {
   int i;
   double value;

   cout << "**simil";
   if (sequenceQ) {
      cout << "\t**seq";
   }
   cout << endl;

   int len = templatedata.getSize();

   for (i=0; i<results.getSize(); i++) {
      value = results[i];
      if (normalize1(value, len) < threshold) {
         cout << ".";
         if (sequenceQ) {
            cout << "\t";
            cout << ".";
	    cout << "\n";
         } else {
            cout << "\n";
         }
         continue;
      }
      if (rawQ) {
         cout << value;
      } else if (scalingQ) {
         value = normalize1(value, len);
	 value = int(value * 100.0 + 0.5) / 100.0;
	 printf("%.2lf", value);
      } else {
	 value = int(value * 100.0 + 0.5) / 100.0;
         value = normalize2(value, len);
	 printf("%.2lf", value);
      }
      if (sequenceQ) {
         cout << "\t";
         printSequence(sourcedata, i, templatedata.getSize(), spacesQ);
      }
      cout << "\n";
   }

   int dots = templatedata.getSize() - 1;
   for (i=0; i<dots; i++) {
      cout << ".";
      if (sequenceQ) {
         cout << "\t.";
      }
      cout << "\n";
   }

   cout << "*-";
   if (sequenceQ) {
      cout << "\t*-";
   }
   cout << endl;
}



//////////////////////////////
//
// printSequence --
// 

void printSequence(Array<Array<char> >& sourcedata, int index, int size, 
      int flag) {
   int i;
   if (size <= 0) {
      cout << ".";
      return;
   }

   cout << sourcedata[index].getBase();

   for (i=index+1; i<index+size; i++) {
      if (flag) {
         cout << ' ';
      }
      cout << sourcedata[i].getBase();
   }
}



//////////////////////////////
//
// usual_thing --
//

void usual_thing(Array<double>& results, Array<Array<char> >& sourcedata, 
      Array<Array<char> >& templatedata) {

   int len = (int)fabs(sourcedata.getSize() - templatedata.getSize() + 1);

   results.setSize(0);
   if (len <= 0) {
      return;
   }
   results.setSize(len);
   results.setAll(0.0);

   for (int i=0; i<len; i++) {
      results[i] = dlv(sourcedata, i, templatedata, 0);
   }
}


//////////////////////////////
//
// printWeights --
//

void printWeights(void) {
   cout << "!! weight_R1: " << weight_R1 << "\n";
   cout << "!! weight_R2: " << weight_R2 << "\n";
   cout << "!! weight_D1: " << weight_D1 << "\n";
   cout << "!! weight_D2: " << weight_D2 << "\n";
   cout << "!! weight_S0: " << weight_S0 << "\n";
   cout << "!! weight_S1: " << weight_S1 << "\n";
   cout << "!! weight_S2: " << weight_S2 << "\n";
   cout << "!! weight_S3: " << weight_S3 << endl;
}



//////////////////////////////
//
// dlv -- Compute the Damerau-Levenshtein distance of two arrays of 
//      integers,  subject to specified weights.           
//      
//      dlv         = DLV raw value from Array<char> using dlvcharstar.
//      dlvstring   = DLV raw value from Array<char> data.
//      dlvcharstar = DLV raw value from char* data.
//      dlvint      = DLV raw value from int data. (original function)
//

double dlv(Array<Array<char> >& set1, int offset1, 
      Array<Array<char> >& set2, int offset2) {

   Array<char*> list1;
   Array<char*> list2;
   if (offset1 < 0) { offset1 = 0; }
   if (offset2 < 0) { offset2 = 0; }
   
   int size1 = set1.getSize() - offset1;
   int size2 = set2.getSize() - offset2;

   if ((size1 < 0) || (size2 < 0)) {
      cerr << "Error in offset values: " << size1 << ", " << size2 << endl;
      exit(1);
   }

   list1.setSize(size1);
   list2.setSize(size2);

   int i;
   for (i=0; i<size1; i++) {
      list1[i] = set1[i+offset1].getBase();
   }
   for (i=0; i<size2; i++) {
      list2[i] = set2[i+offset2].getBase();
   }

   return dlvcharstar(list1.getBase(), size1, list2.getBase(), size2);
}



double dlvstring(Array<Array<char> >& set1, int offset1, 
      Array<Array<char> >& set2, int offset2, int temlen = -1) {

   int len1 = set1.getSize() - offset1;
   int len2 = set2.getSize() - offset2;

   if (temlen == -1) {
      // tem len is the length of the substring to consider
      temlen = set2.getSize();
   }

   int len = len1;   // the length of the sequences to compare

   if (len1 > len2) {
      len = len2;
   } 

   if (temlen < len) {
      len = temlen;
   }

   double cost, val, m;
   int i, j;
   int mindex;
   int s1index, s2index;
   int n = 0;

   if (len+1 > n)  {
      n   = len + 1;
   }

   double min[n];
   int rep1;
   int rep2;

   mindex = 0;
   s1index = offset1;
   m = 0.0;
   min[mindex] = 0.0;

   for (i=0; i<len; i++, s1index++)  {
      m = min[mindex++];
      if ((s1index>0) && 
          (strcmp(set1[s1index].getBase(), set1[s1index-1].getBase()) == 0)) {
         m += weight_R1;
      } else {
         m += weight_D1;
      }
      min[mindex] = m;
   }
   
   s2index = offset2;
   for (j=0; j<len; j++, s2index++)  {
      rep2 = (s2index>0 && (strcmp(set2[s2index].getBase(), 
				   set2[s2index-1].getBase()) == 0));
      mindex = 0;
      cost = min[mindex];
      m = cost + (rep2 ? weight_R2 : weight_D2);
   
      min[mindex++] = m;
   
      s1index = offset1;
      for (i=0; i<len; i++, s1index++)  {
         rep1 = (s1index>0) && 
             (strcmp(set1[s1index].getBase(), 
                     set1[s1index-1].getBase()) == 0);
	       
         m += rep1 ? weight_R1 : weight_D1;
   
         if (strcmp(set1[s1index].getBase(),set2[s2index].getBase()) == 0) {
            val = cost;
         } else  {
            if (rep1) {
               val = cost + (rep2? weight_S3:weight_S1);
            } else { 
               val = cost + (rep2? weight_S2:weight_S0);
            }
         }
         if (val < m) {
            m = val;
         }
   
	 cost = min[mindex];
         val = cost + (rep2 ? weight_R2 : weight_D2);
         if (val < m) {
            m = val;
         }
   
	 min[mindex++] = m;
      }
   }

   return (double)m;
}



double dlvint(int *s1, int len1, int *s2, int len2) {
   float cost, val, m;
   int i, j;
   float *mptr;
   int *s1ptr, *s2ptr;
   int n = 0;
   float *min = NULL;

   if (len1+1 > n)  {
      min = new float[len1+1];
      n   = len1 + 1;
   }

   mptr = min;

   s1ptr = s1;
   m = *mptr = 0.0;

   for (i=len1; --i>=0; s1ptr++)  {
      m = *mptr++;
      if (s1ptr>s1 && *s1ptr==*(s1ptr-1)) {
         m += weight_R1;
      } else {
         m += weight_D1;
      }
   
      *mptr = m;
      }
   
      s2ptr = s2;
      for (j=len2; --j>=0; s2ptr++)  {
      char rep2 = (s2ptr>s2 && *s2ptr==*(s2ptr-1));
   
      mptr = min;
      m = (cost = *mptr) + (rep2? weight_R2:weight_D2);
   
      *mptr++ = m;
   
      s1ptr = s1;
      for (i=len1; --i>=0; s1ptr++)  {
         char rep1 = (s1ptr>s1 && *s1ptr==*(s1ptr-1));
   
         m += (rep1? weight_R1:weight_D1);
   
         if (*s1ptr==*s2ptr)
         val = cost;
         else  {
         if (rep1)
            val = cost + (rep2? weight_S3:weight_S1);
         else
            val = cost + (rep2? weight_S2:weight_S0);
         }
         if (val < m) { m = val; }
   
         val = (cost = *mptr) + (rep2? weight_R2:weight_D2);
         if (val < m) m = val;
   
         *mptr++ = m;
      }
   }

   delete [] min;
   return (double)m;
}



double dlvcharstar(char** set1, int len1, char** set2, int len2) {

   int len = len1;   // the length of the sequences to compare

   if (len1 > len2) {
      len = len2;
   } 

   double cost, val, m;
   int i, j;
   int mindex;
   int s1index, s2index;
   int n = 0;

   if (len+1 > n)  {
      n   = len + 1;
   }

   double min[n];
   int rep1;
   int rep2;

   mindex = 0;
   s1index = 0;
   m = 0.0;
   min[mindex] = 0.0;

   for (i=0; i<len; i++, s1index++)  {
      m = min[mindex++];
      if ((s1index>0) && 
          (strcmp(set1[s1index], set1[s1index-1]) == 0)) {
         m += weight_R1;
      } else {
         m += weight_D1;
      }
      min[mindex] = m;
   }
   
   s2index = 0;
   for (j=0; j<len; j++, s2index++)  {
      rep2 = (s2index>0 && (strcmp(set2[s2index], 
				   set2[s2index-1]) == 0));
      mindex = 0;
      cost = min[mindex];
      m = cost + (rep2 ? weight_R2 : weight_D2);
   
      min[mindex++] = m;
   
      s1index = 0;
      for (i=0; i<len; i++, s1index++)  {
         rep1 = (s1index>0) && 
             (strcmp(set1[s1index], 
                     set1[s1index-1]) == 0);
	       
         m += rep1 ? weight_R1 : weight_D1;
   
         if (strcmp(set1[s1index],set2[s2index]) == 0) {
            val = cost;
         } else  {
            if (rep1) {
               val = cost + (rep2? weight_S3:weight_S1);
            } else { 
               val = cost + (rep2? weight_S2:weight_S0);
            }
         }
         if (val < m) {
            m = val;
         }
   
	 cost = min[mindex];
         val = cost + (rep2 ? weight_R2 : weight_D2);
         if (val < m) {
            m = val;
         }
   
	 min[mindex++] = m;
      }
   }

   return (double)m;
}


// md5sum: 9d1b6764bcd4aa329ba525b7385aa3f2 simil.cpp [20100722]