docs/doxygen/ossimRegExp_8cpp_source.html

 //
 // Copyright (C) 1991 Texas Instruments Incorporated.
 //
 // Permission is granted to any individual or institution to use, copy, modify,
 // and distribute this software, provided that this complete copyright and
 // permission notice is maintained, intact, in all copies and supporting
 // documentation.
 //
 // Texas Instruments Incorporated provides this software "as is" without
 // express or implied warranty.
 //
 //
 // Created: MNF 06/13/89  Initial Design and Implementation
 // Updated: LGO 08/09/89  Inherit from Generic
 // Updated: MBN 09/07/89  Added conditional exception handling
 // Updated: MBN 12/15/89  Sprinkled "const" qualifiers all over the place!
 // Updated: DLS 03/22/91  New lite version
 //
 // This  is the header file  for the regular  expression class.   An object of
 // this class contains a regular expression,  in  a special "compiled" format.
 // This  compiled format consists  of  several slots   all kept as the objects
 // private data.  The  RegExp class  provides a convenient  way  to  represent
 // regular  expressions.  It makes it easy   to search  for  the  same regular
 // expression in many different strings without having to  compile a string to
 // regular expression format more than necessary.
 //
 // A regular  expression allows a programmer to  specify complex patterns that
 // can be searched for  and  matched against the  character string of a String
 // object.  In  its  simplest case, a   regular expression  is a  sequence  of
 // characters with which you can search for exact character matches.  However,
 // many times you may not know the exact sequence you want to find, or you may
 // only want to find a match at the beginning or end of  a String.  The RegExp
 // object  allows specification of  such patterns by  utilizing the  following
 // regular  expression  meta-characters   (note   that  more  one  of    these
 // meta-characters  can  be used in a single  regular  expression in  order to
 // create complex search patterns):
 //
 //         ^    Match at beginning of line
 //         $    Match at end of line
 //         .    Match any single character
 //         [ ]  Match any one character inside the brackets
 //         [^ ] Match any character NOT inside the brackets
 //         -    Match any character in range on either side of dash
 //         *    Match preceding pattern zero or more times
 //         +    Match preceding pattern one or more times
 //         ?    Match preceding pattern zero or once only
 //         ()   Save a matched expression and use it in a further match.
 //
 // There are three constructors for RegExp.  One  just creates an empty RegExp
 // object.  Another creates a RegExp object  and initializes it with a regular
 // expression  that is given  in  the form of a   char*.   The  third  takes a
 // reference  to  a RegExp  object    as an  argument    and creates an object
 // initialized with the information from the given RegExp object.
 //
 // The  find  member function  finds   the  first  occurence   of  the regualr
 // expression of that object in the string given to find as an argument.  Find
 // returns a boolean, and  if true,  mutates  the private  data appropriately.
 // Find sets pointers to the beginning and end of  the thing last  found, they
 // are pointers into the actual string  that was searched.   The start and end
 // member functions return indicies  into the searched string that  correspond
 // to the beginning   and  end pointers  respectively.   The    compile member
 // function takes a char* and puts the  compiled version of the char* argument
 // into the object's private data fields.  The == and  != operators only check
 // the  to see  if   the compiled  regular  expression   is the same, and  the
 // deep_equal functions also checks  to see if the  start and end pointers are
 // the same.  The is_valid  function returns false if  program is set to NULL,
 // (i.e. there is no valid compiled exression).  The set_invalid function sets
 // the  program to NULL  (Warning: this deletes the compiled  expression). The
 // following examples may help clarify regular expression usage:
 //
 //   *  The regular expression  "^hello" matches  a "hello"  only at  the
 //      beginning of a  line.  It would match "hello  there" but not "hi,
 //      hello there".
 //
 //   *  The regular expression "long$" matches a  "long"  only at the end
 //      of a line. It would match "so long\0", but not "long ago".
 //
 //   *  The regular expression "t..t..g"  will match anything that  has a
 //      "t" then any two characters, another "t", any  two characters and
 //      then a "g".   It will match  "testing", or "test again" but would
 //      not match "toasting"
 //
 //   *  The regular  expression "[1-9ab]" matches any  number one through
 //      nine, and the characters  "a" and  "b".  It would match "hello 1"
 //      or "begin", but would not match "no-match".
 //
 //   *  The  regular expression "[^1-9ab]"  matches any character that is
 //      not a number one  through nine, or  an "a" or "b".   It would NOT
 //      match "hello 1" or "begin", but would match "no-match".
 //
 //   *  The regular expression "br* " matches  something that begins with
 //      a "b", is followed by zero or more "r"s, and ends in a space.  It
 //      would match "brrrrr ", and "b ", but would not match "brrh ".
 //
 //   *  The regular expression "br+ " matches something  that begins with
 //      a "b", is followed by one or more "r"s, and ends in  a space.  It
 //      would match "brrrrr ",  and  "br ", but would not  match "b  " or
 //      "brrh ".
 //
 //   *  The regular expression "br? " matches  something that begins with
 //      a "b", is followed by zero or one "r"s, and ends in  a space.  It
 //      would  match  "br ", and "b  ", but would not match  "brrrr "  or
 //      "brrh ".
 //
 //   *  The regular expression "(..p)b" matches  something ending with pb
 //      and beginning with whatever the two characters before the first p
 //      encounterd in the line were.  It would find  "repb" in "rep drepa
 //      qrepb".  The regular expression "(..p)a"  would find "repa qrepb"
 //      in "rep drepa qrepb"
 //
 //   *  The regular expression "d(..p)" matches something ending  with p,
 //      beginning with d, and having  two characters  in between that are
 //      the same as the two characters before  the first p  encounterd in
 //      the line.  It would match "drepa qrepb" in "rep drepa qrepb".
 //

 #include <cstring>
 #include <cstdio>
 #include <ossim/base/ossimRegExp.h>
 #include <iostream>
 // ossimRegExp -- Copies the given regular expression.

 ossimRegExp::ossimRegExp (const ossimRegExp& rxp) :
   regstart(0),     // Internal use only
   reganch(0),      // Internal use only
   regmust(0),      // Internal use only
   regmlen(0),     // Internal use only
   program(0),
   progsize(0),
   searchstring(0),

  // work variables
   regparse(0),
   regnpar(0),  // () count.
   regdummy(0),
   regcode(0),  // Code-emit pointer; &regdummy = don't.
   regsize(0),  // Code size.
   reginput(0),  // String-input pointer.
   regbol(0),  // Beginning of input, for ^ check.
   regstartp(0), // Pointer to startp array.
   regendp(0) // Ditto for endp.
 {
    if(rxp.program)
    {
       int ind = 0;
       this->progsize = rxp.progsize;        // Copy regular expression size
       this->program = new char[this->progsize]; // Allocate storage
       for(ind=this->progsize; ind-- != 0;)      // Copy regular expresion
          this->program[ind] = rxp.program[ind];
       this->startp[0] = rxp.startp[0];      // Copy pointers into last
       this->endp[0] = rxp.endp[0];          // Successful "find" operation
       this->regmust = rxp.regmust;          // Copy field
       if (rxp.regmust != NULL) {
          char* dum = rxp.program;
          ind = 0;
          while (dum != rxp.regmust) {
             ++dum;
             ++ind;
          }
          this->regmust = this->program + ind;
       }
       this->regstart = rxp.regstart;        // Copy starting index
       this->reganch = rxp.reganch;          // Copy remaining private data
       this->regmlen = rxp.regmlen;          // Copy remaining private data
    }
 }


 // operator== -- Returns true if two regular expressions have the same
 // compiled program for pattern matching.

 bool ossimRegExp::operator== (const ossimRegExp& rxp) const {
   if (this != &rxp) {               // Same address?
     ossim_uint32 ind = this->progsize;      // Get regular expression size
     if (ind != rxp.progsize)            // If different size regexp
       return false;             // Return failure
     while(ind-- != 0)               // Else while still characters
       if(this->program[ind] != rxp.program[ind]) // If regexp are different
     return false;                // Return failure
   }
   return true;                  // Else same, return success
 }


 // deep_equal -- Returns true if have the same compiled regular expressions
 // and the same start and end pointers.

 bool ossimRegExp::deep_equal (const ossimRegExp& rxp) const {
   ossim_uint32 ind = this->progsize;        // Get regular expression size
   if (ind != rxp.progsize)          // If different size regexp
     return false;               // Return failure
   while(ind-- != 0)             // Else while still characters
     if(this->program[ind] != rxp.program[ind])  // If regexp are different
       return false;             // Return failure
   return (this->startp[0] == rxp.startp[0] &&   // Else if same start/end ptrs,
       this->endp[0] == rxp.endp[0]);    // Return true
 }

 // The remaining code in this file is derived from the  regular expression code
 // whose  copyright statement appears  below.  It has been  changed to work
 // with the class concepts of C++ and COOL.

 /*
  * compile and find
  *
  *  Copyright (c) 1986 by University of Toronto.
  *  Written by Henry Spencer.  Not derived from licensed software.
  *
  *  Permission is granted to anyone to use this software for any
  *  purpose on any computer system, and to redistribute it freely,
  *  subject to the following restrictions:
  *
  *  1. The author is not responsible for the consequences of use of
  *      this software, no matter how awful, even if they arise
  *      from defects in it.
  *
  *  2. The origin of this software must not be misrepresented, either
  *      by explicit claim or by omission.
  *
  *  3. Altered versions must be plainly marked as such, and must not
  *      be misrepresented as being the original software.
  *
  * Beware that some of this code is subtly aware of the way operator
  * precedence is structured in regular expressions.  Serious changes in
  * regular-expression syntax might require a total rethink.
  */

 /*
  * The "internal use only" fields in regexp.h are present to pass info from
  * compile to execute that permits the execute phase to run lots faster on
  * simple cases.  They are:
  *
  * regstart char that must begin a match; '\0' if none obvious
  * reganch  is the match anchored (at beginning-of-line only)?
  * regmust  string (pointer into program) that match must include, or NULL
  * regmlen  length of regmust string
  *
  * Regstart and reganch permit very fast decisions on suitable starting points
  * for a match, cutting down the work a lot.  Regmust permits fast rejection
  * of lines that cannot possibly match.  The regmust tests are costly enough
  * that compile() supplies a regmust only if the r.e. contains something
  * potentially expensive (at present, the only such thing detected is * or +
  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
  * supplied because the test in find() needs it and compile() is computing
  * it anyway.
  */

 /*
  * Structure for regexp "program".  This is essentially a linear encoding
  * of a nondeterministic finite-state machine (aka syntax charts or
  * "railroad normal form" in parsing technology).  Each node is an opcode
  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
  * all nodes except BRANCH implement concatenation; a "next" pointer with
  * a BRANCH on both ends of it is connecting two alternatives.  (Here we
  * have one of the subtle syntax dependencies:  an individual BRANCH (as
  * opposed to a collection of them) is never concatenated with anything
  * because of operator precedence.)  The operand of some types of node is
  * a literal string; for others, it is a node leading into a sub-FSM.  In
  * particular, the operand of a BRANCH node is the first node of the branch.
  * (NB this is *not* a tree structure:  the tail of the branch connects
  * to the thing following the set of BRANCHes.)  The opcodes are:
  */

 // definition   number  opnd?   meaning
 #define END 0       // no   End of program.
 #define BOL 1       // no   Match "" at beginning of line.
 #define EOL 2       // no   Match "" at end of line.
 #define ANY 3       // no   Match any one character.
 #define ANYOF   4       // str  Match any character in this string.
 #define ANYBUT  5       // str  Match any character not in this
                 // string.
 #define BRANCH  6       // node Match this alternative, or the
                 // next...
 #define BACK    7       // no   Match "", "next" ptr points backward.
 #define EXACTLY 8       // str  Match this string.
 #define NOTHING 9       // no   Match empty string.
 #define STAR    10      // node Match this (simple) thing 0 or more
                 // times.
 #define PLUS    11      // node Match this (simple) thing 1 or more
                 // times.
 #define OPEN    20      // no   Mark this point in input as start of
                 // #n.
 // OPEN+1 is number 1, etc.
 #define CLOSE   30      // no   Analogous to OPEN.

 /*
  * Opcode notes:
  *
  * BRANCH   The set of branches constituting a single choice are hooked
  *      together with their "next" pointers, since precedence prevents
  *      anything being concatenated to any individual branch.  The
  *      "next" pointer of the last BRANCH in a choice points to the
  *      thing following the whole choice.  This is also where the
  *      final "next" pointer of each individual branch points; each
  *      branch starts with the operand node of a BRANCH node.
  *
  * BACK     Normal "next" pointers all implicitly point forward; BACK
  *      exists to make loop structures possible.
  *
  * STAR,PLUS    '?', and complex '*' and '+', are implemented as circular
  *      BRANCH structures using BACK.  Simple cases (one character
  *      per match) are implemented with STAR and PLUS for speed
  *      and to minimize recursive plunges.
  *
  * OPEN,CLOSE   ...are numbered at compile time.
  */

 /*
  * A node is one char of opcode followed by two chars of "next" pointer.
  * "Next" pointers are stored as two 8-bit pieces, high order first.  The
  * value is a positive offset from the opcode of the node containing it.
  * An operand, if any, simply follows the node.  (Note that much of the
  * code generation knows about this implicit relationship.)
  *
  * Using two bytes for the "next" pointer is vast overkill for most things,
  * but allows patterns to get big without disasters.
  */

 #define OP(p)       (*(p))
 #define NEXT(p)     (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
 #define OPERAND(p)  ((p) + 3)

 const unsigned char MAGIC = 0234;
 /*
  * Utility definitions.
  */

 #define UCHARAT(p)  ((const unsigned char*)(p))[0]


 #define FAIL(m) { regerror(m); return(NULL); }
 #define ISMULT(c)   ((c) == '*' || (c) == '+' || (c) == '?')
 #define META    "^$.[()|?+*\\"


 /*
  * Flags to be passed up and down.
  */
 #define HASWIDTH    01  // Known never to match null string.
 #define SIMPLE      02  // Simple enough to be STAR/PLUS operand.
 #define SPSTART     04  // Starts with * or +.
 #define WORST       0   // Worst case.


 //
 //  COMPILE AND ASSOCIATED FUNCTIONS
 //


 /*
  * Global work variables for compile().
  */
 // static const char* regparse; // Input-scan pointer.
 // static       int   regnpar;  // () count.
 // static       char  regdummy;
 // static       char* regcode;  // Code-emit pointer; &regdummy = don't.
 // static       long  regsize;  // Code size.

 /*
  * Forward declarations for compile()'s friends.
  */
 // #ifndef static
 // #define  static  static
 // #endif
 // static       char* reg (int, int*);
 // static       char* regbranch (int*);
 // static       char* regpiece (int*);
 // static       char* regatom (int*);
 // static       char* regnode (char);
 // static const char* regnext (const char*);
 // static       char* regnext (char*);
 // static void        regc (unsigned char);
 // static void        reginsert (char, char*);
 // static void        regtail (char*, const char*);
 // static void        regoptail (char*, const char*);

 // #ifdef STRCSPN
 // static int strcspn ();
 // #endif


 /*
  * We can't allocate space until we know how big the compiled form will be,
  * but we can't compile it (and thus know how big it is) until we've got a
  * place to put the code.  So we cheat:  we compile it twice, once with code
  * generation turned off and size counting turned on, and once "for real".
  * This also means that we don't allocate space until we are sure that the
  * thing really will compile successfully, and we never have to move the
  * code and thus invalidate pointers into it.  (Note that it has to be in
  * one piece because free() must be able to free it all.)
  *
  * Beware that the optimization-preparation code in here knows about some
  * of the structure of the compiled regexp.
  */


 // compile -- compile a regular expression into internal code
 // for later pattern matching.

 void ossimRegExp::compile (const char* exp) {
     const char* scan;
     const char* longest;
     unsigned long len;
              int         flags;

     if (exp == NULL) {
       //RAISE Error, SYM(ossimRegExp), SYM(No_Expr),
       printf ("ossimRegExp::compile(): No expression supplied.\n");
       return;
     }

     // First pass: determine size, legality.
     regparse = exp;
     regnpar = 1;
     regsize = 0L;
     regcode = &regdummy;
     regc(MAGIC);
     if(!reg(0, &flags))
       {
     printf ("ossimRegExp::compile(): Error in compile.\n");
     return;
       }
     this->startp[0] = this->endp[0] = this->searchstring = NULL;

     // Small enough for pointer-storage convention?
     if (regsize >= 32767L) {    // Probably could be 65535L.
       //RAISE Error, SYM(ossimRegExp), SYM(Expr_Too_Big),
       printf ("ossimRegExp::compile(): Expression too big.\n");
       return;
     }

     // Allocate space.
 //#ifndef WIN32
     if (this->program != NULL) delete [] this->program;
 //#endif
     this->program = new char[regsize];
     this->progsize = (int) regsize;

     if (this->program == NULL) {
       //RAISE Error, SYM(ossimRegExp), SYM(Out_Of_Memory),
       printf ("ossimRegExp::compile(): Out of memory.\n");
       return;
     }

     // Second pass: emit code.
     regparse = exp;
     regnpar = 1;
     regcode = this->program;
     regc(MAGIC);
     reg(0, &flags);

     // Dig out information for optimizations.
     this->regstart = '\0';      // Worst-case defaults.
     this->reganch = 0;
     this->regmust = NULL;
     this->regmlen = 0;
     scan = this->program + 1;   // First BRANCH.
     if (OP(regnext(scan)) == END) { // Only one top-level choice.
     scan = OPERAND(scan);

     // Starting-point info.
     if (OP(scan) == EXACTLY)
         this->regstart = *OPERAND(scan);
     else if (OP(scan) == BOL)
         this->reganch++;

      //
      // If there's something expensive in the r.e., find the longest
      // literal string that must appear and make it the regmust.  Resolve
      // ties in favor of later strings, since the regstart check works
      // with the beginning of the r.e. and avoiding duplication
      // strengthens checking.  Not a strong reason, but sufficient in the
      // absence of others.
      //
     if (flags & SPSTART) {
         longest = NULL;
         len = 0;
         for (; scan != NULL; scan = regnext(scan))
         if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
             longest = OPERAND(scan);
             len = (unsigned long)strlen(OPERAND(scan));
         }
         this->regmust = longest;
         this->regmlen = len;
     }
     }
 }


 /*
  - reg - regular expression, i.e. main body or parenthesized thing
  *
  * Caller must absorb opening parenthesis.
  *
  * Combining parenthesis handling with the base level of regular expression
  * is a trifle forced, but the need to tie the tails of the branches to what
  * follows makes it hard to avoid.
  */
 char* ossimRegExp::reg (int paren, int *flagp) {
     char* ret;
     char* br;
     char* ender;
     int   parno =0;
              int   flags;

     *flagp = HASWIDTH;      // Tentatively.

     // Make an OPEN node, if parenthesized.
     if (paren) {
     if (regnpar >= NSUBEXP) {
       //RAISE Error, SYM(ossimRegExp), SYM(Too_Many_Parens),
       printf ("ossimRegExp::compile(): Too many parentheses.\n");
       return 0;
         }
     parno = regnpar;
     regnpar++;
     ret = regnode(OPEN + parno);
     }
     else
     ret = NULL;

     // Pick up the branches, linking them together.
     br = regbranch(&flags);
     if (br == NULL)
     return (NULL);
     if (ret != NULL)
     regtail(ret, br);   // OPEN -> first.
     else
     ret = br;
     if (!(flags & HASWIDTH))
     *flagp &= ~HASWIDTH;
     *flagp |= flags & SPSTART;
     while (*regparse == '|') {
     regparse++;
     br = regbranch(&flags);
     if (br == NULL)
         return (NULL);
     regtail(ret, br);   // BRANCH -> BRANCH.
     if (!(flags & HASWIDTH))
         *flagp &= ~HASWIDTH;
     *flagp |= flags & SPSTART;
       }

     // Make a closing node, and hook it on the end.
     ender = regnode((paren) ? CLOSE + parno : END);
     regtail(ret, ender);

     // Hook the tails of the branches to the closing node.
     for (br = ret; br != NULL; br = regnext(br))
     regoptail(br, ender);

     // Check for proper termination.
     if (paren && *regparse++ != ')') {
         //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Parens),
         printf ("ossimRegExp::compile(): Unmatched parentheses.\n");
     return 0;
     }
     else if (!paren && *regparse != '\0') {
         if (*regparse == ')') {
             //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Parens),
             printf ("ossimRegExp::compile(): Unmatched parentheses.\n");
         return 0;
     }
     else {
         //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
         printf ("ossimRegExp::compile(): Internal error.\n");
         return 0;
         }
     // NOTREACHED
     }
     return (ret);
 }


 /*
  - regbranch - one alternative of an | operator
  *
  * Implements the concatenation operator.
  */
 char* ossimRegExp::regbranch (int *flagp) {
     char* ret;
     char* chain;
     char* latest;
     int                  flags;

     *flagp = WORST;     // Tentatively.

     ret = regnode(BRANCH);
     chain = NULL;
     while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
     latest = regpiece(&flags);
     if (latest == NULL)
         return (NULL);
     *flagp |= flags & HASWIDTH;
     if (chain == NULL)  // First piece.
         *flagp |= flags & SPSTART;
     else
         regtail(chain, latest);
     chain = latest;
     }
     if (chain == NULL)      // Loop ran zero times.
     regnode(NOTHING);

     return (ret);
 }


 /*
  - regpiece - something followed by possible [*+?]
  *
  * Note that the branching code sequences used for ? and the general cases
  * of * and + are somewhat optimized:  they use the same NOTHING node as
  * both the endmarker for their branch list and the body of the last branch.
  * It might seem that this node could be dispensed with entirely, but the
  * endmarker role is not redundant.
  */
 char* ossimRegExp::regpiece (int *flagp) {
     char* ret;
     char  op;
     char* next;
     int            flags;

     ret = regatom(&flags);
     if (ret == NULL)
     return (NULL);

     op = *regparse;
     if (!ISMULT(op)) {
     *flagp = flags;
     return (ret);
     }

     if (!(flags & HASWIDTH) && op != '?') {
         //RAISE Error, SYM(ossimRegExp), SYM(Empty_Operand),
         printf ("ossimRegExp::compile() : *+ operand could be empty.\n");
     return 0;
     }
     *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH);

     if (op == '*' && (flags & SIMPLE))
     reginsert(STAR, ret);
     else if (op == '*') {
     // Emit x* as (x&|), where & means "self".
     reginsert(BRANCH, ret); // Either x
     regoptail(ret, regnode(BACK));  // and loop
     regoptail(ret, ret);    // back
     regtail(ret, regnode(BRANCH));  // or
     regtail(ret, regnode(NOTHING)); // null.
     }
     else if (op == '+' && (flags & SIMPLE))
     reginsert(PLUS, ret);
     else if (op == '+') {
     // Emit x+ as x(&|), where & means "self".
     next = regnode(BRANCH); // Either
     regtail(ret, next);
     regtail(regnode(BACK), ret);    // loop back
     regtail(next, regnode(BRANCH)); // or
     regtail(ret, regnode(NOTHING)); // null.
     }
     else if (op == '?') {
     // Emit x? as (x|)
     reginsert(BRANCH, ret); // Either x
     regtail(ret, regnode(BRANCH));  // or
     next = regnode(NOTHING);// null.
     regtail(ret, next);
     regoptail(ret, next);
     }
     regparse++;
     if (ISMULT(*regparse)) {
         //RAISE Error, SYM(ossimRegExp), SYM(Nested_Operand),
         printf ("ossimRegExp::compile(): Nested *?+.\n");
     return 0;
     }
     return (ret);
 }


 /*
  - regatom - the lowest level
  *
  * Optimization:  gobbles an entire sequence of ordinary characters so that
  * it can turn them into a single node, which is smaller to store and
  * faster to run.  Backslashed characters are exceptions, each becoming a
  * separate node; the code is simpler that way and it's not worth fixing.
  */
 char* ossimRegExp::regatom (int *flagp) {
     char* ret;
              int   flags;

     *flagp = WORST;     // Tentatively.

     switch (*regparse++) {
     case '^':
         ret = regnode(BOL);
         break;
     case '$':
         ret = regnode(EOL);
         break;
     case '.':
         ret = regnode(ANY);
         *flagp |= HASWIDTH | SIMPLE;
         break;
     case '[':{
         int    rxpclass;
         int    rxpclassend;

         if (*regparse == '^') { // Complement of range.
             ret = regnode(ANYBUT);
             regparse++;
         }
         else
             ret = regnode(ANYOF);
         if (*regparse == ']' || *regparse == '-')
             regc(*regparse++);
         while (*regparse != '\0' && *regparse != ']') {
             if (*regparse == '-') {
             regparse++;
             if (*regparse == ']' || *regparse == '\0')
                 regc('-');
             else {
                 rxpclass = UCHARAT(regparse - 2) + 1;
                 rxpclassend = UCHARAT(regparse);
                 if (rxpclass > rxpclassend + 1) {
                    //RAISE Error, SYM(ossimRegExp), SYM(Invalid_Range),
                    printf ("ossimRegExp::compile(): Invalid range in [].\n");
                    return 0;
                             }
                 for (; rxpclass <= rxpclassend; rxpclass++)
                 regc(rxpclass);
                 regparse++;
             }
             }
             else
             regc(*regparse++);
         }
         regc('\0');
         if (*regparse != ']') {
                     //RAISE Error, SYM(ossimRegExp), SYM(Unmatched_Bracket),
                     printf ("ossimRegExp::compile(): Unmatched [].\n");
             return 0;
             }
         regparse++;
         *flagp |= HASWIDTH | SIMPLE;
         }
         break;
     case '(':
         ret = reg(1, &flags);
         if (ret == NULL)
         return (NULL);
         *flagp |= flags & (HASWIDTH | SPSTART);
         break;
     case '\0':
     case '|':
     case ')':
         //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
             printf ("ossimRegExp::compile(): Internal error.\n"); // Never here
         return 0;
     case '?':
     case '+':
     case '*':
         //RAISE Error, SYM(ossimRegExp), SYM(No_Operand),
             printf ("ossimRegExp::compile(): ?+* follows nothing.\n");
         return 0;
     case '\\':
         if (*regparse == '\0') {
             //RAISE Error, SYM(ossimRegExp), SYM(Trailing_Backslash),
                 printf ("ossimRegExp::compile(): Trailing backslash.\n");
         return 0;
             }
         ret = regnode(EXACTLY);
         regc(*regparse++);
         regc('\0');
         *flagp |= HASWIDTH | SIMPLE;
         break;
     default:{
         int    len;
         char   ender;

         regparse--;
         len = (int)strcspn(regparse, META);
         if (len <= 0) {
             //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
                     printf ("ossimRegExp::compile(): Internal error.\n");
             return 0;
                 }
         ender = *(regparse + len);
         if (len > 1 && ISMULT(ender))
             len--;  // Back off clear of ?+* operand.
         *flagp |= HASWIDTH;
         if (len == 1)
             *flagp |= SIMPLE;
         ret = regnode(EXACTLY);
         while (len > 0) {
             regc(*regparse++);
             len--;
         }
         regc('\0');
         }
         break;
     }
     return (ret);
 }


 /*
  - regnode - emit a node
    Location.
  */
 char* ossimRegExp::regnode (char op) {
     char* ret;
     char* ptr;

     ret = regcode;
     if (ret == &regdummy) {
     regsize += 3;
     return (ret);
     }

     ptr = ret;
     *ptr++ = op;
     *ptr++ = '\0';      // Null "next" pointer.
     *ptr++ = '\0';
     regcode = ptr;

     return (ret);
 }


 /*
  - regc - emit (if appropriate) a byte of code
  */
 void ossimRegExp::regc (unsigned char b) {
     if (regcode != &regdummy)
     *regcode++ = b;
     else
     regsize++;
 }


 /*
  - reginsert - insert an operator in front of already-emitted operand
  *
  * Means relocating the operand.
  */
 void ossimRegExp::reginsert (char op, char* opnd) {
     char* src;
     char* dst;
     char* place;

     if (regcode == &regdummy) {
     regsize += 3;
     return;
     }

     src = regcode;
     regcode += 3;
     dst = regcode;
     while (src > opnd)
     *--dst = *--src;

     place = opnd;       // Op node, where operand used to be.
     *place++ = op;
     *place++ = '\0';
     *place++ = '\0';
 }


 /*
  - regtail - set the next-pointer at the end of a node chain
  */
 void ossimRegExp::regtail (char* p, const char* val) {
     char* scan;
     char* temp;
     int   offset;

     if (p == &regdummy)
     return;

     // Find last node.
     scan = p;
     for (;;) {
     temp = regnext(scan);
     if (temp == NULL)
         break;
     scan = temp;
     }

     if (OP(scan) == BACK)
     offset = (const char*)scan - val;
     else
     offset = val - scan;
     *(scan + 1) = (offset >> 8) & 0377;
     *(scan + 2) = offset & 0377;
 }


 /*
  - regoptail - regtail on operand of first argument; nop if operandless
  */
 void ossimRegExp::regoptail (char* p, const char* val) {
     // "Operandless" and "op != BRANCH" are synonymous in practice.
     if (p == NULL || p == &regdummy || OP(p) != BRANCH)
     return;
     regtail(OPERAND(p), val);
 }


 //
 //  find and friends
 //


 /*
  * Global work variables for find().
  */
 // static const char*  reginput;    // String-input pointer.
 // static const char*  regbol;  // Beginning of input, for ^ check.
 // static const char* *regstartp;   // Pointer to startp array.
 // static const char* *regendp; // Ditto for endp.

 /*
  * Forwards.
  */
 // static int regtry (const char*, const char* *,
 //         const char* *, const char*);
 // static int regmatch (const char*);
 // static int regrepeat (const char*);

 // #ifdef DEBUG
 // int          regnarrate = 0;
 // void         regdump ();
 // static char* regprop ();
 // #endif


 // find -- Matches the regular expression to the given string.
 // Returns true if found, and sets start and end indexes accordingly.

 bool ossimRegExp::find (const char* string) {
     const char* s = 0;

     if(!string) return false;
     this->searchstring = string;

      // Check validity of program.
     if (!this->program || UCHARAT(this->program) != MAGIC) {
         //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
         printf ("ossimRegExp::find(): Compiled regular expression corrupted.\n");
         return 0;
     }

     // If there is a "must appear" string, look for it.
     if (this->regmust != NULL) {
     s = string;
     while ((s = strchr(s, this->regmust[0])) != NULL) {
         if (strncmp(s, this->regmust, this->regmlen) == 0)
         break;      // Found it.
         s++;
     }
     if (s == NULL)      // Not present.
         return (0);
     }

     // Mark beginning of line for ^ .
     regbol = string;

     // Simplest case:  anchored match need be tried only once.
     if (this->reganch)
     return (regtry(string, this->startp, this->endp, this->program));

     // Messy cases:  unanchored match.
     s = string;
     if (this->regstart != '\0')
     // We know what char it must start with.
     while ((s = strchr(s, this->regstart)) != NULL) {
         if (regtry(s, this->startp, this->endp, this->program))
         return (1);
         s++;

     }
     else
     // We don't -- general case.
     do {
         if (regtry(s, this->startp, this->endp, this->program))
         return (1);
     } while (*s++ != '\0');

     // Failure.
     return (0);
 }


 /*
  - regtry - try match at specific point
    0 failure, 1 success
  */
 int ossimRegExp::regtry (const char* string, const char* *start,
            const char* *end, const char* prog) {
           int    i;
     const char* *sp1;
     const char* *ep;

     reginput = string;
     regstartp = start;
     regendp = end;

     sp1 = start;
     ep = end;
     for (i = NSUBEXP; i > 0; i--) {
     *sp1++ = NULL;
     *ep++ = NULL;
     }
     if (regmatch(prog + 1)) {
     start[0] = string;
     end[0] = reginput;
     return (1);
     }
     else
     return (0);
 }


 /*
  - regmatch - main matching routine
  *
  * Conceptually the strategy is simple:  check to see whether the current
  * node matches, call self recursively to see whether the rest matches,
  * and then act accordingly.  In practice we make some effort to avoid
  * recursion, in particular by going through "ordinary" nodes (that don't
  * need to know whether the rest of the match failed) by a loop instead of
  * by recursion.
  * 0 failure, 1 success
  */
 int ossimRegExp::regmatch (const char* prog) {
     const char* scan;   // Current node.
              const char* next;  // Next node.

     scan = prog;

     while (scan != NULL) {

     next = regnext(scan);

     switch (OP(scan)) {
         case BOL:
         if (reginput != regbol)
             return (0);
         break;
         case EOL:
         if (*reginput != '\0')
             return (0);
         break;
         case ANY:
         if (*reginput == '\0')
             return (0);
         reginput++;
         break;
         case EXACTLY:{
             int         len;
             const char* opnd;

             opnd = OPERAND(scan);
             // Inline the first character, for speed.
             if (*opnd != *reginput)
             return (0);
             len = (int)strlen(opnd);
             if (len > 1 && strncmp(opnd, reginput, len) != 0)
             return (0);
             reginput += len;
         }
         break;
         case ANYOF:
         if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
             return (0);
         reginput++;
         break;
         case ANYBUT:
         if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
             return (0);
         reginput++;
         break;
         case NOTHING:
         break;
         case BACK:
         break;
         case OPEN + 1:
         case OPEN + 2:
         case OPEN + 3:
         case OPEN + 4:
         case OPEN + 5:
         case OPEN + 6:
         case OPEN + 7:
         case OPEN + 8:
         case OPEN + 9:{
                   int    no;
             const char* save;

             no = OP(scan) - OPEN;
             save = reginput;

             if (regmatch(next)) {

             //
             // Don't set startp if some later invocation of the
             // same parentheses already has.
             //
             if (regstartp[no] == NULL)
                 regstartp[no] = save;
             return (1);
             }
             else
             return (0);
         }
 //      break;
         case CLOSE + 1:
         case CLOSE + 2:
         case CLOSE + 3:
         case CLOSE + 4:
         case CLOSE + 5:
         case CLOSE + 6:
         case CLOSE + 7:
         case CLOSE + 8:
         case CLOSE + 9:{
                   int    no;
             const char* save;

             no = OP(scan) - CLOSE;
             save = reginput;

             if (regmatch(next)) {

             //
             // Don't set endp if some later invocation of the
             // same parentheses already has.
             //
             if (regendp[no] == NULL)
                 regendp[no] = save;
             return (1);
             }
             else
             return (0);
         }
 //      break;
         case BRANCH:{

           const char* save;

             if (OP(next) != BRANCH) // No choice.
             next = OPERAND(scan);   // Avoid recursion.
             else {
             do {
                 save = reginput;
                 if (regmatch(OPERAND(scan)))
                 return (1);
                 reginput = save;
                 scan = regnext(scan);
             } while (scan != NULL && OP(scan) == BRANCH);
             return (0);
             // NOTREACHED
             }
         }
         break;
         case STAR:
         case PLUS:{
           char   nextch;
             int        no;
             const char* save;
             int        min_no;

             //
             // Lookahead to avoid useless match attempts when we know
             // what character comes next.
             //
             nextch = '\0';
             if (OP(next) == EXACTLY)
             nextch = *OPERAND(next);
             min_no = (OP(scan) == STAR) ? 0 : 1;
             save = reginput;
             no = regrepeat(OPERAND(scan));
             while (no >= min_no) {
             // If it could work, try it.
             if (nextch == '\0' || *reginput == nextch)
                 if (regmatch(next))
                 return (1);
             // Couldn't or didn't -- back up.
             no--;
             reginput = save + no;
             }
             return (0);
         }
 //      break;
         case END:
         return (1); // Success!

         default:
             //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
                 printf ("ossimRegExp::find(): Internal error -- memory corrupted.\n");
         return 0;
     }
     scan = next;
     }

     //
     //  We get here only if there's trouble -- normally "case END" is the
     //  terminating point.
     //
     //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
     printf ("ossimRegExp::find(): Internal error -- corrupted pointers.\n");
     return (0);
 }


 /*
  - regrepeat - repeatedly match something simple, report how many
  */
 int ossimRegExp::regrepeat (const char* p) {
           int   count = 0;
     const char* scan;
     const char* opnd;

     scan = reginput;
     opnd = OPERAND(p);
     switch (OP(p)) {
     case ANY:
         count = (int)strlen(scan);
         scan += count;
         break;
     case EXACTLY:
         while (*opnd == *scan) {
         count++;
         scan++;
         }
         break;
     case ANYOF:
         while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
         count++;
         scan++;
         }
         break;
     case ANYBUT:
         while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
         count++;
         scan++;
         }
         break;
     default:        // Oh dear.  Called inappropriately.
         //RAISE Error, SYM(ossimRegExp), SYM(Internal_Error),
         printf ("ossimRegExp::find(): Internal error.\n");
         return 0;
     }
     reginput = scan;
     return (count);
 }


 /*
  - regnext - dig the "next" pointer out of a node
  */
 const char* ossimRegExp::regnext (const char* p) {
     int offset;

     if (p == &regdummy)
     return (NULL);

     offset = NEXT(p);
     if (offset == 0)
     return (NULL);

     if (OP(p) == BACK)
     return (p - offset);
     else
     return (p + offset);
 }


 char* ossimRegExp::regnext (char* p) {
     int offset;

     if (p == &regdummy)
     return (NULL);

     offset = NEXT(p);
     if (offset == 0)
     return (NULL);

     if (OP(p) == BACK)
     return (p - offset);
     else
     return (p + offset);
 }
ossimRegExp::regtail
void regtail(char *, const char *)
Definition: ossimRegExp.cpp:876

ISMULT
#define ISMULT(c)
Definition: ossimRegExp.cpp:333

ossimRegExp::regstartp
const char ** regstartp
Definition: ossimRegExp.h:118

ossimRegExp::regendp
const char ** regendp
Definition: ossimRegExp.h:119

ossimRegExp::regbol
const char * regbol
Definition: ossimRegExp.h:117

ossimRegExp::regsize
long regsize
Definition: ossimRegExp.h:115

UCHARAT
#define UCHARAT(p)
Definition: ossimRegExp.cpp:329

ossimRegExp::reganch
char reganch
Definition: ossimRegExp.h:103

ossimRegExp::regtry
int regtry(const char *, const char **, const char **, const char *)
Definition: ossimRegExp.cpp:1006

ossimRegExp::regc
void regc(unsigned char)
Definition: ossimRegExp.cpp:837

PLUS
#define PLUS
Definition: ossimRegExp.cpp:280

ossimRegExp::regbranch
char * regbranch(int *)
Definition: ossimRegExp.cpp:585

ossimRegExp::regdummy
char regdummy
Definition: ossimRegExp.h:113

STAR
#define STAR
Definition: ossimRegExp.cpp:278

NOTHING
#define NOTHING
Definition: ossimRegExp.cpp:277

ossimRegExp::regmatch
int regmatch(const char *)
Definition: ossimRegExp.cpp:1043

ossimRegExp::regoptail
void regoptail(char *, const char *)
Definition: ossimRegExp.cpp:905

ossimRegExp::regnext
const char * regnext(const char *)
Definition: ossimRegExp.cpp:1268

ossimRegExp::regrepeat
int regrepeat(const char *)
Definition: ossimRegExp.cpp:1225

EOL
#define EOL
Definition: ossimRegExp.cpp:268

ossimRegExp::reginput
const char * reginput
Definition: ossimRegExp.h:116

SIMPLE
#define SIMPLE
Definition: ossimRegExp.cpp:341

ossimRegExp::startp
const char * startp[NSUBEXP]
Definition: ossimRegExp.h:100

ANY
#define ANY
Definition: ossimRegExp.cpp:269

MAGIC
const unsigned char MAGIC
Definition: ossimRegExp.cpp:324

BRANCH
#define BRANCH
Definition: ossimRegExp.cpp:273

ossimRegExp::operator==
bool operator==(const ossimRegExp &) const
Definition: ossimRegExp.cpp:173

ossimRegExp
Definition: ossimRegExp.h:75

ossimRegExp::ossimRegExp
ossimRegExp()
Definition: ossimRegExp.h:144

NEXT
#define NEXT(p)
Definition: ossimRegExp.cpp:321

BOL
#define BOL
Definition: ossimRegExp.cpp:267

ossim_uint32
unsigned int ossim_uint32
Definition: ossimConstants.h:235

HASWIDTH
#define HASWIDTH
Definition: ossimRegExp.cpp:340

ossimRegExp::regparse
const char * regparse
Definition: ossimRegExp.h:111

ossimRegExp::program
char * program
Definition: ossimRegExp.h:106

ossimRegExp.h

OPEN
#define OPEN
Definition: ossimRegExp.cpp:282

ossimRegExp::compile
void compile(const char *)
Definition: ossimRegExp.cpp:405

META
#define META
Definition: ossimRegExp.cpp:334

ANYBUT
#define ANYBUT
Definition: ossimRegExp.cpp:271

ossimRegExp::start
ossim_uint32 start() const
Definition: ossimRegExp.h:209

ossimRegExp::regnpar
int regnpar
Definition: ossimRegExp.h:112

ossimRegExp::regnode
char * regnode(char)
Definition: ossimRegExp.cpp:814

ossimRegExp::end
ossim_uint32 end() const
Definition: ossimRegExp.h:217

OPERAND
#define OPERAND(p)
Definition: ossimRegExp.cpp:322

if
if(yy_init)
Definition: ossimEquTokenizer.cpp:545

ossimRegExp::regpiece
char * regpiece(int *)
Definition: ossimRegExp.cpp:622

EXACTLY
#define EXACTLY
Definition: ossimRegExp.cpp:276

ossimRegExp::regmust
const char * regmust
Definition: ossimRegExp.h:104

ossimRegExp::regatom
char * regatom(int *)
Definition: ossimRegExp.cpp:691

BACK
#define BACK
Definition: ossimRegExp.cpp:275

CLOSE
#define CLOSE
Definition: ossimRegExp.cpp:285

ossimRegExp::reginsert
void reginsert(char, char *)
Definition: ossimRegExp.cpp:850

ossimRegExp::regcode
char * regcode
Definition: ossimRegExp.h:114

ossimRegExp::searchstring
const char * searchstring
Definition: ossimRegExp.h:108

NSUBEXP
const int NSUBEXP
Definition: ossimRegExp.h:72

ossimRegExp::deep_equal
bool deep_equal(const ossimRegExp &) const
Definition: ossimRegExp.cpp:189

END
#define END
Definition: ossimRegExp.cpp:266

ossimRegExp::progsize
ossim_uint32 progsize
Definition: ossimRegExp.h:107

ossimRegExp::reg
char * reg(int, int *)
Definition: ossimRegExp.cpp:504

ossimRegExp::regmlen
ossim_uint32 regmlen
Definition: ossimRegExp.h:105

ossimRegExp::endp
const char * endp[NSUBEXP]
Definition: ossimRegExp.h:101

WORST
#define WORST
Definition: ossimRegExp.cpp:343

ossimRegExp::regstart
char regstart
Definition: ossimRegExp.h:102

ossimRegExp::find
bool find(const char *)
Definition: ossimRegExp.cpp:948

OP
#define OP(p)
Definition: ossimRegExp.cpp:320

SPSTART
#define SPSTART
Definition: ossimRegExp.cpp:342

ANYOF
#define ANYOF
Definition: ossimRegExp.cpp:270