OSDN Git Service

Enable to track git://github.com/monaka/binutils.git
[pf3gnuchains/pf3gnuchains3x.git] / tcl / generic / regc_lex.c
diff --git a/tcl/generic/regc_lex.c b/tcl/generic/regc_lex.c
new file mode 100644 (file)
index 0000000..1acc3f4
--- /dev/null
@@ -0,0 +1,1061 @@
+/*
+ * lexical analyzer
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ * 
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them. 
+ * 
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ * 
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ * 
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/* scanning macros (know about v) */
+#define        ATEOS()         (v->now >= v->stop)
+#define        HAVE(n)         (v->stop - v->now >= (n))
+#define        NEXT1(c)        (!ATEOS() && *v->now == CHR(c))
+#define        NEXT2(a,b)      (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
+#define        NEXT3(a,b,c)    (HAVE(3) && *v->now == CHR(a) && \
+                                               *(v->now+1) == CHR(b) && \
+                                               *(v->now+2) == CHR(c))
+#define        SET(c)          (v->nexttype = (c))
+#define        SETV(c, n)      (v->nexttype = (c), v->nextvalue = (n))
+#define        RET(c)          return (SET(c), 1)
+#define        RETV(c, n)      return (SETV(c, n), 1)
+#define        FAILW(e)        return (ERR(e), 0)      /* ERR does SET(EOS) */
+#define        LASTTYPE(t)     (v->lasttype == (t))
+
+/* lexical contexts */
+#define        L_ERE   1       /* mainline ERE/ARE */
+#define        L_BRE   2       /* mainline BRE */
+#define        L_Q     3       /* REG_QUOTE */
+#define        L_EBND  4       /* ERE/ARE bound */
+#define        L_BBND  5       /* BRE bound */
+#define        L_BRACK 6       /* brackets */
+#define        L_CEL   7       /* collating element */
+#define        L_ECL   8       /* equivalence class */
+#define        L_CCL   9       /* character class */
+#define        INTOCON(c)      (v->lexcon = (c))
+#define        INCON(con)      (v->lexcon == (con))
+
+/* construct pointer past end of chr array */
+#define        ENDOF(array)    ((array) + sizeof(array)/sizeof(chr))
+
+/*
+ - lexstart - set up lexical stuff, scan leading options
+ ^ static VOID lexstart(struct vars *);
+ */
+static VOID
+lexstart(v)
+struct vars *v;
+{
+       prefixes(v);                    /* may turn on new type bits etc. */
+       NOERR();
+
+       if (v->cflags&REG_QUOTE) {
+               assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
+               INTOCON(L_Q);
+       } else if (v->cflags&REG_EXTENDED) {
+               assert(!(v->cflags&REG_QUOTE));
+               INTOCON(L_ERE);
+       } else {
+               assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
+               INTOCON(L_BRE);
+       }
+
+       v->nexttype = EMPTY;            /* remember we were at the start */
+       next(v);                        /* set up the first token */
+}
+
+/*
+ - prefixes - implement various special prefixes
+ ^ static VOID prefixes(struct vars *);
+ */
+static VOID
+prefixes(v)
+struct vars *v;
+{
+       /* literal string doesn't get any of this stuff */
+       if (v->cflags&REG_QUOTE)
+               return;
+
+       /* initial "***" gets special things */ 
+       if (HAVE(4) && NEXT3('*', '*', '*'))
+               switch (*(v->now + 3)) {
+               case CHR('?'):          /* "***?" error, msg shows version */
+                       ERR(REG_BADPAT);
+                       return;         /* proceed no further */
+                       break;
+               case CHR('='):          /* "***=" shifts to literal string */
+                       NOTE(REG_UNONPOSIX);
+                       v->cflags |= REG_QUOTE;
+                       v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
+                       v->now += 4;
+                       return;         /* and there can be no more prefixes */
+                       break;
+               case CHR(':'):          /* "***:" shifts to AREs */
+                       NOTE(REG_UNONPOSIX);
+                       v->cflags |= REG_ADVANCED;
+                       v->now += 4;
+                       break;
+               default:                /* otherwise *** is just an error */
+                       ERR(REG_BADRPT);
+                       return;
+                       break;
+               }
+
+       /* BREs and EREs don't get embedded options */
+       if ((v->cflags&REG_ADVANCED) != REG_ADVANCED)
+               return;
+
+       /* embedded options (AREs only) */
+       if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
+               NOTE(REG_UNONPOSIX);
+               v->now += 2;
+               for (; !ATEOS() && iscalpha(*v->now); v->now++)
+                       switch (*v->now) {
+                       case CHR('b'):          /* BREs (but why???) */
+                               v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
+                               break;
+                       case CHR('c'):          /* case sensitive */
+                               v->cflags &= ~REG_ICASE;
+                               break;
+                       case CHR('e'):          /* plain EREs */
+                               v->cflags |= REG_EXTENDED;
+                               v->cflags &= ~(REG_ADVF|REG_QUOTE);
+                               break;
+                       case CHR('i'):          /* case insensitive */
+                               v->cflags |= REG_ICASE;
+                               break;
+                       case CHR('m'):          /* Perloid synonym for n */
+                       case CHR('n'):          /* \n affects ^ $ . [^ */
+                               v->cflags |= REG_NEWLINE;
+                               break;
+                       case CHR('p'):          /* ~Perl, \n affects . [^ */
+                               v->cflags |= REG_NLSTOP;
+                               v->cflags &= ~REG_NLANCH;
+                               break;
+                       case CHR('q'):          /* literal string */
+                               v->cflags |= REG_QUOTE;
+                               v->cflags &= ~REG_ADVANCED;
+                               break;
+                       case CHR('s'):          /* single line, \n ordinary */
+                               v->cflags &= ~REG_NEWLINE;
+                               break;
+                       case CHR('t'):          /* tight syntax */
+                               v->cflags &= ~REG_EXPANDED;
+                               break;
+                       case CHR('w'):          /* weird, \n affects ^ $ only */
+                               v->cflags &= ~REG_NLSTOP;
+                               v->cflags |= REG_NLANCH;
+                               break;
+                       case CHR('x'):          /* expanded syntax */
+                               v->cflags |= REG_EXPANDED;
+                               break;
+                       default:
+                               ERR(REG_BADOPT);
+                               return;
+                       }
+               if (!NEXT1(')')) {
+                       ERR(REG_BADOPT);
+                       return;
+               }
+               v->now++;
+               if (v->cflags&REG_QUOTE)
+                       v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
+       }
+}
+
+/*
+ - lexnest - "call a subroutine", interpolating string at the lexical level
+ * Note, this is not a very general facility.  There are a number of
+ * implicit assumptions about what sorts of strings can be subroutines.
+ ^ static VOID lexnest(struct vars *, chr *, chr *);
+ */
+static VOID
+lexnest(v, beginp, endp)
+struct vars *v;
+chr *beginp;                           /* start of interpolation */
+chr *endp;                             /* one past end of interpolation */
+{
+       assert(v->savenow == NULL);     /* only one level of nesting */
+       v->savenow = v->now;
+       v->savestop = v->stop;
+       v->now = beginp;
+       v->stop = endp;
+}
+
+/*
+ * string constants to interpolate as expansions of things like \d
+ */
+static chr backd[] = {         /* \d */
+       CHR('['), CHR('['), CHR(':'),
+       CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+       CHR(':'), CHR(']'), CHR(']')
+};
+static chr backD[] = {         /* \D */
+       CHR('['), CHR('^'), CHR('['), CHR(':'),
+       CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+       CHR(':'), CHR(']'), CHR(']')
+};
+static chr brbackd[] = {       /* \d within brackets */
+       CHR('['), CHR(':'),
+       CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+       CHR(':'), CHR(']')
+};
+static chr backs[] = {         /* \s */
+       CHR('['), CHR('['), CHR(':'),
+       CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+       CHR(':'), CHR(']'), CHR(']')
+};
+static chr backS[] = {         /* \S */
+       CHR('['), CHR('^'), CHR('['), CHR(':'),
+       CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+       CHR(':'), CHR(']'), CHR(']')
+};
+static chr brbacks[] = {       /* \s within brackets */
+       CHR('['), CHR(':'),
+       CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+       CHR(':'), CHR(']')
+};
+static chr backw[] = {         /* \w */
+       CHR('['), CHR('['), CHR(':'),
+       CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+       CHR(':'), CHR(']'), CHR('_'), CHR(']')
+};
+static chr backW[] = {         /* \W */
+       CHR('['), CHR('^'), CHR('['), CHR(':'),
+       CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+       CHR(':'), CHR(']'), CHR('_'), CHR(']')
+};
+static chr brbackw[] = {       /* \w within brackets */
+       CHR('['), CHR(':'),
+       CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+       CHR(':'), CHR(']'), CHR('_')
+};
+
+/*
+ - lexword - interpolate a bracket expression for word characters
+ * Possibly ought to inquire whether there is a "word" character class.
+ ^ static VOID lexword(struct vars *);
+ */
+static VOID
+lexword(v)
+struct vars *v;
+{
+       lexnest(v, backw, ENDOF(backw));
+}
+
+/*
+ - next - get next token
+ ^ static int next(struct vars *);
+ */
+static int                     /* 1 normal, 0 failure */
+next(v)
+struct vars *v;
+{
+       chr c;
+
+       /* errors yield an infinite sequence of failures */
+       if (ISERR())
+               return 0;       /* the error has set nexttype to EOS */
+
+       /* remember flavor of last token */
+       v->lasttype = v->nexttype;
+
+       /* REG_BOSONLY */
+       if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
+               /* at start of a REG_BOSONLY RE */
+               RETV(SBEGIN, 0);                /* same as \A */
+       }
+
+       /* if we're nested and we've hit end, return to outer level */
+       if (v->savenow != NULL && ATEOS()) {
+               v->now = v->savenow;
+               v->stop = v->savestop;
+               v->savenow = v->savestop = NULL;
+       }
+
+       /* skip white space etc. if appropriate (not in literal or []) */
+       if (v->cflags&REG_EXPANDED)
+               switch (v->lexcon) {
+               case L_ERE:
+               case L_BRE:
+               case L_EBND:
+               case L_BBND:
+                       skip(v);
+                       break;
+               }
+
+       /* handle EOS, depending on context */
+       if (ATEOS()) {
+               switch (v->lexcon) {
+               case L_ERE:
+               case L_BRE:
+               case L_Q:
+                       RET(EOS);
+                       break;
+               case L_EBND:
+               case L_BBND:
+                       FAILW(REG_EBRACE);
+                       break;
+               case L_BRACK:
+               case L_CEL:
+               case L_ECL:
+               case L_CCL:
+                       FAILW(REG_EBRACK);
+                       break;
+               }
+               assert(NOTREACHED);
+       }
+
+       /* okay, time to actually get a character */
+       c = *v->now++;
+
+       /* deal with the easy contexts, punt EREs to code below */
+       switch (v->lexcon) {
+       case L_BRE:                     /* punt BREs to separate function */
+               return brenext(v, c);
+               break;
+       case L_ERE:                     /* see below */
+               break;
+       case L_Q:                       /* literal strings are easy */
+               RETV(PLAIN, c);
+               break;
+       case L_BBND:                    /* bounds are fairly simple */
+       case L_EBND:
+               switch (c) {
+               case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
+               case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
+               case CHR('8'): case CHR('9'):
+                       RETV(DIGIT, (chr)DIGITVAL(c));
+                       break;
+               case CHR(','):
+                       RET(',');
+                       break;
+               case CHR('}'):          /* ERE bound ends with } */
+                       if (INCON(L_EBND)) {
+                               INTOCON(L_ERE);
+                               if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+                                       v->now++;
+                                       NOTE(REG_UNONPOSIX);
+                                       RETV('}', 0);
+                               }
+                               RETV('}', 1);
+                       } else
+                               FAILW(REG_BADBR);
+                       break;
+               case CHR('\\'):         /* BRE bound ends with \} */
+                       if (INCON(L_BBND) && NEXT1('}')) {
+                               v->now++;
+                               INTOCON(L_BRE);
+                               RET('}');
+                       } else
+                               FAILW(REG_BADBR);
+                       break;
+               default:
+                       FAILW(REG_BADBR);
+                       break;
+               }
+               assert(NOTREACHED);
+               break;
+       case L_BRACK:                   /* brackets are not too hard */
+               switch (c) {
+               case CHR(']'):
+                       if (LASTTYPE('['))
+                               RETV(PLAIN, c);
+                       else {
+                               INTOCON((v->cflags&REG_EXTENDED) ?
+                                                       L_ERE : L_BRE);
+                               RET(']');
+                       }
+                       break;
+               case CHR('\\'):
+                       NOTE(REG_UBBS);
+                       if (!(v->cflags&REG_ADVF))
+                               RETV(PLAIN, c);
+                       NOTE(REG_UNONPOSIX);
+                       if (ATEOS())
+                               FAILW(REG_EESCAPE);
+                       (DISCARD)lexescape(v);
+                       switch (v->nexttype) {  /* not all escapes okay here */
+                       case PLAIN:
+                               return 1;
+                               break;
+                       case CCLASS:
+                               switch (v->nextvalue) {
+                               case 'd':
+                                       lexnest(v, brbackd, ENDOF(brbackd));
+                                       break;
+                               case 's':
+                                       lexnest(v, brbacks, ENDOF(brbacks));
+                                       break;
+                               case 'w':
+                                       lexnest(v, brbackw, ENDOF(brbackw));
+                                       break;
+                               default:
+                                       FAILW(REG_EESCAPE);
+                                       break;
+                               }
+                               /* lexnest done, back up and try again */
+                               v->nexttype = v->lasttype;
+                               return next(v);
+                               break;
+                       }
+                       /* not one of the acceptable escapes */
+                       FAILW(REG_EESCAPE);
+                       break;
+               case CHR('-'):
+                       if (LASTTYPE('[') || NEXT1(']'))
+                               RETV(PLAIN, c);
+                       else
+                               RETV(RANGE, c);
+                       break;
+               case CHR('['):
+                       if (ATEOS())
+                               FAILW(REG_EBRACK);
+                       switch (*v->now++) {
+                       case CHR('.'):
+                               INTOCON(L_CEL);
+                               /* might or might not be locale-specific */
+                               RET(COLLEL);
+                               break;
+                       case CHR('='):
+                               INTOCON(L_ECL);
+                               NOTE(REG_ULOCALE);
+                               RET(ECLASS);
+                               break;
+                       case CHR(':'):
+                               INTOCON(L_CCL);
+                               NOTE(REG_ULOCALE);
+                               RET(CCLASS);
+                               break;
+                       default:                        /* oops */
+                               v->now--;
+                               RETV(PLAIN, c);
+                               break;
+                       }
+                       assert(NOTREACHED);
+                       break;
+               default:
+                       RETV(PLAIN, c);
+                       break;
+               }
+               assert(NOTREACHED);
+               break;
+       case L_CEL:                     /* collating elements are easy */
+               if (c == CHR('.') && NEXT1(']')) {
+                       v->now++;
+                       INTOCON(L_BRACK);
+                       RETV(END, '.');
+               } else
+                       RETV(PLAIN, c);
+               break;
+       case L_ECL:                     /* ditto equivalence classes */
+               if (c == CHR('=') && NEXT1(']')) {
+                       v->now++;
+                       INTOCON(L_BRACK);
+                       RETV(END, '=');
+               } else
+                       RETV(PLAIN, c);
+               break;
+       case L_CCL:                     /* ditto character classes */
+               if (c == CHR(':') && NEXT1(']')) {
+                       v->now++;
+                       INTOCON(L_BRACK);
+                       RETV(END, ':');
+               } else
+                       RETV(PLAIN, c);
+               break;
+       default:
+               assert(NOTREACHED);
+               break;
+       }
+
+       /* that got rid of everything except EREs and AREs */
+       assert(INCON(L_ERE));
+
+       /* deal with EREs and AREs, except for backslashes */
+       switch (c) {
+       case CHR('|'):
+               RET('|');
+               break;
+       case CHR('*'):
+               if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+                       v->now++;
+                       NOTE(REG_UNONPOSIX);
+                       RETV('*', 0);
+               }
+               RETV('*', 1);
+               break;
+       case CHR('+'):
+               if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+                       v->now++;
+                       NOTE(REG_UNONPOSIX);
+                       RETV('+', 0);
+               }
+               RETV('+', 1);
+               break;
+       case CHR('?'):
+               if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+                       v->now++;
+                       NOTE(REG_UNONPOSIX);
+                       RETV('?', 0);
+               }
+               RETV('?', 1);
+               break;
+       case CHR('{'):          /* bounds start or plain character */
+               if (v->cflags&REG_EXPANDED)
+                       skip(v);
+               if (ATEOS() || !iscdigit(*v->now)) {
+                       NOTE(REG_UBRACES);
+                       NOTE(REG_UUNSPEC);
+                       RETV(PLAIN, c);
+               } else {
+                       NOTE(REG_UBOUNDS);
+                       INTOCON(L_EBND);
+                       RET('{');
+               }
+               assert(NOTREACHED);
+               break;
+       case CHR('('):          /* parenthesis, or advanced extension */
+               if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+                       NOTE(REG_UNONPOSIX);
+                       v->now++;
+                       switch (*v->now++) {
+                       case CHR(':'):          /* non-capturing paren */
+                               RETV('(', 0);
+                               break;
+                       case CHR('#'):          /* comment */
+                               while (!ATEOS() && *v->now != CHR(')'))
+                                       v->now++;
+                               if (!ATEOS())
+                                       v->now++;
+                               assert(v->nexttype == v->lasttype);
+                               return next(v);
+                               break;
+                       case CHR('='):          /* positive lookahead */
+                               NOTE(REG_ULOOKAHEAD);
+                               RETV(LACON, 1);
+                               break;
+                       case CHR('!'):          /* negative lookahead */
+                               NOTE(REG_ULOOKAHEAD);
+                               RETV(LACON, 0);
+                               break;
+                       default:
+                               FAILW(REG_BADRPT);
+                               break;
+                       }
+                       assert(NOTREACHED);
+               }
+               if (v->cflags&REG_NOSUB)
+                       RETV('(', 0);           /* all parens non-capturing */
+               else
+                       RETV('(', 1);
+               break;
+       case CHR(')'):
+               if (LASTTYPE('(')) {
+                       NOTE(REG_UUNSPEC);
+               }
+               RETV(')', c);
+               break;
+       case CHR('['):          /* easy except for [[:<:]] and [[:>:]] */
+               if (HAVE(6) &&  *(v->now+0) == CHR('[') &&
+                               *(v->now+1) == CHR(':') &&
+                               (*(v->now+2) == CHR('<') ||
+                                               *(v->now+2) == CHR('>')) &&
+                               *(v->now+3) == CHR(':') &&
+                               *(v->now+4) == CHR(']') &&
+                               *(v->now+5) == CHR(']')) {
+                       c = *(v->now+2);
+                       v->now += 6;
+                       NOTE(REG_UNONPOSIX);
+                       RET((c == CHR('<')) ? '<' : '>');
+               }
+               INTOCON(L_BRACK);
+               if (NEXT1('^')) {
+                       v->now++;
+                       RETV('[', 0);
+               }
+               RETV('[', 1);
+               break;
+       case CHR('.'):
+               RET('.');
+               break;
+       case CHR('^'):
+               RET('^');
+               break;
+       case CHR('$'):
+               RET('$');
+               break;
+       case CHR('\\'):         /* mostly punt backslashes to code below */
+               if (ATEOS())
+                       FAILW(REG_EESCAPE);
+               break;
+       default:                /* ordinary character */
+               RETV(PLAIN, c);
+               break;
+       }
+
+       /* ERE/ARE backslash handling; backslash already eaten */
+       assert(!ATEOS());
+       if (!(v->cflags&REG_ADVF)) {    /* only AREs have non-trivial escapes */
+               if (iscalnum(*v->now)) {
+                       NOTE(REG_UBSALNUM);
+                       NOTE(REG_UUNSPEC);
+               }
+               RETV(PLAIN, *v->now++);
+       }
+       (DISCARD)lexescape(v);
+       if (ISERR())
+               FAILW(REG_EESCAPE);
+       if (v->nexttype == CCLASS) {    /* fudge at lexical level */
+               switch (v->nextvalue) {
+               case 'd':       lexnest(v, backd, ENDOF(backd)); break;
+               case 'D':       lexnest(v, backD, ENDOF(backD)); break;
+               case 's':       lexnest(v, backs, ENDOF(backs)); break;
+               case 'S':       lexnest(v, backS, ENDOF(backS)); break;
+               case 'w':       lexnest(v, backw, ENDOF(backw)); break;
+               case 'W':       lexnest(v, backW, ENDOF(backW)); break;
+               default:
+                       assert(NOTREACHED);
+                       FAILW(REG_ASSERT);
+                       break;
+               }
+               /* lexnest done, back up and try again */
+               v->nexttype = v->lasttype;
+               return next(v);
+       }
+       /* otherwise, lexescape has already done the work */
+       return !ISERR();
+}
+
+/*
+ - lexescape - parse an ARE backslash escape (backslash already eaten)
+ * Note slightly nonstandard use of the CCLASS type code.
+ ^ static int lexescape(struct vars *);
+ */
+static int                     /* not actually used, but convenient for RETV */
+lexescape(v)
+struct vars *v;
+{
+       chr c;
+       static chr alert[] = {
+               CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
+       };
+       static chr esc[] = {
+               CHR('E'), CHR('S'), CHR('C')
+       };
+       chr *save;
+
+       assert(v->cflags&REG_ADVF);
+
+       assert(!ATEOS());
+       c = *v->now++;
+       if (!iscalnum(c))
+               RETV(PLAIN, c);
+
+       NOTE(REG_UNONPOSIX);
+       switch (c) {
+       case CHR('a'):
+               RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
+               break;
+       case CHR('A'):
+               RETV(SBEGIN, 0);
+               break;
+       case CHR('b'):
+               RETV(PLAIN, CHR('\b'));
+               break;
+       case CHR('B'):
+               RETV(PLAIN, CHR('\\'));
+               break;
+       case CHR('c'):
+               NOTE(REG_UUNPORT);
+               if (ATEOS())
+                       FAILW(REG_EESCAPE);
+               RETV(PLAIN, (chr)(*v->now++ & 037));
+               break;
+       case CHR('d'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 'd');
+               break;
+       case CHR('D'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 'D');
+               break;
+       case CHR('e'):
+               NOTE(REG_UUNPORT);
+               RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
+               break;
+       case CHR('f'):
+               RETV(PLAIN, CHR('\f'));
+               break;
+       case CHR('m'):
+               RET('<');
+               break;
+       case CHR('M'):
+               RET('>');
+               break;
+       case CHR('n'):
+               RETV(PLAIN, CHR('\n'));
+               break;
+       case CHR('r'):
+               RETV(PLAIN, CHR('\r'));
+               break;
+       case CHR('s'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 's');
+               break;
+       case CHR('S'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 'S');
+               break;
+       case CHR('t'):
+               RETV(PLAIN, CHR('\t'));
+               break;
+       case CHR('u'):
+               c = lexdigits(v, 16, 4, 4);
+               if (ISERR())
+                       FAILW(REG_EESCAPE);
+               RETV(PLAIN, c);
+               break;
+       case CHR('U'):
+               c = lexdigits(v, 16, 8, 8);
+               if (ISERR())
+                       FAILW(REG_EESCAPE);
+               RETV(PLAIN, c);
+               break;
+       case CHR('v'):
+               RETV(PLAIN, CHR('\v'));
+               break;
+       case CHR('w'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 'w');
+               break;
+       case CHR('W'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 'W');
+               break;
+       case CHR('x'):
+               NOTE(REG_UUNPORT);
+               c = lexdigits(v, 16, 1, 255);   /* REs >255 long outside spec */
+               if (ISERR())
+                       FAILW(REG_EESCAPE);
+               RETV(PLAIN, c);
+               break;
+       case CHR('y'):
+               NOTE(REG_ULOCALE);
+               RETV(WBDRY, 0);
+               break;
+       case CHR('Y'):
+               NOTE(REG_ULOCALE);
+               RETV(NWBDRY, 0);
+               break;
+       case CHR('Z'):
+               RETV(SEND, 0);
+               break;
+       case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
+       case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
+       case CHR('9'):
+               save = v->now;
+               v->now--;       /* put first digit back */
+               c = lexdigits(v, 10, 1, 255);   /* REs >255 long outside spec */
+               if (ISERR())
+                       FAILW(REG_EESCAPE);
+               /* ugly heuristic (first test is "exactly 1 digit?") */
+               if (v->now - save == 0 || (int)c <= v->nsubexp) {
+                       NOTE(REG_UBACKREF);
+                       RETV(BACKREF, (chr)c);
+               }
+               /* oops, doesn't look like it's a backref after all... */
+               v->now = save;
+               /* and fall through into octal number */
+       case CHR('0'):
+               NOTE(REG_UUNPORT);
+               v->now--;       /* put first digit back */
+               c = lexdigits(v, 8, 1, 3);
+               if (ISERR())
+                       FAILW(REG_EESCAPE);
+               RETV(PLAIN, c);
+               break;
+       default:
+               assert(iscalpha(c));
+               FAILW(REG_EESCAPE);     /* unknown alphabetic escape */
+               break;
+       }
+       assert(NOTREACHED);
+}
+
+/*
+ - lexdigits - slurp up digits and return chr value
+ ^ static chr lexdigits(struct vars *, int, int, int);
+ */
+static chr                     /* chr value; errors signalled via ERR */
+lexdigits(v, base, minlen, maxlen)
+struct vars *v;
+int base;
+int minlen;
+int maxlen;
+{
+       uchr n;                 /* unsigned to avoid overflow misbehavior */
+       int len;
+       chr c;
+       int d;
+       CONST uchr ub = (uchr) base;
+
+       n = 0;
+       for (len = 0; len < maxlen && !ATEOS(); len++) {
+               c = *v->now++;
+               switch (c) {
+               case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
+               case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
+               case CHR('8'): case CHR('9'):
+                       d = DIGITVAL(c);
+                       break;
+               case CHR('a'): case CHR('A'): d = 10; break;
+               case CHR('b'): case CHR('B'): d = 11; break;
+               case CHR('c'): case CHR('C'): d = 12; break;
+               case CHR('d'): case CHR('D'): d = 13; break;
+               case CHR('e'): case CHR('E'): d = 14; break;
+               case CHR('f'): case CHR('F'): d = 15; break;
+               default:
+                       v->now--;       /* oops, not a digit at all */
+                       d = -1;
+                       break;
+               }
+
+               if (d >= base) {        /* not a plausible digit */
+                       v->now--;
+                       d = -1;
+               }
+               if (d < 0)
+                       break;          /* NOTE BREAK OUT */
+               n = n*ub + (uchr)d;
+       }
+       if (len < minlen)
+               ERR(REG_EESCAPE);
+
+       return (chr)n;
+}
+
+/*
+ - brenext - get next BRE token
+ * This is much like EREs except for all the stupid backslashes and the
+ * context-dependency of some things.
+ ^ static int brenext(struct vars *, pchr);
+ */
+static int                     /* 1 normal, 0 failure */
+brenext(v, pc)
+struct vars *v;
+pchr pc;
+{
+       chr c = (chr)pc;
+
+       switch (c) {
+       case CHR('*'):
+               if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
+                       RETV(PLAIN, c);
+               RET('*');
+               break;
+       case CHR('['):
+               if (HAVE(6) &&  *(v->now+0) == CHR('[') &&
+                               *(v->now+1) == CHR(':') &&
+                               (*(v->now+2) == CHR('<') ||
+                                               *(v->now+2) == CHR('>')) &&
+                               *(v->now+3) == CHR(':') &&
+                               *(v->now+4) == CHR(']') &&
+                               *(v->now+5) == CHR(']')) {
+                       c = *(v->now+2);
+                       v->now += 6;
+                       NOTE(REG_UNONPOSIX);
+                       RET((c == CHR('<')) ? '<' : '>');
+               }
+               INTOCON(L_BRACK);
+               if (NEXT1('^')) {
+                       v->now++;
+                       RETV('[', 0);
+               }
+               RETV('[', 1);
+               break;
+       case CHR('.'):
+               RET('.');
+               break;
+       case CHR('^'):
+               if (LASTTYPE(EMPTY))
+                       RET('^');
+               if (LASTTYPE('(')) {
+                       NOTE(REG_UUNSPEC);
+                       RET('^');
+               }
+               RETV(PLAIN, c);
+               break;
+       case CHR('$'):
+               if (v->cflags&REG_EXPANDED)
+                       skip(v);
+               if (ATEOS())
+                       RET('$');
+               if (NEXT2('\\', ')')) {
+                       NOTE(REG_UUNSPEC);
+                       RET('$');
+               }
+               RETV(PLAIN, c);
+               break;
+       case CHR('\\'):
+               break;          /* see below */
+       default:
+               RETV(PLAIN, c);
+               break;
+       }
+
+       assert(c == CHR('\\'));
+
+       if (ATEOS())
+               FAILW(REG_EESCAPE);
+
+       c = *v->now++;
+       switch (c) {
+       case CHR('{'):
+               INTOCON(L_BBND);
+               NOTE(REG_UBOUNDS);
+               RET('{');
+               break;
+       case CHR('('):
+               RETV('(', 1);
+               break;
+       case CHR(')'):
+               RETV(')', c);
+               break;
+       case CHR('<'):
+               NOTE(REG_UNONPOSIX);
+               RET('<');
+               break;
+       case CHR('>'):
+               NOTE(REG_UNONPOSIX);
+               RET('>');
+               break;
+       case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
+       case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
+       case CHR('9'):
+               NOTE(REG_UBACKREF);
+               RETV(BACKREF, (chr)DIGITVAL(c));
+               break;
+       default:
+               if (iscalnum(c)) {
+                       NOTE(REG_UBSALNUM);
+                       NOTE(REG_UUNSPEC);
+               }
+               RETV(PLAIN, c);
+               break;
+       }
+
+       assert(NOTREACHED);
+}
+
+/*
+ - skip - skip white space and comments in expanded form
+ ^ static VOID skip(struct vars *);
+ */
+static VOID
+skip(v)
+struct vars *v;
+{
+       chr *start = v->now;
+
+       assert(v->cflags&REG_EXPANDED);
+
+       for (;;) {
+               while (!ATEOS() && iscspace(*v->now))
+                       v->now++;
+               if (ATEOS() || *v->now != CHR('#'))
+                       break;                          /* NOTE BREAK OUT */
+               assert(NEXT1('#'));
+               while (!ATEOS() && *v->now != CHR('\n'))
+                       v->now++;
+               /* leave the newline to be picked up by the iscspace loop */
+       }
+
+       if (v->now != start)
+               NOTE(REG_UNONPOSIX);
+}
+
+/*
+ - newline - return the chr for a newline
+ * This helps confine use of CHR to this source file.
+ ^ static chr newline(NOPARMS);
+ */
+static chr
+newline()
+{
+       return CHR('\n');
+}
+
+/*
+ - ch - return the chr sequence for regc_locale.c's fake collating element ch
+ * This helps confine use of CHR to this source file.  Beware that the caller
+ * knows how long the sequence is.
+ ^ #ifdef REG_DEBUG
+ ^ static chr *ch(NOPARMS);
+ ^ #endif
+ */
+#ifdef REG_DEBUG
+static chr *
+ch()
+{
+       static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
+
+       return chstr;
+}
+#endif
+
+/*
+ - chrnamed - return the chr known by a given (chr string) name
+ * The code is a bit clumsy, but this routine gets only such specialized
+ * use that it hardly matters.
+ ^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
+ */
+static chr
+chrnamed(v, startp, endp, lastresort)
+struct vars *v;
+chr *startp;                   /* start of name */
+chr *endp;                     /* just past end of name */
+pchr lastresort;               /* what to return if name lookup fails */
+{
+       celt c;
+       int errsave;
+       int e;
+       struct cvec *cv;
+
+       errsave = v->err;
+       v->err = 0;
+       c = element(v, startp, endp);
+       e = v->err;
+       v->err = errsave;
+
+       if (e != 0)
+               return (chr)lastresort;
+
+       cv = range(v, c, c, 0);
+       if (cv->nchrs == 0)
+               return (chr)lastresort;
+       return cv->chrs[0];
+}