Daily bump.

[pf3gnuchains/gcc-fork.git] / gcc / cpplex.c
diff --git a/gcc/cpplex.c b/gcc/cpplex.c

index 9d0a792..b7f6da5 100644 (file)
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -4,6 +4,7 @@
     Based on CCCP program by Paul Rubin, June 1986
     Adapted to ANSI C, Richard Stallman, Jan 1987
     Broken out to separate file, Zack Weinberg, Mar 2000
     Based on CCCP program by Paul Rubin, June 1986
     Adapted to ANSI C, Richard Stallman, Jan 1987
     Broken out to separate file, Zack Weinberg, Mar 2000
+   Single-pass line tokenization by Neil Booth, April 2000
  
  This program is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
  
  This program is free software; you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the
@@ -22,10 +23,13 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  #include "config.h"
  #include "system.h"
  #include "intl.h"
  #include "config.h"
  #include "system.h"
  #include "intl.h"
-#include "hashtab.h"
  #include "cpplib.h"
  #include "cpphash.h"
  
  #include "cpplib.h"
  #include "cpphash.h"
  
+#ifdef HAVE_MMAP_FILE
+# include <sys/mman.h>
+#endif
+
  #define PEEKBUF(BUFFER, N) \
    ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
  #define GETBUF(BUFFER) \
  #define PEEKBUF(BUFFER, N) \
    ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
  #define GETBUF(BUFFER) \
@@ -45,12 +49,114 @@ static int copy_comment            PARAMS ((cpp_reader *, int));
  static void skip_string                PARAMS ((cpp_reader *, int));
  static void parse_string       PARAMS ((cpp_reader *, int));
  static U_CHAR *find_position   PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
  static void skip_string                PARAMS ((cpp_reader *, int));
  static void parse_string       PARAMS ((cpp_reader *, int));
  static U_CHAR *find_position   PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
-static int null_cleanup                PARAMS ((cpp_buffer *, cpp_reader *));
  static void null_warning        PARAMS ((cpp_reader *, unsigned int));
  
  static void safe_fwrite                PARAMS ((cpp_reader *, const U_CHAR *,
                                          size_t, FILE *));
  static void null_warning        PARAMS ((cpp_reader *, unsigned int));
  
  static void safe_fwrite                PARAMS ((cpp_reader *, const U_CHAR *,
                                          size_t, FILE *));
-static void output_line_command        PARAMS ((cpp_reader *, cpp_printer *));
+static void output_line_command        PARAMS ((cpp_reader *, cpp_printer *,
+                                        unsigned int));
+static void bump_column                PARAMS ((cpp_printer *, unsigned int,
+                                        unsigned int));
+static void expand_name_space   PARAMS ((cpp_toklist *, unsigned int));
+static void pedantic_whitespace        PARAMS ((cpp_reader *, U_CHAR *,
+                                        unsigned int));
+
+#define auto_expand_name_space(list) \
+    expand_name_space ((list), 1 + (list)->name_cap / 2)
+
+#ifdef NEW_LEXER
+
+void init_trigraph_map PARAMS ((void));
+static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
+                                               unsigned char *));
+static const unsigned char *backslash_start PARAMS ((cpp_reader *,
+                                                    const unsigned char *));
+static int skip_block_comment2 PARAMS ((cpp_reader *));
+static int skip_line_comment2 PARAMS ((cpp_reader *));
+static void skip_whitespace PARAMS ((cpp_reader *, int));
+static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
+static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
+static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
+                                 unsigned int, int));
+static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
+static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
+                                 const unsigned char *,
+                                 unsigned int, unsigned int));
+void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
+
+static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
+
+static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
+                                           unsigned char *, int));
+
+typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
+                                         cpp_token *));
+
+/* Macros on a cpp_name.  */
+#define INIT_TOKEN_NAME(list, token) \
+  do {(token)->val.name.len = 0; \
+      (token)->val.name.text = (list)->namebuf + (list)->name_used; \
+      (list)->tokens_used = token - (list)->tokens + 1; \
+  } while (0)
+
+/* Maybe put these in the ISTABLE eventually.  */
+#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
+#define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
+
+/* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
+   character, if any, is in buffer.  */
+#define handle_newline(cur, limit, c) \
+  do {\
+  if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
+    (cur)++; \
+  CPP_BUMP_LINE_CUR (pfile, (cur)); \
+  pfile->col_adjust = 0; \
+  } while (0)
+
+#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
+#define PREV_TOKEN_TYPE (cur_token[-1].type)
+
+#define PUSH_TOKEN(ttype) cur_token++->type = ttype
+#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
+#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
+#define BACKUP_DIGRAPH(ttype) do { \
+  BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
+
+/* An upper bound on the number of bytes needed to spell a token,
+   including preceding whitespace.  */
+#define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
+                              SPELL_NONE ? (token)->val.name.len: 0))
+
+#endif
+
+/* Order here matters.  Those beyond SPELL_NONE store their spelling
+   in the token list, and it's length in the token->val.name.len.  */
+enum spell_type
+{
+  SPELL_OPERATOR = 0,
+  SPELL_NONE,
+  SPELL_CHAR,    /* FIXME: revert order of NONE and CHAR after transition. */
+  SPELL_IDENT,
+  SPELL_STRING
+};
+
+#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
+#define I(e, s) {SPELL_IDENT, s},
+#define S(e, s) {SPELL_STRING, s},
+#define C(e, s) {SPELL_CHAR, s},
+#define N(e, s) {SPELL_NONE, s},
+
+static const struct token_spelling
+{
+  ENUM_BITFIELD(spell_type) type : CHAR_BIT;
+  const U_CHAR *spelling;
+} token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
+
+#undef T
+#undef I
+#undef S
+#undef C
+#undef N
  
  /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
  
  
  /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
  
@@ -66,14 +172,6 @@ _cpp_grow_token_buffer (pfile, n)
    CPP_SET_WRITTEN (pfile, old_written);
  }
  
    CPP_SET_WRITTEN (pfile, old_written);
  }
  
-static int
-null_cleanup (pbuf, pfile)
-     cpp_buffer *pbuf ATTRIBUTE_UNUSED;
-     cpp_reader *pfile ATTRIBUTE_UNUSED;
-{
-  return 0;
-}
-
  /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
     If BUFFER != NULL, then use the LENGTH characters in BUFFER
     as the new input buffer.
  /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
     If BUFFER != NULL, then use the LENGTH characters in BUFFER
     as the new input buffer.
@@ -95,8 +193,6 @@ cpp_push_buffer (pfile, buffer, length)
  
    new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
  
  
    new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
  
-  new->if_stack = pfile->if_stack;
-  new->cleanup = null_cleanup;
    new->buf = new->cur = buffer;
    new->rlimit = buffer + length;
    new->prev = buf;
    new->buf = new->cur = buffer;
    new->rlimit = buffer + length;
    new->prev = buf;
@@ -114,7 +210,39 @@ cpp_pop_buffer (pfile)
    cpp_buffer *buf = CPP_BUFFER (pfile);
    if (ACTIVE_MARK_P (pfile))
      cpp_ice (pfile, "mark active in cpp_pop_buffer");
    cpp_buffer *buf = CPP_BUFFER (pfile);
    if (ACTIVE_MARK_P (pfile))
      cpp_ice (pfile, "mark active in cpp_pop_buffer");
-  (*buf->cleanup) (buf, pfile);
+
+  if (buf->inc)
+    {
+      _cpp_unwind_if_stack (pfile, buf);
+      if (buf->buf)
+       free ((PTR) buf->buf);
+      if (pfile->system_include_depth)
+       pfile->system_include_depth--;
+      if (pfile->potential_control_macro)
+       {
+         if (buf->inc->cmacro != NEVER_REREAD)
+           buf->inc->cmacro = pfile->potential_control_macro;
+         pfile->potential_control_macro = 0;
+       }
+      pfile->input_stack_listing_current = 0;
+      /* If the file will not be included again, then close it.  */
+      if (DO_NOT_REREAD (buf->inc))
+       {
+         close (buf->inc->fd);
+         buf->inc->fd = -1;
+       }
+    }
+  else if (buf->macro)
+    {
+      cpp_hashnode *m = buf->macro;
+  
+      m->disabled = 0;
+      if ((m->type == T_FMACRO && buf->mapped)
+         || m->type == T_SPECLINE || m->type == T_FILE
+         || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
+         || m->type == T_STDC)
+       free ((PTR) buf->buf);
+    }
    CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
    free (buf);
    pfile->buffer_stack_depth--;
    CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
    free (buf);
    pfile->buffer_stack_depth--;
@@ -149,23 +277,18 @@ safe_fwrite (pfile, buf, len, fp)
     or the current file name has changed.  */
  
  static void
     or the current file name has changed.  */
  
  static void
-output_line_command (pfile, print)
+output_line_command (pfile, print, line)
       cpp_reader *pfile;
       cpp_printer *print;
       cpp_reader *pfile;
       cpp_printer *print;
+     unsigned int line;
  {
  {
-  unsigned int line;
-  cpp_buffer *ip;
+  cpp_buffer *ip = cpp_file_buffer (pfile);
    enum { same = 0, enter, leave, rname } change;
    static const char * const codes[] = { "", " 1", " 2", "" };
  
    if (CPP_OPTION (pfile, no_line_commands))
      return;
  
    enum { same = 0, enter, leave, rname } change;
    static const char * const codes[] = { "", " 1", " 2", "" };
  
    if (CPP_OPTION (pfile, no_line_commands))
      return;
  
-  ip = cpp_file_buffer (pfile);
-  if (ip == NULL)
-    return;
-  line = CPP_BUF_LINE (ip);
-
    /* Determine whether the current filename has changed, and if so,
       how.  'nominal_fname' values are unique, so they can be compared
       by comparing pointers.  */
    /* Determine whether the current filename has changed, and if so,
       how.  'nominal_fname' values are unique, so they can be compared
       by comparing pointers.  */
@@ -205,13 +328,13 @@ output_line_command (pfile, print)
    if (CPP_OPTION (pfile, cplusplus))
      fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
              codes[change],
    if (CPP_OPTION (pfile, cplusplus))
      fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
              codes[change],
-            ip->system_header_p ? " 3" : "",
-            (ip->system_header_p == 2) ? " 4" : "");
+            ip->inc->sysp ? " 3" : "",
+            (ip->inc->sysp == 2) ? " 4" : "");
    else
  #endif
      fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
              codes[change],
    else
  #endif
      fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
              codes[change],
-            ip->system_header_p ? " 3" : "");
+            ip->inc->sysp ? " 3" : "");
    print->lineno = line;
  }
  
    print->lineno = line;
  }
  
@@ -224,6 +347,8 @@ cpp_output_tokens (pfile, print)
       cpp_reader *pfile;
       cpp_printer *print;
  {
       cpp_reader *pfile;
       cpp_printer *print;
  {
+  cpp_buffer *ip;
+
    if (CPP_WRITTEN (pfile) - print->written)
      {
        if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
    if (CPP_WRITTEN (pfile) - print->written)
      {
        if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
@@ -231,10 +356,78 @@ cpp_output_tokens (pfile, print)
        safe_fwrite (pfile, pfile->token_buffer,
                    CPP_WRITTEN (pfile) - print->written, print->outf);
      }
        safe_fwrite (pfile, pfile->token_buffer,
                    CPP_WRITTEN (pfile) - print->written, print->outf);
      }
-  output_line_command (pfile, print);
+
+  ip = cpp_file_buffer (pfile);
+  if (ip)
+    output_line_command (pfile, print, CPP_BUF_LINE (ip));
+
    CPP_SET_WRITTEN (pfile, print->written);
  }
  
    CPP_SET_WRITTEN (pfile, print->written);
  }
  
+/* Helper for cpp_output_list - increases the column number to match
+   what we expect it to be.  */
+
+static void
+bump_column (print, from, to)
+     cpp_printer *print;
+     unsigned int from, to;
+{
+  unsigned int tabs, spcs;
+  unsigned int delta = to - from;
+
+  /* Only if FROM is 0, advance by tabs.  */
+  if (from == 0)
+    tabs = delta / 8, spcs = delta % 8;
+  else
+    tabs = 0, spcs = delta;
+
+  while (tabs--) putc ('\t', print->outf);
+  while (spcs--) putc (' ', print->outf);
+}
+
+/* Write out the list L onto pfile->token_buffer.  This function is
+   incomplete:
+
+   1) pfile->token_buffer is not going to continue to exist.
+   2) At the moment, tokens don't carry the information described
+   in cpplib.h; they are all strings.
+   3) The list has to be a complete line, and has to be written starting
+   at the beginning of a line.  */
+
+void
+cpp_output_list (pfile, print, list)
+     cpp_reader *pfile;
+     cpp_printer *print;
+     const cpp_toklist *list;
+{
+  unsigned int i;
+  unsigned int curcol = 1;
+
+  /* XXX Probably does not do what is intended.  */
+  if (print->lineno != list->line)
+    output_line_command (pfile, print, list->line);
+  
+  for (i = 0; i < list->tokens_used; i++)
+    {
+      if (TOK_TYPE (list, i) == CPP_VSPACE)
+       {
+         output_line_command (pfile, print, list->tokens[i].aux);
+         continue;
+       }
+         
+      if (curcol < TOK_COL (list, i))
+       {
+         /* Insert space to bring the column to what it should be.  */
+         bump_column (print, curcol - 1, TOK_COL (list, i));
+         curcol = TOK_COL (list, i);
+       }
+      /* XXX We may have to insert space to prevent an accidental
+        token paste.  */
+      safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
+      curcol += TOK_LEN (list, i);
+    }
+}
+
  /* Scan a string (which may have escape marks), perform macro expansion,
     and write the result to the token_buffer.  */
  
  /* Scan a string (which may have escape marks), perform macro expansion,
     and write the result to the token_buffer.  */
  
@@ -244,7 +437,7 @@ _cpp_expand_to_buffer (pfile, buf, length)
       const U_CHAR *buf;
       int length;
  {
       const U_CHAR *buf;
       int length;
  {
-  cpp_buffer *ip;
+  cpp_buffer *stop;
    enum cpp_ttype token;
    U_CHAR *buf1;
  
    enum cpp_ttype token;
    U_CHAR *buf1;
  
@@ -261,33 +454,27 @@ _cpp_expand_to_buffer (pfile, buf, length)
    memcpy (buf1, buf, length);
  
    /* Set up the input on the input stack.  */
    memcpy (buf1, buf, length);
  
    /* Set up the input on the input stack.  */
-  ip = cpp_push_buffer (pfile, buf1, length);
-  if (ip == NULL)
+  stop = CPP_BUFFER (pfile);
+  if (cpp_push_buffer (pfile, buf1, length) == NULL)
      return;
      return;
-  ip->has_escapes = 1;
+  CPP_BUFFER (pfile)->has_escapes = 1;
  
    /* Scan the input, create the output.  */
    for (;;)
      {
        token = cpp_get_token (pfile);
  
    /* Scan the input, create the output.  */
    for (;;)
      {
        token = cpp_get_token (pfile);
-      if (token == CPP_EOF)
+      if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
         break;
         break;
-      if (token == CPP_POP && CPP_BUFFER (pfile) == ip)
-       {
-         cpp_pop_buffer (pfile);
-         break;
-       }
      }
  }
  
      }
  }
  
-/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.
-   Then pop the buffer.  */
+/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
  
  void
  cpp_scan_buffer_nooutput (pfile)
       cpp_reader *pfile;
  {
  
  void
  cpp_scan_buffer_nooutput (pfile)
       cpp_reader *pfile;
  {
-  cpp_buffer *buffer = CPP_BUFFER (pfile);
+  cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
    enum cpp_ttype token;
    unsigned int old_written = CPP_WRITTEN (pfile);
    /* In no-output mode, we can ignore everything but directives.  */
    enum cpp_ttype token;
    unsigned int old_written = CPP_WRITTEN (pfile);
    /* In no-output mode, we can ignore everything but directives.  */
@@ -296,45 +483,33 @@ cpp_scan_buffer_nooutput (pfile)
        if (! pfile->only_seen_white)
         _cpp_skip_rest_of_line (pfile);
        token = cpp_get_token (pfile);
        if (! pfile->only_seen_white)
         _cpp_skip_rest_of_line (pfile);
        token = cpp_get_token (pfile);
-      if (token == CPP_EOF)
+      if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
         break;
         break;
-      if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
-       {
-         cpp_pop_buffer (pfile);
-         break;
-       }
      }
    CPP_SET_WRITTEN (pfile, old_written);
  }
  
      }
    CPP_SET_WRITTEN (pfile, old_written);
  }
  
-/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.
-   Then pop the buffer.  */
+/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
  
  void
  cpp_scan_buffer (pfile, print)
       cpp_reader *pfile;
       cpp_printer *print;
  {
  
  void
  cpp_scan_buffer (pfile, print)
       cpp_reader *pfile;
       cpp_printer *print;
  {
-  cpp_buffer *buffer = CPP_BUFFER (pfile);
+  cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
    enum cpp_ttype token;
  
    for (;;)
      {
        token = cpp_get_token (pfile);
    enum cpp_ttype token;
  
    for (;;)
      {
        token = cpp_get_token (pfile);
-      if ((token == CPP_POP && !CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
-         || token == CPP_EOF || token == CPP_VSPACE
+      if (token == CPP_VSPACE || token == CPP_EOF
           /* XXX Temporary kluge - force flush after #include only */
           || (token == CPP_DIRECTIVE
               && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
         {
           cpp_output_tokens (pfile, print);
           /* XXX Temporary kluge - force flush after #include only */
           || (token == CPP_DIRECTIVE
               && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
         {
           cpp_output_tokens (pfile, print);
-         if (token == CPP_EOF)
+         if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
             return;
             return;
-         if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
-           {
-             cpp_pop_buffer (pfile);
-             return;
-           }
         }
      }
  }
         }
      }
  }
@@ -348,11 +523,314 @@ cpp_file_buffer (pfile)
    cpp_buffer *ip;
  
    for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
    cpp_buffer *ip;
  
    for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
-    if (ip->ihash != NULL)
+    if (ip->inc != NULL)
        return ip;
    return NULL;
  }
  
        return ip;
    return NULL;
  }
  
+/* Token-buffer helper functions.  */
+
+/* Expand a token list's string space. It is *vital* that
+   list->tokens_used is correct, to get pointer fix-up right.  */
+static void
+expand_name_space (list, len)
+     cpp_toklist *list;
+     unsigned int len;
+{
+  const U_CHAR *old_namebuf;
+
+  old_namebuf = list->namebuf;
+  list->name_cap += len;
+  list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
+
+  /* Fix up token text pointers.  */
+  if (list->namebuf != old_namebuf)
+    {
+      unsigned int i;
+
+      for (i = 0; i < list->tokens_used; i++)
+       if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
+         list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
+    }
+}
+
+/* Expand the number of tokens in a list.  */
+void
+_cpp_expand_token_space (list, count)
+     cpp_toklist *list;
+     unsigned int count;
+{
+  unsigned int n;
+
+  list->tokens_cap += count;
+  n = list->tokens_cap;
+  if (list->flags & LIST_OFFSET)
+    list->tokens--, n++;
+  list->tokens = (cpp_token *)
+    xrealloc (list->tokens, n * sizeof (cpp_token));
+  if (list->flags & LIST_OFFSET)
+    list->tokens++;            /* Skip the dummy.  */
+}
+
+/* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
+   an extra token in front of the token list, as this allows the lexer
+   to always peek at the previous token without worrying about
+   underflowing the list, and some initial space.  Otherwise, no
+   token- or name-space is allocated, and there is no dummy token.  */
+void
+_cpp_init_toklist (list, flags)
+     cpp_toklist *list;
+     int flags;
+{
+  /* We malloc zero bytes because we may want to realloc later, and
+     some old implementations don't like realloc-ing a null pointer.  */
+  if (flags == NO_DUMMY_TOKEN)
+    {
+      list->tokens_cap = 0;
+      list->tokens = (cpp_token *) malloc (0);
+      list->name_cap = 0;
+      list->flags = 0;
+    }
+  else
+    {
+      /* Initialize token space.  Put a dummy token before the start
+        that will fail matches.  */
+      list->tokens_cap = 256;  /* 4K's worth.  */
+      list->tokens = (cpp_token *)
+       xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
+      list->tokens[0].type = CPP_EOF;
+      list->tokens++;
+
+      /* Initialize name space.  */
+      list->name_cap = 1024;
+      list->flags = LIST_OFFSET;
+    }
+
+  /* Allocate name space.  */
+  list->namebuf = (unsigned char *) xmalloc (list->name_cap);
+
+  _cpp_clear_toklist (list);
+}
+
+/* Clear a token list.  */
+void
+_cpp_clear_toklist (list)
+     cpp_toklist *list;
+{
+  list->tokens_used = 0;
+  list->name_used = 0;
+  list->dirno = -1;
+  list->flags &= LIST_OFFSET;  /* clear all but that one */
+}
+
+/* Free a token list.  Does not free the list itself, which may be
+   embedded in a larger structure.  */
+void
+_cpp_free_toklist (list)
+     cpp_toklist *list;
+{
+  if (list->flags & LIST_OFFSET)
+    free (list->tokens - 1);   /* Backup over dummy token.  */
+  else
+    free (list->tokens);
+  free (list->namebuf);
+}
+
+/* Slice a token list: copy the sublist [START, FINISH) into COPY.
+   COPY is assumed not to be initialized.  The comment space is not
+   copied.  */
+void
+_cpp_slice_toklist (copy, start, finish)
+     cpp_toklist *copy;
+     const cpp_token *start, *finish;
+{
+  unsigned int i, n;
+  size_t bytes;
+
+  n = finish - start;
+  copy->tokens_cap = n;
+  copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
+  memcpy (copy->tokens, start, n * sizeof (cpp_token));
+
+  bytes = 0;
+  for (i = 0; i < n; i++)
+    if (token_spellings[start[i].type].type > SPELL_NONE)
+      bytes += start[i].val.name.len;
+
+  copy->namebuf = xmalloc (bytes);
+  bytes = 0;
+  for (i = 0; i < n; i++)
+    if (token_spellings[start[i].type].type > SPELL_NONE)
+      {
+       memcpy (copy->namebuf + bytes,
+               start[i].val.name.text, start[i].val.name.len);
+       copy->tokens[i].val.name.text = copy->namebuf + bytes;
+       bytes += start[i].val.name.len;
+      }
+
+  copy->tokens_cap = n;
+  copy->tokens_used = n;
+  copy->name_used = bytes;
+  copy->name_cap = bytes;
+  
+  copy->flags = 0;
+  copy->dirno = -1;
+}
+
+/* Shrink a token list down to the minimum size.  */
+void
+_cpp_squeeze_toklist (list)
+     cpp_toklist *list;
+{
+  long delta;
+  const U_CHAR *old_namebuf;
+
+  if (list->flags & LIST_OFFSET)
+    {
+      list->tokens--;
+      memmove (list->tokens, list->tokens + 1,
+              list->tokens_used * sizeof (cpp_token));
+      list->tokens = xrealloc (list->tokens,
+                              list->tokens_used * sizeof (cpp_token));
+      list->flags &= ~LIST_OFFSET;
+    }
+  else
+    list->tokens = xrealloc (list->tokens,
+                            list->tokens_used * sizeof (cpp_token));
+  list->tokens_cap = list->tokens_used;
+
+  old_namebuf = list->namebuf;
+  list->namebuf = xrealloc (list->namebuf, list->name_used);
+  list->name_cap = list->name_used;
+
+  /* Fix up token text pointers.  */
+  delta = list->namebuf - old_namebuf;
+  if (delta)
+    {
+      unsigned int i;
+
+      for (i = 0; i < list->tokens_used; i++)
+       if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
+         list->tokens[i].val.name.text += delta;
+    }
+}
+
+/* Compare two tokens.  */
+int
+_cpp_equiv_tokens (a, b)
+     const cpp_token *a, *b;
+{
+  if (a->type != b->type
+      || a->flags != b->flags
+      || a->aux != b->aux)
+    return 0;
+
+  if (token_spellings[a->type].type > SPELL_NONE)
+    {
+      if (a->val.name.len != b->val.name.len
+         || ustrncmp(a->val.name.text,
+                     b->val.name.text,
+                     a->val.name.len))
+       return 0;
+    }
+  return 1;
+}
+
+/* Compare two token lists.  */
+int
+_cpp_equiv_toklists (a, b)
+     const cpp_toklist *a, *b;
+{
+  unsigned int i;
+
+  if (a->tokens_used != b->tokens_used)
+    return 0;
+
+  for (i = 0; i < a->tokens_used; i++)
+    if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
+      return 0;
+  return 1;
+}
+
+/* Scan until we encounter a token of type STOP or a newline, and
+   create a token list for it.  Does not macro-expand or execute
+   directives.  The final token is not included in the list or
+   consumed from the input.  Returns the type of the token stopped at. */
+
+enum cpp_ttype
+_cpp_scan_until (pfile, list, stop)
+     cpp_reader *pfile;
+     cpp_toklist *list;
+     enum cpp_ttype stop;
+{
+  int i, col;
+  long written, len;
+  enum cpp_ttype type;
+  int space_before;
+
+  _cpp_clear_toklist (list);
+  list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
+
+  written = CPP_WRITTEN (pfile);
+  i = 0;
+  space_before = 0;
+  for (;;)
+    {
+      col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
+      type = _cpp_lex_token (pfile);
+      len = CPP_WRITTEN (pfile) - written;
+      CPP_SET_WRITTEN (pfile, written);
+      if (type == CPP_HSPACE)
+       {
+         if (CPP_PEDANTIC (pfile))
+           pedantic_whitespace (pfile, pfile->token_buffer + written, len);
+         space_before = 1;
+         continue;
+       }
+      else if (type == CPP_COMMENT)
+       /* Only happens when processing -traditional macro definitions.
+          Do not give this a token entry, but do not change space_before
+          either.  */
+       continue;
+
+      if (list->tokens_used >= list->tokens_cap)
+       _cpp_expand_token_space (list, 256);
+      if (list->name_used + len >= list->name_cap)
+       expand_name_space (list, list->name_used + len + 1 - list->name_cap);
+
+      if (type == CPP_MACRO)
+       type = CPP_NAME;
+
+      if (type == CPP_VSPACE || type == stop)
+       break;
+
+      list->tokens_used++;
+      TOK_TYPE  (list, i) = type;
+      TOK_COL   (list, i) = col;
+      TOK_AUX   (list, i) = 0;
+      TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
+      
+      TOK_LEN (list, i) = len;
+      if (token_spellings[type].type > SPELL_NONE)
+       {
+         memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
+         TOK_NAME (list, i) = list->namebuf + list->name_used;
+         list->name_used += len;
+       }
+      else
+       TOK_NAME (list, i) = token_spellings[type].spelling;
+      i++;
+      space_before = 0;
+    }
+
+  /* XXX Temporary kluge: put back the newline (or whatever).  */
+  FORWARD(-1);
+
+  /* Don't consider the first token to have white before.  */
+  TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
+  return type;
+}
+
  /* Skip a C-style block comment.  We know it's a comment, and point is
     at the second character of the starter.  */
  static void
  /* Skip a C-style block comment.  We know it's a comment, and point is
     at the second character of the starter.  */
  static void
@@ -443,7 +921,7 @@ skip_comment (pfile, m)
      }
    else if (m == '/' && PEEKC() == '/')
      {
      }
    else if (m == '/' && PEEKC() == '/')
      {
-      if (CPP_BUFFER (pfile)->system_header_p)
+      if (CPP_IN_SYSTEM_HEADER (pfile))
         {
           /* We silently allow C++ comments in system headers, irrespective
              of conformance mode, because lots of busted systems do that
         {
           /* We silently allow C++ comments in system headers, irrespective
              of conformance mode, because lots of busted systems do that
@@ -453,13 +931,17 @@ skip_comment (pfile, m)
         }
        else if (CPP_OPTION (pfile, cplusplus_comments))
         {
         }
        else if (CPP_OPTION (pfile, cplusplus_comments))
         {
-         if (CPP_OPTION (pfile, c89)
-             && CPP_PEDANTIC (pfile)
-             && ! CPP_BUFFER (pfile)->warned_cplusplus_comments)
+         if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
             {
             {
-             cpp_pedwarn (pfile,
-                          "C++ style comments are not allowed in ISO C89");
-             cpp_pedwarn (pfile,
+             if (CPP_WTRADITIONAL (pfile))
+               cpp_pedwarn (pfile,
+                       "C++ style comments are not allowed in traditional C");
+             else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
+               cpp_pedwarn (pfile,
+                       "C++ style comments are not allowed in ISO C89");
+             if (CPP_WTRADITIONAL (pfile)
+                 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
+               cpp_pedwarn (pfile,
                            "(this will be reported only once per input file)");
               CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
             }
                            "(this will be reported only once per input file)");
               CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
             }
@@ -621,7 +1103,10 @@ _cpp_parse_name (pfile, c)
           break;
        }
  
           break;
        }
  
-      if (c == '$' && CPP_PEDANTIC (pfile))
+      /* $ is not a legal identifier character in the standard, but is
+        commonly accepted as an extension.  Don't warn about it in
+        skipped conditional blocks. */
+      if (c == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
         cpp_pedwarn (pfile, "`$' in identifier");
  
        CPP_RESERVE(pfile, 2); /* One more for final NUL.  */
         cpp_pedwarn (pfile, "`$' in identifier");
  
        CPP_RESERVE(pfile, 2); /* One more for final NUL.  */
@@ -746,85 +1231,6 @@ parse_string (pfile, c)
        CPP_PUTC_Q (pfile, *start);
  }
  
        CPP_PUTC_Q (pfile, *start);
  }
  
-/* Read an assertion into the token buffer, converting to
-   canonical form: `#predicate(a n swe r)'  The next non-whitespace
-   character to read should be the first letter of the predicate.
-   Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
-   with answer (see callers for why). In case of 0, an error has been
-   printed. */
-int
-_cpp_parse_assertion (pfile)
-     cpp_reader *pfile;
-{
-  int c, dropwhite;
-  _cpp_skip_hspace (pfile);
-  c = PEEKC();
-  if (c == '\n')
-    {
-      cpp_error (pfile, "assertion without predicate");
-      return 0;
-    }
-  else if (! is_idstart(c))
-    {
-      cpp_error (pfile, "assertion predicate is not an identifier");
-      return 0;
-    }
-  CPP_PUTC(pfile, '#');
-  FORWARD(1);
-  _cpp_parse_name (pfile, c);
-
-  c = PEEKC();
-  if (c != '(')
-    {
-      if (is_hspace(c) || c == '\r')
-       _cpp_skip_hspace (pfile);
-      c = PEEKC();
-    }
-  if (c != '(')
-    return 1;
-
-  CPP_PUTC(pfile, '(');
-  FORWARD(1);
-  dropwhite = 1;
-  while ((c = GETC()) != ')')
-    {
-      if (is_space(c))
-       {
-         if (! dropwhite)
-           {
-             CPP_PUTC(pfile, ' ');
-             dropwhite = 1;
-           }
-       }
-      else if (c == '\n' || c == EOF)
-       {
-         if (c == '\n') FORWARD(-1);
-         cpp_error (pfile, "un-terminated assertion answer");
-         return 0;
-       }
-      else if (c == '\r')
-       /* \r cannot be a macro escape here. */
-       CPP_BUMP_LINE (pfile);
-      else
-       {
-         CPP_PUTC (pfile, c);
-         dropwhite = 0;
-       }
-    }
-
-  if (pfile->limit[-1] == ' ')
-    pfile->limit[-1] = ')';
-  else if (pfile->limit[-1] == '(')
-    {
-      cpp_error (pfile, "empty token sequence in assertion");
-      return 0;
-    }
-  else
-    CPP_PUTC (pfile, ')');
-
-  return 2;
-}
-
  /* Get the next token, and add it to the text in pfile->token_buffer.
     Return the kind of token we got.  */
  
  /* Get the next token, and add it to the text in pfile->token_buffer.
     Return the kind of token we got.  */
  
@@ -861,14 +1267,8 @@ _cpp_lex_token (pfile)
          For -traditional, a comment is equivalent to nothing.  */
        if (!CPP_OPTION (pfile, discard_comments))
         return CPP_COMMENT;
          For -traditional, a comment is equivalent to nothing.  */
        if (!CPP_OPTION (pfile, discard_comments))
         return CPP_COMMENT;
-      else if (CPP_TRADITIONAL (pfile)
-              && ! is_space (PEEKC ()))
-       {
-         if (pfile->parsing_define_directive)
-           return CPP_COMMENT;
-         else
-           goto get_next;
-       }
+      else if (CPP_TRADITIONAL (pfile))
+       goto get_next;
        else
         {
           CPP_PUTC (pfile, c);
        else
         {
           CPP_PUTC (pfile, c);
@@ -879,42 +1279,24 @@ _cpp_lex_token (pfile)
        CPP_PUTC (pfile, c);
  
      hash:
        CPP_PUTC (pfile, c);
  
      hash:
-      if (pfile->parsing_if_directive)
+      c2 = PEEKC ();
+      if (c2 == '#')
         {
         {
-         CPP_ADJUST_WRITTEN (pfile, -1);
-         if (_cpp_parse_assertion (pfile))
-           return CPP_ASSERTION;
-         return CPP_OTHER;
+         FORWARD (1);
+         CPP_PUTC (pfile, c2);
+         return CPP_PASTE;
         }
         }
-
-      if (pfile->parsing_define_directive && ! CPP_TRADITIONAL (pfile))
+      else if (c2 == '%' && PEEKN (1) == ':')
         {
         {
-         c2 = PEEKC ();
-         if (c2 == '#')
-           {
-             FORWARD (1);
-             CPP_PUTC (pfile, c2);
-           }
-         else if (c2 == '%' && PEEKN (1) == ':')
-           {
-             /* Digraph: "%:" == "#".  */
-             FORWARD (1);
-             CPP_RESERVE (pfile, 2);
-             CPP_PUTC_Q (pfile, c2);
-             CPP_PUTC_Q (pfile, GETC ());
-           }
-         else
-           return CPP_STRINGIZE;
-
-         return CPP_TOKPASTE;
+         /* Digraph: "%:" == "#".  */
+         FORWARD (1);
+         CPP_RESERVE (pfile, 2);
+         CPP_PUTC_Q (pfile, c2);
+         CPP_PUTC_Q (pfile, GETC ());
+         return CPP_PASTE;
         }
         }
-
-      if (!pfile->only_seen_white)
-       return CPP_OTHER;
-
-      /* Remove the "#" or "%:" from the token buffer.  */
-      CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
-      return CPP_DIRECTIVE;
+      else
+       return CPP_HASH;
  
      case '\"':
      case '\'':
  
      case '\"':
      case '\'':
@@ -959,7 +1341,7 @@ _cpp_lex_token (pfile)
           CPP_RESERVE (pfile, 2);
           CPP_PUTC_Q (pfile, c);
           CPP_PUTC_Q (pfile, c2);
           CPP_RESERVE (pfile, 2);
           CPP_PUTC_Q (pfile, c);
           CPP_PUTC_Q (pfile, c2);
-         return CPP_RBRACE;
+         return CPP_OPEN_BRACE;
         }
        /* else fall through */
  
         }
        /* else fall through */
  
@@ -1042,7 +1424,7 @@ _cpp_lex_token (pfile)
           CPP_RESERVE (pfile, 2);
           CPP_PUTC_Q (pfile, c);
           CPP_PUTC_Q (pfile, c2);
           CPP_RESERVE (pfile, 2);
           CPP_PUTC_Q (pfile, c);
           CPP_PUTC_Q (pfile, c2);
-         return CPP_LBRACE;
+         return CPP_CLOSE_BRACE;
         }
        else if (c2 == ':')
         goto op2;
         }
        else if (c2 == ':')
         goto op2;
@@ -1082,7 +1464,7 @@ _cpp_lex_token (pfile)
           CPP_PUTC_Q (pfile, '.');
           CPP_PUTC_Q (pfile, '.');
           FORWARD (2);
           CPP_PUTC_Q (pfile, '.');
           CPP_PUTC_Q (pfile, '.');
           FORWARD (2);
-         return CPP_3DOTS;
+         return CPP_ELLIPSIS;
         }
        goto randomchar;
  
         }
        goto randomchar;
  
@@ -1228,12 +1610,12 @@ _cpp_lex_token (pfile)
        CPP_PUTC (pfile, c);
        return CPP_VSPACE;
  
        CPP_PUTC (pfile, c);
        return CPP_VSPACE;
  
-    case '(': token = CPP_LPAREN;    goto char1;
-    case ')': token = CPP_RPAREN;    goto char1;
-    case '{': token = CPP_LBRACE;    goto char1;
-    case '}': token = CPP_RBRACE;    goto char1;
-    case ',': token = CPP_COMMA;     goto char1;
-    case ';': token = CPP_SEMICOLON; goto char1;
+    case '(': token = CPP_OPEN_PAREN;  goto char1;
+    case ')': token = CPP_CLOSE_PAREN; goto char1;
+    case '{': token = CPP_OPEN_BRACE;  goto char1;
+    case '}': token = CPP_CLOSE_BRACE; goto char1;
+    case ',': token = CPP_COMMA;       goto char1;
+    case ';': token = CPP_SEMICOLON;   goto char1;
  
      randomchar:
      default:
  
      randomchar:
      default:
@@ -1253,11 +1635,12 @@ maybe_macroexpand (pfile, written)
  {
    U_CHAR *macro = pfile->token_buffer + written;
    size_t len = CPP_WRITTEN (pfile) - written;
  {
    U_CHAR *macro = pfile->token_buffer + written;
    size_t len = CPP_WRITTEN (pfile) - written;
-  HASHNODE *hp = _cpp_lookup (pfile, macro, len);
+  cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
  
  
-  if (!hp)
+  /* cpp_lookup never returns null.  */
+  if (hp->type == T_VOID)
      return 0;
      return 0;
-  if (hp->type == T_DISABLED)
+  if (hp->disabled || hp->type == T_IDENTITY)
      {
        if (pfile->output_escapes)
         {
      {
        if (pfile->output_escapes)
         {
@@ -1281,7 +1664,7 @@ maybe_macroexpand (pfile, written)
      }
  
    /* If macro wants an arglist, verify that a '(' follows.  */
      }
  
    /* If macro wants an arglist, verify that a '(' follows.  */
-  if (hp->type == T_MACRO && hp->value.defn->nargs >= 0)
+  if (hp->type == T_FMACRO)
      {
        int macbuf_whitespace = 0;
        int c;
      {
        int macbuf_whitespace = 0;
        int c;
@@ -1324,6 +1707,12 @@ maybe_macroexpand (pfile, written)
         not_macro_call:
           if (macbuf_whitespace)
             CPP_PUTC (pfile, ' ');
         not_macro_call:
           if (macbuf_whitespace)
             CPP_PUTC (pfile, ' ');
+
+         /* K+R treated this as a hard error.  */
+         if (CPP_WTRADITIONAL (pfile))
+           cpp_warning (pfile,
+        "function macro %s must be used with arguments in traditional C",
+                        hp->name);
           return 0;
         }
      }
           return 0;
         }
      }
@@ -1337,12 +1726,33 @@ maybe_macroexpand (pfile, written)
    return 1;
  }
  
    return 1;
  }
  
+/* Complain about \v or \f in a preprocessing directive (constraint
+   violation, C99 6.10 para 5).  Caller has checked CPP_PEDANTIC.  */
+static void
+pedantic_whitespace (pfile, p, len)
+     cpp_reader *pfile;
+     U_CHAR *p;
+     unsigned int len;
+{
+  while (len)
+    {
+      if (*p == '\v')
+       cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
+      else if (*p == '\f')
+       cpp_pedwarn (pfile, "form feed in preprocessing directive");
+      p++;
+      len--;
+    }
+}
+
+
  enum cpp_ttype
  cpp_get_token (pfile)
       cpp_reader *pfile;
  {
    enum cpp_ttype token;
    long written = CPP_WRITTEN (pfile);
  enum cpp_ttype
  cpp_get_token (pfile)
       cpp_reader *pfile;
  {
    enum cpp_ttype token;
    long written = CPP_WRITTEN (pfile);
+  int macro_buffer;
  
   get_next:
    token = _cpp_lex_token (pfile);
  
   get_next:
    token = _cpp_lex_token (pfile);
@@ -1350,55 +1760,71 @@ cpp_get_token (pfile)
    switch (token)
      {
      default:
    switch (token)
      {
      default:
+      if (pfile->skipping)
+       break;
        pfile->potential_control_macro = 0;
        pfile->only_seen_white = 0;
        pfile->potential_control_macro = 0;
        pfile->only_seen_white = 0;
-      return token;
+      break;
+
+    case CPP_HSPACE:
+    case CPP_COMMENT:
+      break;
  
      case CPP_VSPACE:
        if (pfile->only_seen_white == 0)
         pfile->only_seen_white = 1;
        CPP_BUMP_LINE (pfile);
  
      case CPP_VSPACE:
        if (pfile->only_seen_white == 0)
         pfile->only_seen_white = 1;
        CPP_BUMP_LINE (pfile);
-      return token;
-
-    case CPP_HSPACE:
-    case CPP_COMMENT:
-      return token;
+      break;
  
  
-    case CPP_DIRECTIVE:
+    case CPP_HASH:
        pfile->potential_control_macro = 0;
        pfile->potential_control_macro = 0;
+      if (!pfile->only_seen_white)
+       break;
+      /* XXX shouldn't have to do this - remove the hash or %: from
+        the token buffer.  */
+      if (CPP_PWRITTEN (pfile)[-1] == '#')
+       CPP_ADJUST_WRITTEN (pfile, -1);
+      else
+       CPP_ADJUST_WRITTEN (pfile, -2);
+
        if (_cpp_handle_directive (pfile))
        if (_cpp_handle_directive (pfile))
-       return CPP_DIRECTIVE;
+       {
+         token = CPP_DIRECTIVE;
+         break;
+       }
        pfile->only_seen_white = 0;
        CPP_PUTC (pfile, '#');
        pfile->only_seen_white = 0;
        CPP_PUTC (pfile, '#');
-      return CPP_OTHER;
+      break;
  
      case CPP_MACRO:
  
      case CPP_MACRO:
+      if (pfile->skipping)
+       break;
        pfile->potential_control_macro = 0;
        pfile->only_seen_white = 0;
        if (! pfile->no_macro_expand
           && maybe_macroexpand (pfile, written))
         goto get_next;
        pfile->potential_control_macro = 0;
        pfile->only_seen_white = 0;
        if (! pfile->no_macro_expand
           && maybe_macroexpand (pfile, written))
         goto get_next;
-      return CPP_NAME;
+      token = CPP_NAME;
+      break;
  
  
+      /* Do not run this case through the 'skipping' logic.  */
      case CPP_EOF:
        if (CPP_BUFFER (pfile) == NULL)
         return CPP_EOF;
      case CPP_EOF:
        if (CPP_BUFFER (pfile) == NULL)
         return CPP_EOF;
-      if (CPP_BUFFER (pfile)->manual_pop)
-       /* If we've been reading from redirected input, the
-          frontend will pop the buffer.  */
-       return CPP_EOF;
+      macro_buffer = CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile));
  
  
-      if (CPP_BUFFER (pfile)->seen_eof)
-       {
-         cpp_pop_buffer (pfile);
-         goto get_next;
-       }
-      else
-       {
-         _cpp_handle_eof (pfile);
-         return CPP_POP;
-       }
+      cpp_pop_buffer (pfile);
+      if (macro_buffer)
+       goto get_next;
+      return CPP_EOF;
      }
      }
+  
+  if (pfile->skipping)
+    {
+      CPP_SET_WRITTEN (pfile, written);
+      goto get_next;
+    }
+  return token;
  }
  
  /* Like cpp_get_token, but skip spaces and comments.  */
  }
  
  /* Like cpp_get_token, but skip spaces and comments.  */
@@ -1418,20 +1844,17 @@ cpp_get_non_space_token (pfile)
  }
  
  /* Like cpp_get_token, except that it does not execute directives,
  }
  
  /* Like cpp_get_token, except that it does not execute directives,
-   does not consume vertical space, and automatically pops off macro
-   buffers.
-
-   XXX This function will exist only till collect_expansion doesn't
-   need to see whitespace anymore, then it'll be merged with
-   _cpp_get_directive_token (below).  */
+   does not consume vertical space, and discards horizontal space.  */
  enum cpp_ttype
  enum cpp_ttype
-_cpp_get_define_token (pfile)
+_cpp_get_directive_token (pfile)
       cpp_reader *pfile;
  {
    long old_written;
    enum cpp_ttype token;
       cpp_reader *pfile;
  {
    long old_written;
    enum cpp_ttype token;
+  int at_bol;
  
   get_next:
  
   get_next:
+  at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
    old_written = CPP_WRITTEN (pfile);
    token = _cpp_lex_token (pfile);
    switch (token)
    old_written = CPP_WRITTEN (pfile);
    token = _cpp_lex_token (pfile);
    switch (token)
@@ -1446,26 +1869,15 @@ _cpp_get_define_token (pfile)
        return CPP_VSPACE;
  
      case CPP_HSPACE:
        return CPP_VSPACE;
  
      case CPP_HSPACE:
-      if (CPP_PEDANTIC (pfile))
-       {
-         U_CHAR *p, *limit;
-         p = pfile->token_buffer + old_written;
-         limit = CPP_PWRITTEN (pfile);
-         while (p < limit)
-           {
-             if (*p == '\v' || *p == '\f')
-               cpp_pedwarn (pfile, "%s in preprocessing directive",
-                            *p == '\f' ? "formfeed" : "vertical tab");
-             p++;
-           }
-       }
+      /* The purpose of this rather strange check is to prevent pedantic
+        warnings for ^L in an #ifdefed out block.  */
+      if (CPP_PEDANTIC (pfile) && ! at_bol)
+       pedantic_whitespace (pfile, pfile->token_buffer + old_written,
+                            CPP_WRITTEN (pfile) - old_written);
+      CPP_SET_WRITTEN (pfile, old_written);
+      goto get_next;
        return CPP_HSPACE;
  
        return CPP_HSPACE;
  
-    case CPP_DIRECTIVE:
-      /* Don't execute the directive, but don't smash it to OTHER either.  */
-      CPP_PUTC (pfile, '#');
-      return CPP_DIRECTIVE;
-
      case CPP_MACRO:
        if (! pfile->no_macro_expand
           && maybe_macroexpand (pfile, old_written))
      case CPP_MACRO:
        if (! pfile->no_macro_expand
           && maybe_macroexpand (pfile, old_written))
@@ -1487,23 +1899,6 @@ _cpp_get_define_token (pfile)
      }
  }
  
      }
  }
  
-/* Just like _cpp_get_define_token except that it discards horizontal
-   whitespace.  */
-
-enum cpp_ttype
-_cpp_get_directive_token (pfile)
-     cpp_reader *pfile;
-{
-  int old_written = CPP_WRITTEN (pfile);
-  for (;;)
-    {
-      enum cpp_ttype token = _cpp_get_define_token (pfile);
-      if (token != CPP_COMMENT && token != CPP_HSPACE)
-       return token;
-      CPP_SET_WRITTEN (pfile, old_written);
-    }
-}
-
  /* Determine the current line and column.  Used only by read_and_prescan. */
  static U_CHAR *
  find_position (start, limit, linep)
  /* Determine the current line and column.  Used only by read_and_prescan. */
  static U_CHAR *
  find_position (start, limit, linep)
@@ -1526,21 +1921,17 @@ find_position (start, limit, linep)
    return lbase;
  }
  
    return lbase;
  }
  
-/* The following table is used by _cpp_read_and_prescan.  If we have
+/* The following table is used by _cpp_prescan.  If we have
     designated initializers, it can be constant data; otherwise, it is
     set up at runtime by _cpp_init_input_buffer.  */
  
     designated initializers, it can be constant data; otherwise, it is
     set up at runtime by _cpp_init_input_buffer.  */
  
-#ifndef UCHAR_MAX
-#define UCHAR_MAX 255  /* assume 8-bit bytes */
-#endif
-
-#if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L)
+#if (GCC_VERSION >= 2007)
  #define init_chartab()  /* nothing */
  #define init_chartab()  /* nothing */
-#define CHARTAB static const unsigned char chartab[UCHAR_MAX + 1] = {
+#define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
  #define END };
  #define s(p, v) [p] = v,
  #else
  #define END };
  #define s(p, v) [p] = v,
  #else
-#define CHARTAB static unsigned char chartab[UCHAR_MAX + 1] = { 0 }; \
+#define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
   static void init_chartab PARAMS ((void)) { \
   unsigned char *x = chartab;
  #define END }
   static void init_chartab PARAMS ((void)) { \
   unsigned char *x = chartab;
  #define END }
@@ -1571,9 +1962,10 @@ END
  #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
  #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
  
  #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
  #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
  
-/* Read the entire contents of file DESC into buffer BUF.  LEN is how
-   much memory to allocate initially; more will be allocated if
-   necessary.  Convert end-of-line markers (\n, \r, \r\n, \n\r) to
+/* Prescan pass over a file already loaded into BUF.  This is
+   translation phases 1 and 2 (C99 5.1.1.2).
+
+   Convert end-of-line markers (\n, \r, \r\n, \n\r) to
     canonical form (\n).  If enabled, convert and/or warn about
     trigraphs.  Convert backslash-newline to a one-character escape
     (\r) and remove it from "embarrassing" places (i.e. the middle of a
     canonical form (\n).  If enabled, convert and/or warn about
     trigraphs.  Convert backslash-newline to a one-character escape
     (\r) and remove it from "embarrassing" places (i.e. the middle of a
@@ -1594,204 +1986,153 @@ END
     at the end of reload1.c is about 60%.  (reload1.c is 329k.)
  
     If your file has more than one kind of end-of-line marker, you
     at the end of reload1.c is about 60%.  (reload1.c is 329k.)
  
     If your file has more than one kind of end-of-line marker, you
-   will get messed-up line numbering.
-   
-   So that the cases of the switch statement do not have to concern
-   themselves with the complications of reading beyond the end of the
-   buffer, the buffer is guaranteed to have at least 3 characters in
-   it (or however many are left in the file, if less) on entry to the
-   switch.  This is enough to handle trigraphs and the "\\\n\r" and
-   "\\\r\n" cases.
-   
-   The end of the buffer is marked by a '\\', which, being a special
-   character, guarantees we will exit the fast-scan loops and perform
-   a refill. */
- 
-long
-_cpp_read_and_prescan (pfile, fp, desc, len)
+   will get messed-up line numbering.  */
+
+ssize_t
+_cpp_prescan (pfile, fp, len)
       cpp_reader *pfile;
       cpp_buffer *fp;
       cpp_reader *pfile;
       cpp_buffer *fp;
-     int desc;
-     size_t len;
+     ssize_t len;
  {
  {
-  U_CHAR *buf = (U_CHAR *) xmalloc (len);
-  U_CHAR *ip, *op, *line_base;
-  U_CHAR *ibase;
+  U_CHAR *buf, *op;
+  const U_CHAR *ibase, *ip, *ilimit;
+  U_CHAR *line_base;
    unsigned long line;
    unsigned int deferred_newlines;
    unsigned long line;
    unsigned int deferred_newlines;
-  size_t offset;
-  int count = 0;
  
  
-  offset = 0;
-  deferred_newlines = 0;
-  op = buf;
-  line_base = buf;
+  /* Allocate an extra byte in case we must add a trailing \n.  */
+  buf = (U_CHAR *) xmalloc (len + 1);
+  line_base = op = buf;
+  ip = ibase = fp->buf;
+  ilimit = ibase + len;
    line = 1;
    line = 1;
-  ibase = pfile->input_buffer + 3;
-  ip = ibase;
-  ip[-1] = '\0';  /* Guarantee no match with \n for SPECCASE_CR */
+  deferred_newlines = 0;
  
    for (;;)
      {
  
    for (;;)
      {
-      U_CHAR *near_buff_end;
+      const U_CHAR *iq;
  
  
-      count = read (desc, ibase, pfile->input_buffer_len);
-      if (count < 0)
-       goto error;
-      
-      ibase[count] = '\\';  /* Marks end of buffer */
-      if (count)
+      /* Deal with \-newline, potentially in the middle of a token. */
+      if (deferred_newlines)
         {
         {
-         near_buff_end = pfile->input_buffer + count;
-         offset += count;
-         if (offset > len)
+         if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
             {
             {
-             size_t delta_op;
-             size_t delta_line_base;
-             len = offset * 2;
-             if (offset > len)
-               /* len overflowed.
-                  This could happen if the file is larger than half the
-                  maximum address space of the machine. */
-               goto too_big;
-
-             delta_op = op - buf;
-             delta_line_base = line_base - buf;
-             buf = (U_CHAR *) xrealloc (buf, len);
-             op = buf + delta_op;
-             line_base = buf + delta_line_base;
+             /* Previous was not white space.  Skip to white
+                space, if we can, before outputting the \r's */
+             iq = ip;
+             while (iq < ilimit
+                    && *iq != ' '
+                    && *iq != '\t'
+                    && *iq != '\n'
+                    && NORMAL(*iq))
+               iq++;
+             memcpy (op, ip, iq - ip);
+             op += iq - ip;
+             ip += iq - ip;
+             if (! NORMAL(*ip))
+               goto do_speccase;
             }
             }
-       }
-      else
-       {
-         if (ip == ibase)
-           break;
-         /* Allow normal processing of the (at most 2) remaining
-            characters.  The end-of-buffer marker is still present
-            and prevents false matches within the switch. */
-         near_buff_end = ibase - 1;
+         while (deferred_newlines)
+           deferred_newlines--, *op++ = '\r';
         }
  
         }
  
-      for (;;)
-       {
-         unsigned int span;
+      /* Copy as much as we can without special treatment. */
+      iq = ip;
+      while (iq < ilimit && NORMAL (*iq)) iq++;
+      memcpy (op, ip, iq - ip);
+      op += iq - ip;
+      ip += iq - ip;
  
  
-         /* Deal with \-newline, potentially in the middle of a token. */
-         if (deferred_newlines)
+    do_speccase:
+      if (ip >= ilimit)
+       break;
+
+      switch (chartab[*ip++])
+       {
+       case SPECCASE_CR:  /* \r */
+         if (ip[-2] != '\n')
             {
             {
-             if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
-               {
-                 /* Previous was not white space.  Skip to white
-                    space, if we can, before outputting the \r's */
-                 span = 0;
-                 while (ip[span] != ' '
-                        && ip[span] != '\t'
-                        && ip[span] != '\n'
-                        && NORMAL(ip[span]))
-                   span++;
-                 memcpy (op, ip, span);
-                 op += span;
-                 ip += span;
-                 if (! NORMAL(ip[0]))
-                   goto do_speccase;
-               }
-             while (deferred_newlines)
-               deferred_newlines--, *op++ = '\r';
+             if (ip < ilimit && *ip == '\n')
+               ip++;
+             *op++ = '\n';
             }
             }
+         break;
  
  
-         /* Copy as much as we can without special treatment. */
-         span = 0;
-         while (NORMAL (ip[span])) span++;
-         memcpy (op, ip, span);
-         op += span;
-         ip += span;
-
-       do_speccase:
-         if (ip > near_buff_end) /* Do we have enough chars? */
-           break;
-         switch (chartab[*ip++])
+       case SPECCASE_BACKSLASH:  /* \ */
+       backslash:
+         if (ip < ilimit)
             {
             {
-           case SPECCASE_CR:  /* \r */
-             if (ip[-2] != '\n')
-               {
-                 if (*ip == '\n')
-                   ip++;
-                 *op++ = '\n';
-               }
-             break;
-
-           case SPECCASE_BACKSLASH:  /* \ */
               if (*ip == '\n')
                 {
                   deferred_newlines++;
                   ip++;
                   if (*ip == '\r') ip++;
               if (*ip == '\n')
                 {
                   deferred_newlines++;
                   ip++;
                   if (*ip == '\r') ip++;
+                 break;
                 }
               else if (*ip == '\r')
                 {
                   deferred_newlines++;
                   ip++;
                   if (*ip == '\n') ip++;
                 }
               else if (*ip == '\r')
                 {
                   deferred_newlines++;
                   ip++;
                   if (*ip == '\n') ip++;
+                 break;
                 }
                 }
-             else
-               *op++ = '\\';
-             break;
+           }
  
  
-           case SPECCASE_QUESTION: /* ? */
-             {
-               unsigned int d, t;
+         *op++ = '\\';
+         break;
  
  
-               *op++ = '?'; /* Normal non-trigraph case */
-               if (ip[0] != '?')
-                 break;
-                   
-               d = ip[1];
-               t = chartab[d];
-               if (NONTRI (t))
-                 break;
+       case SPECCASE_QUESTION: /* ? */
+         {
+           unsigned int d, t;
  
  
-               if (CPP_OPTION (pfile, warn_trigraphs))
-                 {
-                   unsigned long col;
-                   line_base = find_position (line_base, op, &line);
-                   col = op - line_base + 1;
-                   if (CPP_OPTION (pfile, trigraphs))
-                     cpp_warning_with_line (pfile, line, col,
-                                            "trigraph ??%c converted to %c", d, t);
-                   else
-                     cpp_warning_with_line (pfile, line, col,
-                                            "trigraph ??%c ignored", d);
-                 }
+           *op++ = '?'; /* Normal non-trigraph case */
+           if (ip > ilimit - 2 || ip[0] != '?')
+             break;
+                   
+           d = ip[1];
+           t = chartab[d];
+           if (NONTRI (t))
+             break;
  
  
-               ip += 2;
+           if (CPP_OPTION (pfile, warn_trigraphs))
+             {
+               unsigned long col;
+               line_base = find_position (line_base, op, &line);
+               col = op - line_base + 1;
                 if (CPP_OPTION (pfile, trigraphs))
                 if (CPP_OPTION (pfile, trigraphs))
-                 {
-                   op[-1] = t;     /* Overwrite '?' */
-                   if (t == '\\')
-                     {
-                       op--;
-                       *--ip = '\\';
-                       goto do_speccase; /* May need buffer refill */
-                     }
-                 }
+                 cpp_warning_with_line (pfile, line, col,
+                                        "trigraph ??%c converted to %c", d, t);
                 else
                 else
+                 cpp_warning_with_line (pfile, line, col,
+                                        "trigraph ??%c ignored", d);
+             }
+
+           ip += 2;
+           if (CPP_OPTION (pfile, trigraphs))
+             {
+               op[-1] = t;         /* Overwrite '?' */
+               if (t == '\\')
                   {
                   {
-                   *op++ = '?';
-                   *op++ = d;
+                   op--;
+                   goto backslash;
                   }
               }
                   }
               }
-             break;
-           }
+           else
+             {
+               *op++ = '?';
+               *op++ = d;
+             }
+         }
+         break;
         }
         }
-      /* Copy previous char plus unprocessed (at most 2) chars
-        to beginning of buffer, refill it with another
-        read(), and continue processing */
-      memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
-      ip -= count;
      }
  
      }
  
-  if (offset == 0)
-    return 0;
+#ifdef HAVE_MMAP_FILE
+  if (fp->mapped)
+    munmap ((caddr_t) fp->buf, len);
+  else
+#endif
+    free ((PTR) fp->buf);
  
    if (op[-1] != '\n')
      {
  
    if (op[-1] != '\n')
      {
@@ -1799,30 +2140,11 @@ _cpp_read_and_prescan (pfile, fp, desc, len)
        line_base = find_position (line_base, op, &line);
        col = op - line_base + 1;
        cpp_warning_with_line (pfile, line, col, "no newline at end of file");
        line_base = find_position (line_base, op, &line);
        col = op - line_base + 1;
        cpp_warning_with_line (pfile, line, col, "no newline at end of file");
-      if (offset + 1 > len)
-       {
-         len += 1;
-         if (offset + 1 > len)
-           goto too_big;
-         buf = (U_CHAR *) xrealloc (buf, len);
-         op = buf + offset;
-       }
        *op++ = '\n';
      }
  
        *op++ = '\n';
      }
  
-  fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
+  fp->buf = buf;
    return op - buf;
    return op - buf;
-
- too_big:
-  cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
-             (unsigned long)offset);
-  free (buf);
-  return -1;
-
- error:
-  cpp_error_from_errno (pfile, fp->ihash->name);
-  free (buf);
-  return -1;
  }
  
  /* Allocate pfile->input_buffer, and initialize chartab[]
  }
  
  /* Allocate pfile->input_buffer, and initialize chartab[]
@@ -1835,6 +2157,7 @@ _cpp_init_input_buffer (pfile)
    U_CHAR *tmp;
  
    init_chartab ();
    U_CHAR *tmp;
  
    init_chartab ();
+  _cpp_init_toklist (&pfile->directbuf, NO_DUMMY_TOKEN);
  
    /* Determine the appropriate size for the input buffer.  Normal C
       source files are smaller than eight K.  */
  
    /* Determine the appropriate size for the input buffer.  Normal C
       source files are smaller than eight K.  */
@@ -1847,3 +2170,1249 @@ _cpp_init_input_buffer (pfile)
    pfile->input_buffer = tmp;
    pfile->input_buffer_len = 8192;
  }
    pfile->input_buffer = tmp;
    pfile->input_buffer_len = 8192;
  }
+
+/* Utility routine:
+   Compares, in the manner of strcmp(3), the token beginning at TOKEN
+   and extending for LEN characters to the NUL-terminated string
+   STRING.  Typical usage:
+
+   if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
+                 "inline"))
+     { ... }
+ */
+
+int
+cpp_idcmp (token, len, string)
+     const U_CHAR *token;
+     size_t len;
+     const char *string;
+{
+  size_t len2 = strlen (string);
+  int r;
+
+  if ((r = memcmp (token, string, MIN (len, len2))))
+    return r;
+
+  /* The longer of the two strings sorts after the shorter.  */
+  if (len == len2)
+    return 0;
+  else if (len < len2)
+    return -1;
+  else
+    return 1;
+}
+
+#ifdef NEW_LEXER
+
+/* Lexing algorithm.
+
+ The original lexer in cpplib was made up of two passes: a first pass
+ that replaced trigraphs and deleted esacped newlines, and a second
+ pass that tokenized the result of the first pass.  Tokenisation was
+ performed by peeking at the next character in the input stream.  For
+ example, if the input stream contained "!=", the handler for the !
+ character would peek at the next character, and if it were a '='
+ would skip over it, and return a "!=" token, otherwise it would
+ return just the "!" token.
+
+ To implement a single-pass lexer, this peeking ahead is unworkable.
+ An arbitrary number of escaped newlines, and trigraphs (in particular
+ ??/ which translates to the escape \), could separate the '!' and '='
+ in the input stream, yet the next token is still a "!=".
+
+ Suppose instead that we lex by one logical line at a time, producing
+ a token list or stack for each logical line, and when seeing the '!'
+ push a CPP_NOT token on the list.  Then if the '!' is part of a
+ longer token ("!=") we know we must see the remainder of the token by
+ the time we reach the end of the logical line.  Thus we can have the
+ '=' handler look at the previous token (at the end of the list / top
+ of the stack) and see if it is a "!" token, and if so, instead of
+ pushing a "=" token revise the existing token to be a "!=" token.
+
+ This works in the presence of escaped newlines, because the '\' would
+ have been pushed on the top of the stack as a CPP_BACKSLASH.  The
+ newline ('\n' or '\r') handler looks at the token at the top of the
+ stack to see if it is a CPP_BACKSLASH, and if so discards both.
+ Otherwise it pushes the newline (CPP_VSPACE) token as normal.  Hence
+ the '=' handler would never see any intervening escaped newlines.
+
+ To make trigraphs work in this context, as in precedence trigraphs
+ are highest and converted before anything else, the '?' handler does
+ lookahead to see if it is a trigraph, and if so skips the trigraph
+ and pushes the token it represents onto the top of the stack.  This
+ also works in the particular case of a CPP_BACKSLASH trigraph.
+
+ To the preprocessor, whitespace is only significant to the point of
+ knowing whether whitespace precedes a particular token.  For example,
+ the '=' handler needs to know whether there was whitespace between it
+ and a "!" token on the top of the stack, to make the token conversion
+ decision correctly.  So each token has a PREV_WHITESPACE flag to
+ indicate this - the standard permits consecutive whitespace to be
+ regarded as a single space.  The compiler front ends are not
+ interested in whitespace at all; they just require a token stream.
+ Another place where whitespace is significant to the preprocessor is
+ a #define statment - if there is whitespace between the macro name
+ and an initial "(" token the macro is "object-like", otherwise it is
+ a function-like macro that takes arguments.
+
+ However, all is not rosy.  Parsing of identifiers, numbers, comments
+ and strings becomes trickier because of the possibility of raw
+ trigraphs and escaped newlines in the input stream.
+
+ The trigraphs are three consecutive characters beginning with two
+ question marks.  A question mark is not valid as part of a number or
+ identifier, so parsing of a number or identifier terminates normally
+ upon reaching it, returning to the mainloop which handles the
+ trigraph just like it would in any other position.  Similarly for the
+ backslash of a backslash-newline combination.  So we just need the
+ escaped-newline dropper in the mainloop to check if the token on the
+ top of the stack after dropping the escaped newline is a number or
+ identifier, and if so to continue the processing it as if nothing had
+ happened.
+
+ For strings, we replace trigraphs whenever we reach a quote or
+ newline, because there might be a backslash trigraph escaping them.
+ We need to be careful that we start trigraph replacing from where we
+ left off previously, because it is possible for a first scan to leave
+ "fake" trigraphs that a second scan would pick up as real (e.g. the
+ sequence "????/\n=" would find a fake ??= trigraph after removing the
+ escaped newline.)
+
+ For line comments, on reaching a newline we scan the previous
+ character(s) to see if it escaped, and continue if it is.  Block
+ comments ignore everything and just focus on finding the comment
+ termination mark.  The only difficult thing, and it is surprisingly
+ tricky, is checking if an asterisk precedes the final slash since
+ they could be separated by escaped newlines.  If the preprocessor is
+ invoked with the output comments option, we don't bother removing
+ escaped newlines and replacing trigraphs for output.
+
+ Finally, numbers can begin with a period, which is pushed initially
+ as a CPP_DOT token in its own right.  The digit handler checks if the
+ previous token was a CPP_DOT not separated by whitespace, and if so
+ pops it off the stack and pushes a period into the number's buffer
+ before calling the number parser.
+
+*/
+
+static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
+                                                   U":>", U"<%", U"%>"};
+static unsigned char trigraph_map[256];
+
+void
+init_trigraph_map ()
+{
+  trigraph_map['='] = '#';
+  trigraph_map['('] = '[';
+  trigraph_map[')'] = ']';
+  trigraph_map['/'] = '\\';
+  trigraph_map['\''] = '^';
+  trigraph_map['<'] = '{';
+  trigraph_map['>'] = '}';
+  trigraph_map['!'] = '|';
+  trigraph_map['-'] = '~';
+}
+
+/* Call when a trigraph is encountered.  It warns if necessary, and
+   returns true if the trigraph should be honoured.  END is the third
+   character of a trigraph in the input stream.  */
+static int
+trigraph_ok (pfile, end)
+     cpp_reader *pfile;
+     const unsigned char *end;
+{
+  int accept = CPP_OPTION (pfile, trigraphs);
+  
+  if (CPP_OPTION (pfile, warn_trigraphs))
+    {
+      unsigned int col = end - 1 - pfile->buffer->line_base;
+      if (accept)
+       cpp_warning_with_line (pfile, pfile->buffer->lineno, col, 
+                              "trigraph ??%c converted to %c",
+                              (int) *end, (int) trigraph_map[*end]);
+      else
+       cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
+                              "trigraph ??%c ignored", (int) *end);
+    }
+  return accept;
+}
+
+/* Scan a string for trigraphs, warning or replacing them inline as
+   appropriate.  When parsing a string, we must call this routine
+   before processing a newline character (if trigraphs are enabled),
+   since the newline might be escaped by a preceding backslash
+   trigraph sequence.  Returns a pointer to the end of the name after
+   replacement.  */
+
+static unsigned char*
+trigraph_replace (pfile, src, limit)
+     cpp_reader *pfile;
+     unsigned char *src;
+     unsigned char* limit;
+{
+  unsigned char *dest;
+
+  /* Starting with src[1], find two consecutive '?'.  The case of no
+     trigraphs is streamlined.  */
+  
+  for (; src + 1 < limit; src += 2)
+    {
+      if (src[0] != '?')
+       continue;
+
+      /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
+      if (src[-1] == '?')
+       src--;
+      else if (src + 2 == limit || src[1] != '?')
+       continue;
+
+      /* Check if it really is a trigraph.  */
+      if (trigraph_map[src[2]] == 0)
+       continue;
+
+      dest = src;
+      goto trigraph_found;
+    }
+  return limit;
+
+  /* Now we have a trigraph, we need to scan the remaining buffer, and
+     copy-shifting its contents left if replacement is enabled.  */
+  for (; src + 2 < limit; dest++, src++)
+    if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
+      {
+      trigraph_found:
+       src += 2;
+       if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
+         *dest = trigraph_map[*src];
+      }
+  
+  /* Copy remaining (at most 2) characters.  */
+  while (src < limit)
+    *dest++ = *src++;
+  return dest;
+}
+
+/* If CUR is a backslash or the end of a trigraphed backslash, return
+   a pointer to its beginning, otherwise NULL.  We don't read beyond
+   the buffer start, because there is the start of the comment in the
+   buffer.  */
+static const unsigned char *
+backslash_start (pfile, cur)
+     cpp_reader *pfile;
+     const unsigned char *cur;
+{
+  if (cur[0] == '\\')
+    return cur;
+  if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
+      && trigraph_ok (pfile, cur))
+    return cur - 2;
+  return 0;
+}
+
+/* Skip a C-style block comment.  This is probably the trickiest
+   handler.  We find the end of the comment by seeing if an asterisk
+   is before every '/' we encounter.  The nasty complication is that a
+   previous asterisk may be separated by one or more escaped newlines.
+   Returns non-zero if comment terminated by EOF, zero otherwise.  */
+static int
+skip_block_comment2 (pfile)
+     cpp_reader *pfile;
+{
+  cpp_buffer *buffer = pfile->buffer;
+  const unsigned char *char_after_star = 0;
+  register const unsigned char *cur = buffer->cur;
+  int seen_eof = 0;
+  
+  /* Inner loop would think the comment has ended if the first comment
+     character is a '/'.  Avoid this and keep the inner loop clean by
+     skipping such a character.  */
+  if (cur < buffer->rlimit && cur[0] == '/')
+    cur++;
+
+  for (; cur < buffer->rlimit; )
+    {
+      unsigned char c = *cur++;
+
+      /* People like decorating comments with '*', so check for
+        '/' instead for efficiency.  */
+      if (c == '/')
+       {
+         if (cur[-2] == '*' || cur - 1 == char_after_star)
+           goto out;
+
+         /* Warn about potential nested comments, but not when
+            the final character inside the comment is a '/'.
+            Don't bother to get it right across escaped newlines.  */
+         if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
+             && cur[0] == '*' && cur[1] != '/') 
+           {
+             buffer->cur = cur;
+             cpp_warning (pfile, "'/*' within comment");
+           }
+       }
+      else if (IS_NEWLINE(c))
+       {
+         const unsigned char* bslash = backslash_start (pfile, cur - 2);
+
+         handle_newline (cur, buffer->rlimit, c);
+         /* Work correctly if there is an asterisk before an
+            arbirtrarily long sequence of escaped newlines.  */
+         if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
+           char_after_star = cur;
+         else
+           char_after_star = 0;
+       }
+    }
+  seen_eof = 1;
+
+ out:
+  buffer->cur = cur;
+  return seen_eof;
+}
+
+/* Skip a C++ or Chill line comment.  Handles escaped newlines.
+   Returns non-zero if a multiline comment.  */
+static int
+skip_line_comment2 (pfile)
+     cpp_reader *pfile;
+{
+  cpp_buffer *buffer = pfile->buffer;
+  register const unsigned char *cur = buffer->cur;
+  int multiline = 0;
+
+  for (; cur < buffer->rlimit; )
+    {
+      unsigned char c = *cur++;
+
+      if (IS_NEWLINE (c))
+       {
+         /* Check for a (trigaph?) backslash escaping the newline.  */
+         if (!backslash_start (pfile, cur - 2))
+           goto out;
+         multiline = 1;
+         handle_newline (cur, buffer->rlimit, c);
+       }
+    }
+  cur++;
+
+ out:
+  buffer->cur = cur - 1;       /* Leave newline for caller.  */
+  return multiline;
+}
+
+/* Skips whitespace, stopping at next non-whitespace character.
+   Adjusts pfile->col_adjust to account for tabs.  This enables tokens
+   to be assigned the correct column.  */
+static void
+skip_whitespace (pfile, in_directive)
+     cpp_reader *pfile;
+     int in_directive;
+{
+  cpp_buffer *buffer = pfile->buffer;
+  register const unsigned char *cur = buffer->cur;
+  unsigned short null_count = 0;
+
+  for (; cur < buffer->rlimit; )
+    {
+      unsigned char c = *cur++;
+
+      if (c == '\t')
+       {
+         unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
+         pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
+                               - col % CPP_OPTION(pfile, tabstop));
+       }
+      if (IS_HSPACE(c))                /* FIXME: Fix ISTABLE.  */
+       continue;
+      if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines.  */
+       goto out;
+      if (c == '\0')
+       null_count++;
+      /* Mut be '\f' or '\v' */
+      else if (in_directive && CPP_PEDANTIC (pfile))
+       cpp_pedwarn (pfile, "%s in preprocessing directive",
+                    c == '\f' ? "formfeed" : "vertical tab");
+    }
+  cur++;
+
+ out:
+  buffer->cur = cur - 1;
+  if (null_count)
+    cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
+                : "embedded null character ignored");
+}
+
+/* Parse (append) an identifier.  */
+static void
+parse_name (pfile, list, name)
+     cpp_reader *pfile;
+     cpp_toklist *list;
+     cpp_name *name;
+{
+  const unsigned char *name_limit;
+  unsigned char *namebuf;
+  cpp_buffer *buffer = pfile->buffer;
+  register const unsigned char *cur = buffer->cur;
+
+ expanded:
+  name_limit = list->namebuf + list->name_cap;
+  namebuf = list->namebuf + list->name_used;
+
+  for (; cur < buffer->rlimit && namebuf < name_limit; )
+    {
+      unsigned char c = *namebuf = *cur; /* Copy a single char.  */
+
+      if (! is_idchar(c))
+       goto out;
+      namebuf++;
+      cur++;
+      if (c == '$' && CPP_PEDANTIC (pfile))
+       {
+         buffer->cur = cur;
+         cpp_pedwarn (pfile, "'$' character in identifier");
+       }
+    }
+
+  /* Run out of name space?  */
+  if (cur < buffer->rlimit)
+    {
+      list->name_used = namebuf - list->namebuf;
+      auto_expand_name_space (list);
+      goto expanded;
+    }
+
+ out:
+  buffer->cur = cur;
+  name->len = namebuf - name->text;
+  list->name_used = namebuf - list->namebuf;
+}
+
+/* Parse (append) a number.  */
+
+#define VALID_SIGN(c, prevc) \
+  (((c) == '+' || (c) == '-') && \
+   ((prevc) == 'e' || (prevc) == 'E' \
+    || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
+
+static void
+parse_number (pfile, list, name)
+     cpp_reader *pfile;
+     cpp_toklist *list;
+     cpp_name *name;
+{
+  const unsigned char *name_limit;
+  unsigned char *namebuf;
+  cpp_buffer *buffer = pfile->buffer;
+  register const unsigned char *cur = buffer->cur;
+
+ expanded:
+  name_limit = list->namebuf + list->name_cap;
+  namebuf = list->namebuf + list->name_used;
+
+  for (; cur < buffer->rlimit && namebuf < name_limit; )
+    {
+      unsigned char c = *namebuf = *cur; /* Copy a single char.  */
+
+      /* Perhaps we should accept '$' here if we accept it for
+         identifiers.  We know namebuf[-1] is safe, because for c to
+         be a sign we must have pushed at least one character.  */
+      if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
+       goto out;
+
+      namebuf++;
+      cur++;
+    }
+
+  /* Run out of name space?  */
+  if (cur < buffer->rlimit)
+    {
+      list->name_used = namebuf - list->namebuf;
+      auto_expand_name_space (list);
+      goto expanded;
+    }
+  
+ out:
+  buffer->cur = cur;
+  name->len = namebuf - name->text;
+  list->name_used = namebuf - list->namebuf;
+}
+
+/* Places a string terminated by an unescaped TERMINATOR into a
+   cpp_name, which should be expandable and thus at the top of the
+   list's stack.  Handles embedded trigraphs, if necessary, and
+   escaped newlines.
+
+   Can be used for character constants (terminator = '\''), string
+   constants ('"') and angled headers ('>').  Multi-line strings are
+   allowed, except for within directives.  */
+
+static void
+parse_string2 (pfile, list, name, terminator, multiline_ok)
+     cpp_reader *pfile;
+     cpp_toklist *list;
+     cpp_name *name;
+     unsigned int terminator;
+     int multiline_ok;
+{
+  cpp_buffer *buffer = pfile->buffer;
+  register const unsigned char *cur = buffer->cur;
+  const unsigned char *name_limit;
+  unsigned char *namebuf;
+  unsigned int null_count = 0;
+  int trigraphed_len = 0;
+
+ expanded:
+  name_limit = list->namebuf + list->name_cap;
+  namebuf = list->namebuf + list->name_used;
+
+  for (; cur < buffer->rlimit && namebuf < name_limit; )
+    {
+      unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
+
+      if (c == '\0')
+       null_count++;
+      else if (c == terminator || IS_NEWLINE (c))
+       {
+         /* Needed for trigraph_replace and multiline string warning.  */
+         buffer->cur = cur;
+
+         /* Scan for trigraphs before checking if backslash-escaped.  */
+         if (CPP_OPTION (pfile, trigraphs)
+             || CPP_OPTION (pfile, warn_trigraphs))
+           {
+             namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
+                                           namebuf);
+             trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
+             if (trigraphed_len < 0)
+               trigraphed_len = 0;
+           }
+
+         namebuf--;     /* Drop the newline / terminator from the name.  */
+         if (IS_NEWLINE (c))
+           {
+             /* Drop a backslash newline, and continue. */
+             if (namebuf[-1] == '\\')
+               {
+                 handle_newline (cur, buffer->rlimit, c);
+                 namebuf--;
+                 continue;
+               }
+
+             cur--;
+
+             /* In Fortran and assembly language, silently terminate
+                strings of either variety at end of line.  This is a
+                kludge around not knowing where comments are in these
+                languages.  */
+             if (CPP_OPTION (pfile, lang_fortran)
+                 || CPP_OPTION (pfile, lang_asm))
+               goto out;
+
+             /* Character constants, headers and asserts may not
+                extend over multiple lines.  In Standard C, neither
+                may strings.  We accept multiline strings as an
+                extension, but not in directives.  */
+             if (!multiline_ok)
+               goto unterminated;
+               
+             cur++;  /* Move forwards again.  */
+
+             if (pfile->multiline_string_line == 0)
+               {
+                 pfile->multiline_string_line = list->line;
+                 if (CPP_PEDANTIC (pfile))
+                   cpp_pedwarn (pfile, "multi-line string constant");
+               }
+
+             *namebuf++ = '\n';
+             handle_newline (cur, buffer->rlimit, c);
+           }
+         else
+           {
+             unsigned char *temp;
+
+             /* An odd number of consecutive backslashes represents
+                an escaped terminator.  */
+             temp = namebuf - 1;
+             while (temp >= name->text && *temp == '\\')
+               temp--;
+
+             if ((namebuf - temp) & 1)
+               goto out;
+             namebuf++;
+           }
+       }
+    }
+
+  /* Run out of name space?  */
+  if (cur < buffer->rlimit)
+    {
+      list->name_used = namebuf - list->namebuf;
+      auto_expand_name_space (list);
+      goto expanded;
+    }
+
+  /* We may not have trigraph-replaced the input for this code path,
+     but as the input is in error by being unterminated we don't
+     bother.  Prevent warnings about no newlines at EOF.  */
+  if (IS_NEWLINE(cur[-1]))
+    cur--;
+
+ unterminated:
+  cpp_error (pfile, "missing terminating %c character", (int) terminator);
+
+  if (terminator == '\"' && pfile->multiline_string_line != list->line
+      && pfile->multiline_string_line != 0)
+    {
+      cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
+                          "possible start of unterminated string literal");
+      pfile->multiline_string_line = 0;
+    }
+  
+ out:
+  buffer->cur = cur;
+  name->len = namebuf - name->text;
+  list->name_used = namebuf - list->namebuf;
+
+  if (null_count > 0)
+    cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
+                        : "null character preserved"));
+}
+
+/* The character TYPE helps us distinguish comment types: '*' = C
+   style, '-' = Chill-style and '/' = C++ style.  For code simplicity,
+   the stored comment includes the comment start and any terminator.  */
+
+#define COMMENT_START_LEN 2
+static void
+save_comment (list, token, from, len, type)
+     cpp_toklist *list;
+     cpp_token *token;
+     const unsigned char *from;
+     unsigned int len;
+     unsigned int type;
+{
+  unsigned char *buffer;
+  
+  len += COMMENT_START_LEN;
+
+  if (list->name_used + len > list->name_cap)
+    expand_name_space (list, len);
+
+  INIT_TOKEN_NAME (list, token);
+  token->type = CPP_COMMENT;
+  token->val.name.len = len;
+
+  buffer = list->namebuf + list->name_used;
+  list->name_used += len;
+
+  /* Copy the comment.  */
+  if (type == '*')
+    {
+      *buffer++ = '/';
+      *buffer++ = '*';
+    }
+  else
+    {
+      *buffer++ = type;
+      *buffer++ = type;
+    }
+  memcpy (buffer, from, len - COMMENT_START_LEN);
+}
+
+/*
+ *  The tokenizer's main loop.  Returns a token list, representing a
+ *  logical line in the input file.  On EOF after some tokens have
+ *  been processed, we return immediately.  Then in next call, or if
+ *  EOF occurred at the beginning of a logical line, a single CPP_EOF
+ *  token is placed in the list.
+ *
+ *  Implementation relies almost entirely on lookback, rather than
+ *  looking forwards.  This means that tokenization requires just
+ *  a single pass of the file, even in the presence of trigraphs and
+ *  escaped newlines, providing significant performance benefits.
+ *  Trigraph overhead is negligible if they are disabled, and low
+ *  even when enabled.
+ */
+
+#define IS_DIRECTIVE() (list->tokens[first_token].type == CPP_HASH)
+
+void
+_cpp_lex_line (pfile, list)
+     cpp_reader *pfile;
+     cpp_toklist *list;
+{
+  cpp_token *cur_token, *token_limit;
+  cpp_buffer *buffer = pfile->buffer;
+  register const unsigned char *cur = buffer->cur;
+  unsigned char flags = 0;
+  unsigned int first_token = list->tokens_used;
+
+  list->line = CPP_BUF_LINE (buffer);
+  pfile->col_adjust = 0;
+ expanded:
+  token_limit = list->tokens + list->tokens_cap;
+  cur_token = list->tokens + list->tokens_used;
+
+  for (; cur < buffer->rlimit && cur_token < token_limit;)
+    {
+      unsigned char c = *cur++;
+
+      /* Optimize whitespace skipping, as most tokens are probably
+        separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
+
+      if (is_hspace ((unsigned int) c))
+       {
+         /* Step back to get the null warning and tab correction.  */
+         buffer->cur = cur - 1;
+         skip_whitespace (pfile, IS_DIRECTIVE ());
+         cur = buffer->cur;
+
+         flags = PREV_WHITESPACE;
+         if (cur == buffer->rlimit)
+           break;
+         c = *cur++;
+       }
+
+      /* Initialize current token.  Its type is set in the switch.  */
+      cur_token->col = CPP_BUF_COLUMN (buffer, cur);
+      cur_token->flags = flags;
+      flags = 0;
+
+      switch (c)
+       {
+       case '0': case '1': case '2': case '3': case '4':
+       case '5': case '6': case '7': case '8': case '9':
+         {
+           int prev_dot;
+
+           cur--;              /* Backup character.  */
+           prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
+           if (prev_dot)
+             cur_token--;
+           INIT_TOKEN_NAME (list, cur_token);
+           /* Prepend an immediately previous CPP_DOT token.  */
+           if (prev_dot)
+             {
+               if (list->name_cap == list->name_used)
+                 auto_expand_name_space (list);
+
+               cur_token->val.name.len = 1;
+               list->namebuf[list->name_used++] = '.';
+             }
+
+         continue_number:
+           cur_token->type = CPP_NUMBER; /* Before parse_number.  */
+           buffer->cur = cur;
+           parse_number (pfile, list, &cur_token->val.name);
+           cur = buffer->cur;
+           cur_token++;
+         }
+         break;
+
+       letter:
+       case '_':
+       case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+       case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+       case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+       case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+       case 'y': case 'z':
+       case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+       case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+       case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+       case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+       case 'Y': case 'Z':
+         cur--;                     /* Backup character.  */
+         INIT_TOKEN_NAME (list, cur_token);
+         cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
+
+       continue_name:
+         buffer->cur = cur;
+         parse_name (pfile, list, &cur_token->val.name);
+         cur = buffer->cur;
+
+         /* Find handler for newly created / extended directive.  */
+         if (IS_DIRECTIVE () && cur_token == &list->tokens[first_token + 1])
+           _cpp_check_directive (list, cur_token);
+         cur_token++;
+         break;
+
+       case '\'':
+         /* Fall through.  */
+       case '\"':
+         cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
+         /* Do we have a wide string?  */
+         if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
+             && cur_token[-1].val.name.len == 1
+             && cur_token[-1].val.name.text[0] == 'L'
+             && !CPP_TRADITIONAL (pfile))
+           {
+             /* No need for 'L' any more.  */
+             list->name_used--;
+             (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
+           }
+
+       do_parse_string:
+         /* Here c is one of ' " or >.  */
+         INIT_TOKEN_NAME (list, cur_token);
+         buffer->cur = cur;
+         parse_string2 (pfile, list, &cur_token->val.name, c,
+                        c == '"' && !IS_DIRECTIVE());
+         cur = buffer->cur;
+         cur_token++;
+         break;
+
+       case '/':
+         cur_token->type = CPP_DIV;
+         if (IMMED_TOKEN ())
+           {
+             if (PREV_TOKEN_TYPE == CPP_DIV)
+               {
+                 /* We silently allow C++ comments in system headers,
+                    irrespective of conformance mode, because lots of
+                    broken systems do that and trying to clean it up
+                    in fixincludes is a nightmare.  */
+                 if (CPP_IN_SYSTEM_HEADER (pfile))
+                   goto do_line_comment;
+                 else if (CPP_OPTION (pfile, cplusplus_comments))
+                   {
+                     if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
+                         && ! buffer->warned_cplusplus_comments)
+                       {
+                         buffer->cur = cur;
+                         cpp_pedwarn (pfile,
+                            "C++ style comments are not allowed in ISO C89");
+                         cpp_pedwarn (pfile,
+                         "(this will be reported only once per input file)");
+                         buffer->warned_cplusplus_comments = 1;
+                       }
+                   do_line_comment:
+                     buffer->cur = cur;
+                     if (cur[-2] != c)
+                       cpp_warning (pfile,
+                                    "comment start split across lines");
+                     if (skip_line_comment2 (pfile))
+                       cpp_error_with_line (pfile, list->line,
+                                            cur_token[-1].col,
+                                            "multi-line comment");
+
+                     /* Back-up to first '-' or '/'.  */
+                     cur_token--;
+                     if (!CPP_OPTION (pfile, discard_comments)
+                         && (!IS_DIRECTIVE() || list->dirno == 0))
+                       save_comment (list, cur_token++, cur,
+                                     buffer->cur - cur, c);
+                     cur = buffer->cur;
+
+                     if (!CPP_OPTION (pfile, traditional))
+                       flags = PREV_WHITESPACE;
+                     break;
+                   }
+               }
+           }
+         cur_token++;
+         break;
+                     
+       case '*':
+         cur_token->type = CPP_MULT;
+         if (IMMED_TOKEN ())
+           {
+             if (PREV_TOKEN_TYPE == CPP_DIV)
+               {
+                 buffer->cur = cur;
+                 if (cur[-2] != '/')
+                   cpp_warning (pfile,
+                                "comment start '/*' split across lines");
+                 if (skip_block_comment2 (pfile))
+                   cpp_error_with_line (pfile, list->line, cur_token[-1].col,
+                                        "unterminated comment");
+                 else if (buffer->cur[-2] != '*')
+                   cpp_warning (pfile,
+                                "comment end '*/' split across lines");
+
+                 /* Back up to opening '/'.  */
+                 cur_token--;
+                 if (!CPP_OPTION (pfile, discard_comments)
+                     && (!IS_DIRECTIVE() || list->dirno == 0))
+                   save_comment (list, cur_token++, cur,
+                                 buffer->cur - cur, c);
+                 cur = buffer->cur;
+
+                 if (!CPP_OPTION (pfile, traditional))
+                   flags = PREV_WHITESPACE;
+                 break;
+               }
+             else if (CPP_OPTION (pfile, cplusplus))
+               {
+                 /* In C++, there are .* and ->* operators.  */
+                 if (PREV_TOKEN_TYPE == CPP_DEREF)
+                   BACKUP_TOKEN (CPP_DEREF_STAR);
+                 else if (PREV_TOKEN_TYPE == CPP_DOT)
+                   BACKUP_TOKEN (CPP_DOT_STAR);
+               }
+           }
+         cur_token++;
+         break;
+
+       case '\n':
+       case '\r':
+         handle_newline (cur, buffer->rlimit, c);
+         if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
+           {
+             /* Remove the escaped newline.  Then continue to process
+                any interrupted name or number.  */
+             cur_token--;
+             if (IMMED_TOKEN ())
+               {
+                 cur_token--;
+                 if (cur_token->type == CPP_NAME)
+                   goto continue_name;
+                 else if (cur_token->type == CPP_NUMBER)
+                   goto continue_number;
+                 cur_token++;
+               }
+             /* Remember whitespace setting.  */
+             flags = cur_token->flags;
+             break;
+           }
+         if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
+           {
+             buffer->cur = cur;
+             cpp_warning (pfile, "backslash and newline separated by space");
+           }
+         /* Skip vertical space until we have at least one token to
+             return.  */
+         if (cur_token != &list->tokens[first_token])
+           goto out;
+         list->line = CPP_BUF_LINE (buffer);
+         break;
+
+       case '-':
+         if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
+           {
+             if (CPP_OPTION (pfile, chill))
+               goto do_line_comment;
+             REVISE_TOKEN (CPP_MINUS_MINUS);
+           }
+         else
+           PUSH_TOKEN (CPP_MINUS);
+         break;
+
+         /* The digraph flag checking ensures that ## and %:%:
+            are interpreted as CPP_PASTE, but #%: and %:# are not.  */
+       make_hash:
+       case '#':
+         if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
+             && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
+           REVISE_TOKEN (CPP_PASTE);
+         else
+           PUSH_TOKEN (CPP_HASH);
+         break;
+
+       case ':':
+         cur_token->type = CPP_COLON;
+         if (IMMED_TOKEN ())
+           {
+             if (PREV_TOKEN_TYPE == CPP_COLON
+                 && CPP_OPTION (pfile, cplusplus))
+               BACKUP_TOKEN (CPP_SCOPE);
+             /* Digraph: "<:" is a '['  */
+             else if (PREV_TOKEN_TYPE == CPP_LESS)
+               BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
+             /* Digraph: "%:" is a '#'  */
+             else if (PREV_TOKEN_TYPE == CPP_MOD)
+               {
+                 (--cur_token)->flags |= DIGRAPH;
+                 goto make_hash;
+               }
+           }
+         cur_token++;
+         break;
+
+       case '&':
+         if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
+           REVISE_TOKEN (CPP_AND_AND);
+         else
+           PUSH_TOKEN (CPP_AND);
+         break;
+
+       make_or:
+       case '|':
+         if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
+           REVISE_TOKEN (CPP_OR_OR);
+         else
+           PUSH_TOKEN (CPP_OR);
+         break;
+
+       case '+':
+         if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
+           REVISE_TOKEN (CPP_PLUS_PLUS);
+         else
+           PUSH_TOKEN (CPP_PLUS);
+         break;
+
+       case '=':
+           /* This relies on equidistance of "?=" and "?" tokens.  */
+         if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
+           REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
+         else
+           PUSH_TOKEN (CPP_EQ);
+         break;
+
+       case '>':
+         cur_token->type = CPP_GREATER;
+         if (IMMED_TOKEN ())
+           {
+             if (PREV_TOKEN_TYPE == CPP_GREATER)
+               BACKUP_TOKEN (CPP_RSHIFT);
+             else if (PREV_TOKEN_TYPE == CPP_MINUS)
+               BACKUP_TOKEN (CPP_DEREF);
+             /* Digraph: ":>" is a ']'  */
+             else if (PREV_TOKEN_TYPE == CPP_COLON)
+               BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
+             /* Digraph: "%>" is a '}'  */
+             else if (PREV_TOKEN_TYPE == CPP_MOD)
+               BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
+           }
+         cur_token++;
+         break;
+         
+       case '<':
+         if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
+           {
+             REVISE_TOKEN (CPP_LSHIFT);
+             break;
+           }
+         /* Is this the beginning of a header name?  */
+         if (list->flags & SYNTAX_INCLUDE)
+           {
+             c = '>';  /* Terminator.  */
+             cur_token->type = CPP_HEADER_NAME;
+             goto do_parse_string;
+           }
+         PUSH_TOKEN (CPP_LESS);
+         break;
+
+       case '%':
+         /* Digraph: "<%" is a '{'  */
+         cur_token->type = CPP_MOD;
+         if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
+           BACKUP_DIGRAPH (CPP_OPEN_BRACE);
+         cur_token++;
+         break;
+
+       case '?':
+         if (cur + 1 < buffer->rlimit && *cur == '?'
+             && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
+           {
+             /* Handle trigraph.  */
+             cur++;
+             switch (*cur++)
+               {
+               case '(': goto make_open_square;
+               case ')': goto make_close_square;
+               case '<': goto make_open_brace;
+               case '>': goto make_close_brace;
+               case '=': goto make_hash;
+               case '!': goto make_or;
+               case '-': goto make_complement;
+               case '/': goto make_backslash;
+               case '\'': goto make_xor;
+               }
+           }
+         if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
+           {
+             /* GNU C++ defines <? and >? operators.  */
+             if (PREV_TOKEN_TYPE == CPP_LESS)
+               {
+                 REVISE_TOKEN (CPP_MIN);
+                 break;
+               }
+             else if (PREV_TOKEN_TYPE == CPP_GREATER)
+               {
+                 REVISE_TOKEN (CPP_MAX);
+                 break;
+               }
+           }
+         PUSH_TOKEN (CPP_QUERY);
+         break;
+
+       case '.':
+         if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
+             && IMMED_TOKEN ()
+             && !(cur_token[-1].flags & PREV_WHITESPACE))
+           {
+             cur_token -= 2;
+             PUSH_TOKEN (CPP_ELLIPSIS);
+           }
+         else
+           PUSH_TOKEN (CPP_DOT);
+         break;
+
+       make_complement:
+       case '~': PUSH_TOKEN (CPP_COMPL); break;
+       make_xor:
+       case '^': PUSH_TOKEN (CPP_XOR); break;
+       make_open_brace:
+       case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
+       make_close_brace:
+       case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
+       make_open_square:
+       case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
+       make_close_square:
+       case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
+       make_backslash:
+       case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
+       case '!': PUSH_TOKEN (CPP_NOT); break;
+       case ',': PUSH_TOKEN (CPP_COMMA); break;
+       case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
+       case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
+       case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
+
+       case '$':
+         if (CPP_OPTION (pfile, dollars_in_ident))
+           goto letter;
+         /* Fall through */
+       default:
+         cur_token->aux = c;
+         cur_token->val.name.len = 0; /* FIXME: needed for transition only */
+         PUSH_TOKEN (CPP_OTHER);
+         break;
+       }
+    }
+
+  /* Run out of token space?  */
+  if (cur_token == token_limit)
+    {
+      list->tokens_used = cur_token - list->tokens;
+      _cpp_expand_token_space (list, 256);
+      goto expanded;
+    }
+
+  cur_token->flags = flags;
+  if (cur_token == &list->tokens[first_token])
+    {
+      /* FIXME: move this warning to callers who care.  */
+      if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
+       cpp_warning (pfile, "no newline at end of file");
+      cur_token++->type = CPP_EOF;
+    }
+
+ out:
+  list->tokens[first_token].flags |= BOL;
+  buffer->cur = cur;
+  list->tokens_used = cur_token - list->tokens;
+}
+
+/* Write the spelling of a token TOKEN to BUFFER.  The buffer must
+   already contain the enough space to hold the token's spelling.  If
+   WHITESPACE is true, and the token was preceded by whitespace,
+   output a single space before the token proper.  Returns a pointer
+   to the character after the last character written.  */
+
+static unsigned char *
+spell_token (pfile, token, buffer, whitespace)
+     cpp_reader *pfile;                /* Would be nice to be rid of this...  */
+     const cpp_token *token;
+     unsigned char *buffer;
+     int whitespace;
+{
+  /* Whitespace will not be wanted by handlers of the # and ##
+     operators calling this function, but will be wanted by the
+     function that writes out the preprocessed file.  */
+  if (whitespace && token->flags & PREV_WHITESPACE)
+    *buffer++ = ' ';
+
+  switch (token_spellings[token->type].type)
+    {
+    case SPELL_OPERATOR:
+      {
+       const unsigned char *spelling;
+       unsigned char c;
+
+       if (token->flags & DIGRAPH)
+         spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
+       else
+         spelling = token_spellings[token->type].spelling;
+       
+       while ((c = *spelling++) != '\0')
+         *buffer++ = c;
+      }
+      break;
+
+    case SPELL_IDENT:
+      memcpy (buffer, token->val.name.text, token->val.name.len);
+      buffer += token->val.name.len;
+      break;
+
+    case SPELL_STRING:
+      {
+       unsigned char c;
+
+       if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
+         *buffer++ = 'L';
+       c = '\'';
+       if (token->type == CPP_STRING || token->type == CPP_WSTRING)
+         c = '"';
+       *buffer++ = c;
+       memcpy (buffer, token->val.name.text, token->val.name.len);
+       buffer += token->val.name.len;
+       *buffer++ = c;
+      }
+      break;
+
+    case SPELL_CHAR:
+      *buffer++ = token->aux;
+      break;
+
+    case SPELL_NONE:
+      cpp_ice (pfile, "Unspellable token");
+      break;
+    }
+
+  return buffer;
+}
+
+/* Temporary function for illustrative purposes.  */
+void
+_cpp_lex_file (pfile)
+     cpp_reader* pfile;
+{
+  cpp_toklist* list;
+
+  init_trigraph_map ();
+  list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
+  _cpp_init_toklist (list, DUMMY_TOKEN);
+
+  for (;;)
+    {
+      _cpp_lex_line (pfile, list);
+      if (list->tokens[0].type == CPP_EOF)
+       break;
+
+#if 0
+      if (list->dirno)
+       _cpp_handle_directive (pfile, list);
+      else
+#endif
+       _cpp_output_list (pfile, list);
+      _cpp_clear_toklist (list);
+    }
+}
+
+/* Temporary function for illustrative purposes.  */
+static void
+_cpp_output_list (pfile, list)
+     cpp_reader *pfile;
+     cpp_toklist *list;
+{
+  unsigned int i;
+
+  for (i = 0; i < list->tokens_used; i++)
+    {
+      CPP_RESERVE (pfile, TOKEN_LEN (&list->tokens[i]));
+      pfile->limit = spell_token (pfile, &list->tokens[i], pfile->limit, 1);
+    }
+}
+
+#endif