2000-06-26 Joseph S. Myers <jsm28@cam.ac.uk>

[pf3gnuchains/gcc-fork.git] / gcc / c-lex.c
diff --git a/gcc/c-lex.c b/gcc/c-lex.c

index d32f7a8..8b917ff 100644 (file)
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -1,5 +1,6 @@
  /* Lexical analyzer for C and Objective C.
-   Copyright (C) 1987, 88, 89, 92, 94-96, 1997 Free Software Foundation, Inc.
+   Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
+   1998, 1999, 2000 Free Software Foundation, Inc.
  
  This file is part of GNU CC.
  
@@ -20,16 +21,20 @@ Boston, MA 02111-1307, USA.  */
  
  #include "config.h"
  #include "system.h"
-#include <setjmp.h>
  
  #include "rtl.h"
  #include "tree.h"
  #include "input.h"
+#include "output.h"
  #include "c-lex.h"
  #include "c-tree.h"
  #include "flags.h"
  #include "c-parse.h"
  #include "c-pragma.h"
+#include "toplev.h"
+#include "intl.h"
+#include "ggc.h"
+#include "tm_p.h"
  
  /* MULTIBYTE_CHARS support only works for native compilers.
     ??? Ideally what we want is to model widechar support after
@@ -39,16 +44,24 @@ Boston, MA 02111-1307, USA.  */
  #endif
  
  #ifdef MULTIBYTE_CHARS
+#include "mbchar.h"
  #include <locale.h>
+#endif /* MULTIBYTE_CHARS */
+#ifndef GET_ENVIRONMENT
+#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
  #endif
  
  #if USE_CPPLIB
  #include "cpplib.h"
-cpp_reader parse_in;
-cpp_options parse_options;
-static enum cpp_token cpp_token;
+extern cpp_reader  parse_in;
+extern cpp_options parse_options;
+#else
+/* Stream for reading from the input file.  */
+FILE *finput;
  #endif
  
+extern void yyprint                    PARAMS ((FILE *, int, YYSTYPE));
+
  /* The elements of `ridpointers' are identifier nodes
     for the reserved type names and storage classes.
     It is indexed by a RID_... value.  */
@@ -58,55 +71,68 @@ tree ridpointers[(int) RID_MAX];
  #define YYDEBUG 1
  
  #if USE_CPPLIB
-static unsigned char *yy_cur, *yy_lim;
+extern unsigned char *yy_cur, *yy_lim;
+extern enum cpp_token cpp_token;
  
-int
-yy_get_token ()
+extern int yy_get_token ();
+
+#define GETC() (yy_cur < yy_lim ? *yy_cur++ : yy_get_token ())
+#define UNGETC(c) ((c) == EOF ? 0 : yy_cur--)
+
+#else /* ! USE_CPPLIB */
+
+#define GETC() getch ()
+#define UNGETC(c) put_back (c)
+
+struct putback_buffer {
+  unsigned char *buffer;
+  int   buffer_size;
+  int   index;
+};
+
+static struct putback_buffer putback = {NULL, 0, -1};
+
+static inline int getch PARAMS ((void));
+
+static inline int
+getch ()
  {
-  for (;;)
+  if (putback.index != -1)
      {
-      parse_in.limit = parse_in.token_buffer;
-      cpp_token = cpp_get_token (&parse_in);
-      if (cpp_token == CPP_EOF)
-       return -1;
-      yy_lim = CPP_PWRITTEN (&parse_in);
-      yy_cur = parse_in.token_buffer;
-      if (yy_cur < yy_lim)
-       return *yy_cur++;
+      int ch = putback.buffer[putback.index];
+      --putback.index;
+      return ch;
      }
+  return getc (finput);
  }
  
-#define GETC() (yy_cur < yy_lim ? *yy_cur++ : yy_get_token ())
-#define UNGETC(c) ((c), yy_cur--)
-#else
-#define GETC() getc (finput)
-#define UNGETC(c) ungetc (c, finput)
-#endif
+static inline void put_back PARAMS ((int));
  
-/* the declaration found for the last IDENTIFIER token read in.
-   yylex must look this up to detect typedefs, which get token type TYPENAME,
-   so it is left around in case the identifier is not a typedef but is
-   used in a context which makes it a reference to a variable.  */
-tree lastiddecl;
-
-/* Nonzero enables objc features.  */
-
-int doing_objc_thang;
+static inline void
+put_back (ch)
+     int ch;
+{
+  if (ch != EOF)
+    {
+      if (putback.index == putback.buffer_size - 1)
+       {
+         putback.buffer_size += 16;
+         putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
+       }
+      putback.buffer[++putback.index] = ch;
+    }
+}
+#endif /* ! USE_CPPLIB */
  
-extern tree is_class_name ();
+int linemode;
  
  extern int yydebug;
  
  /* File used for outputting assembler code.  */
  extern FILE *asm_out_file;
  
-#ifndef WCHAR_TYPE_SIZE
-#ifdef INT_TYPE_SIZE
-#define WCHAR_TYPE_SIZE INT_TYPE_SIZE
-#else
-#define WCHAR_TYPE_SIZE        BITS_PER_WORD
-#endif
-#endif
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
  
  /* Number of bytes in a wide character.  */
  #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
@@ -116,23 +142,24 @@ char *token_buffer;       /* Pointer to token buffer.
                            Actual allocated length is maxtoken + 2.
                            This is not static because objc-parse.y uses it.  */
  
-static int indent_level = 0;        /* Number of { minus number of }. */
+static int indent_level;        /* Number of { minus number of }. */
+
+/* Nonzero tells yylex to ignore \ in string constants.  */
+static int ignore_escape_flag;
  
  /* Nonzero if end-of-file has been seen on input.  */
  static int end_of_file;
  
-#if !USE_CPPLIB
-/* Buffered-back input character; faster than using ungetc.  */
-static int nextchar = -1;
-#endif
-
-#ifdef HANDLE_SYSV_PRAGMA
-static int handle_sysv_pragma          PROTO((int));
-#endif /* HANDLE_SYSV_PRAGMA */
-static int skip_white_space            PROTO((int));
-static char *extend_token_buffer       PROTO((char *));
-static int readescape                  PROTO((int *));
-int check_newline ();
+#ifdef HANDLE_GENERIC_PRAGMAS
+static int handle_generic_pragma       PARAMS ((int));
+#endif /* HANDLE_GENERIC_PRAGMAS */
+static int whitespace_cr               PARAMS ((int));
+static int skip_white_space            PARAMS ((int));
+static char *extend_token_buffer       PARAMS ((const char *));
+static int readescape                  PARAMS ((int *));
+static void parse_float                        PARAMS ((PTR));
+static void extend_token_buffer_to     PARAMS ((int));
+static int read_line_number            PARAMS ((int *));
  \f
  /* Do not insert generated code into the source, instead, include it.
     This allows us to build gcc automatically even for targets that
@@ -179,34 +206,64 @@ remember_protocol_qualifiers ()
        wordlist[i].name = "inout";
      else if (wordlist[i].rid == RID_BYCOPY)
        wordlist[i].name = "bycopy";
+    else if (wordlist[i].rid == RID_BYREF)
+      wordlist[i].name = "byref";
      else if (wordlist[i].rid == RID_ONEWAY)
-      wordlist[i].name = "oneway";   
+      wordlist[i].name = "oneway";
  }
  \f
-#if USE_CPPLIB
-void
+const char *
  init_parse (filename)
-     char *filename;
+     const char *filename;
  {
-  init_lex ();
-  yy_cur = "\n";
-  yy_lim = yy_cur+1;
+#if !USE_CPPLIB
+  /* Open input file.  */
+  if (filename == 0 || !strcmp (filename, "-"))
+    {
+      finput = stdin;
+      filename = "stdin";
+    }
+  else
+    finput = fopen (filename, "r");
+  if (finput == 0)
+    pfatal_with_name (filename);
  
-  cpp_reader_init (&parse_in);
-  parse_in.data = &parse_options;
-  cpp_options_init (&parse_options);
-  cpp_handle_options (&parse_in, 0, NULL); /* FIXME */
+#ifdef IO_BUFFER_SIZE
+  setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
+#endif
+#else /* !USE_CPPLIB */
    parse_in.show_column = 1;
    if (! cpp_start_read (&parse_in, filename))
      abort ();
+
+  if (filename == 0 || !strcmp (filename, "-"))
+    filename = "stdin";
+
+  /* cpp_start_read always puts at least one line directive into the
+     token buffer.  We must arrange to read it out here. */
+  yy_cur = parse_in.token_buffer;
+  yy_lim = CPP_PWRITTEN (&parse_in);
+  cpp_token = CPP_DIRECTIVE;
+#endif
+
+  add_c_tree_codes ();
+  
+  init_lex ();
+  init_pragma ();
+
+  return filename;
  }
  
  void
  finish_parse ()
  {
+#if USE_CPPLIB
    cpp_finish (&parse_in);
-}
+  errorcount += parse_in.errors;
+#else
+  fclose (finput);
  #endif
+}
  
  void
  init_lex ()
@@ -221,6 +278,7 @@ init_lex ()
  #ifdef MULTIBYTE_CHARS
    /* Change to the native locale for multibyte conversions.  */
    setlocale (LC_CTYPE, "");
+  GET_ENVIRONMENT (literal_codeset, "LANG");
  #endif
  
    maxtoken = 40;
@@ -237,7 +295,10 @@ init_lex ()
    ridpointers[(int) RID_SIGNED] = get_identifier ("signed");
    ridpointers[(int) RID_INLINE] = get_identifier ("inline");
    ridpointers[(int) RID_CONST] = get_identifier ("const");
+  ridpointers[(int) RID_RESTRICT] = get_identifier ("restrict");
    ridpointers[(int) RID_VOLATILE] = get_identifier ("volatile");
+  ridpointers[(int) RID_BOUNDED] = get_identifier ("__bounded");
+  ridpointers[(int) RID_UNBOUNDED] = get_identifier ("__unbounded");
    ridpointers[(int) RID_AUTO] = get_identifier ("auto");
    ridpointers[(int) RID_STATIC] = get_identifier ("static");
    ridpointers[(int) RID_EXTERN] = get_identifier ("extern");
@@ -250,6 +311,7 @@ init_lex ()
    ridpointers[(int) RID_OUT] = get_identifier ("out");
    ridpointers[(int) RID_INOUT] = get_identifier ("inout");
    ridpointers[(int) RID_BYCOPY] = get_identifier ("bycopy");
+  ridpointers[(int) RID_BYREF] = get_identifier ("byref");
    ridpointers[(int) RID_ONEWAY] = get_identifier ("oneway");
    forget_protocol_qualifiers();
  
@@ -265,6 +327,7 @@ init_lex ()
    if (flag_traditional)
      {
        UNSET_RESERVED_WORD ("const");
+      UNSET_RESERVED_WORD ("restrict");
        UNSET_RESERVED_WORD ("volatile");
        UNSET_RESERVED_WORD ("typeof");
        UNSET_RESERVED_WORD ("signed");
@@ -272,6 +335,9 @@ init_lex ()
        UNSET_RESERVED_WORD ("iterator");
        UNSET_RESERVED_WORD ("complex");
      }
+  else if (!flag_isoc99)
+    UNSET_RESERVED_WORD ("restrict");
+
    if (flag_no_asm)
      {
        UNSET_RESERVED_WORD ("asm");
@@ -334,7 +400,8 @@ yyprint (file, yychar, yylval)
  \f
  /* Iff C is a carriage return, warn about it - if appropriate -
     and return nonzero.  */
-int
+
+static int
  whitespace_cr (c)
       int c;
  {
@@ -371,6 +438,11 @@ skip_white_space (c)
              Also, there's no need, since cpp removes all comments.  */
  
         case '\n':
+         if (linemode)
+           {
+             UNGETC (c);
+             return EOF;
+           }
           c = check_newline ();
           break;
  
@@ -379,7 +451,14 @@ skip_white_space (c)
         case '\f':
         case '\v':
         case '\b':
-         c = GETC();
+#if USE_CPPLIB
+         /* While processing a # directive we don't get CPP_HSPACE
+            tokens, so we also need to handle whitespace the normal way.  */
+         if (cpp_token == CPP_HSPACE)
+           c = yy_get_token ();
+         else
+#endif
+           c = GETC();
           break;
  
         case '\r':
@@ -402,502 +481,388 @@ skip_white_space (c)
      }
  }
  
-/* Skips all of the white space at the current location in the input file.
-   Must use and reset nextchar if it has the next character.  */
+/* Skips all of the white space at the current location in the input file.  */
  
  void
  position_after_white_space ()
  {
    register int c;
  
-#if !USE_CPPLIB
-  if (nextchar != -1)
-    c = nextchar, nextchar = -1;
-  else
-#endif
-    c = GETC();
+  c = GETC();
  
    UNGETC (skip_white_space (c));
  }
  
-/* Like skip_white_space, but don't advance beyond the end of line.
-   Moreover, we don't get passed a character to start with.  */
-static int
-skip_white_space_on_line ()
-{
-  register int c;
-
-  while (1)
-    {
-      c = GETC();
-      switch (c)
-       {
-       case '\n':
-       default:
-         break;
-
-       case ' ':
-       case '\t':
-       case '\f':
-       case '\v':
-       case '\b':
-         continue;
-
-       case '\r':
-         whitespace_cr (c);
-         continue;
-       }
-      break;
-    }
-  return c;
-}
-
  /* Make the token buffer longer, preserving the data in it.
     P should point to just beyond the last valid character in the old buffer.
     The value we return is a pointer to the new buffer
     at a place corresponding to P.  */
  
+static void
+extend_token_buffer_to (size)
+     int size;
+{
+  do
+    maxtoken = maxtoken * 2 + 10;
+  while (maxtoken < size);
+  token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
+}
+
  static char *
  extend_token_buffer (p)
-     char *p;
+     const char *p;
  {
    int offset = p - token_buffer;
-
-  maxtoken = maxtoken * 2 + 10;
-  token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
-
+  extend_token_buffer_to (offset);
    return token_buffer + offset;
  }
  \f
-#if !USE_CPPLIB
-#define GET_DIRECTIVE_LINE() get_directive_line (finput)
-#else /* USE_CPPLIB */
-/* Read the rest of a #-directive from input stream FINPUT.
-   In normal use, the directive name and the white space after it
-   have already been read, so they won't be included in the result.
-   We allow for the fact that the directive line may contain
-   a newline embedded within a character or string literal which forms
-   a part of the directive.
-
-   The value is a string in a reusable buffer.  It remains valid
-   only until the next time this function is called.  */
+#if defined HANDLE_PRAGMA
+/* Local versions of these macros, that can be passed as function pointers.  */
+static int
+pragma_getc ()
+{
+  return GETC ();
+}
  
-static char *
-GET_DIRECTIVE_LINE ()
+static void
+pragma_ungetc (arg)
+     int arg;
  {
-  static char *directive_buffer = NULL;
-  static unsigned buffer_length = 0;
-  register char *p;
-  register char *buffer_limit;
-  register int looking_for = 0;
-  register int char_escaped = 0;
+  UNGETC (arg);
+}
+#endif
+
+static int
+read_line_number (num)
+     int *num;
+{
+  register int token = yylex ();
  
-  if (buffer_length == 0)
+  if (token == CONSTANT
+      && TREE_CODE (yylval.ttype) == INTEGER_CST)
      {
-      directive_buffer = (char *)xmalloc (128);
-      buffer_length = 128;
+      *num = TREE_INT_CST_LOW (yylval.ttype);
+      return 1;
      }
-
-  buffer_limit = &directive_buffer[buffer_length];
-
-  for (p = directive_buffer; ; )
+  else
      {
-      int c;
-
-      /* Make buffer bigger if it is full.  */
-      if (p >= buffer_limit)
-        {
-         register unsigned bytes_used = (p - directive_buffer);
-
-         buffer_length *= 2;
-         directive_buffer
-           = (char *)xrealloc (directive_buffer, buffer_length);
-         p = &directive_buffer[bytes_used];
-         buffer_limit = &directive_buffer[buffer_length];
-        }
-
-      c = GETC ();
-
-      /* Discard initial whitespace.  */
-      if ((c == ' ' || c == '\t') && p == directive_buffer)
-       continue;
-
-      /* Detect the end of the directive.  */
-      if (c == '\n' && looking_for == 0)
-       {
-          UNGETC (c);
-         c = '\0';
-       }
-
-      *p++ = c;
-
-      if (c == 0)
-       return directive_buffer;
-
-      /* Handle string and character constant syntax.  */
-      if (looking_for)
-       {
-         if (looking_for == c && !char_escaped)
-           looking_for = 0;    /* Found terminator... stop looking.  */
-       }
-      else
-        if (c == '\'' || c == '"')
-         looking_for = c;      /* Don't stop buffering until we see another
-                                  another one of these (or an EOF).  */
-
-      /* Handle backslash.  */
-      char_escaped = (c == '\\' && ! char_escaped);
+      if (token != END_OF_LINE)
+       error ("invalid #-line");
+      return 0;
      }
  }
-#endif /* USE_CPPLIB */
-\f
+  
  /* At the beginning of a line, increment the line number
     and process any #-directive on this line.
     If the line is a #-directive, read the entire line and return a newline.
-   Otherwise, return the line's first non-whitespace character.  */
+   Otherwise, return the line's first non-whitespace character.
+
+   Note that in the case of USE_CPPLIB, we get the whole line as one
+   CPP_DIRECTIVE token.  */
  
  int
  check_newline ()
  {
    register int c;
    register int token;
+  int saw_line;
+  enum { act_none, act_push, act_pop } action;
+  int old_lineno, action_number, l;
  
-  lineno++;
-
+ restart:
    /* Read first nonwhite char on the line.  */
  
-  c = GETC();
-  while (c == ' ' || c == '\t')
-    c = GETC();
+#ifdef USE_CPPLIB
+  c = GETC ();
+  /* In some cases where we're leaving an include file, we can get multiple
+     CPP_HSPACE tokens in a row, so we need to loop.  */
+  while (cpp_token == CPP_HSPACE)
+    c = yy_get_token ();
+#else
+  do
+    c = GETC ();
+  while (c == ' ' || c == '\t');
+#endif
+
+  lineno++;
  
    if (c != '#')
      {
+      /* Sequences of multiple newlines are very common; optimize them.  */
+      if (c == '\n')
+       goto restart;
+
        /* If not #, return it so caller will use it.  */
        return c;
      }
  
-  /* Read first nonwhite char after the `#'.  */
-
-  c = GETC();
-  while (c == ' ' || c == '\t')
-    c = GETC();
+  /* Don't read beyond this line.  */
+  saw_line = 0;
+  linemode = 1;
+  
+#if USE_CPPLIB
+  if (cpp_token == CPP_VSPACE)
+    {
+      /* Format is "<space> <line number> <filename> <newline>".
+        Only the line number is interesting, and even that
+        we can get more efficiently than scanning the line.  */
+      yy_cur = yy_lim - 1;
+      lineno = parse_in.lineno - 1;
+      goto skipline;
+    }
+#endif
  
-  /* If a letter follows, then if the word here is `line', skip
-     it and ignore it; otherwise, ignore the line, with an error
-     if the word isn't `pragma', `ident', `define', or `undef'.  */
+  token = yylex ();
  
-  if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+  if (token == IDENTIFIER)
      {
-      if (c == 'p')
+      /* If a letter follows, then if the word here is `line', skip
+        it and ignore it; otherwise, ignore the line, with an error
+        if the word isn't `pragma'.  */
+
+      const char *name = IDENTIFIER_POINTER (yylval.ttype);
+
+      if (!strcmp (name, "pragma"))
         {
-         if (GETC() == 'r'
-             && GETC() == 'a'
-             && GETC() == 'g'
-             && GETC() == 'm'
-             && GETC() == 'a'
-             && ((c = GETC()) == ' ' || c == '\t' || c == '\n'
-                  || whitespace_cr (c) ))
-           {
-             while (c == ' ' || c == '\t' || whitespace_cr (c))
-               c = GETC ();
-             if (c == '\n')
-               return c;
-#ifdef HANDLE_SYSV_PRAGMA
-             UNGETC (c);
-             token = yylex ();
-             if (token != IDENTIFIER)
-               goto skipline;
-             return handle_sysv_pragma (token);
-#else /* !HANDLE_SYSV_PRAGMA */
+         token = yylex ();
+         if (token != IDENTIFIER
+             || TREE_CODE (yylval.ttype) != IDENTIFIER_NODE)
+           goto skipline;
+
  #ifdef HANDLE_PRAGMA
-#if !USE_CPPLIB
-             UNGETC (c);
-             token = yylex ();
-             if (token != IDENTIFIER)
-               goto skipline;
-             if (HANDLE_PRAGMA (finput, yylval.ttype))
-               {
-                 c = GETC ();
-                 return c;
-               }
-#else
-             ??? do not know what to do ???;
-#endif /* !USE_CPPLIB */
+         /* We invoke HANDLE_PRAGMA before HANDLE_GENERIC_PRAGMAS
+            (if both are defined), in order to give the back
+            end a chance to override the interpretation of
+            SYSV style pragmas.  */
+         if (HANDLE_PRAGMA (pragma_getc, pragma_ungetc,
+                            IDENTIFIER_POINTER (yylval.ttype)))
+           goto skipline;
  #endif /* HANDLE_PRAGMA */
-#endif /* !HANDLE_SYSV_PRAGMA */
-             goto skipline;
-           }
-       }
+             
+#ifdef HANDLE_GENERIC_PRAGMAS
+         if (handle_generic_pragma (token))
+           goto skipline;
+#endif /* HANDLE_GENERIC_PRAGMAS */
+
+         /* Issue a warning message if we have been asked to do so.
+            Ignoring unknown pragmas in system header file unless
+            an explcit -Wunknown-pragmas has been given. */
+         if (warn_unknown_pragmas > 1
+             || (warn_unknown_pragmas && ! in_system_header))
+           warning ("ignoring pragma: %s", token_buffer);
  
-      else if (c == 'd')
+         goto skipline;
+       }
+      else if (!strcmp (name, "define"))
         {
-         if (GETC() == 'e'
-             && GETC() == 'f'
-             && GETC() == 'i'
-             && GETC() == 'n'
-             && GETC() == 'e'
-             && ((c = GETC()) == ' ' || c == '\t' || c == '\n'))
-           {
-             if (c != '\n')
-               debug_define (lineno, GET_DIRECTIVE_LINE ());
-             goto skipline;
-           }
+         debug_define (lineno, GET_DIRECTIVE_LINE ());
+         goto skipline;
         }
-      else if (c == 'u')
+      else if (!strcmp (name, "undef"))
         {
-         if (GETC() == 'n'
-             && GETC() == 'd'
-             && GETC() == 'e'
-             && GETC() == 'f'
-             && ((c = GETC()) == ' ' || c == '\t' || c == '\n'))
-           {
-             if (c != '\n')
-               debug_undef (lineno, GET_DIRECTIVE_LINE ());
-             goto skipline;
-           }
+         debug_undef (lineno, GET_DIRECTIVE_LINE ());
+         goto skipline;
         }
-      else if (c == 'l')
+      else if (!strcmp (name, "line"))
         {
-         if (GETC() == 'i'
-             && GETC() == 'n'
-             && GETC() == 'e'
-             && ((c = GETC()) == ' ' || c == '\t'))
-           goto linenum;
+         saw_line = 1;
+         token = yylex ();
+         goto linenum;
         }
-      else if (c == 'i')
+      else if (!strcmp (name, "ident"))
         {
-         if (GETC() == 'd'
-             && GETC() == 'e'
-             && GETC() == 'n'
-             && GETC() == 't'
-             && ((c = GETC()) == ' ' || c == '\t'))
-           {
-             /* #ident.  The pedantic warning is now in cccp.c.  */
-
-             /* Here we have just seen `#ident '.
-                A string constant should follow.  */
-
-             c = skip_white_space_on_line ();
+         /* #ident.  The pedantic warning is now in cpp.  */
  
-             /* If no argument, ignore the line.  */
-             if (c == '\n')
-               return c;
+         /* Here we have just seen `#ident '.
+            A string constant should follow.  */
  
-             UNGETC (c);
-             token = yylex ();
-             if (token != STRING
-                 || TREE_CODE (yylval.ttype) != STRING_CST)
-               {
-                 error ("invalid #ident");
-                 goto skipline;
-               }
+         token = yylex ();
+         if (token == END_OF_LINE)
+           goto skipline;
+         if (token != STRING
+             || TREE_CODE (yylval.ttype) != STRING_CST)
+           {
+             error ("invalid #ident");
+             goto skipline;
+           }
  
-             if (!flag_no_ident)
-               {
+         if (! flag_no_ident)
+           {
  #ifdef ASM_OUTPUT_IDENT
-                 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (yylval.ttype));
+             ASM_OUTPUT_IDENT (asm_out_file,
+                               TREE_STRING_POINTER (yylval.ttype));
  #endif
-               }
-
-             /* Skip the rest of this line.  */
-             goto skipline;
             }
+
+         /* Skip the rest of this line.  */
+         goto skipline;
         }
  
-      error ("undefined or invalid # directive");
+      error ("undefined or invalid # directive `%s'", name);
        goto skipline;
      }
  
+  /* If the # is the only nonwhite char on the line,
+     just ignore it.  Check the new newline.  */
+  if (token == END_OF_LINE)
+    goto skipline;
+
  linenum:
    /* Here we have either `#line' or `# <nonletter>'.
       In either case, it should be a line number; a digit should follow.  */
  
-  /* Can't use skip_white_space here, but must handle all whitespace
-     that is not '\n', lest we get a recursion for '\r' '\n' when
-     calling yylex.  */
-  UNGETC (c);
-  c = skip_white_space_on_line ();
+  if (token != CONSTANT
+      || TREE_CODE (yylval.ttype) != INTEGER_CST)
+    {
+      error ("invalid #-line");
+      goto skipline;
+    }
  
-  /* If the # is the only nonwhite char on the line,
-     just ignore it.  Check the new newline.  */
-  if (c == '\n')
-    return c;
+  /* subtract one, because it is the following line that
+     gets the specified number */
  
-  /* Something follows the #; read a token.  */
+  l = TREE_INT_CST_LOW (yylval.ttype) - 1;
  
-  UNGETC (c);
-  token = yylex ();
+  /* More follows: it must be a string constant (filename).
+     It would be neat to use cpplib to quickly process the string, but
+     (1) we don't have a handy tokenization of the string, and
+     (2) I don't know how well that would work in the presense
+     of filenames that contain wide characters.  */
  
-  if (token == CONSTANT
-      && TREE_CODE (yylval.ttype) == INTEGER_CST)
+  if (saw_line)
      {
-      int old_lineno = lineno;
-      int used_up = 0;
-      /* subtract one, because it is the following line that
-        gets the specified number */
-
-      int l = TREE_INT_CST_LOW (yylval.ttype) - 1;
-
-      /* Is this the last nonwhite stuff on the line?  */
-      c = skip_white_space_on_line ();
-      if (c == '\n')
-       {
-         /* No more: store the line number and check following line.  */
-         lineno = l;
-         return c;
-       }
-      UNGETC (c);
-
-      /* More follows: it must be a string constant (filename).  */
+      /* Don't treat \ as special if we are processing #line 1 "...".
+        If you want it to be treated specially, use # 1 "...".  */
+      ignore_escape_flag = 1;
+    }
  
-      /* Read the string constant.  */
-      token = yylex ();
+  /* Read the string constant.  */
+  token = yylex ();
  
-      if (token != STRING || TREE_CODE (yylval.ttype) != STRING_CST)
-       {
-         error ("invalid #line");
-         goto skipline;
-       }
+  ignore_escape_flag = 0;
  
-      input_filename
-       = (char *) permalloc (TREE_STRING_LENGTH (yylval.ttype) + 1);
-      strcpy (input_filename, TREE_STRING_POINTER (yylval.ttype));
+  if (token == END_OF_LINE)
+    {
+      /* No more: store the line number and check following line.  */
        lineno = l;
+      goto skipline;
+    }
  
-      /* Each change of file name
-        reinitializes whether we are now in a system header.  */
-      in_system_header = 0;
-
-      if (main_input_filename == 0)
-       main_input_filename = input_filename;
-
-      /* Is this the last nonwhite stuff on the line?  */
-      c = skip_white_space_on_line ();
-      if (c == '\n')
-       {
-         /* Update the name in the top element of input_file_stack.  */
-         if (input_file_stack)
-           input_file_stack->name = input_filename;
-
-         return c;
-       }
-      UNGETC (c);
+  if (token != STRING || TREE_CODE (yylval.ttype) != STRING_CST)
+    {
+      error ("invalid #line");
+      goto skipline;
+    }
  
-      token = yylex ();
-      used_up = 0;
+  input_filename = TREE_STRING_POINTER (yylval.ttype);
  
-      /* `1' after file name means entering new file.
-        `2' after file name means just left a file.  */
+  if (main_input_filename == 0)
+    main_input_filename = input_filename;
  
-      if (token == CONSTANT
-         && TREE_CODE (yylval.ttype) == INTEGER_CST)
-       {
-         if (TREE_INT_CST_LOW (yylval.ttype) == 1)
-           {
-             /* Pushing to a new file.  */
-             struct file_stack *p
-               = (struct file_stack *) xmalloc (sizeof (struct file_stack));
-             input_file_stack->line = old_lineno;
-             p->next = input_file_stack;
-             p->name = input_filename;
-             p->indent_level = indent_level;
-             input_file_stack = p;
-             input_file_stack_tick++;
-             debug_start_source_file (input_filename);
-             used_up = 1;
-           }
-         else if (TREE_INT_CST_LOW (yylval.ttype) == 2)
-           {
-             /* Popping out of a file.  */
-             if (input_file_stack->next)
-               {
-                 struct file_stack *p = input_file_stack;
-                 if (indent_level != p->indent_level)
-                   {
-                     warning_with_file_and_line 
-                       (p->name, old_lineno,
-                        "This file contains more `%c's than `%c's.",
-                        indent_level > p->indent_level ? '{' : '}',
-                        indent_level > p->indent_level ? '}' : '{');
-                   }
-                 input_file_stack = p->next;
-                 free (p);
-                 input_file_stack_tick++;
-                 debug_end_source_file (input_file_stack->line);
-               }
-             else
-               error ("#-lines for entering and leaving files don't match");
+  old_lineno = lineno;
+  action = act_none;
+  action_number = 0;
+  lineno = l;
  
-             used_up = 1;
-           }
-       }
+  /* Each change of file name
+     reinitializes whether we are now in a system header.  */
+  in_system_header = 0;
  
-      /* Now that we've pushed or popped the input stack,
-        update the name in the top element.  */
+  if (!read_line_number (&action_number))
+    {
+      /* Update the name in the top element of input_file_stack.  */
        if (input_file_stack)
         input_file_stack->name = input_filename;
+    }
  
-      /* If we have handled a `1' or a `2',
-        see if there is another number to read.  */
-      if (used_up)
-       {
-         /* Is this the last nonwhite stuff on the line?  */
-         c = skip_white_space_on_line ();
-         if (c == '\n')
-           return c;
-         UNGETC (c);
-
-         token = yylex ();
-         used_up = 0;
-       }
+  /* `1' after file name means entering new file.
+     `2' after file name means just left a file.  */
  
+  if (action_number == 1)
+    {
+      action = act_push;
+      read_line_number (&action_number);
+    }
+  else if (action_number == 2)
+    {
+      action = act_pop;
+      read_line_number (&action_number);
+    }
+  if (action_number == 3)
+    {
        /* `3' after file name means this is a system header file.  */
+      in_system_header = 1;
+      read_line_number (&action_number);
+    }
  
-      if (token == CONSTANT
-         && TREE_CODE (yylval.ttype) == INTEGER_CST
-         && TREE_INT_CST_LOW (yylval.ttype) == 3)
-       in_system_header = 1, used_up = 1;
+  /* Do the actions implied by the preceding numbers.  */
  
-      if (used_up)
+  if (action == act_push)
+    {
+      /* Pushing to a new file.  */
+      struct file_stack *p
+       = (struct file_stack *) xmalloc (sizeof (struct file_stack));
+      input_file_stack->line = old_lineno;
+      p->next = input_file_stack;
+      p->name = input_filename;
+      p->indent_level = indent_level;
+      input_file_stack = p;
+      input_file_stack_tick++;
+      debug_start_source_file (input_filename);
+    }
+  else if (action == act_pop)
+    {
+      /* Popping out of a file.  */
+      if (input_file_stack->next)
         {
-         /* Is this the last nonwhite stuff on the line?  */
-         c = skip_white_space_on_line ();
-         if (c == '\n')
-           return c;
-         UNGETC (c);
+         struct file_stack *p = input_file_stack;
+         if (indent_level != p->indent_level)
+           {
+             warning_with_file_and_line
+               (p->name, old_lineno,
+                "This file contains more `%c's than `%c's.",
+                indent_level > p->indent_level ? '{' : '}',
+                indent_level > p->indent_level ? '}' : '{');
+           }
+         input_file_stack = p->next;
+         free (p);
+         input_file_stack_tick++;
+         debug_end_source_file (input_file_stack->line);
         }
-
-      warning ("unrecognized text at end of #line");
+      else
+       error ("#-lines for entering and leaving files don't match");
      }
-  else
-    error ("invalid #-line");
+
+  /* Now that we've pushed or popped the input stack,
+     update the name in the top element.  */
+  if (input_file_stack)
+    input_file_stack->name = input_filename;
  
    /* skip the rest of this line.  */
   skipline:
-#if !USE_CPPLIB
-  if (c != '\n' && c != EOF && nextchar >= 0)
-    c = nextchar, nextchar = -1;
-#endif
-  while (c != '\n' && c != EOF)
+  linemode = 0;
+  end_of_file = 0;
+
+  do
      c = GETC();
+  while (c != '\n' && c != EOF);
    return c;
  }
  \f
-#ifdef HANDLE_SYSV_PRAGMA
+#ifdef HANDLE_GENERIC_PRAGMAS
  
  /* Handle a #pragma directive.
     TOKEN is the token we read after `#pragma'.  Processes the entire input
-   line and returns a character for the caller to reread: either \n or EOF.  */
+   line and return non-zero iff the pragma has been successfully parsed.  */
  
  /* This function has to be in this file, in order to get at
     the token types.  */
  
  static int
-handle_sysv_pragma (token)
+handle_generic_pragma (token)
       register int token;
  {
-  register int c;
-
    for (;;)
      {
        switch (token)
@@ -908,29 +873,19 @@ handle_sysv_pragma (token)
         case CONSTANT:
           handle_pragma_token (token_buffer, yylval.ttype);
           break;
+
+       case END_OF_LINE:
+         return handle_pragma_token (NULL_PTR, NULL_TREE);
+
         default:
-         handle_pragma_token (token_buffer, 0);
+         handle_pragma_token (token_buffer, NULL);
         }
-#if !USE_CPPLIB
-      if (nextchar >= 0)
-       c = nextchar, nextchar = -1;
-      else
-#endif
-       c = GETC ();
  
-      while (c == ' ' || c == '\t')
-       c = GETC ();
-      if (c == '\n' || c == EOF)
-       {
-         handle_pragma_token (0, 0);
-         return c;
-       }
-      UNGETC (c);
        token = yylex ();
      }
  }
  
-#endif /* HANDLE_SYSV_PRAGMA */
+#endif /* HANDLE_GENERIC_PRAGMAS */
  \f
  #define ENDFILE -1  /* token that represents end-of-file */
  
@@ -962,9 +917,7 @@ readescape (ignore_ptr)
        while (1)
         {
           c = GETC();
-         if (!(c >= 'a' && c <= 'f')
-             && !(c >= 'A' && c <= 'F')
-             && !(c >= '0' && c <= '9'))
+         if (! ISXDIGIT (c))
             {
               UNGETC (c);
               break;
@@ -985,13 +938,18 @@ readescape (ignore_ptr)
           nonnull = 1;
         }
        if (! nonnull)
-       error ("\\x used with no following hex digits");
+       {
+         warning ("\\x used with no following hex digits");
+         return 'x';
+       }
        else if (count == 0)
         /* Digits are all 0's.  Ok.  */
         ;
        else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
                || (count > 1
-                  && ((1 << (TYPE_PRECISION (integer_type_node) - (count - 1) * 4))
+                  && (((unsigned)1
+                       << (TYPE_PRECISION (integer_type_node)
+                           - (count - 1) * 4))
                        <= firstdig)))
         pedwarn ("hex escape out of range");
        return code;
@@ -1050,7 +1008,7 @@ readescape (ignore_ptr)
      case 'E':
        if (pedantic)
         pedwarn ("non-ANSI-standard escape sequence, `\\%c'", c);
-      return 033;
+      return TARGET_ESC;
  
      case '?':
        return c;
@@ -1062,64 +1020,210 @@ readescape (ignore_ptr)
        /* `\%' is used to prevent SCCS from getting confused.  */
      case '%':
        if (pedantic)
-       pedwarn ("non-ANSI escape sequence `\\%c'", c);
+       pedwarn ("unknown escape sequence `\\%c'", c);
        return c;
      }
-  if (c >= 040 && c < 0177)
-    pedwarn ("unknown escape sequence `\\%c'", c);
-  else
-    pedwarn ("unknown escape sequence: `\\' followed by char code 0x%x", c);
-  return c;
+  if (ISGRAPH (c))
+    pedwarn ("unknown escape sequence `\\%c'", c);
+  else
+    pedwarn ("unknown escape sequence: `\\' followed by char code 0x%x", c);
+  return c;
+}
+\f
+void
+yyerror (msgid)
+     const char *msgid;
+{
+  const char *string = _(msgid);
+
+  /* We can't print string and character constants well
+     because the token_buffer contains the result of processing escapes.  */
+  if (end_of_file)
+    error ("%s at end of input", string);
+  else if (token_buffer[0] == 0)
+    error ("%s at null character", string);
+  else if (token_buffer[0] == '"')
+    error ("%s before string constant", string);
+  else if (token_buffer[0] == '\'')
+    error ("%s before character constant", string);
+  else if (!ISGRAPH(token_buffer[0]))
+    error ("%s before character 0%o", string, (unsigned char) token_buffer[0]);
+  else
+    error ("%s before `%s'", string, token_buffer);
+}
+
+#if 0
+
+struct try_type
+{
+  tree *node_var;
+  char unsigned_flag;
+  char long_flag;
+  char long_long_flag;
+};
+
+struct try_type type_sequence[] =
+{
+  { &integer_type_node, 0, 0, 0},
+  { &unsigned_type_node, 1, 0, 0},
+  { &long_integer_type_node, 0, 1, 0},
+  { &long_unsigned_type_node, 1, 1, 0},
+  { &long_long_integer_type_node, 0, 1, 1},
+  { &long_long_unsigned_type_node, 1, 1, 1}
+};
+#endif /* 0 */
+\f
+struct pf_args
+{
+  /* Input */
+  int base;
+  char * p;
+  /* I/O */
+  int c;
+  /* Output */
+  int imag;
+  tree type;
+  int conversion_errno;
+  REAL_VALUE_TYPE value;
+};
+ 
+static void
+parse_float (data)
+  PTR data;
+{
+  struct pf_args * args = (struct pf_args *) data;
+  int fflag = 0, lflag = 0;
+  /* Copy token_buffer now, while it has just the number
+     and not the suffixes; once we add `f' or `i',
+     REAL_VALUE_ATOF may not work any more.  */
+  char *copy = (char *) alloca (args->p - token_buffer + 1);
+  bcopy (token_buffer, copy, args->p - token_buffer + 1);
+  args->imag = 0;
+  args->conversion_errno = 0;
+  args->type = double_type_node;
+
+  while (1)
+    {
+      int lose = 0;
+
+      /* Read the suffixes to choose a data type.  */
+      switch (args->c)
+       {
+       case 'f': case 'F':
+         if (fflag)
+           error ("more than one `f' in numeric constant");
+         fflag = 1;
+         break;
+
+       case 'l': case 'L':
+         if (lflag)
+           error ("more than one `l' in numeric constant");
+         lflag = 1;
+         break;
+
+       case 'i': case 'I':
+         if (args->imag)
+           error ("more than one `i' or `j' in numeric constant");
+         else if (pedantic)
+           pedwarn ("ANSI C forbids imaginary numeric constants");
+         args->imag = 1;
+         break;
+
+       default:
+         lose = 1;
+       }
+
+      if (lose)
+       break;
+
+      if (args->p >= token_buffer + maxtoken - 3)
+       args->p = extend_token_buffer (args->p);
+      *(args->p++) = args->c;
+      *(args->p) = 0;
+      args->c = GETC();
+    }
+
+  /* The second argument, machine_mode, of REAL_VALUE_ATOF
+     tells the desired precision of the binary result
+     of decimal-to-binary conversion.  */
+
+  if (fflag)
+    {
+      if (lflag)
+       error ("both `f' and `l' in floating constant");
+
+      args->type = float_type_node;
+      errno = 0;
+      if (args->base == 16)
+       args->value = REAL_VALUE_HTOF (copy, TYPE_MODE (args->type));
+      else
+       args->value = REAL_VALUE_ATOF (copy, TYPE_MODE (args->type));
+      args->conversion_errno = errno;
+      /* A diagnostic is required here by some ANSI C testsuites.
+        This is not pedwarn, because some people don't want
+        an error for this.  */
+      if (REAL_VALUE_ISINF (args->value) && pedantic)
+       warning ("floating point number exceeds range of `float'");
+    }
+  else if (lflag)
+    {
+      args->type = long_double_type_node;
+      errno = 0;
+      if (args->base == 16)
+       args->value = REAL_VALUE_HTOF (copy, TYPE_MODE (args->type));
+      else
+       args->value = REAL_VALUE_ATOF (copy, TYPE_MODE (args->type));
+      args->conversion_errno = errno;
+      if (REAL_VALUE_ISINF (args->value) && pedantic)
+       warning ("floating point number exceeds range of `long double'");
+    }
+  else
+    {
+      errno = 0;
+      if (args->base == 16)
+       args->value = REAL_VALUE_HTOF (copy, TYPE_MODE (args->type));
+      else
+       args->value = REAL_VALUE_ATOF (copy, TYPE_MODE (args->type));
+      args->conversion_errno = errno;
+      if (REAL_VALUE_ISINF (args->value) && pedantic)
+       warning ("floating point number exceeds range of `double'");
+    }
  }
-\f
-void
-yyerror (string)
-     char *string;
-{
-  char buf[200];
-
-  strcpy (buf, string);
+ 
+/* Get the next character, staying within the current token if possible.
+   If we're lexing a token, we don't want to look beyond the end of the
+   token cpplib has prepared for us; otherwise, we end up reading in the
+   next token, which screws up feed_input.  So just return a null
+   character.  */
  
-  /* We can't print string and character constants well
-     because the token_buffer contains the result of processing escapes.  */
-  if (end_of_file)
-    strcat (buf, " at end of input");
-  else if (token_buffer[0] == 0)
-    strcat (buf, " at null character");
-  else if (token_buffer[0] == '"')
-    strcat (buf, " before string constant");
-  else if (token_buffer[0] == '\'')
-    strcat (buf, " before character constant");
-  else if (token_buffer[0] < 040 || (unsigned char) token_buffer[0] >= 0177)
-    sprintf (buf + strlen (buf), " before character 0%o",
-            (unsigned char) token_buffer[0]);
-  else
-    strcat (buf, " before `%s'");
+static inline int token_getch PARAMS ((void));
  
-  error (buf, token_buffer);
+static inline int
+token_getch ()
+{
+#if USE_CPPLIB
+  if (yy_cur == yy_lim)
+    return '\0';
+#endif
+  return GETC ();
  }
  
-#if 0
+static inline void token_put_back PARAMS ((int));
  
-struct try_type
+static inline void
+token_put_back (ch)
+     int ch;
  {
-  tree *node_var;
-  char unsigned_flag;
-  char long_flag;
-  char long_long_flag;
-};
+#if USE_CPPLIB
+  if (ch == '\0')
+    return;
+#endif
+  UNGETC (ch);
+}
+
+/* Read a single token from the input stream, and assign it lexical
+   semantics.  */
  
-struct try_type type_sequence[] = 
-{
-  { &integer_type_node, 0, 0, 0},
-  { &unsigned_type_node, 1, 0, 0},
-  { &long_integer_type_node, 0, 1, 0},
-  { &long_unsigned_type_node, 1, 1, 0},
-  { &long_long_integer_type_node, 0, 1, 1},
-  { &long_long_unsigned_type_node, 1, 1, 1}
-};
-#endif /* 0 */
-\f
  int
  yylex ()
  {
@@ -1129,12 +1233,7 @@ yylex ()
    int wide_flag = 0;
    int objc_flag = 0;
  
-#if !USE_CPPLIB
-  if (nextchar >= 0)
-    c = nextchar, nextchar = -1;
-  else
-#endif
-    c = GETC();
+  c = GETC();
  
    /* Effectively do c = skip_white_space (c)
       but do it faster in the usual cases.  */
@@ -1146,7 +1245,12 @@ yylex ()
        case '\f':
        case '\v':
        case '\b':
-       c = GETC();
+#if USE_CPPLIB
+       if (cpp_token == CPP_HSPACE)
+         c = yy_get_token ();
+       else
+#endif
+         c = GETC();
         break;
  
        case '\r':
@@ -1171,13 +1275,20 @@ yylex ()
      case EOF:
        end_of_file = 1;
        token_buffer[0] = 0;
-      value = ENDFILE;
+      if (linemode)
+       value = END_OF_LINE;
+      else
+       value = ENDFILE;
        break;
  
      case 'L':
+#if USE_CPPLIB
+      if (cpp_token == CPP_NAME)
+       goto letter;
+#endif
        /* Capital L may start a wide-string or wide-character constant.  */
        {
-       register int c = GETC();
+       register int c = token_getch();
         if (c == '\'')
           {
             wide_flag = 1;
@@ -1188,7 +1299,7 @@ yylex ()
             wide_flag = 1;
             goto string_constant;
           }
-       UNGETC (c);
+       token_put_back (c);
        }
        goto letter;
  
@@ -1201,13 +1312,13 @@ yylex ()
        else
         {
           /* '@' may start a constant string object.  */
-         register int c = GETC ();
+         register int c = token_getch ();
           if (c == '"')
             {
               objc_flag = 1;
               goto string_constant;
             }
-         UNGETC (c);
+         token_put_back (c);
           /* Fall through to treat '@' as the start of an identifier.  */
         }
  
@@ -1226,33 +1337,45 @@ yylex ()
      case '_':
      case '$':
      letter:
-      p = token_buffer;
-      while (isalnum (c) || c == '_' || c == '$' || c == '@')
+#if USE_CPPLIB
+      if (cpp_token == CPP_NAME)
         {
-         /* Make sure this char really belongs in an identifier.  */
-         if (c == '@' && ! doing_objc_thang)
-           break;
-         if (c == '$')
+         /* Note that one character has already been read from
+            yy_cur into token_buffer.  Also, cpplib complains about
+            $ in identifiers, so we don't have to.  */
+
+         int len = yy_lim - yy_cur + 1;
+         if (len >= maxtoken)
+           extend_token_buffer_to (len + 1);
+         memcpy (token_buffer + 1, yy_cur, len);
+         p = token_buffer + len;
+         yy_cur = yy_lim;
+       }
+      else
+#endif
+       {
+         p = token_buffer;
+         while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
             {
-             if (! dollars_in_ident)
-               error ("`$' in identifier");
-             else if (pedantic)
-               pedwarn ("`$' in identifier");
-           }
+             /* Make sure this char really belongs in an identifier.  */
+             if (c == '$')
+               {
+                 if (! dollars_in_ident)
+                   error ("`$' in identifier");
+                 else if (pedantic)
+                   pedwarn ("`$' in identifier");
+               }
  
-         if (p >= token_buffer + maxtoken)
-           p = extend_token_buffer (p);
+             if (p >= token_buffer + maxtoken)
+               p = extend_token_buffer (p);
  
-         *p++ = c;
-         c = GETC();
-       }
+             *p++ = c;
+             c = token_getch();
+           }
  
-      *p = 0;
-#if USE_CPPLIB
-      UNGETC (c);
-#else
-      nextchar = c;
-#endif
+         *p = 0;
+         token_put_back (c);
+       }
  
        value = IDENTIFIER;
        yylval.itype = 0;
@@ -1271,10 +1394,10 @@ yylex ()
             /* Only return OBJECTNAME if it is a typedef.  */
             if (doing_objc_thang && value == OBJECTNAME)
               {
-               lastiddecl = lookup_name(yylval.ttype);
+               tree decl = lookup_name(yylval.ttype);
  
-               if (lastiddecl == NULL_TREE
-                   || TREE_CODE (lastiddecl) != TYPE_DECL)
+               if (decl == NULL_TREE
+                   || TREE_CODE (decl) != TYPE_DECL)
                   value = IDENTIFIER;
               }
  
@@ -1293,25 +1416,27 @@ yylex ()
  
        if (value == IDENTIFIER)
         {
+         tree decl;
+
           if (token_buffer[0] == '@')
             error("invalid identifier `%s'", token_buffer);
  
            yylval.ttype = get_identifier (token_buffer);
-         lastiddecl = lookup_name (yylval.ttype);
+         decl = lookup_name (yylval.ttype);
  
-         if (lastiddecl != 0 && TREE_CODE (lastiddecl) == TYPE_DECL)
+         if (decl != 0 && TREE_CODE (decl) == TYPE_DECL)
             value = TYPENAME;
           /* A user-invisible read-only initialized variable
              should be replaced by its value.
              We handle only strings since that's the only case used in C.  */
-         else if (lastiddecl != 0 && TREE_CODE (lastiddecl) == VAR_DECL
-                  && DECL_IGNORED_P (lastiddecl)
-                  && TREE_READONLY (lastiddecl)
-                  && DECL_INITIAL (lastiddecl) != 0
-                  && TREE_CODE (DECL_INITIAL (lastiddecl)) == STRING_CST)
+         else if (decl != 0 && TREE_CODE (decl) == VAR_DECL
+                  && DECL_IGNORED_P (decl)
+                  && TREE_READONLY (decl)
+                  && DECL_INITIAL (decl) != 0
+                  && TREE_CODE (DECL_INITIAL (decl)) == STRING_CST)
             {
-             tree stringval = DECL_INITIAL (lastiddecl);
-             
+             tree stringval = DECL_INITIAL (decl);
+
               /* Copy the string value so that we won't clobber anything
                  if we put something in the TREE_CHAIN of this one.  */
               yylval.ttype = build_string (TREE_STRING_LENGTH (stringval),
@@ -1332,16 +1457,53 @@ yylex ()
  
        break;
  
+    case '.':
+#if USE_CPPLIB
+      if (yy_cur < yy_lim)
+#endif
+       {
+         /* It's hard to preserve tokenization on '.' because
+            it could be a symbol by itself, or it could be the
+            start of a floating point number and cpp won't tell us.  */
+         register int c1 = token_getch ();
+         token_buffer[1] = c1;
+         if (c1 == '.')
+           {
+             c1 = token_getch ();
+             if (c1 == '.')
+               {
+                 token_buffer[2] = c1;
+                 token_buffer[3] = 0;
+                 value = ELLIPSIS;
+                 goto done;
+               }
+             error ("parse error at `..'");
+           }
+         if (ISDIGIT (c1))
+           {
+             token_put_back (c1);
+             goto number;
+           }
+         token_put_back (c1);
+       }
+      value = '.';
+      token_buffer[1] = 0;
+      break;
+
      case '0':  case '1':
+      /* Optimize for most frequent case.  */
        {
-       int next_c;
-       /* Check first for common special case:  single-digit 0 or 1.  */
+       register int cond;
  
-       next_c = GETC ();
-       UNGETC (next_c);        /* Always undo this lookahead.  */
-       if (!isalnum (next_c) && next_c != '.')
+#if USE_CPPLIB
+       cond = (yy_cur == yy_lim);
+#else
+       register int c1 = token_getch ();
+       token_put_back (c1);
+       cond = (! ISALNUM (c1) && c1 != '.');
+#endif
+       if (cond)
           {
-           token_buffer[0] = (char)c,  token_buffer[1] = '\0';
             yylval.ttype = (c == '0') ? integer_zero_node : integer_one_node;
             value = CONSTANT;
             break;
@@ -1350,24 +1512,27 @@ yylex ()
        }
      case '2':  case '3':  case '4':
      case '5':  case '6':  case '7':  case '8':  case '9':
-    case '.':
+    number:
        {
         int base = 10;
         int count = 0;
         int largest_digit = 0;
         int numdigits = 0;
-       /* for multi-precision arithmetic,
-          we actually store only HOST_BITS_PER_CHAR bits in each part.
-          The number of parts is chosen so as to be sufficient to hold
-          the enough bits to fit into the two HOST_WIDE_INTs that contain
-          the integer value (this is always at least as many bits as are
-          in a target `long long' value, but may be wider).  */
-#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
-       int parts[TOTAL_PARTS];
         int overflow = 0;
  
-       enum anon1 { NOT_FLOAT, AFTER_POINT, TOO_MANY_POINTS} floatflag
-         = NOT_FLOAT;
+       /* We actually store only HOST_BITS_PER_CHAR bits in each part.
+          The code below which fills the parts array assumes that a host
+          int is at least twice as wide as a host char, and that 
+          HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
+          Two HOST_WIDE_INTs is the largest int literal we can store.
+          In order to detect overflow below, the number of parts (TOTAL_PARTS)
+          must be exactly the number of parts needed to hold the bits
+          of two HOST_WIDE_INTs. */
+#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
+       unsigned int parts[TOTAL_PARTS];
+
+       enum anon1 { NOT_FLOAT, AFTER_POINT, TOO_MANY_POINTS, AFTER_EXPON}
+         floatflag = NOT_FLOAT;
  
         for (count = 0; count < TOTAL_PARTS; count++)
           parts[count] = 0;
@@ -1377,11 +1542,11 @@ yylex ()
  
         if (c == '0')
           {
-           *p++ = (c = GETC());
+           *p++ = (c = token_getch());
             if ((c == 'x') || (c == 'X'))
               {
                 base = 16;
-               *p++ = (c = GETC());
+               *p++ = (c = token_getch());
               }
             /* Leading 0 forces octal unless the 0 is the only digit.  */
             else if (c >= '0' && c <= '9')
@@ -1396,19 +1561,21 @@ yylex ()
         /* Read all the digits-and-decimal-points.  */
  
         while (c == '.'
-              || (isalnum (c) && c != 'l' && c != 'L'
+              || (ISALNUM (c) && c != 'l' && c != 'L'
                    && c != 'u' && c != 'U'
                    && c != 'i' && c != 'I' && c != 'j' && c != 'J'
-                  && (floatflag == NOT_FLOAT || ((c != 'f') && (c != 'F')))))
+                  && (floatflag == NOT_FLOAT
+                      || ((base != 16) && (c != 'f') && (c != 'F'))
+                      || base == 16)))   
           {
             if (c == '.')
               {
-               if (base == 16)
-                 error ("floating constant may not be in radix 16");
+               if (base == 16 && pedantic)
+                 pedwarn ("floating constant may not be in radix 16");
                 if (floatflag == TOO_MANY_POINTS)
                   /* We have already emitted an error.  Don't need another.  */
                   ;
-               else if (floatflag == AFTER_POINT)
+               else if (floatflag == AFTER_POINT || floatflag == AFTER_EXPON)
                   {
                     error ("malformed floating constant");
                     floatflag = TOO_MANY_POINTS;
@@ -1419,37 +1586,20 @@ yylex ()
                 else
                   floatflag = AFTER_POINT;
  
-               base = 10;
-               *p++ = c = GETC();
+               if (base == 8)
+                 base = 10;
+               *p++ = c = token_getch();
                 /* Accept '.' as the start of a floating-point number
-                  only when it is followed by a digit.
-                  Otherwise, unread the following non-digit
-                  and use the '.' as a structural token.  */
-               if (p == token_buffer + 2 && !isdigit (c))
-                 {
-                   if (c == '.')
-                     {
-                       c = GETC();
-                       if (c == '.')
-                         {
-                           *p++ = c;
-                           *p = 0;
-                           return ELLIPSIS;
-                         }
-                       error ("parse error at `..'");
-                     }
-                   UNGETC (c);
-                   token_buffer[1] = 0;
-                   value = '.';
-                   goto done;
-                 }
+                  only when it is followed by a digit.  */
+               if (p == token_buffer + 2 && !ISDIGIT (c))
+                 abort ();
               }
             else
               {
                 /* It is not a decimal point.
                    It should be a digit (perhaps a hex digit).  */
  
-               if (isdigit (c))
+               if (ISDIGIT (c))
                   {
                     c = c - '0';
                   }
@@ -1458,13 +1608,18 @@ yylex ()
                     if (c == 'e' || c == 'E')
                       {
                         base = 10;
-                       floatflag = AFTER_POINT;
+                       floatflag = AFTER_EXPON;
                         break;   /* start of exponent */
                       }
                     error ("nondigits in number and not hexadecimal");
                     c = 0;
                   }
-               else if (c >= 'a')
+               else if (base == 16 && (c == 'p' || c == 'P'))
+                 {
+                   floatflag = AFTER_EXPON;
+                   break;   /* start of exponent */
+                 }
+               else if (c >= 'a' && c <= 'f')
                   {
                     c = c - 'a' + 10;
                   }
@@ -1490,164 +1645,101 @@ yylex ()
                       parts[0] += c;
                   }
  
-               /* If the extra highest-order part ever gets anything in it,
-                  the number is certainly too big.  */
-               if (parts[TOTAL_PARTS - 1] != 0)
-                 overflow = 1;
+               /* If the highest-order part overflows (gets larger than
+                  a host char will hold) then the whole number has 
+                  overflowed.  Record this and truncate the highest-order
+                  part. */
+               if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
+                 {
+                   overflow = 1;
+                   parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
+                 }
  
                 if (p >= token_buffer + maxtoken - 3)
                   p = extend_token_buffer (p);
-               *p++ = (c = GETC());
+               *p++ = (c = token_getch());
               }
           }
  
+       /* This can happen on input like `int i = 0x;' */
         if (numdigits == 0)
           error ("numeric constant with no digits");
  
         if (largest_digit >= base)
           error ("numeric constant contains digits beyond the radix");
  
-       /* Remove terminating char from the token buffer and delimit the string */
+       /* Remove terminating char from the token buffer and delimit the
+           string.  */
         *--p = 0;
  
         if (floatflag != NOT_FLOAT)
           {
-           tree type = double_type_node;
-           int exceeds_double = 0;
-           int imag = 0;
+           tree type;
+           int imag, conversion_errno;
             REAL_VALUE_TYPE value;
-           jmp_buf handler;
+           struct pf_args args;
  
             /* Read explicit exponent if any, and put it in tokenbuf.  */
  
-           if ((c == 'e') || (c == 'E'))
+           if ((base == 10 && ((c == 'e') || (c == 'E')))
+               || (base == 16 && (c == 'p' || c == 'P')))
               {
                 if (p >= token_buffer + maxtoken - 3)
                   p = extend_token_buffer (p);
                 *p++ = c;
-               c = GETC();
+               c = token_getch();
                 if ((c == '+') || (c == '-'))
                   {
                     *p++ = c;
-                   c = GETC();
+                   c = token_getch();
                   }
-               if (! isdigit (c))
+               /* Exponent is decimal, even if string is a hex float.  */
+               if (! ISDIGIT (c))
                   error ("floating constant exponent has no digits");
-               while (isdigit (c))
+               while (ISDIGIT (c))
                   {
                     if (p >= token_buffer + maxtoken - 3)
                       p = extend_token_buffer (p);
                     *p++ = c;
-                   c = GETC();
+                   c = token_getch ();
                   }
               }
+           if (base == 16 && floatflag != AFTER_EXPON)
+             error ("hexadecimal floating constant has no exponent");
  
             *p = 0;
-           errno = 0;
+
+           /* Setup input for parse_float() */
+           args.base = base;
+           args.p = p;
+           args.c = c;
  
             /* Convert string to a double, checking for overflow.  */
-           if (setjmp (handler))
+           if (do_float_handler (parse_float, (PTR) &args))
               {
-               error ("floating constant out of range");
-               value = dconst0;
+               /* Receive output from parse_float() */
+               value = args.value;
               }
             else
               {
-               int fflag = 0, lflag = 0;
-               /* Copy token_buffer now, while it has just the number
-                  and not the suffixes; once we add `f' or `i',
-                  REAL_VALUE_ATOF may not work any more.  */
-               char *copy = (char *) alloca (p - token_buffer + 1);
-               bcopy (token_buffer, copy, p - token_buffer + 1);
-
-               set_float_handler (handler);
-
-               while (1)
-                 {
-                   int lose = 0;
-
-                   /* Read the suffixes to choose a data type.  */
-                   switch (c)
-                     {
-                     case 'f': case 'F':
-                       if (fflag)
-                         error ("more than one `f' in numeric constant");
-                       fflag = 1;
-                       break;
-
-                     case 'l': case 'L':
-                       if (lflag)
-                         error ("more than one `l' in numeric constant");
-                       lflag = 1;
-                       break;
-
-                     case 'i': case 'I':
-                       if (imag)
-                         error ("more than one `i' or `j' in numeric constant");
-                       else if (pedantic)
-                         pedwarn ("ANSI C forbids imaginary numeric constants");
-                       imag = 1;
-                       break;
-
-                     default:
-                       lose = 1;
-                     }
-
-                   if (lose)
-                     break;
-
-                   if (p >= token_buffer + maxtoken - 3)
-                     p = extend_token_buffer (p);
-                   *p++ = c;
-                   *p = 0;
-                   c = GETC();
-                 }
-
-               /* The second argument, machine_mode, of REAL_VALUE_ATOF
-                  tells the desired precision of the binary result
-                  of decimal-to-binary conversion.  */
-
-               if (fflag)
-                 {
-                   if (lflag)
-                     error ("both `f' and `l' in floating constant");
-
-                   type = float_type_node;
-                   value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
-                   /* A diagnostic is required here by some ANSI C testsuites.
-                      This is not pedwarn, become some people don't want
-                      an error for this.  */
-                   if (REAL_VALUE_ISINF (value) && pedantic)
-                     warning ("floating point number exceeds range of `float'");
-                 }
-               else if (lflag)
-                 {
-                   type = long_double_type_node;
-                   value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
-                   if (REAL_VALUE_ISINF (value) && pedantic)
-                     warning ("floating point number exceeds range of `long double'");
-                 }
-               else
-                 {
-                   value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
-                   if (REAL_VALUE_ISINF (value) && pedantic)
-                     warning ("floating point number exceeds range of `double'");
-                 }
+               /* We got an exception from parse_float() */
+               error ("floating constant out of range");
+               value = dconst0;
+             }
  
-               set_float_handler (NULL_PTR);
-           }
+           /* Receive output from parse_float() */
+           c = args.c;
+           imag = args.imag;
+           type = args.type;
+           conversion_errno = args.conversion_errno;
+           
  #ifdef ERANGE
-           if (errno == ERANGE && !flag_traditional && pedantic)
-             {
-               /* ERANGE is also reported for underflow,
-                  so test the value to distinguish overflow from that.  */
-               if (REAL_VALUES_LESS (dconst1, value)
-                   || REAL_VALUES_LESS (value, dconstm1))
-                 {
-                   warning ("floating point number exceeds range of `double'");
-                   exceeds_double = 1;
-                 }
-             }
+           /* ERANGE is also reported for underflow,
+              so test the value to distinguish overflow from that.  */
+           if (conversion_errno == ERANGE && !flag_traditional && pedantic
+               && (REAL_VALUES_LESS (dconst1, value)
+                   || REAL_VALUES_LESS (value, dconstm1)))
+             warning ("floating point number exceeds range of `double'");
  #endif
  
             /* If the result is not a number, assume it must have been
@@ -1672,8 +1764,9 @@ yylex ()
             int spec_long = 0;
             int spec_long_long = 0;
             int spec_imag = 0;
-           int bytes, warn, i;
+           int warn = 0, i;
  
+           traditional_type = ansi_type = type = NULL_TREE;
             while (1)
               {
                 if (c == 'u' || c == 'U')
@@ -1688,7 +1781,8 @@ yylex ()
                       {
                         if (spec_long_long)
                           error ("three `l's in integer constant");
-                       else if (pedantic)
+                       else if (pedantic && ! flag_isoc99
+                                && ! in_system_header && warn_long_long)
                           pedwarn ("ANSI C forbids long long integer constants");
                         spec_long_long = 1;
                       }
@@ -1707,25 +1801,15 @@ yylex ()
                 if (p >= token_buffer + maxtoken - 3)
                   p = extend_token_buffer (p);
                 *p++ = c;
-               c = GETC();
+               c = token_getch();
               }
  
-           /* If the constant won't fit in an unsigned long long,
-              then warn that the constant is out of range.  */
-
-           /* ??? This assumes that long long and long integer types are
-              a multiple of 8 bits.  This better than the original code
-              though which assumed that long was exactly 32 bits and long
-              long was exactly 64 bits.  */
-
-           bytes = TYPE_PRECISION (long_long_integer_type_node) / 8;
-
-           warn = overflow;
-           for (i = bytes; i < TOTAL_PARTS; i++)
-             if (parts[i])
+           /* If the literal overflowed, pedwarn about it now. */
+           if (overflow)
+             {
                 warn = 1;
-           if (warn)
-             pedwarn ("integer constant out of range");
+               pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
+             }
  
             /* This is simplified by the fact that our constant
                is always positive.  */
@@ -1739,7 +1823,7 @@ yylex ()
                          << (i * HOST_BITS_PER_CHAR));
                 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
               }
-           
+
             yylval.ttype = build_int_2 (low, high);
             TREE_TYPE (yylval.ttype) = long_long_unsigned_type_node;
  
@@ -1768,10 +1852,17 @@ yylex ()
                 else if (! spec_long_long)
                   traditional_type = (spec_unsigned ? long_unsigned_type_node
                                       : long_integer_type_node);
-               else
+               else if (int_fits_type_p (yylval.ttype,
+                                         spec_unsigned 
+                                         ? long_long_unsigned_type_node
+                                         : long_long_integer_type_node)) 
                   traditional_type = (spec_unsigned
                                       ? long_long_unsigned_type_node
                                       : long_long_integer_type_node);
+               else
+                 traditional_type = (spec_unsigned
+                                     ? widest_unsigned_literal_type_node
+                                     : widest_integer_literal_type_node);
               }
             if (warn_traditional || ! flag_traditional)
               {
@@ -1793,13 +1884,24 @@ yylex ()
                          && int_fits_type_p (yylval.ttype,
                                              long_long_integer_type_node))
                   ansi_type = long_long_integer_type_node;
-               else
+               else if (int_fits_type_p (yylval.ttype,
+                                         long_long_unsigned_type_node))
                   ansi_type = long_long_unsigned_type_node;
+               else if (! spec_unsigned
+                        && int_fits_type_p (yylval.ttype,
+                                            widest_integer_literal_type_node))
+                 ansi_type = widest_integer_literal_type_node;
+               else
+                 ansi_type = widest_unsigned_literal_type_node;
               }
  
             type = flag_traditional ? traditional_type : ansi_type;
  
-           if (warn_traditional && traditional_type != ansi_type)
+           /* We assume that constants specified in a non-decimal
+              base are bit patterns, and that the programmer really
+              meant what they wrote.  */
+           if (warn_traditional && base == 10
+               && traditional_type != ansi_type)
               {
                 if (TYPE_PRECISION (traditional_type)
                     != TYPE_PRECISION (ansi_type))
@@ -1814,7 +1916,10 @@ yylex ()
             if (pedantic && !flag_traditional && !spec_long_long && !warn
                 && (TYPE_PRECISION (long_integer_type_node)
                     < TYPE_PRECISION (type)))
-             pedwarn ("integer constant out of range");
+             {
+               warn = 1;
+               pedwarn ("integer constant larger than the maximum value of an unsigned long int");
+             }
  
             if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
               warning ("decimal constant is so large that it is unsigned");
@@ -1842,12 +1947,21 @@ yylex ()
               }
             else
               TREE_TYPE (yylval.ttype) = type;
+
+
+           /* If it's still an integer (not a complex), and it doesn't
+              fit in the type we choose for it, then pedwarn. */
+
+           if (! warn
+               && TREE_CODE (TREE_TYPE (yylval.ttype)) == INTEGER_TYPE
+               && ! int_fits_type_p (yylval.ttype, TREE_TYPE (yylval.ttype)))
+             pedwarn ("integer constant is larger than the maximum value for its type");
           }
  
-       UNGETC (c);
+       token_put_back (c);
         *p = 0;
  
-       if (isalnum (c) || c == '.' || c == '_' || c == '$'
+       if (ISALNUM (c) || c == '.' || c == '_' || c == '$'
             || (!flag_traditional && (c == '-' || c == '+')
                 && (p[-1] == 'e' || p[-1] == 'E')))
           error ("missing white space after number `%s'", token_buffer);
@@ -1860,30 +1974,27 @@ yylex ()
        {
         register int result = 0;
         register int num_chars = 0;
+       int chars_seen = 0;
         unsigned width = TYPE_PRECISION (char_type_node);
         int max_chars;
-
-       if (wide_flag)
-         {
-           width = WCHAR_TYPE_SIZE;
  #ifdef MULTIBYTE_CHARS
-           max_chars = MB_CUR_MAX;
-#else
-           max_chars = 1;
+       int longest_char = local_mb_cur_max ();
+       (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
  #endif
-         }
-       else
-         max_chars = TYPE_PRECISION (integer_type_node) / width;
+
+       max_chars = TYPE_PRECISION (integer_type_node) / width;
+       if (wide_flag)
+         width = WCHAR_TYPE_SIZE;
  
         while (1)
           {
           tryagain:
-
-           c = GETC();
+           c = token_getch();
  
             if (c == '\'' || c == EOF)
               break;
  
+           ++chars_seen;
             if (c == '\\')
               {
                 int ignore = 0;
@@ -1891,10 +2002,10 @@ yylex ()
                 if (ignore)
                   goto tryagain;
                 if (width < HOST_BITS_PER_INT
-                   && (unsigned) c >= (1 << width))
+                   && (unsigned) c >= ((unsigned)1 << width))
                   pedwarn ("escape sequence out of range for character");
  #ifdef MAP_CHARACTER
-               if (isprint (c))
+               if (ISPRINT (c))
                   c = MAP_CHARACTER (c);
  #endif
               }
@@ -1904,18 +2015,81 @@ yylex ()
                   pedwarn ("ANSI C forbids newline in character constant");
                 lineno++;
               }
-#ifdef MAP_CHARACTER
             else
-             c = MAP_CHARACTER (c);
+             {
+#ifdef MULTIBYTE_CHARS
+               wchar_t wc;
+               int i;
+               int char_len = -1;
+               for (i = 1; i <= longest_char; ++i)
+                 {
+                   if (i > maxtoken - 4)
+                     extend_token_buffer (token_buffer);
+
+                   token_buffer[i] = c;
+                   char_len = local_mbtowc (& wc,
+                                            token_buffer + 1,
+                                            i);
+                   if (char_len != -1)
+                     break;
+                   c = token_getch ();
+                 }
+               if (char_len > 1)
+                 {
+                   /* mbtowc sometimes needs an extra char before accepting */
+                   if (char_len < i)
+                     token_put_back (c);
+                   if (! wide_flag)
+                     {
+                       /* Merge character into result; ignore excess chars.  */
+                       for (i = 1; i <= char_len; ++i)
+                         {
+                           if (i > max_chars)
+                             break;
+                           if (width < HOST_BITS_PER_INT)
+                             result = (result << width)
+                               | (token_buffer[i]
+                                  & ((1 << width) - 1));
+                           else
+                             result = token_buffer[i];
+                         }
+                       num_chars += char_len;
+                       goto tryagain;
+                     }
+                   c = wc;
+                 }
+               else
+                 {
+                   if (char_len == -1)
+                     {
+                       warning ("Ignoring invalid multibyte character");
+                       /* Replace all but the first byte.  */
+                       for (--i; i > 1; --i)
+                         token_put_back (token_buffer[i]);
+                       wc = token_buffer[1];
+                     }
+#ifdef MAP_CHARACTER
+                     c = MAP_CHARACTER (wc);
+#else
+                     c = wc;
  #endif
+                 }
+#else /* ! MULTIBYTE_CHARS */
+#ifdef MAP_CHARACTER
+               c = MAP_CHARACTER (c);
+#endif
+#endif /* ! MULTIBYTE_CHARS */
+             }
  
-           num_chars++;
-           if (num_chars > maxtoken - 4)
-             extend_token_buffer (token_buffer);
-
-           token_buffer[num_chars] = c;
+           if (wide_flag)
+             {
+               if (chars_seen == 1) /* only keep the first one */
+                 result = c;
+               goto tryagain;
+             }
  
             /* Merge character into result; ignore excess chars.  */
+           num_chars += (width / TYPE_PRECISION (char_type_node));
             if (num_chars < max_chars + 1)
               {
                 if (width < HOST_BITS_PER_INT)
@@ -1925,19 +2099,16 @@ yylex ()
               }
           }
  
-       token_buffer[num_chars + 1] = '\'';
-       token_buffer[num_chars + 2] = 0;
-
         if (c != '\'')
-         error ("malformatted character constant");
-       else if (num_chars == 0)
+         error ("malformed character constant");
+       else if (chars_seen == 0)
           error ("empty character constant");
         else if (num_chars > max_chars)
           {
             num_chars = max_chars;
             error ("character constant too long");
           }
-       else if (num_chars != 1 && ! flag_traditional)
+       else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
           warning ("multi-character character constant");
  
         /* If char type is signed, sign-extend the constant.  */
@@ -1962,22 +2133,6 @@ yylex ()
           }
         else
           {
-#ifdef MULTIBYTE_CHARS
-           /* Set the initial shift state and convert the next sequence.  */
-           result = 0;
-           /* In all locales L'\0' is zero and mbtowc will return zero,
-              so don't use it.  */
-           if (num_chars > 1
-               || (num_chars == 1 && token_buffer[1] != '\0'))
-             {
-               wchar_t wc;
-               (void) mbtowc (NULL_PTR, NULL_PTR, 0);
-               if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars)
-                 result = wc;
-               else
-                 warning ("Ignoring invalid multibyte character");
-             }
-#endif
             yylval.ttype = build_int_2 (result, 0);
             TREE_TYPE (yylval.ttype) = wchar_type_node;
           }
@@ -1989,20 +2144,26 @@ yylex ()
      case '"':
      string_constant:
        {
-       c = GETC();
+       unsigned width = wide_flag ? WCHAR_TYPE_SIZE
+                                  : TYPE_PRECISION (char_type_node);
+#ifdef MULTIBYTE_CHARS
+       int longest_char = local_mb_cur_max ();
+       (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
+#endif
+       c = token_getch ();
         p = token_buffer + 1;
  
-       while (c != '"' && c >= 0)
+       while (c != '"' && c != EOF)
           {
-           if (c == '\\')
+           /* ignore_escape_flag is set for reading the filename in #line.  */
+           if (!ignore_escape_flag && c == '\\')
               {
                 int ignore = 0;
                 c = readescape (&ignore);
                 if (ignore)
                   goto skipnewline;
-               if (!wide_flag
-                   && TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT
-                   && c >= (1 << TYPE_PRECISION (char_type_node)))
+               if (width < HOST_BITS_PER_INT
+                   && (unsigned) c >= ((unsigned)1 << width))
                   pedwarn ("escape sequence out of range for character");
               }
             else if (c == '\n')
@@ -2011,17 +2172,98 @@ yylex ()
                   pedwarn ("ANSI C forbids newline in string constant");
                 lineno++;
               }
+           else
+             {
+#ifdef MULTIBYTE_CHARS
+               wchar_t wc;
+               int i;
+               int char_len = -1;
+               for (i = 0; i < longest_char; ++i)
+                 {
+                   if (p + i >= token_buffer + maxtoken)
+                     p = extend_token_buffer (p);
+                   p[i] = c;
+
+                   char_len = local_mbtowc (& wc, p, i + 1);
+                   if (char_len != -1)
+                     break;
+                   c = token_getch ();
+                 }
+               if (char_len == -1)
+                 {
+                   warning ("Ignoring invalid multibyte character");
+                   /* Replace all except the first byte.  */
+                   token_put_back (c);
+                   for (--i; i > 0; --i)
+                     token_put_back (p[i]);
+                   char_len = 1;
+                 }
+               /* mbtowc sometimes needs an extra char before accepting */
+               if (char_len <= i)
+                 token_put_back (c);
+               if (! wide_flag)
+                 {
+                   p += (i + 1);
+                   c = token_getch ();
+                   continue;
+                 }
+               c = wc;
+#endif /* MULTIBYTE_CHARS */
+             }
  
-           if (p == token_buffer + maxtoken)
-             p = extend_token_buffer (p);
-           *p++ = c;
+           /* Add this single character into the buffer either as a wchar_t
+              or as a single byte.  */
+           if (wide_flag)
+             {
+               unsigned width = TYPE_PRECISION (char_type_node);
+               unsigned bytemask = (1 << width) - 1;
+               int byte;
+
+               if (p + WCHAR_BYTES > token_buffer + maxtoken)
+                 p = extend_token_buffer (p);
+
+               for (byte = 0; byte < WCHAR_BYTES; ++byte)
+                 {
+                   int value;
+                   if (byte >= (int) sizeof (c))
+                     value = 0;
+                   else
+                     value = (c >> (byte * width)) & bytemask;
+                   if (BYTES_BIG_ENDIAN)
+                     p[WCHAR_BYTES - byte - 1] = value;
+                   else
+                     p[byte] = value;
+                 }
+               p += WCHAR_BYTES;
+             }
+           else
+             {
+               if (p >= token_buffer + maxtoken)
+                 p = extend_token_buffer (p);
+               *p++ = c;
+             }
  
           skipnewline:
-           c = GETC();
+           c = token_getch ();
+         }
+
+       /* Terminate the string value, either with a single byte zero
+          or with a wide zero.  */
+       if (wide_flag)
+         {
+           if (p + WCHAR_BYTES > token_buffer + maxtoken)
+             p = extend_token_buffer (p);
+           bzero (p, WCHAR_BYTES);
+           p += WCHAR_BYTES;
+         }
+       else
+         {
+           if (p >= token_buffer + maxtoken)
+             p = extend_token_buffer (p);
+           *p++ = 0;
           }
-       *p = 0;
  
-       if (c < 0)
+       if (c == EOF)
           error ("Unterminated string constant");
  
         /* We have read the entire constant.
@@ -2029,53 +2271,27 @@ yylex ()
  
         if (wide_flag)
           {
-           /* If this is a L"..." wide-string, convert the multibyte string
-              to a wide character string.  */
-           char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES);
-           int len;
-
-#ifdef MULTIBYTE_CHARS
-           len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer);
-           if (len < 0 || len >= (p - token_buffer))
-             {
-               warning ("Ignoring invalid multibyte string");
-               len = 0;
-             }
-           bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES);
-#else
-           {
-             char *wp, *cp;
-
-             wp = widep + (BYTES_BIG_ENDIAN ? WCHAR_BYTES - 1 : 0);
-             bzero (widep, (p - token_buffer) * WCHAR_BYTES);
-             for (cp = token_buffer + 1; cp < p; cp++)
-               *wp = *cp, wp += WCHAR_BYTES;
-             len = p - token_buffer - 1;
-           }
-#endif
-           yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep);
+           yylval.ttype = build_string (p - (token_buffer + 1),
+                                        token_buffer + 1);
             TREE_TYPE (yylval.ttype) = wchar_array_type_node;
             value = STRING;
           }
         else if (objc_flag)
           {
-           extern tree build_objc_string();
             /* Return an Objective-C @"..." constant string object.  */
-           yylval.ttype = build_objc_string (p - token_buffer,
+           yylval.ttype = build_objc_string (p - (token_buffer + 1),
                                               token_buffer + 1);
             TREE_TYPE (yylval.ttype) = char_array_type_node;
             value = OBJC_STRING;
           }
         else
           {
-           yylval.ttype = build_string (p - token_buffer, token_buffer + 1);
+           yylval.ttype = build_string (p - (token_buffer + 1),
+                                        token_buffer + 1);
             TREE_TYPE (yylval.ttype) = char_array_type_node;
             value = STRING;
           }
  
-       *p++ = '"';
-       *p = 0;
-
         break;
        }
  
@@ -2125,7 +2341,7 @@ yylex ()
             yylval.code = GT_EXPR; break;
           }
  
-       token_buffer[1] = c1 = GETC();
+       token_buffer[1] = c1 = token_getch();
         token_buffer[2] = 0;
  
         if (c1 == '=')
@@ -2168,6 +2384,8 @@ yylex ()
               if (c1 == '>')
                 { value = POINTSAT; goto done; }
               break;
+
+             /* digraphs */
             case ':':
               if (c1 == '>')
                 { value = ']'; goto done; }
@@ -2183,13 +2401,14 @@ yylex ()
                 { value = '}'; indent_level--; goto done; }
               break;
             }
-       UNGETC (c1);
+
+       token_put_back (c1);
         token_buffer[1] = 0;
  
         if ((c == '<') || (c == '>'))
           value = ARITHCOMPARE;
         else value = c;
-       goto done;
+       break;
        }
  
      case 0: