* config/alpha/alpha.c (alpha_emit_floatuns): Ensure we pass a REG

[pf3gnuchains/gcc-fork.git] / gcc / c-lex.c
diff --git a/gcc/c-lex.c b/gcc/c-lex.c

index a7a2687..2f8a840 100644 (file)
--- a/gcc/c-lex.c
+++ b/gcc/c-lex.c
@@ -1,5 +1,6 @@
  /* Lexical analyzer for C and Objective C.
-   Copyright (C) 1987, 88, 89, 92, 94, 1995 Free Software Foundation, Inc.
+   Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
+   1998, 1999, 2000 Free Software Foundation, Inc.
  
  This file is part of GNU CC.
  
@@ -15,34 +16,52 @@ GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
-
-
-#include <stdio.h>
-#include <errno.h>
-#include <setjmp.h>
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
  
  #include "config.h"
+#include "system.h"
+
  #include "rtl.h"
  #include "tree.h"
  #include "input.h"
+#include "output.h"
  #include "c-lex.h"
  #include "c-tree.h"
  #include "flags.h"
  #include "c-parse.h"
  #include "c-pragma.h"
-
-#include <ctype.h>
+#include "toplev.h"
+#include "intl.h"
+#include "ggc.h"
+#include "tm_p.h"
+
+/* MULTIBYTE_CHARS support only works for native compilers.
+   ??? Ideally what we want is to model widechar support after
+   the current floating point support.  */
+#ifdef CROSS_COMPILE
+#undef MULTIBYTE_CHARS
+#endif
  
  #ifdef MULTIBYTE_CHARS
-#include <stdlib.h>
+#include "mbchar.h"
  #include <locale.h>
+#endif /* MULTIBYTE_CHARS */
+#ifndef GET_ENVIRONMENT
+#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
  #endif
  
-#ifndef errno
-extern int errno;
+#if USE_CPPLIB
+#include "cpplib.h"
+extern cpp_reader  parse_in;
+extern cpp_options parse_options;
+#else
+/* Stream for reading from the input file.  */
+FILE *finput;
  #endif
  
+extern void yyprint                    PARAMS ((FILE *, int, YYSTYPE));
+
  /* The elements of `ridpointers' are identifier nodes
     for the reserved type names and storage classes.
     It is indexed by a RID_... value.  */
@@ -51,6 +70,62 @@ tree ridpointers[(int) RID_MAX];
  /* Cause the `yydebug' variable to be defined.  */
  #define YYDEBUG 1
  
+#if USE_CPPLIB
+extern unsigned char *yy_cur, *yy_lim;
+extern enum cpp_token cpp_token;
+
+extern int yy_get_token ();
+
+#define GETC() (yy_cur < yy_lim ? *yy_cur++ : yy_get_token ())
+#define UNGETC(c) ((c) == EOF ? 0 : yy_cur--)
+
+#else /* ! USE_CPPLIB */
+
+#define GETC() getch ()
+#define UNGETC(c) put_back (c)
+
+struct putback_buffer {
+  unsigned char *buffer;
+  int   buffer_size;
+  int   index;
+};
+
+static struct putback_buffer putback = {NULL, 0, -1};
+
+static inline int getch PARAMS ((void));
+
+static inline int
+getch ()
+{
+  if (putback.index != -1)
+    {
+      int ch = putback.buffer[putback.index];
+      --putback.index;
+      return ch;
+    }
+  return getc (finput);
+}
+
+static inline void put_back PARAMS ((int));
+
+static inline void
+put_back (ch)
+     int ch;
+{
+  if (ch != EOF)
+    {
+      if (putback.index == putback.buffer_size - 1)
+       {
+         putback.buffer_size += 16;
+         putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
+       }
+      putback.buffer[++putback.index] = ch;
+    }
+}
+#endif /* ! USE_CPPLIB */
+
+int linemode;
+
  /* the declaration found for the last IDENTIFIER token read in.
     yylex must look this up to detect typedefs, which get token type TYPENAME,
     so it is left around in case the identifier is not a typedef but is
@@ -61,20 +136,13 @@ tree lastiddecl;
  
  int doing_objc_thang;
  
-extern tree is_class_name ();
-
  extern int yydebug;
  
  /* File used for outputting assembler code.  */
  extern FILE *asm_out_file;
  
-#ifndef WCHAR_TYPE_SIZE
-#ifdef INT_TYPE_SIZE
-#define WCHAR_TYPE_SIZE INT_TYPE_SIZE
-#else
-#define WCHAR_TYPE_SIZE        BITS_PER_WORD
-#endif
-#endif
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
  
  /* Number of bytes in a wide character.  */
  #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
@@ -84,13 +152,24 @@ char *token_buffer;        /* Pointer to token buffer.
                            Actual allocated length is maxtoken + 2.
                            This is not static because objc-parse.y uses it.  */
  
+static int indent_level;        /* Number of { minus number of }. */
+
+/* Nonzero tells yylex to ignore \ in string constants.  */
+static int ignore_escape_flag;
+
  /* Nonzero if end-of-file has been seen on input.  */
  static int end_of_file;
  
-/* Buffered-back input character; faster than using ungetc.  */
-static int nextchar = -1;
-
-int check_newline ();
+#ifdef HANDLE_GENERIC_PRAGMAS
+static int handle_generic_pragma       PARAMS ((int));
+#endif /* HANDLE_GENERIC_PRAGMAS */
+static int whitespace_cr               PARAMS ((int));
+static int skip_white_space            PARAMS ((int));
+static char *extend_token_buffer       PARAMS ((const char *));
+static int readescape                  PARAMS ((int *));
+static void parse_float                        PARAMS ((PTR));
+static void extend_token_buffer_to     PARAMS ((int));
+static int read_line_number            PARAMS ((int *));
  \f
  /* Do not insert generated code into the source, instead, include it.
     This allows us to build gcc automatically even for targets that
@@ -137,10 +216,63 @@ remember_protocol_qualifiers ()
        wordlist[i].name = "inout";
      else if (wordlist[i].rid == RID_BYCOPY)
        wordlist[i].name = "bycopy";
+    else if (wordlist[i].rid == RID_BYREF)
+      wordlist[i].name = "byref";
      else if (wordlist[i].rid == RID_ONEWAY)
-      wordlist[i].name = "oneway";   
+      wordlist[i].name = "oneway";
  }
  \f
+char *
+init_parse (filename)
+     char *filename;
+{
+#if !USE_CPPLIB
+  /* Open input file.  */
+  if (filename == 0 || !strcmp (filename, "-"))
+    {
+      finput = stdin;
+      filename = "stdin";
+    }
+  else
+    finput = fopen (filename, "r");
+  if (finput == 0)
+    pfatal_with_name (filename);
+
+#ifdef IO_BUFFER_SIZE
+  setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
+#endif
+#else /* !USE_CPPLIB */
+  parse_in.show_column = 1;
+  if (! cpp_start_read (&parse_in, filename))
+    abort ();
+
+  if (filename == 0 || !strcmp (filename, "-"))
+    filename = "stdin";
+
+  /* cpp_start_read always puts at least one line directive into the
+     token buffer.  We must arrange to read it out here. */
+  yy_cur = parse_in.token_buffer;
+  yy_lim = CPP_PWRITTEN (&parse_in);
+  cpp_token = CPP_DIRECTIVE;
+#endif
+
+  init_lex ();
+  init_pragma ();
+
+  return filename;
+}
+
+void
+finish_parse ()
+{
+#if USE_CPPLIB
+  cpp_finish (&parse_in);
+  errorcount += parse_in.errors;
+#else
+  fclose (finput);
+#endif
+}
+
  void
  init_lex ()
  {
@@ -154,6 +286,7 @@ init_lex ()
  #ifdef MULTIBYTE_CHARS
    /* Change to the native locale for multibyte conversions.  */
    setlocale (LC_CTYPE, "");
+  GET_ENVIRONMENT (literal_codeset, "LANG");
  #endif
  
    maxtoken = 40;
@@ -170,7 +303,10 @@ init_lex ()
    ridpointers[(int) RID_SIGNED] = get_identifier ("signed");
    ridpointers[(int) RID_INLINE] = get_identifier ("inline");
    ridpointers[(int) RID_CONST] = get_identifier ("const");
+  ridpointers[(int) RID_RESTRICT] = get_identifier ("restrict");
    ridpointers[(int) RID_VOLATILE] = get_identifier ("volatile");
+  ridpointers[(int) RID_BOUNDED] = get_identifier ("__bounded");
+  ridpointers[(int) RID_UNBOUNDED] = get_identifier ("__unbounded");
    ridpointers[(int) RID_AUTO] = get_identifier ("auto");
    ridpointers[(int) RID_STATIC] = get_identifier ("static");
    ridpointers[(int) RID_EXTERN] = get_identifier ("extern");
@@ -183,6 +319,7 @@ init_lex ()
    ridpointers[(int) RID_OUT] = get_identifier ("out");
    ridpointers[(int) RID_INOUT] = get_identifier ("inout");
    ridpointers[(int) RID_BYCOPY] = get_identifier ("bycopy");
+  ridpointers[(int) RID_BYREF] = get_identifier ("byref");
    ridpointers[(int) RID_ONEWAY] = get_identifier ("oneway");
    forget_protocol_qualifiers();
  
@@ -198,6 +335,7 @@ init_lex ()
    if (flag_traditional)
      {
        UNSET_RESERVED_WORD ("const");
+      UNSET_RESERVED_WORD ("restrict");
        UNSET_RESERVED_WORD ("volatile");
        UNSET_RESERVED_WORD ("typeof");
        UNSET_RESERVED_WORD ("signed");
@@ -205,6 +343,9 @@ init_lex ()
        UNSET_RESERVED_WORD ("iterator");
        UNSET_RESERVED_WORD ("complex");
      }
+  else if (!flag_isoc99)
+    UNSET_RESERVED_WORD ("restrict");
+
    if (flag_no_asm)
      {
        UNSET_RESERVED_WORD ("asm");
@@ -244,10 +385,14 @@ yyprint (file, yychar, yylval)
        if (TREE_CODE (t) == INTEGER_CST)
         fprintf (file,
  #if HOST_BITS_PER_WIDE_INT == 64
-#if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT
+#if HOST_BITS_PER_WIDE_INT == HOST_BITS_PER_INT
+                " 0x%x%016x",
+#else
+#if HOST_BITS_PER_WIDE_INT == HOST_BITS_PER_LONG
                  " 0x%lx%016lx",
  #else
-                " 0x%x%016x",
+                " 0x%llx%016llx",
+#endif
  #endif
  #else
  #if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT
@@ -260,8 +405,31 @@ yyprint (file, yychar, yylval)
        break;
      }
  }
-
  \f
+/* Iff C is a carriage return, warn about it - if appropriate -
+   and return nonzero.  */
+
+static int
+whitespace_cr (c)
+     int c;
+{
+  static int newline_warning = 0;
+
+  if (c == '\r')
+    {
+      /* ANSI C says the effects of a carriage return in a source file
+        are undefined.  */
+      if (pedantic && !newline_warning)
+       {
+         warning ("carriage return in source file");
+         warning ("(we only warn about the first carriage return)");
+         newline_warning = 1;
+       }
+      return 1;
+    }
+  return 0;
+}
+
  /* If C is not whitespace, return C.
     Otherwise skip whitespace and return first nonwhite char read.  */
  
@@ -269,8 +437,6 @@ static int
  skip_white_space (c)
       register int c;
  {
-  static int newline_warning = 0;
-
    for (;;)
      {
        switch (c)
@@ -280,6 +446,11 @@ skip_white_space (c)
              Also, there's no need, since cpp removes all comments.  */
  
         case '\n':
+         if (linemode)
+           {
+             UNGETC (c);
+             return EOF;
+           }
           c = check_newline ();
           break;
  
@@ -288,28 +459,28 @@ skip_white_space (c)
         case '\f':
         case '\v':
         case '\b':
-         c = getc (finput);
+#if USE_CPPLIB
+         /* While processing a # directive we don't get CPP_HSPACE
+            tokens, so we also need to handle whitespace the normal way.  */
+         if (cpp_token == CPP_HSPACE)
+           c = yy_get_token ();
+         else
+#endif
+           c = GETC();
           break;
  
         case '\r':
-         /* ANSI C says the effects of a carriage return in a source file
-            are undefined.  */
-         if (pedantic && !newline_warning)
-           {
-             warning ("carriage return in source file");
-             warning ("(we only warn about the first carriage return)");
-             newline_warning = 1;
-           }
-         c = getc (finput);
+         whitespace_cr (c);
+         c = GETC();
           break;
  
         case '\\':
-         c = getc (finput);
+         c = GETC();
           if (c == '\n')
             lineno++;
           else
             error ("stray '\\' in program");
-         c = getc (finput);
+         c = GETC();
           break;
  
         default:
@@ -318,20 +489,16 @@ skip_white_space (c)
      }
  }
  
-/* Skips all of the white space at the current location in the input file.
-   Must use and reset nextchar if it has the next character.  */
+/* Skips all of the white space at the current location in the input file.  */
  
  void
  position_after_white_space ()
  {
    register int c;
  
-  if (nextchar != -1)
-    c = nextchar, nextchar = -1;
-  else
-    c = getc (finput);
+  c = GETC();
  
-  ungetc (skip_white_space (c), finput);
+  UNGETC (skip_white_space (c));
  }
  
  /* Make the token buffer longer, preserving the data in it.
@@ -339,363 +506,374 @@ position_after_white_space ()
     The value we return is a pointer to the new buffer
     at a place corresponding to P.  */
  
+static void
+extend_token_buffer_to (size)
+     int size;
+{
+  do
+    maxtoken = maxtoken * 2 + 10;
+  while (maxtoken < size);
+  token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
+}
+
  static char *
  extend_token_buffer (p)
-     char *p;
+     const char *p;
  {
    int offset = p - token_buffer;
-
-  maxtoken = maxtoken * 2 + 10;
-  token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
-
+  extend_token_buffer_to (offset);
    return token_buffer + offset;
  }
  \f
+#if defined HANDLE_PRAGMA
+/* Local versions of these macros, that can be passed as function pointers.  */
+static int
+pragma_getc ()
+{
+  return GETC ();
+}
+
+static void
+pragma_ungetc (arg)
+     int arg;
+{
+  UNGETC (arg);
+}
+#endif
+
+static int
+read_line_number (num)
+     int *num;
+{
+  register int token = yylex ();
+
+  if (token == CONSTANT
+      && TREE_CODE (yylval.ttype) == INTEGER_CST)
+    {
+      *num = TREE_INT_CST_LOW (yylval.ttype);
+      return 1;
+    }
+  else
+    {
+      if (token != END_OF_LINE)
+       error ("invalid #-line");
+      return 0;
+    }
+}
+  
  /* At the beginning of a line, increment the line number
     and process any #-directive on this line.
     If the line is a #-directive, read the entire line and return a newline.
-   Otherwise, return the line's first non-whitespace character.  */
+   Otherwise, return the line's first non-whitespace character.
+
+   Note that in the case of USE_CPPLIB, we get the whole line as one
+   CPP_DIRECTIVE token.  */
  
  int
  check_newline ()
  {
    register int c;
    register int token;
+  int saw_line;
+  enum { act_none, act_push, act_pop } action;
+  int old_lineno, action_number, l;
  
-  lineno++;
-
+ restart:
    /* Read first nonwhite char on the line.  */
  
-  c = getc (finput);
-  while (c == ' ' || c == '\t')
-    c = getc (finput);
+#ifdef USE_CPPLIB
+  c = GETC ();
+  /* In some cases where we're leaving an include file, we can get multiple
+     CPP_HSPACE tokens in a row, so we need to loop.  */
+  while (cpp_token == CPP_HSPACE)
+    c = yy_get_token ();
+#else
+  do
+    c = GETC ();
+  while (c == ' ' || c == '\t');
+#endif
+
+  lineno++;
  
    if (c != '#')
      {
+      /* Sequences of multiple newlines are very common; optimize them.  */
+      if (c == '\n')
+       goto restart;
+
        /* If not #, return it so caller will use it.  */
        return c;
      }
  
-  /* Read first nonwhite char after the `#'.  */
-
-  c = getc (finput);
-  while (c == ' ' || c == '\t')
-    c = getc (finput);
+  /* Don't read beyond this line.  */
+  saw_line = 0;
+  linemode = 1;
+  
+#if USE_CPPLIB
+  if (cpp_token == CPP_VSPACE)
+    {
+      /* Format is "<space> <line number> <filename> <newline>".
+        Only the line number is interesting, and even that
+        we can get more efficiently than scanning the line.  */
+      yy_cur = yy_lim - 1;
+      lineno = parse_in.lineno - 1;
+      goto skipline;
+    }
+#endif
  
-  /* If a letter follows, then if the word here is `line', skip
-     it and ignore it; otherwise, ignore the line, with an error
-     if the word isn't `pragma', `ident', `define', or `undef'.  */
+  token = yylex ();
  
-  if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+  if (token == IDENTIFIER)
      {
-      if (c == 'p')
+      /* If a letter follows, then if the word here is `line', skip
+        it and ignore it; otherwise, ignore the line, with an error
+        if the word isn't `pragma'.  */
+
+      const char *name = IDENTIFIER_POINTER (yylval.ttype);
+
+      if (!strcmp (name, "pragma"))
         {
-         if (getc (finput) == 'r'
-             && getc (finput) == 'a'
-             && getc (finput) == 'g'
-             && getc (finput) == 'm'
-             && getc (finput) == 'a'
-             && ((c = getc (finput)) == ' ' || c == '\t' || c == '\n'))
-           {
-#ifdef HANDLE_SYSV_PRAGMA
-             return handle_sysv_pragma (finput, c);
-#else /* !HANDLE_SYSV_PRAGMA */
+         token = yylex ();
+         if (token != IDENTIFIER
+             || TREE_CODE (yylval.ttype) != IDENTIFIER_NODE)
+           goto skipline;
+
  #ifdef HANDLE_PRAGMA
-             HANDLE_PRAGMA (finput);
+         /* We invoke HANDLE_PRAGMA before HANDLE_GENERIC_PRAGMAS
+            (if both are defined), in order to give the back
+            end a chance to override the interpretation of
+            SYSV style pragmas.  */
+         if (HANDLE_PRAGMA (pragma_getc, pragma_ungetc,
+                            IDENTIFIER_POINTER (yylval.ttype)))
+           goto skipline;
  #endif /* HANDLE_PRAGMA */
-             goto skipline;
-#endif /* !HANDLE_SYSV_PRAGMA */
-           }
-       }
+             
+#ifdef HANDLE_GENERIC_PRAGMAS
+         if (handle_generic_pragma (token))
+           goto skipline;
+#endif /* HANDLE_GENERIC_PRAGMAS */
+
+         /* Issue a warning message if we have been asked to do so.
+            Ignoring unknown pragmas in system header file unless
+            an explcit -Wunknown-pragmas has been given. */
+         if (warn_unknown_pragmas > 1
+             || (warn_unknown_pragmas && ! in_system_header))
+           warning ("ignoring pragma: %s", token_buffer);
  
-      else if (c == 'd')
+         goto skipline;
+       }
+      else if (!strcmp (name, "define"))
         {
-         if (getc (finput) == 'e'
-             && getc (finput) == 'f'
-             && getc (finput) == 'i'
-             && getc (finput) == 'n'
-             && getc (finput) == 'e'
-             && ((c = getc (finput)) == ' ' || c == '\t' || c == '\n'))
-           {
-#ifdef DWARF_DEBUGGING_INFO
-             if ((debug_info_level == DINFO_LEVEL_VERBOSE)
-                 && (write_symbols == DWARF_DEBUG))
-               dwarfout_define (lineno, get_directive_line (finput));
-#endif /* DWARF_DEBUGGING_INFO */
-             goto skipline;
-           }
+         debug_define (lineno, GET_DIRECTIVE_LINE ());
+         goto skipline;
         }
-      else if (c == 'u')
+      else if (!strcmp (name, "undef"))
         {
-         if (getc (finput) == 'n'
-             && getc (finput) == 'd'
-             && getc (finput) == 'e'
-             && getc (finput) == 'f'
-             && ((c = getc (finput)) == ' ' || c == '\t' || c == '\n'))
-           {
-#ifdef DWARF_DEBUGGING_INFO
-             if ((debug_info_level == DINFO_LEVEL_VERBOSE)
-                 && (write_symbols == DWARF_DEBUG))
-               dwarfout_undef (lineno, get_directive_line (finput));
-#endif /* DWARF_DEBUGGING_INFO */
-             goto skipline;
-           }
+         debug_undef (lineno, GET_DIRECTIVE_LINE ());
+         goto skipline;
         }
-      else if (c == 'l')
+      else if (!strcmp (name, "line"))
         {
-         if (getc (finput) == 'i'
-             && getc (finput) == 'n'
-             && getc (finput) == 'e'
-             && ((c = getc (finput)) == ' ' || c == '\t'))
-           goto linenum;
+         saw_line = 1;
+         token = yylex ();
+         goto linenum;
         }
-      else if (c == 'i')
+      else if (!strcmp (name, "ident"))
         {
-         if (getc (finput) == 'd'
-             && getc (finput) == 'e'
-             && getc (finput) == 'n'
-             && getc (finput) == 't'
-             && ((c = getc (finput)) == ' ' || c == '\t'))
-           {
-             /* #ident.  The pedantic warning is now in cccp.c.  */
-
-             /* Here we have just seen `#ident '.
-                A string constant should follow.  */
-
-             while (c == ' ' || c == '\t')
-               c = getc (finput);
+         /* #ident.  The pedantic warning is now in cccp.c.  */
  
-             /* If no argument, ignore the line.  */
-             if (c == '\n')
-               return c;
+         /* Here we have just seen `#ident '.
+            A string constant should follow.  */
  
-             ungetc (c, finput);
-             token = yylex ();
-             if (token != STRING
-                 || TREE_CODE (yylval.ttype) != STRING_CST)
-               {
-                 error ("invalid #ident");
-                 goto skipline;
-               }
+         token = yylex ();
+         if (token == END_OF_LINE)
+           goto skipline;
+         if (token != STRING
+             || TREE_CODE (yylval.ttype) != STRING_CST)
+           {
+             error ("invalid #ident");
+             goto skipline;
+           }
  
-             if (!flag_no_ident)
-               {
+         if (! flag_no_ident)
+           {
  #ifdef ASM_OUTPUT_IDENT
-                 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (yylval.ttype));
+             ASM_OUTPUT_IDENT (asm_out_file,
+                               TREE_STRING_POINTER (yylval.ttype));
  #endif
-               }
-
-             /* Skip the rest of this line.  */
-             goto skipline;
             }
+
+         /* Skip the rest of this line.  */
+         goto skipline;
         }
  
-      error ("undefined or invalid # directive");
+      error ("undefined or invalid # directive `%s'", name);
        goto skipline;
      }
  
+  /* If the # is the only nonwhite char on the line,
+     just ignore it.  Check the new newline.  */
+  if (token == END_OF_LINE)
+    goto skipline;
+
  linenum:
    /* Here we have either `#line' or `# <nonletter>'.
       In either case, it should be a line number; a digit should follow.  */
  
-  while (c == ' ' || c == '\t')
-    c = getc (finput);
+  if (token != CONSTANT
+      || TREE_CODE (yylval.ttype) != INTEGER_CST)
+    {
+      error ("invalid #-line");
+      goto skipline;
+    }
  
-  /* If the # is the only nonwhite char on the line,
-     just ignore it.  Check the new newline.  */
-  if (c == '\n')
-    return c;
+  /* subtract one, because it is the following line that
+     gets the specified number */
  
-  /* Something follows the #; read a token.  */
+  l = TREE_INT_CST_LOW (yylval.ttype) - 1;
  
-  ungetc (c, finput);
-  token = yylex ();
+  /* More follows: it must be a string constant (filename).
+     It would be neat to use cpplib to quickly process the string, but
+     (1) we don't have a handy tokenization of the string, and
+     (2) I don't know how well that would work in the presense
+     of filenames that contain wide characters.  */
  
-  if (token == CONSTANT
-      && TREE_CODE (yylval.ttype) == INTEGER_CST)
+  if (saw_line)
      {
-      int old_lineno = lineno;
-      int used_up = 0;
-      /* subtract one, because it is the following line that
-        gets the specified number */
-
-      int l = TREE_INT_CST_LOW (yylval.ttype) - 1;
-
-      /* Is this the last nonwhite stuff on the line?  */
-      c = getc (finput);
-      while (c == ' ' || c == '\t')
-       c = getc (finput);
-      if (c == '\n')
-       {
-         /* No more: store the line number and check following line.  */
-         lineno = l;
-         return c;
-       }
-      ungetc (c, finput);
+      /* Don't treat \ as special if we are processing #line 1 "...".
+        If you want it to be treated specially, use # 1 "...".  */
+      ignore_escape_flag = 1;
+    }
  
-      /* More follows: it must be a string constant (filename).  */
+  /* Read the string constant.  */
+  token = yylex ();
  
-      /* Read the string constant.  */
-      token = yylex ();
+  ignore_escape_flag = 0;
  
-      if (token != STRING || TREE_CODE (yylval.ttype) != STRING_CST)
-       {
-         error ("invalid #line");
-         goto skipline;
-       }
-
-      input_filename
-       = (char *) permalloc (TREE_STRING_LENGTH (yylval.ttype) + 1);
-      strcpy (input_filename, TREE_STRING_POINTER (yylval.ttype));
+  if (token == END_OF_LINE)
+    {
+      /* No more: store the line number and check following line.  */
        lineno = l;
+      goto skipline;
+    }
  
-      /* Each change of file name
-        reinitializes whether we are now in a system header.  */
-      in_system_header = 0;
-
-      if (main_input_filename == 0)
-       main_input_filename = input_filename;
-
-      /* Is this the last nonwhite stuff on the line?  */
-      c = getc (finput);
-      while (c == ' ' || c == '\t')
-       c = getc (finput);
-      if (c == '\n')
-       {
-         /* Update the name in the top element of input_file_stack.  */
-         if (input_file_stack)
-           input_file_stack->name = input_filename;
-
-         return c;
-       }
-      ungetc (c, finput);
+  if (token != STRING || TREE_CODE (yylval.ttype) != STRING_CST)
+    {
+      error ("invalid #line");
+      goto skipline;
+    }
  
-      token = yylex ();
-      used_up = 0;
+  input_filename = TREE_STRING_POINTER (yylval.ttype);
  
-      /* `1' after file name means entering new file.
-        `2' after file name means just left a file.  */
+  if (main_input_filename == 0)
+    main_input_filename = input_filename;
  
-      if (token == CONSTANT
-         && TREE_CODE (yylval.ttype) == INTEGER_CST)
-       {
-         if (TREE_INT_CST_LOW (yylval.ttype) == 1)
-           {
-             /* Pushing to a new file.  */
-             struct file_stack *p
-               = (struct file_stack *) xmalloc (sizeof (struct file_stack));
-             input_file_stack->line = old_lineno;
-             p->next = input_file_stack;
-             p->name = input_filename;
-             input_file_stack = p;
-             input_file_stack_tick++;
-#ifdef DWARF_DEBUGGING_INFO
-             if (debug_info_level == DINFO_LEVEL_VERBOSE
-                 && write_symbols == DWARF_DEBUG)
-               dwarfout_start_new_source_file (input_filename);
-#endif /* DWARF_DEBUGGING_INFO */
-
-             used_up = 1;
-           }
-         else if (TREE_INT_CST_LOW (yylval.ttype) == 2)
-           {
-             /* Popping out of a file.  */
-             if (input_file_stack->next)
-               {
-                 struct file_stack *p = input_file_stack;
-                 input_file_stack = p->next;
-                 free (p);
-                 input_file_stack_tick++;
-#ifdef DWARF_DEBUGGING_INFO
-                 if (debug_info_level == DINFO_LEVEL_VERBOSE
-                     && write_symbols == DWARF_DEBUG)
-                   dwarfout_resume_previous_source_file (input_file_stack->line);
-#endif /* DWARF_DEBUGGING_INFO */
-               }
-             else
-               error ("#-lines for entering and leaving files don't match");
+  old_lineno = lineno;
+  action = act_none;
+  action_number = 0;
+  lineno = l;
  
-             used_up = 1;
-           }
-       }
+  /* Each change of file name
+     reinitializes whether we are now in a system header.  */
+  in_system_header = 0;
  
-      /* Now that we've pushed or popped the input stack,
-        update the name in the top element.  */
+  if (!read_line_number (&action_number))
+    {
+      /* Update the name in the top element of input_file_stack.  */
        if (input_file_stack)
         input_file_stack->name = input_filename;
+    }
  
-      /* If we have handled a `1' or a `2',
-        see if there is another number to read.  */
-      if (used_up)
-       {
-         /* Is this the last nonwhite stuff on the line?  */
-         c = getc (finput);
-         while (c == ' ' || c == '\t')
-           c = getc (finput);
-         if (c == '\n')
-           return c;
-         ungetc (c, finput);
-
-         token = yylex ();
-         used_up = 0;
-       }
+  /* `1' after file name means entering new file.
+     `2' after file name means just left a file.  */
  
+  if (action_number == 1)
+    {
+      action = act_push;
+      read_line_number (&action_number);
+    }
+  else if (action_number == 2)
+    {
+      action = act_pop;
+      read_line_number (&action_number);
+    }
+  if (action_number == 3)
+    {
        /* `3' after file name means this is a system header file.  */
+      in_system_header = 1;
+      read_line_number (&action_number);
+    }
  
-      if (token == CONSTANT
-         && TREE_CODE (yylval.ttype) == INTEGER_CST
-         && TREE_INT_CST_LOW (yylval.ttype) == 3)
-       in_system_header = 1, used_up = 1;
+  /* Do the actions implied by the preceding numbers.  */
  
-      if (used_up)
+  if (action == act_push)
+    {
+      /* Pushing to a new file.  */
+      struct file_stack *p
+       = (struct file_stack *) xmalloc (sizeof (struct file_stack));
+      input_file_stack->line = old_lineno;
+      p->next = input_file_stack;
+      p->name = input_filename;
+      p->indent_level = indent_level;
+      input_file_stack = p;
+      input_file_stack_tick++;
+      debug_start_source_file (input_filename);
+    }
+  else if (action == act_pop)
+    {
+      /* Popping out of a file.  */
+      if (input_file_stack->next)
         {
-         /* Is this the last nonwhite stuff on the line?  */
-         c = getc (finput);
-         while (c == ' ' || c == '\t')
-           c = getc (finput);
-         if (c == '\n')
-           return c;
-         ungetc (c, finput);
+         struct file_stack *p = input_file_stack;
+         if (indent_level != p->indent_level)
+           {
+             warning_with_file_and_line
+               (p->name, old_lineno,
+                "This file contains more `%c's than `%c's.",
+                indent_level > p->indent_level ? '{' : '}',
+                indent_level > p->indent_level ? '}' : '{');
+           }
+         input_file_stack = p->next;
+         free (p);
+         input_file_stack_tick++;
+         debug_end_source_file (input_file_stack->line);
         }
-
-      warning ("unrecognized text at end of #line");
+      else
+       error ("#-lines for entering and leaving files don't match");
      }
-  else
-    error ("invalid #-line");
+
+  /* Now that we've pushed or popped the input stack,
+     update the name in the top element.  */
+  if (input_file_stack)
+    input_file_stack->name = input_filename;
  
    /* skip the rest of this line.  */
   skipline:
-  if (c == '\n')
-    return c;
-  while ((c = getc (finput)) != EOF && c != '\n');
+  linemode = 0;
+  end_of_file = 0;
+
+  do
+    c = GETC();
+  while (c != '\n' && c != EOF);
    return c;
  }
  \f
-#ifdef HANDLE_SYSV_PRAGMA
+#ifdef HANDLE_GENERIC_PRAGMAS
  
-/* Handle a #pragma directive.  INPUT is the current input stream,
-   and C is a character to reread.  Processes the entire input line
-   and returns a character for the caller to reread: either \n or EOF.  */
+/* Handle a #pragma directive.
+   TOKEN is the token we read after `#pragma'.  Processes the entire input
+   line and return non-zero iff the pragma has been successfully parsed.  */
  
  /* This function has to be in this file, in order to get at
     the token types.  */
  
-int
-handle_sysv_pragma (input, c)
-     FILE *input;
-     int c;
+static int
+handle_generic_pragma (token)
+     register int token;
  {
    for (;;)
      {
-      while (c == ' ' || c == '\t')
-       c = getc (input);
-      if (c == '\n' || c == EOF)
-       {
-         handle_pragma_token (0, 0);
-         return c;
-       }
-      ungetc (c, input);
-      switch (yylex ())
+      switch (token)
         {
         case IDENTIFIER:
         case TYPENAME:
@@ -703,17 +881,19 @@ handle_sysv_pragma (input, c)
         case CONSTANT:
           handle_pragma_token (token_buffer, yylval.ttype);
           break;
+
+       case END_OF_LINE:
+         return handle_pragma_token (NULL_PTR, NULL_TREE);
+
         default:
-         handle_pragma_token (token_buffer, 0);
+         handle_pragma_token (token_buffer, NULL);
         }
-      if (nextchar >= 0)
-       c = nextchar, nextchar = -1;
-      else
-       c = getc (input);
+
+      token = yylex ();
      }
  }
  
-#endif /* HANDLE_SYSV_PRAGMA */
+#endif /* HANDLE_GENERIC_PRAGMAS */
  \f
  #define ENDFILE -1  /* token that represents end-of-file */
  
@@ -724,7 +904,7 @@ static int
  readescape (ignore_ptr)
       int *ignore_ptr;
  {
-  register int c = getc (finput);
+  register int c = GETC();
    register int code;
    register unsigned count;
    unsigned firstdig = 0;
@@ -744,12 +924,10 @@ readescape (ignore_ptr)
        nonnull = 0;
        while (1)
         {
-         c = getc (finput);
-         if (!(c >= 'a' && c <= 'f')
-             && !(c >= 'A' && c <= 'F')
-             && !(c >= '0' && c <= '9'))
+         c = GETC();
+         if (! ISXDIGIT (c))
             {
-             ungetc (c, finput);
+             UNGETC (c);
               break;
             }
           code *= 16;
@@ -768,13 +946,18 @@ readescape (ignore_ptr)
           nonnull = 1;
         }
        if (! nonnull)
-       error ("\\x used with no following hex digits");
+       {
+         warning ("\\x used with no following hex digits");
+         return 'x';
+       }
        else if (count == 0)
         /* Digits are all 0's.  Ok.  */
         ;
        else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
                || (count > 1
-                  && ((1 << (TYPE_PRECISION (integer_type_node) - (count - 1) * 4))
+                  && (((unsigned)1
+                       << (TYPE_PRECISION (integer_type_node)
+                           - (count - 1) * 4))
                        <= firstdig)))
         pedwarn ("hex escape out of range");
        return code;
@@ -786,9 +969,9 @@ readescape (ignore_ptr)
        while ((c <= '7') && (c >= '0') && (count++ < 3))
         {
           code = (code * 8) + (c - '0');
-         c = getc (finput);
+         c = GETC();
         }
-      ungetc (c, finput);
+      UNGETC (c);
        return code;
  
      case '\\': case '\'': case '"':
@@ -833,7 +1016,7 @@ readescape (ignore_ptr)
      case 'E':
        if (pedantic)
         pedwarn ("non-ANSI-standard escape sequence, `\\%c'", c);
-      return 033;
+      return TARGET_ESC;
  
      case '?':
        return c;
@@ -845,10 +1028,10 @@ readescape (ignore_ptr)
        /* `\%' is used to prevent SCCS from getting confused.  */
      case '%':
        if (pedantic)
-       pedwarn ("non-ANSI escape sequence `\\%c'", c);
+       pedwarn ("unknown escape sequence `\\%c'", c);
        return c;
      }
-  if (c >= 040 && c < 0177)
+  if (ISGRAPH (c))
      pedwarn ("unknown escape sequence `\\%c'", c);
    else
      pedwarn ("unknown escape sequence: `\\' followed by char code 0x%x", c);
@@ -856,53 +1039,199 @@ readescape (ignore_ptr)
  }
  \f
  void
-yyerror (string)
-     char *string;
+yyerror (msgid)
+     const char *msgid;
  {
-  char buf[200];
-
-  strcpy (buf, string);
+  const char *string = _(msgid);
  
    /* We can't print string and character constants well
       because the token_buffer contains the result of processing escapes.  */
    if (end_of_file)
-    strcat (buf, " at end of input");
+    error ("%s at end of input", string);
    else if (token_buffer[0] == 0)
-    strcat (buf, " at null character");
+    error ("%s at null character", string);
    else if (token_buffer[0] == '"')
-    strcat (buf, " before string constant");
+    error ("%s before string constant", string);
    else if (token_buffer[0] == '\'')
-    strcat (buf, " before character constant");
-  else if (token_buffer[0] < 040 || (unsigned char) token_buffer[0] >= 0177)
-    sprintf (buf + strlen (buf), " before character 0%o",
-            (unsigned char) token_buffer[0]);
+    error ("%s before character constant", string);
+  else if (!ISGRAPH(token_buffer[0]))
+    error ("%s before character 0%o", string, (unsigned char) token_buffer[0]);
    else
-    strcat (buf, " before `%s'");
-
-  error (buf, token_buffer);
+    error ("%s before `%s'", string, token_buffer);
  }
  
  #if 0
  
-struct try_type
+struct try_type
+{
+  tree *node_var;
+  char unsigned_flag;
+  char long_flag;
+  char long_long_flag;
+};
+
+struct try_type type_sequence[] =
+{
+  { &integer_type_node, 0, 0, 0},
+  { &unsigned_type_node, 1, 0, 0},
+  { &long_integer_type_node, 0, 1, 0},
+  { &long_unsigned_type_node, 1, 1, 0},
+  { &long_long_integer_type_node, 0, 1, 1},
+  { &long_long_unsigned_type_node, 1, 1, 1}
+};
+#endif /* 0 */
+\f
+struct pf_args
+{
+  /* Input */
+  int base;
+  char * p;
+  /* I/O */
+  int c;
+  /* Output */
+  int imag;
+  tree type;
+  int conversion_errno;
+  REAL_VALUE_TYPE value;
+};
+ 
+static void
+parse_float (data)
+  PTR data;
+{
+  struct pf_args * args = (struct pf_args *) data;
+  int fflag = 0, lflag = 0;
+  /* Copy token_buffer now, while it has just the number
+     and not the suffixes; once we add `f' or `i',
+     REAL_VALUE_ATOF may not work any more.  */
+  char *copy = (char *) alloca (args->p - token_buffer + 1);
+  bcopy (token_buffer, copy, args->p - token_buffer + 1);
+  args->imag = 0;
+  args->conversion_errno = 0;
+  args->type = double_type_node;
+
+  while (1)
+    {
+      int lose = 0;
+
+      /* Read the suffixes to choose a data type.  */
+      switch (args->c)
+       {
+       case 'f': case 'F':
+         if (fflag)
+           error ("more than one `f' in numeric constant");
+         fflag = 1;
+         break;
+
+       case 'l': case 'L':
+         if (lflag)
+           error ("more than one `l' in numeric constant");
+         lflag = 1;
+         break;
+
+       case 'i': case 'I':
+         if (args->imag)
+           error ("more than one `i' or `j' in numeric constant");
+         else if (pedantic)
+           pedwarn ("ANSI C forbids imaginary numeric constants");
+         args->imag = 1;
+         break;
+
+       default:
+         lose = 1;
+       }
+
+      if (lose)
+       break;
+
+      if (args->p >= token_buffer + maxtoken - 3)
+       args->p = extend_token_buffer (args->p);
+      *(args->p++) = args->c;
+      *(args->p) = 0;
+      args->c = GETC();
+    }
+
+  /* The second argument, machine_mode, of REAL_VALUE_ATOF
+     tells the desired precision of the binary result
+     of decimal-to-binary conversion.  */
+
+  if (fflag)
+    {
+      if (lflag)
+       error ("both `f' and `l' in floating constant");
+
+      args->type = float_type_node;
+      errno = 0;
+      if (args->base == 16)
+       args->value = REAL_VALUE_HTOF (copy, TYPE_MODE (args->type));
+      else
+       args->value = REAL_VALUE_ATOF (copy, TYPE_MODE (args->type));
+      args->conversion_errno = errno;
+      /* A diagnostic is required here by some ANSI C testsuites.
+        This is not pedwarn, because some people don't want
+        an error for this.  */
+      if (REAL_VALUE_ISINF (args->value) && pedantic)
+       warning ("floating point number exceeds range of `float'");
+    }
+  else if (lflag)
+    {
+      args->type = long_double_type_node;
+      errno = 0;
+      if (args->base == 16)
+       args->value = REAL_VALUE_HTOF (copy, TYPE_MODE (args->type));
+      else
+       args->value = REAL_VALUE_ATOF (copy, TYPE_MODE (args->type));
+      args->conversion_errno = errno;
+      if (REAL_VALUE_ISINF (args->value) && pedantic)
+       warning ("floating point number exceeds range of `long double'");
+    }
+  else
+    {
+      errno = 0;
+      if (args->base == 16)
+       args->value = REAL_VALUE_HTOF (copy, TYPE_MODE (args->type));
+      else
+       args->value = REAL_VALUE_ATOF (copy, TYPE_MODE (args->type));
+      args->conversion_errno = errno;
+      if (REAL_VALUE_ISINF (args->value) && pedantic)
+       warning ("floating point number exceeds range of `double'");
+    }
+}
+ 
+/* Get the next character, staying within the current token if possible.
+   If we're lexing a token, we don't want to look beyond the end of the
+   token cpplib has prepared for us; otherwise, we end up reading in the
+   next token, which screws up feed_input.  So just return a null
+   character.  */
+
+static inline int token_getch PARAMS ((void));
+
+static inline int
+token_getch ()
+{
+#if USE_CPPLIB
+  if (yy_cur == yy_lim)
+    return '\0';
+#endif
+  return GETC ();
+}
+
+static inline void token_put_back PARAMS ((int));
+
+static inline void
+token_put_back (ch)
+     int ch;
  {
-  tree *node_var;
-  char unsigned_flag;
-  char long_flag;
-  char long_long_flag;
-};
+#if USE_CPPLIB
+  if (ch == '\0')
+    return;
+#endif
+  UNGETC (ch);
+}
+
+/* Read a single token from the input stream, and assign it lexical
+   semantics.  */
  
-struct try_type type_sequence[] = 
-{
-  { &integer_type_node, 0, 0, 0},
-  { &unsigned_type_node, 1, 0, 0},
-  { &long_integer_type_node, 0, 1, 0},
-  { &long_unsigned_type_node, 1, 1, 0},
-  { &long_long_integer_type_node, 0, 1, 1},
-  { &long_long_unsigned_type_node, 1, 1, 1}
-};
-#endif /* 0 */
-\f
  int
  yylex ()
  {
@@ -912,10 +1241,7 @@ yylex ()
    int wide_flag = 0;
    int objc_flag = 0;
  
-  if (nextchar >= 0)
-    c = nextchar, nextchar = -1;
-  else
-    c = getc (finput);
+  c = GETC();
  
    /* Effectively do c = skip_white_space (c)
       but do it faster in the usual cases.  */
@@ -927,7 +1253,12 @@ yylex ()
        case '\f':
        case '\v':
        case '\b':
-       c = getc (finput);
+#if USE_CPPLIB
+       if (cpp_token == CPP_HSPACE)
+         c = yy_get_token ();
+       else
+#endif
+         c = GETC();
         break;
  
        case '\r':
@@ -952,18 +1283,20 @@ yylex ()
      case EOF:
        end_of_file = 1;
        token_buffer[0] = 0;
-      value = ENDFILE;
+      if (linemode)
+       value = END_OF_LINE;
+      else
+       value = ENDFILE;
        break;
  
-    case '$':
-      if (dollars_in_ident)
-       goto letter;
-      return '$';
-
      case 'L':
+#if USE_CPPLIB
+      if (cpp_token == CPP_NAME)
+       goto letter;
+#endif
        /* Capital L may start a wide-string or wide-character constant.  */
        {
-       register int c = getc (finput);
+       register int c = token_getch();
         if (c == '\'')
           {
             wide_flag = 1;
@@ -974,7 +1307,7 @@ yylex ()
             wide_flag = 1;
             goto string_constant;
           }
-       ungetc (c, finput);
+       token_put_back (c);
        }
        goto letter;
  
@@ -987,13 +1320,13 @@ yylex ()
        else
         {
           /* '@' may start a constant string object.  */
-         register int c = getc(finput);
+         register int c = token_getch ();
           if (c == '"')
             {
               objc_flag = 1;
               goto string_constant;
             }
-         ungetc(c, finput);
+         token_put_back (c);
           /* Fall through to treat '@' as the start of an identifier.  */
         }
  
@@ -1010,25 +1343,47 @@ yylex ()
      case 'u':  case 'v':  case 'w':  case 'x':  case 'y':
      case 'z':
      case '_':
+    case '$':
      letter:
-      p = token_buffer;
-      while (isalnum (c) || c == '_' || c == '$' || c == '@')
+#if USE_CPPLIB
+      if (cpp_token == CPP_NAME)
         {
-         /* Make sure this char really belongs in an identifier.  */
-         if (c == '@' && ! doing_objc_thang)
-           break;
-         if (c == '$' && ! dollars_in_ident)
-           break;
+         /* Note that one character has already been read from
+            yy_cur into token_buffer.  Also, cpplib complains about
+            $ in identifiers, so we don't have to.  */
+
+         int len = yy_lim - yy_cur + 1;
+         if (len >= maxtoken)
+           extend_token_buffer_to (len + 1);
+         memcpy (token_buffer + 1, yy_cur, len);
+         p = token_buffer + len;
+         yy_cur = yy_lim;
+       }
+      else
+#endif
+       {
+         p = token_buffer;
+         while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
+           {
+             /* Make sure this char really belongs in an identifier.  */
+             if (c == '$')
+               {
+                 if (! dollars_in_ident)
+                   error ("`$' in identifier");
+                 else if (pedantic)
+                   pedwarn ("`$' in identifier");
+               }
  
-         if (p >= token_buffer + maxtoken)
-           p = extend_token_buffer (p);
+             if (p >= token_buffer + maxtoken)
+               p = extend_token_buffer (p);
  
-         *p++ = c;
-         c = getc (finput);
-       }
+             *p++ = c;
+             c = token_getch();
+           }
  
-      *p = 0;
-      nextchar = c;
+         *p = 0;
+         token_put_back (c);
+       }
  
        value = IDENTIFIER;
        yylval.itype = 0;
@@ -1038,7 +1393,7 @@ yylex ()
        {
         register struct resword *ptr;
  
-       if (ptr = is_reserved_word (token_buffer, p - token_buffer))
+       if ((ptr = is_reserved_word (token_buffer, p - token_buffer)))
           {
             if (ptr->rid)
               yylval.ttype = ridpointers[(int) ptr->rid];
@@ -1087,7 +1442,7 @@ yylex ()
                    && TREE_CODE (DECL_INITIAL (lastiddecl)) == STRING_CST)
             {
               tree stringval = DECL_INITIAL (lastiddecl);
-             
+
               /* Copy the string value so that we won't clobber anything
                  if we put something in the TREE_CHAIN of this one.  */
               yylval.ttype = build_string (TREE_STRING_LENGTH (stringval),
@@ -1108,26 +1463,82 @@ yylex ()
  
        break;
  
-    case '0':  case '1':  case '2':  case '3':  case '4':
-    case '5':  case '6':  case '7':  case '8':  case '9':
      case '.':
+#if USE_CPPLIB
+      if (yy_cur < yy_lim)
+#endif
+       {
+         /* It's hard to preserve tokenization on '.' because
+            it could be a symbol by itself, or it could be the
+            start of a floating point number and cpp won't tell us.  */
+         register int c1 = token_getch ();
+         token_buffer[1] = c1;
+         if (c1 == '.')
+           {
+             c1 = token_getch ();
+             if (c1 == '.')
+               {
+                 token_buffer[2] = c1;
+                 token_buffer[3] = 0;
+                 value = ELLIPSIS;
+                 goto done;
+               }
+             error ("parse error at `..'");
+           }
+         if (ISDIGIT (c1))
+           {
+             token_put_back (c1);
+             goto number;
+           }
+         token_put_back (c1);
+       }
+      value = '.';
+      token_buffer[1] = 0;
+      break;
+
+    case '0':  case '1':
+      /* Optimize for most frequent case.  */
+      {
+       register int cond;
+
+#if USE_CPPLIB
+       cond = (yy_cur == yy_lim);
+#else
+       register int c1 = token_getch ();
+       token_put_back (c1);
+       cond = (! ISALNUM (c1) && c1 != '.');
+#endif
+       if (cond)
+         {
+           yylval.ttype = (c == '0') ? integer_zero_node : integer_one_node;
+           value = CONSTANT;
+           break;
+         }
+       /*FALLTHRU*/
+      }
+    case '2':  case '3':  case '4':
+    case '5':  case '6':  case '7':  case '8':  case '9':
+    number:
        {
         int base = 10;
         int count = 0;
         int largest_digit = 0;
         int numdigits = 0;
-       /* for multi-precision arithmetic,
-          we actually store only HOST_BITS_PER_CHAR bits in each part.
-          The number of parts is chosen so as to be sufficient to hold
-          the enough bits to fit into the two HOST_WIDE_INTs that contain
-          the integer value (this is always at least as many bits as are
-          in a target `long long' value, but may be wider).  */
-#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
-       int parts[TOTAL_PARTS];
         int overflow = 0;
  
-       enum anon1 { NOT_FLOAT, AFTER_POINT, TOO_MANY_POINTS} floatflag
-         = NOT_FLOAT;
+       /* We actually store only HOST_BITS_PER_CHAR bits in each part.
+          The code below which fills the parts array assumes that a host
+          int is at least twice as wide as a host char, and that 
+          HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
+          Two HOST_WIDE_INTs is the largest int literal we can store.
+          In order to detect overflow below, the number of parts (TOTAL_PARTS)
+          must be exactly the number of parts needed to hold the bits
+          of two HOST_WIDE_INTs. */
+#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
+       unsigned int parts[TOTAL_PARTS];
+
+       enum anon1 { NOT_FLOAT, AFTER_POINT, TOO_MANY_POINTS, AFTER_EXPON}
+         floatflag = NOT_FLOAT;
  
         for (count = 0; count < TOTAL_PARTS; count++)
           parts[count] = 0;
@@ -1137,11 +1548,11 @@ yylex ()
  
         if (c == '0')
           {
-           *p++ = (c = getc (finput));
+           *p++ = (c = token_getch());
             if ((c == 'x') || (c == 'X'))
               {
                 base = 16;
-               *p++ = (c = getc (finput));
+               *p++ = (c = token_getch());
               }
             /* Leading 0 forces octal unless the 0 is the only digit.  */
             else if (c >= '0' && c <= '9')
@@ -1156,19 +1567,21 @@ yylex ()
         /* Read all the digits-and-decimal-points.  */
  
         while (c == '.'
-              || (isalnum (c) && c != 'l' && c != 'L'
+              || (ISALNUM (c) && c != 'l' && c != 'L'
                    && c != 'u' && c != 'U'
                    && c != 'i' && c != 'I' && c != 'j' && c != 'J'
-                  && (floatflag == NOT_FLOAT || ((c != 'f') && (c != 'F')))))
+                  && (floatflag == NOT_FLOAT
+                      || ((base != 16) && (c != 'f') && (c != 'F'))
+                      || base == 16)))   
           {
             if (c == '.')
               {
-               if (base == 16)
-                 error ("floating constant may not be in radix 16");
+               if (base == 16 && pedantic)
+                 pedwarn ("floating constant may not be in radix 16");
                 if (floatflag == TOO_MANY_POINTS)
                   /* We have already emitted an error.  Don't need another.  */
                   ;
-               else if (floatflag == AFTER_POINT)
+               else if (floatflag == AFTER_POINT || floatflag == AFTER_EXPON)
                   {
                     error ("malformed floating constant");
                     floatflag = TOO_MANY_POINTS;
@@ -1179,37 +1592,20 @@ yylex ()
                 else
                   floatflag = AFTER_POINT;
  
-               base = 10;
-               *p++ = c = getc (finput);
+               if (base == 8)
+                 base = 10;
+               *p++ = c = token_getch();
                 /* Accept '.' as the start of a floating-point number
-                  only when it is followed by a digit.
-                  Otherwise, unread the following non-digit
-                  and use the '.' as a structural token.  */
-               if (p == token_buffer + 2 && !isdigit (c))
-                 {
-                   if (c == '.')
-                     {
-                       c = getc (finput);
-                       if (c == '.')
-                         {
-                           *p++ = c;
-                           *p = 0;
-                           return ELLIPSIS;
-                         }
-                       error ("parse error at `..'");
-                     }
-                   ungetc (c, finput);
-                   token_buffer[1] = 0;
-                   value = '.';
-                   goto done;
-                 }
+                  only when it is followed by a digit.  */
+               if (p == token_buffer + 2 && !ISDIGIT (c))
+                 abort ();
               }
             else
               {
                 /* It is not a decimal point.
                    It should be a digit (perhaps a hex digit).  */
  
-               if (isdigit (c))
+               if (ISDIGIT (c))
                   {
                     c = c - '0';
                   }
@@ -1218,12 +1614,17 @@ yylex ()
                     if (c == 'e' || c == 'E')
                       {
                         base = 10;
-                       floatflag = AFTER_POINT;
+                       floatflag = AFTER_EXPON;
                         break;   /* start of exponent */
                       }
                     error ("nondigits in number and not hexadecimal");
                     c = 0;
                   }
+               else if (base == 16 && (c == 'p' || c == 'P'))
+                 {
+                   floatflag = AFTER_EXPON;
+                   break;   /* start of exponent */
+                 }
                 else if (c >= 'a')
                   {
                     c = c - 'a' + 10;
@@ -1250,178 +1651,102 @@ yylex ()
                       parts[0] += c;
                   }
  
-               /* If the extra highest-order part ever gets anything in it,
-                  the number is certainly too big.  */
-               if (parts[TOTAL_PARTS - 1] != 0)
-                 overflow = 1;
+               /* If the highest-order part overflows (gets larger than
+                  a host char will hold) then the whole number has 
+                  overflowed.  Record this and truncate the highest-order
+                  part. */
+               if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
+                 {
+                   overflow = 1;
+                   parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
+                 }
  
                 if (p >= token_buffer + maxtoken - 3)
                   p = extend_token_buffer (p);
-               *p++ = (c = getc (finput));
+               *p++ = (c = token_getch());
               }
           }
  
+       /* This can happen on input like `int i = 0x;' */
         if (numdigits == 0)
           error ("numeric constant with no digits");
  
         if (largest_digit >= base)
           error ("numeric constant contains digits beyond the radix");
  
-       /* Remove terminating char from the token buffer and delimit the string */
+       /* Remove terminating char from the token buffer and delimit the
+           string.  */
         *--p = 0;
  
         if (floatflag != NOT_FLOAT)
           {
-           tree type = double_type_node;
-           int garbage_chars = 0, exceeds_double = 0;
-           int imag = 0;
+           tree type;
+           int imag, conversion_errno;
             REAL_VALUE_TYPE value;
-           jmp_buf handler;
+           struct pf_args args;
  
             /* Read explicit exponent if any, and put it in tokenbuf.  */
  
-           if ((c == 'e') || (c == 'E'))
+           if ((base == 10 && ((c == 'e') || (c == 'E')))
+               || (base == 16 && (c == 'p' || c == 'P')))
               {
                 if (p >= token_buffer + maxtoken - 3)
                   p = extend_token_buffer (p);
                 *p++ = c;
-               c = getc (finput);
+               c = token_getch();
                 if ((c == '+') || (c == '-'))
                   {
                     *p++ = c;
-                   c = getc (finput);
+                   c = token_getch();
                   }
-               if (! isdigit (c))
+               /* Exponent is decimal, even if string is a hex float.  */
+               if (! ISDIGIT (c))
                   error ("floating constant exponent has no digits");
-               while (isdigit (c))
+               while (ISDIGIT (c))
                   {
                     if (p >= token_buffer + maxtoken - 3)
                       p = extend_token_buffer (p);
                     *p++ = c;
-                   c = getc (finput);
+                   c = token_getch ();
                   }
               }
+           if (base == 16 && floatflag != AFTER_EXPON)
+             error ("hexadecimal floating constant has no exponent");
  
             *p = 0;
-           errno = 0;
+
+           /* Setup input for parse_float() */
+           args.base = base;
+           args.p = p;
+           args.c = c;
  
             /* Convert string to a double, checking for overflow.  */
-           if (setjmp (handler))
+           if (do_float_handler (parse_float, (PTR) &args))
               {
-               error ("floating constant out of range");
-               value = dconst0;
+               /* Receive output from parse_float() */
+               value = args.value;
               }
             else
               {
-               int fflag = 0, lflag = 0;
-               /* Copy token_buffer now, while it has just the number
-                  and not the suffixes; once we add `f' or `i',
-                  REAL_VALUE_ATOF may not work any more.  */
-               char *copy = (char *) alloca (p - token_buffer + 1);
-               bcopy (token_buffer, copy, p - token_buffer + 1);
-
-               set_float_handler (handler);
-
-               while (1)
-                 {
-                   int lose = 0;
-
-                   /* Read the suffixes to choose a data type.  */
-                   switch (c)
-                     {
-                     case 'f': case 'F':
-                       if (fflag)
-                         error ("more than one `f' in numeric constant");
-                       fflag = 1;
-                       break;
-
-                     case 'l': case 'L':
-                       if (lflag)
-                         error ("more than one `l' in numeric constant");
-                       lflag = 1;
-                       break;
-
-                     case 'i': case 'I':
-                       if (imag)
-                         error ("more than one `i' or `j' in numeric constant");
-                       else if (pedantic)
-                         pedwarn ("ANSI C forbids imaginary numeric constants");
-                       imag = 1;
-                       break;
-
-                     default:
-                       lose = 1;
-                     }
-
-                   if (lose)
-                     break;
-
-                   if (p >= token_buffer + maxtoken - 3)
-                     p = extend_token_buffer (p);
-                   *p++ = c;
-                   *p = 0;
-                   c = getc (finput);
-                 }
-
-               /* The second argument, machine_mode, of REAL_VALUE_ATOF
-                  tells the desired precision of the binary result
-                  of decimal-to-binary conversion.  */
-
-               if (fflag)
-                 {
-                   if (lflag)
-                     error ("both `f' and `l' in floating constant");
-
-                   type = float_type_node;
-                   value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
-                   /* A diagnostic is required here by some ANSI C testsuites.
-                      This is not pedwarn, become some people don't want
-                      an error for this.  */
-                   if (REAL_VALUE_ISINF (value) && pedantic)
-                     warning ("floating point number exceeds range of `float'");
-                 }
-               else if (lflag)
-                 {
-                   type = long_double_type_node;
-                   value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
-                   if (REAL_VALUE_ISINF (value) && pedantic)
-                     warning ("floating point number exceeds range of `long double'");
-                 }
-               else
-                 {
-                   value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
-                   if (REAL_VALUE_ISINF (value) && pedantic)
-                     warning ("floating point number exceeds range of `double'");
-                 }
+               /* We got an exception from parse_float() */
+               error ("floating constant out of range");
+               value = dconst0;
+             }
  
-               set_float_handler (NULL_PTR);
-           }
+           /* Receive output from parse_float() */
+           c = args.c;
+           imag = args.imag;
+           type = args.type;
+           conversion_errno = args.conversion_errno;
+           
  #ifdef ERANGE
-           if (errno == ERANGE && !flag_traditional && pedantic)
-             {
-               /* ERANGE is also reported for underflow,
-                  so test the value to distinguish overflow from that.  */
-               if (REAL_VALUES_LESS (dconst1, value)
-                   || REAL_VALUES_LESS (value, dconstm1))
-                 {
-                   warning ("floating point number exceeds range of `double'");
-                   exceeds_double = 1;
-                 }
-             }
+           /* ERANGE is also reported for underflow,
+              so test the value to distinguish overflow from that.  */
+           if (conversion_errno == ERANGE && !flag_traditional && pedantic
+               && (REAL_VALUES_LESS (dconst1, value)
+                   || REAL_VALUES_LESS (value, dconstm1)))
+             warning ("floating point number exceeds range of `double'");
  #endif
-           garbage_chars = 0;
-           while (isalnum (c) || c == '.' || c == '_'
-                  || (!flag_traditional && (c == '+' || c == '-')
-                      && (p[-1] == 'e' || p[-1] == 'E')))
-             {
-               if (p >= token_buffer + maxtoken - 3)
-                 p = extend_token_buffer (p);
-               *p++ = c;
-               c = getc (finput);
-               garbage_chars++;
-             }
-           if (garbage_chars > 0)
-             error ("garbage at end of number");
  
             /* If the result is not a number, assume it must have been
                due to some error message above, so silently convert
@@ -1431,13 +1756,11 @@ yylex ()
  
             /* Create a node with determined type and value.  */
             if (imag)
-             yylval.ttype = build_complex (convert (type, integer_zero_node),
+             yylval.ttype = build_complex (NULL_TREE,
+                                           convert (type, integer_zero_node),
                                             build_real (type, value));
             else
               yylval.ttype = build_real (type, value);
-
-           ungetc (c, finput);
-           *p = 0;
           }
         else
           {
@@ -1447,8 +1770,9 @@ yylex ()
             int spec_long = 0;
             int spec_long_long = 0;
             int spec_imag = 0;
-           int bytes, warn, i;
+           int warn = 0, i;
  
+           traditional_type = ansi_type = type = NULL_TREE;
             while (1)
               {
                 if (c == 'u' || c == 'U')
@@ -1463,7 +1787,7 @@ yylex ()
                       {
                         if (spec_long_long)
                           error ("three `l's in integer constant");
-                       else if (pedantic)
+                       else if (pedantic && ! in_system_header && warn_long_long)
                           pedwarn ("ANSI C forbids long long integer constants");
                         spec_long_long = 1;
                       }
@@ -1478,53 +1802,19 @@ yylex ()
                     spec_imag = 1;
                   }
                 else
-                 {
-                   if (isalnum (c) || c == '.' || c == '_'
-                       || (!flag_traditional && (c == '+' || c == '-')
-                           && (p[-1] == 'e' || p[-1] == 'E')))
-                     {
-                       error ("garbage at end of number");
-                       while (isalnum (c) || c == '.' || c == '_'
-                              || (!flag_traditional && (c == '+' || c == '-')
-                                  && (p[-1] == 'e' || p[-1] == 'E')))
-                         {
-                           if (p >= token_buffer + maxtoken - 3)
-                             p = extend_token_buffer (p);
-                           *p++ = c;
-                           c = getc (finput);
-                         }
-                     }
-                   break;
-                 }
+                 break;
                 if (p >= token_buffer + maxtoken - 3)
                   p = extend_token_buffer (p);
                 *p++ = c;
-               c = getc (finput);
+               c = token_getch();
               }
  
-           ungetc (c, finput);
-
-           /* If the constant is not long long and it won't fit in an
-              unsigned long, or if the constant is long long and won't fit
-              in an unsigned long long, then warn that the constant is out
-              of range.  */
-
-           /* ??? This assumes that long long and long integer types are
-              a multiple of 8 bits.  This better than the original code
-              though which assumed that long was exactly 32 bits and long
-              long was exactly 64 bits.  */
-
-           if (spec_long_long)
-             bytes = TYPE_PRECISION (long_long_integer_type_node) / 8;
-           else
-             bytes = TYPE_PRECISION (long_integer_type_node) / 8;
-
-           warn = overflow;
-           for (i = bytes; i < TOTAL_PARTS; i++)
-             if (parts[i])
+           /* If the literal overflowed, pedwarn about it now. */
+           if (overflow)
+             {
                 warn = 1;
-           if (warn)
-             pedwarn ("integer constant out of range");
+               pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
+             }
  
             /* This is simplified by the fact that our constant
                is always positive.  */
@@ -1538,7 +1828,7 @@ yylex ()
                          << (i * HOST_BITS_PER_CHAR));
                 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
               }
-           
+
             yylval.ttype = build_int_2 (low, high);
             TREE_TYPE (yylval.ttype) = long_long_unsigned_type_node;
  
@@ -1567,10 +1857,17 @@ yylex ()
                 else if (! spec_long_long)
                   traditional_type = (spec_unsigned ? long_unsigned_type_node
                                       : long_integer_type_node);
-               else
+               else if (int_fits_type_p (yylval.ttype,
+                                         spec_unsigned 
+                                         ? long_long_unsigned_type_node
+                                         : long_long_integer_type_node)) 
                   traditional_type = (spec_unsigned
                                       ? long_long_unsigned_type_node
                                       : long_long_integer_type_node);
+               else
+                 traditional_type = (spec_unsigned
+                                     ? widest_unsigned_literal_type_node
+                                     : widest_integer_literal_type_node);
               }
             if (warn_traditional || ! flag_traditional)
               {
@@ -1584,21 +1881,32 @@ yylex ()
                 else if (! spec_unsigned && !spec_long_long
                          && int_fits_type_p (yylval.ttype, long_integer_type_node))
                   ansi_type = long_integer_type_node;
-               else if (! spec_long_long)
+               else if (! spec_long_long
+                        && int_fits_type_p (yylval.ttype,
+                                            long_unsigned_type_node))
                   ansi_type = long_unsigned_type_node;
                 else if (! spec_unsigned
-                        /* Verify value does not overflow into sign bit.  */
-                        && TREE_INT_CST_HIGH (yylval.ttype) >= 0
                          && int_fits_type_p (yylval.ttype,
                                              long_long_integer_type_node))
                   ansi_type = long_long_integer_type_node;
-               else
+               else if (int_fits_type_p (yylval.ttype,
+                                         long_long_unsigned_type_node))
                   ansi_type = long_long_unsigned_type_node;
+               else if (! spec_unsigned
+                        && int_fits_type_p (yylval.ttype,
+                                            widest_integer_literal_type_node))
+                 ansi_type = widest_integer_literal_type_node;
+               else
+                 ansi_type = widest_unsigned_literal_type_node;
               }
  
             type = flag_traditional ? traditional_type : ansi_type;
  
-           if (warn_traditional && traditional_type != ansi_type)
+           /* We assume that constants specified in a non-decimal
+              base are bit patterns, and that the programmer really
+              meant what they wrote.  */
+           if (warn_traditional && base == 10
+               && traditional_type != ansi_type)
               {
                 if (TYPE_PRECISION (traditional_type)
                     != TYPE_PRECISION (ansi_type))
@@ -1610,9 +1918,13 @@ yylex ()
                   warning ("width of integer constant may change on other systems with -traditional");
               }
  
-           if (!flag_traditional && !int_fits_type_p (yylval.ttype, type)
-               && !warn)
-             pedwarn ("integer constant out of range");
+           if (pedantic && !flag_traditional && !spec_long_long && !warn
+               && (TYPE_PRECISION (long_integer_type_node)
+                   < TYPE_PRECISION (type)))
+             {
+               warn = 1;
+               pedwarn ("integer constant larger than the maximum value of an unsigned long int");
+             }
  
             if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
               warning ("decimal constant is so large that it is unsigned");
@@ -1622,8 +1934,9 @@ yylex ()
                 if (TYPE_PRECISION (type)
                     <= TYPE_PRECISION (integer_type_node))
                   yylval.ttype
-                   = build_complex (integer_zero_node,
-                                    convert (integer_type_node, yylval.ttype));
+                   = build_complex (NULL_TREE, integer_zero_node,
+                                    convert (integer_type_node,
+                                             yylval.ttype));
                 else
                   error ("complex integer constant is too wide for `complex int'");
               }
@@ -1640,9 +1953,24 @@ yylex ()
             else
               TREE_TYPE (yylval.ttype) = type;
  
-           *p = 0;
+
+           /* If it's still an integer (not a complex), and it doesn't
+              fit in the type we choose for it, then pedwarn. */
+
+           if (! warn
+               && TREE_CODE (TREE_TYPE (yylval.ttype)) == INTEGER_TYPE
+               && ! int_fits_type_p (yylval.ttype, TREE_TYPE (yylval.ttype)))
+             pedwarn ("integer constant is larger than the maximum value for its type");
           }
  
+       token_put_back (c);
+       *p = 0;
+
+       if (ISALNUM (c) || c == '.' || c == '_' || c == '$'
+           || (!flag_traditional && (c == '-' || c == '+')
+               && (p[-1] == 'e' || p[-1] == 'E')))
+         error ("missing white space after number `%s'", token_buffer);
+
         value = CONSTANT; break;
        }
  
@@ -1651,30 +1979,27 @@ yylex ()
        {
         register int result = 0;
         register int num_chars = 0;
+       int chars_seen = 0;
         unsigned width = TYPE_PRECISION (char_type_node);
         int max_chars;
-
-       if (wide_flag)
-         {
-           width = WCHAR_TYPE_SIZE;
  #ifdef MULTIBYTE_CHARS
-           max_chars = MB_CUR_MAX;
-#else
-           max_chars = 1;
+       int longest_char = local_mb_cur_max ();
+       (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
  #endif
-         }
-       else
-         max_chars = TYPE_PRECISION (integer_type_node) / width;
+
+       max_chars = TYPE_PRECISION (integer_type_node) / width;
+       if (wide_flag)
+         width = WCHAR_TYPE_SIZE;
  
         while (1)
           {
           tryagain:
-
-           c = getc (finput);
+           c = token_getch();
  
             if (c == '\'' || c == EOF)
               break;
  
+           ++chars_seen;
             if (c == '\\')
               {
                 int ignore = 0;
@@ -1682,10 +2007,10 @@ yylex ()
                 if (ignore)
                   goto tryagain;
                 if (width < HOST_BITS_PER_INT
-                   && (unsigned) c >= (1 << width))
+                   && (unsigned) c >= ((unsigned)1 << width))
                   pedwarn ("escape sequence out of range for character");
  #ifdef MAP_CHARACTER
-               if (isprint (c))
+               if (ISPRINT (c))
                   c = MAP_CHARACTER (c);
  #endif
               }
@@ -1695,18 +2020,81 @@ yylex ()
                   pedwarn ("ANSI C forbids newline in character constant");
                 lineno++;
               }
-#ifdef MAP_CHARACTER
             else
-             c = MAP_CHARACTER (c);
+             {
+#ifdef MULTIBYTE_CHARS
+               wchar_t wc;
+               int i;
+               int char_len = -1;
+               for (i = 1; i <= longest_char; ++i)
+                 {
+                   if (i > maxtoken - 4)
+                     extend_token_buffer (token_buffer);
+
+                   token_buffer[i] = c;
+                   char_len = local_mbtowc (& wc,
+                                            token_buffer + 1,
+                                            i);
+                   if (char_len != -1)
+                     break;
+                   c = token_getch ();
+                 }
+               if (char_len > 1)
+                 {
+                   /* mbtowc sometimes needs an extra char before accepting */
+                   if (char_len < i)
+                     token_put_back (c);
+                   if (! wide_flag)
+                     {
+                       /* Merge character into result; ignore excess chars.  */
+                       for (i = 1; i <= char_len; ++i)
+                         {
+                           if (i > max_chars)
+                             break;
+                           if (width < HOST_BITS_PER_INT)
+                             result = (result << width)
+                               | (token_buffer[i]
+                                  & ((1 << width) - 1));
+                           else
+                             result = token_buffer[i];
+                         }
+                       num_chars += char_len;
+                       goto tryagain;
+                     }
+                   c = wc;
+                 }
+               else
+                 {
+                   if (char_len == -1)
+                     {
+                       warning ("Ignoring invalid multibyte character");
+                       /* Replace all but the first byte.  */
+                       for (--i; i > 1; --i)
+                         token_put_back (token_buffer[i]);
+                       wc = token_buffer[1];
+                     }
+#ifdef MAP_CHARACTER
+                     c = MAP_CHARACTER (wc);
+#else
+                     c = wc;
  #endif
+                 }
+#else /* ! MULTIBYTE_CHARS */
+#ifdef MAP_CHARACTER
+               c = MAP_CHARACTER (c);
+#endif
+#endif /* ! MULTIBYTE_CHARS */
+             }
  
-           num_chars++;
-           if (num_chars > maxtoken - 4)
-             extend_token_buffer (token_buffer);
-
-           token_buffer[num_chars] = c;
+           if (wide_flag)
+             {
+               if (chars_seen == 1) /* only keep the first one */
+                 result = c;
+               goto tryagain;
+             }
  
             /* Merge character into result; ignore excess chars.  */
+           num_chars += (width / TYPE_PRECISION (char_type_node));
             if (num_chars < max_chars + 1)
               {
                 if (width < HOST_BITS_PER_INT)
@@ -1716,19 +2104,16 @@ yylex ()
               }
           }
  
-       token_buffer[num_chars + 1] = '\'';
-       token_buffer[num_chars + 2] = 0;
-
         if (c != '\'')
-         error ("malformatted character constant");
-       else if (num_chars == 0)
+         error ("malformed character constant");
+       else if (chars_seen == 0)
           error ("empty character constant");
         else if (num_chars > max_chars)
           {
             num_chars = max_chars;
             error ("character constant too long");
           }
-       else if (num_chars != 1 && ! flag_traditional)
+       else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
           warning ("multi-character character constant");
  
         /* If char type is signed, sign-extend the constant.  */
@@ -1741,34 +2126,18 @@ yylex ()
             else if (TREE_UNSIGNED (char_type_node)
                      || ((result >> (num_bits - 1)) & 1) == 0)
               yylval.ttype
-               = build_int_2 (result & ((unsigned HOST_WIDE_INT) ~0
+               = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
                                          >> (HOST_BITS_PER_WIDE_INT - num_bits)),
                                0);
             else
               yylval.ttype
-               = build_int_2 (result | ~((unsigned HOST_WIDE_INT) ~0
+               = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
                                           >> (HOST_BITS_PER_WIDE_INT - num_bits)),
                                -1);
             TREE_TYPE (yylval.ttype) = integer_type_node;
           }
         else
           {
-#ifdef MULTIBYTE_CHARS
-           /* Set the initial shift state and convert the next sequence.  */
-           result = 0;
-           /* In all locales L'\0' is zero and mbtowc will return zero,
-              so don't use it.  */
-           if (num_chars > 1
-               || (num_chars == 1 && token_buffer[1] != '\0'))
-             {
-               wchar_t wc;
-               (void) mbtowc (NULL_PTR, NULL_PTR, 0);
-               if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars)
-                 result = wc;
-               else
-                 warning ("Ignoring invalid multibyte character");
-             }
-#endif
             yylval.ttype = build_int_2 (result, 0);
             TREE_TYPE (yylval.ttype) = wchar_type_node;
           }
@@ -1780,20 +2149,26 @@ yylex ()
      case '"':
      string_constant:
        {
-       c = getc (finput);
+       unsigned width = wide_flag ? WCHAR_TYPE_SIZE
+                                  : TYPE_PRECISION (char_type_node);
+#ifdef MULTIBYTE_CHARS
+       int longest_char = local_mb_cur_max ();
+       (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
+#endif
+       c = token_getch ();
         p = token_buffer + 1;
  
-       while (c != '"' && c >= 0)
+       while (c != '"' && c != EOF)
           {
-           if (c == '\\')
+           /* ignore_escape_flag is set for reading the filename in #line.  */
+           if (!ignore_escape_flag && c == '\\')
               {
                 int ignore = 0;
                 c = readescape (&ignore);
                 if (ignore)
                   goto skipnewline;
-               if (!wide_flag
-                   && TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT
-                   && c >= (1 << TYPE_PRECISION (char_type_node)))
+               if (width < HOST_BITS_PER_INT
+                   && (unsigned) c >= ((unsigned)1 << width))
                   pedwarn ("escape sequence out of range for character");
               }
             else if (c == '\n')
@@ -1802,17 +2177,98 @@ yylex ()
                   pedwarn ("ANSI C forbids newline in string constant");
                 lineno++;
               }
+           else
+             {
+#ifdef MULTIBYTE_CHARS
+               wchar_t wc;
+               int i;
+               int char_len = -1;
+               for (i = 0; i < longest_char; ++i)
+                 {
+                   if (p + i >= token_buffer + maxtoken)
+                     p = extend_token_buffer (p);
+                   p[i] = c;
  
-           if (p == token_buffer + maxtoken)
-             p = extend_token_buffer (p);
-           *p++ = c;
+                   char_len = local_mbtowc (& wc, p, i + 1);
+                   if (char_len != -1)
+                     break;
+                   c = token_getch ();
+                 }
+               if (char_len == -1)
+                 {
+                   warning ("Ignoring invalid multibyte character");
+                   /* Replace all except the first byte.  */
+                   token_put_back (c);
+                   for (--i; i > 0; --i)
+                     token_put_back (p[i]);
+                   char_len = 1;
+                 }
+               /* mbtowc sometimes needs an extra char before accepting */
+               if (char_len <= i)
+                 token_put_back (c);
+               if (! wide_flag)
+                 {
+                   p += (i + 1);
+                   c = token_getch ();
+                   continue;
+                 }
+               c = wc;
+#endif /* MULTIBYTE_CHARS */
+             }
+
+           /* Add this single character into the buffer either as a wchar_t
+              or as a single byte.  */
+           if (wide_flag)
+             {
+               unsigned width = TYPE_PRECISION (char_type_node);
+               unsigned bytemask = (1 << width) - 1;
+               int byte;
+
+               if (p + WCHAR_BYTES > token_buffer + maxtoken)
+                 p = extend_token_buffer (p);
+
+               for (byte = 0; byte < WCHAR_BYTES; ++byte)
+                 {
+                   int value;
+                   if (byte >= (int) sizeof (c))
+                     value = 0;
+                   else
+                     value = (c >> (byte * width)) & bytemask;
+                   if (BYTES_BIG_ENDIAN)
+                     p[WCHAR_BYTES - byte - 1] = value;
+                   else
+                     p[byte] = value;
+                 }
+               p += WCHAR_BYTES;
+             }
+           else
+             {
+               if (p >= token_buffer + maxtoken)
+                 p = extend_token_buffer (p);
+               *p++ = c;
+             }
  
           skipnewline:
-           c = getc (finput);
+           c = token_getch ();
+         }
+
+       /* Terminate the string value, either with a single byte zero
+          or with a wide zero.  */
+       if (wide_flag)
+         {
+           if (p + WCHAR_BYTES > token_buffer + maxtoken)
+             p = extend_token_buffer (p);
+           bzero (p, WCHAR_BYTES);
+           p += WCHAR_BYTES;
+         }
+       else
+         {
+           if (p >= token_buffer + maxtoken)
+             p = extend_token_buffer (p);
+           *p++ = 0;
           }
-       *p = 0;
  
-       if (c < 0)
+       if (c == EOF)
           error ("Unterminated string constant");
  
         /* We have read the entire constant.
@@ -1820,59 +2276,27 @@ yylex ()
  
         if (wide_flag)
           {
-           /* If this is a L"..." wide-string, convert the multibyte string
-              to a wide character string.  */
-           char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES);
-           int len;
-
-#ifdef MULTIBYTE_CHARS
-           len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer);
-           if (len < 0 || len >= (p - token_buffer))
-             {
-               warning ("Ignoring invalid multibyte string");
-               len = 0;
-             }
-           bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES);
-#else
-           {
-             union { long l; char c[sizeof (long)]; } u;
-             int big_endian;
-             char *wp, *cp;
-
-             /* Determine whether host is little or big endian.  */
-             u.l = 1;
-             big_endian = u.c[sizeof (long) - 1];
-             wp = widep + (big_endian ? WCHAR_BYTES - 1 : 0);
-
-             bzero (widep, (p - token_buffer) * WCHAR_BYTES);
-             for (cp = token_buffer + 1; cp < p; cp++)
-               *wp = *cp, wp += WCHAR_BYTES;
-             len = p - token_buffer - 1;
-           }
-#endif
-           yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep);
+           yylval.ttype = build_string (p - (token_buffer + 1),
+                                        token_buffer + 1);
             TREE_TYPE (yylval.ttype) = wchar_array_type_node;
             value = STRING;
           }
         else if (objc_flag)
           {
-           extern tree build_objc_string();
             /* Return an Objective-C @"..." constant string object.  */
-           yylval.ttype = build_objc_string (p - token_buffer,
+           yylval.ttype = build_objc_string (p - (token_buffer + 1),
                                               token_buffer + 1);
             TREE_TYPE (yylval.ttype) = char_array_type_node;
             value = OBJC_STRING;
           }
         else
           {
-           yylval.ttype = build_string (p - token_buffer, token_buffer + 1);
+           yylval.ttype = build_string (p - (token_buffer + 1),
+                                        token_buffer + 1);
             TREE_TYPE (yylval.ttype) = char_array_type_node;
             value = STRING;
           }
  
-       *p++ = '"';
-       *p = 0;
-
         break;
        }
  
@@ -1922,7 +2346,7 @@ yylex ()
             yylval.code = GT_EXPR; break;
           }
  
-       token_buffer[1] = c1 = getc (finput);
+       token_buffer[1] = c1 = token_getch();
         token_buffer[2] = 0;
  
         if (c1 == '=')
@@ -1965,28 +2389,31 @@ yylex ()
               if (c1 == '>')
                 { value = POINTSAT; goto done; }
               break;
+
+             /* digraphs */
             case ':':
               if (c1 == '>')
                 { value = ']'; goto done; }
               break;
             case '<':
               if (c1 == '%')
-               { value = '{'; goto done; }
+               { value = '{'; indent_level++; goto done; }
               if (c1 == ':')
                 { value = '['; goto done; }
               break;
             case '%':
               if (c1 == '>')
-               { value = '}'; goto done; }
+               { value = '}'; indent_level--; goto done; }
               break;
             }
-       ungetc (c1, finput);
+
+       token_put_back (c1);
         token_buffer[1] = 0;
  
         if ((c == '<') || (c == '>'))
           value = ARITHCOMPARE;
         else value = c;
-       goto done;
+       break;
        }
  
      case 0:
@@ -1994,6 +2421,16 @@ yylex ()
        value = 1;
        break;
  
+    case '{':
+      indent_level++;
+      value = c;
+      break;
+
+    case '}':
+      indent_level--;
+      value = c;
+      break;
+
      default:
        value = c;
      }