X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fjava%2Flex.c;h=77e38f898485707443b1cdc41d1500e4cb43e1c4;hp=be1fcf8e76fa2d349b3031a4b62758d8070cf8ca;hb=8e452f9c0c8f40aeee57cd573a9d638e53872aea;hpb=ab3a735944aa71e2e91e5f9824346756576f6806

diff --git a/gcc/java/lex.c b/gcc/java/lex.c
index be1fcf8e76f..77e38f89848 100644
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -1,21 +1,22 @@
 /* Language lexer for the GNU compiler for the Java(TM) language.
-   Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
+   Free Software Foundation, Inc.
    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
 
-This file is part of GNU CC.
+This file is part of GCC.
 
-GNU CC is free software; you can redistribute it and/or modify
+GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2, or (at your option)
 any later version.
 
-GNU CC is distributed in the hope that it will be useful,
+GCC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
+along with GCC; see the file COPYING.  If not, write to
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA. 
 
@@ -36,32 +37,46 @@ The Free Software Foundation is independent of Sun Microsystems, Inc.  */
 
 #include "keyword.h"
 #include "flags.h"
+#include "chartables.h"
+#ifndef JC1_LITE
+#include "timevar.h"
+#endif
+
+/* Function declarations.  */
+static char *java_sprint_unicode (struct java_line *, int);
+static void java_unicode_2_utf8 (unicode_t);
+static void java_lex_error (const char *, int);
+#ifndef JC1_LITE
+static int do_java_lex (YYSTYPE *);
+static int java_lex (YYSTYPE *);
+static int java_is_eol (FILE *, int);
+static tree build_wfl_node (tree);
+#endif
+static void java_store_unicode (struct java_line *, unicode_t, int);
+static int java_parse_escape_sequence (void);
+static int java_start_char_p (unicode_t);
+static int java_part_char_p (unicode_t);
+static int java_space_char_p (unicode_t);
+static void java_parse_doc_section (int);
+static void java_parse_end_comment (int);
+static int java_get_unicode (void);
+static int java_read_unicode (java_lexer *, int *);
+static int java_read_unicode_collapsing_terminators (java_lexer *, int *);
+static void java_store_unicode (struct java_line *, unicode_t, int);
+static int java_read_char (java_lexer *);
+static void java_allocate_new_line (void);
+static void java_unget_unicode (void);
+static unicode_t java_sneak_unicode (void);
+#ifndef JC1_LITE
+static int utf8_cmp (const unsigned char *, int, const char *);
+#endif
 
-/* Function declaration  */
-static char *java_sprint_unicode PARAMS ((struct java_line *, int));
-static void java_unicode_2_utf8 PARAMS ((unicode_t));
-static void java_lex_error PARAMS ((const char *, int));
+java_lexer *java_new_lexer (FILE *, const char *);
 #ifndef JC1_LITE
-static int java_is_eol PARAMS ((FILE *, int));
-static tree build_wfl_node PARAMS ((tree));
+static void error_if_numeric_overflow (tree);
 #endif
-static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
-static unicode_t java_parse_escape_sequence PARAMS ((void));
-static int java_letter_or_digit_p PARAMS ((unicode_t));
-static int java_ignorable_control_p PARAMS ((unicode_t));
-static int java_parse_doc_section PARAMS ((unicode_t));
-static void java_parse_end_comment PARAMS ((unicode_t));
-static unicode_t java_get_unicode PARAMS ((void));
-static unicode_t java_read_unicode PARAMS ((java_lexer *, int *));
-static unicode_t java_read_unicode_collapsing_terminators
-    PARAMS ((java_lexer *, int *));
-static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
-static unicode_t java_read_char PARAMS ((java_lexer *));
-static void java_allocate_new_line PARAMS ((void));
-static void java_unget_unicode PARAMS ((void));
-static unicode_t java_sneak_unicode PARAMS ((void));
-java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
 
+#ifdef HAVE_ICONV
 /* This is nonzero if we have initialized `need_byteswap'.  */
 static int byteswap_init = 0;
 
@@ -70,21 +85,16 @@ static int byteswap_init = 0;
    doing a conversion once at startup and seeing what happens.  This
    flag holds the results of this determination.  */
 static int need_byteswap = 0;
+#endif
 
 void
-java_init_lex (finput, encoding)
-     FILE *finput;
-     const char *encoding;
+java_init_lex (FILE *finput, const char *encoding)
 {
 #ifndef JC1_LITE
   int java_lang_imported = 0;
 
   if (!java_lang_id)
     java_lang_id = get_identifier ("java.lang");
-  if (!java_lang_cloneable)
-    java_lang_cloneable = get_identifier ("java.lang.Cloneable");
-  if (!java_io_serializable)
-    java_io_serializable = get_identifier ("java.io.Serializable");
   if (!inst_id)
     inst_id = get_identifier ("inst$");
   if (!wpv_id)
@@ -108,32 +118,32 @@ java_init_lex (finput, encoding)
     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
   if (!wfl_string_buffer)
     wfl_string_buffer = 
-      build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
+      build_expr_wfl (get_identifier (flag_emit_class_files
+				      ? "java.lang.StringBuffer"
+				      : "gnu.gcj.runtime.StringBuffer"),
+		      NULL, 0, 0);
   if (!wfl_to_string)
     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
 
   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
-    CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
+    CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
 
-  bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
-  bzero ((PTR) current_jcf, sizeof (JCF));
+  memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
+  current_jcf = ggc_alloc_cleared (sizeof (JCF));
   ctxp->current_parsed_class = NULL;
   ctxp->package = NULL_TREE;
 #endif
 
   ctxp->filename = input_filename;
-  ctxp->lineno = lineno = 0;
+  ctxp->lineno = input_line = 0;
   ctxp->p_line = NULL;
   ctxp->c_line = NULL;
-  ctxp->minus_seen = 0;
   ctxp->java_error_flag = 0;
   ctxp->lexer = java_new_lexer (finput, encoding);
 }
 
 static char *
-java_sprint_unicode (line, i)
-    struct java_line *line;
-    int i;
+java_sprint_unicode (struct java_line *line, int i)
 {
   static char buffer [10];
   if (line->unicode_escape_p [i] || line->line [i] > 128)
@@ -147,22 +157,24 @@ java_sprint_unicode (line, i)
 }
 
 static unicode_t
-java_sneak_unicode ()
+java_sneak_unicode (void)
 {
   return (ctxp->c_line->line [ctxp->c_line->current]);
 }
 
 static void
-java_unget_unicode ()
+java_unget_unicode (void)
 {
   if (!ctxp->c_line->current)
-    fatal ("can't unget unicode - java_unget_unicode");
+    /* Can't unget unicode.  */
+    abort ();
+
   ctxp->c_line->current--;
   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 }
 
 static void
-java_allocate_new_line ()
+java_allocate_new_line (void)
 {
   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
   char ahead_escape_p = (ctxp->c_line ? 
@@ -177,17 +189,16 @@ java_allocate_new_line ()
 	  free (ctxp->p_line);
 	}
       ctxp->p_line = ctxp->c_line;
-      ctxp->c_line = NULL;		/* Reallocated */
+      ctxp->c_line = NULL;		/* Reallocated.  */
     }
 
   if (!ctxp->c_line)
     {
-      ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
+      ctxp->c_line = xmalloc (sizeof (struct java_line));
       ctxp->c_line->max = JAVA_LINE_MAX;
-      ctxp->c_line->line = (unicode_t *)xmalloc 
-	(sizeof (unicode_t)*ctxp->c_line->max);
+      ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
       ctxp->c_line->unicode_escape_p = 
-	  (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
+	xmalloc (sizeof (char)*ctxp->c_line->max);
       ctxp->c_line->white_space_only = 0;
     }
 
@@ -201,22 +212,23 @@ java_allocate_new_line ()
     }
   ctxp->c_line->ahead [0] = 0;
   ctxp->c_line->unicode_escape_ahead_p = 0;
-  ctxp->c_line->lineno = ++lineno;
+  ctxp->c_line->lineno = ++input_line;
   ctxp->c_line->white_space_only = 1;
 }
 
 /* Create a new lexer object.  */
+
 java_lexer *
-java_new_lexer (finput, encoding)
-     FILE *finput;
-     const char *encoding;
+java_new_lexer (FILE *finput, const char *encoding)
 {
-  java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
+  java_lexer *lex = xmalloc (sizeof (java_lexer));
   int enc_error = 0;
 
   lex->finput = finput;
   lex->bs_count = 0;
   lex->unget_value = 0;
+  lex->hit_eof = 0;
+  lex->encoding = encoding;
 
 #ifdef HAVE_ICONV
   lex->handle = iconv_open ("UCS-2", encoding);
@@ -252,12 +264,14 @@ java_new_lexer (finput, encoding)
 	      in[1] = 0xbb;
 	      in[2] = 0xbf;
 
-	      inp = in;
+	      inp = (char *) in;
 	      inc = 3;
 	      outp = (char *) &result;
 	      outc = 2;
 
-	      r = iconv (handle, (const char **) &inp, &inc, &outp, &outc);
+	      r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
+			 &outp, &outc);
+	      iconv_close (handle);
 	      /* Conversion must be complete for us to use the result.  */
 	      if (r != (size_t) -1 && inc == 0 && outc == 0)
 		need_byteswap = (result != 0xfeff);
@@ -271,25 +285,32 @@ java_new_lexer (finput, encoding)
     {
       /* If iconv failed, use the internal decoder if the default
 	 encoding was requested.  This code is used on platforms where
-	 iconv() exists but is insufficient for our needs.  For
-	 instance, on Solaris 2.5 iconv() cannot handle UTF-8 or UCS-2.  */
-      if (strcmp (encoding, DEFAULT_ENCODING))
+	 iconv exists but is insufficient for our needs.  For
+	 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
+
+	 On Solaris the default encoding, as returned by nl_langinfo(),
+	 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
+	 understand that.  We work around that by pretending
+	 `646' to be the same as UTF-8.   */
+      if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
 	enc_error = 1;
 #ifdef HAVE_ICONV
       else
-	lex->use_fallback = 1;
+        {
+	  lex->use_fallback = 1;
+	  lex->encoding = "UTF-8";
+	}
 #endif /* HAVE_ICONV */
     }
 
   if (enc_error)
-    fatal ("unknown encoding: `%s'", encoding);
+    fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
 
   return lex;
 }
 
 void
-java_destroy_lexer (lex)
-     java_lexer *lex;
+java_destroy_lexer (java_lexer *lex)
 {
 #ifdef HAVE_ICONV
   if (! lex->use_fallback)
@@ -298,9 +319,8 @@ java_destroy_lexer (lex)
   free (lex);
 }
 
-static unicode_t
-java_read_char (lex)
-     java_lexer *lex;
+static int
+java_read_char (java_lexer *lex)
 {
   if (lex->unget_value)
     {
@@ -357,9 +377,9 @@ java_read_char (lex)
 	      in_save = inbytesleft;
 	      out_save = out_count;
 	      inp = &lex->buffer[lex->first];
-	      outp = &lex->out_buffer[lex->out_last];
-	      ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
-			  &outp, &out_count);
+	      outp = (char *) &lex->out_buffer[lex->out_last];
+	      ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
+			  &inbytesleft, &outp, &out_count);
 
 	      /* If we haven't read any bytes, then look to see if we
 		 have read a BOM.  */
@@ -406,18 +426,19 @@ java_read_char (lex)
 			 is in the middle of a character sequence.  We just
 			 move the valid part of the buffer to the beginning
 			 to force a read.  */
-		      /* We use bcopy() because it should work for
-			 overlapping strings.  Use memmove() instead... */
-		      bcopy (&lex->buffer[lex->first], &lex->buffer[0],
-			     lex->last - lex->first);
+		      memmove (&lex->buffer[0], &lex->buffer[lex->first],
+			       lex->last - lex->first);
 		      lex->last -= lex->first;
 		      lex->first = 0;
 		    }
 		  else
 		    {
 		      /* A more serious error.  */
-		      java_lex_error ("unrecognized character in input stream",
-				      0);
+		      char buffer[128];
+		      sprintf (buffer,
+			       "Unrecognized character for encoding '%s'", 
+		               lex->encoding);
+		      java_lex_error (buffer, 0);
 		      return UEOF;
 		    }
 		}
@@ -441,18 +462,24 @@ java_read_char (lex)
       int c, c1, c2;
       c = getc (lex->finput);
 
-      if (c < 128)
-	return (unicode_t)c;
       if (c == EOF)
 	return UEOF;
+      if (c < 128)
+	return (unicode_t) c;
       else
 	{
 	  if ((c & 0xe0) == 0xc0)
 	    {
 	      c1 = getc (lex->finput);
 	      if ((c1 & 0xc0) == 0x80)
-		return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
-	      c = c1;
+		{
+		  unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
+		  /* Check for valid 2-byte characters.  We explicitly
+		     allow \0 because this encoding is common in the
+		     Java world.  */
+		  if (r == 0 || (r >= 0x80 && r <= 0x7ff))
+		    return r;
+		}
 	    }
 	  else if ((c & 0xf0) == 0xe0)
 	    {
@@ -461,16 +488,23 @@ java_read_char (lex)
 		{
 		  c2 = getc (lex->finput);
 		  if ((c2 & 0xc0) == 0x80)
-		    return (unicode_t)(((c & 0xf) << 12) + 
-				       (( c1 & 0x3f) << 6) + (c2 & 0x3f));
-		  else
-		    c = c2;
+		    {
+		      unicode_t r =  (unicode_t)(((c & 0xf) << 12) + 
+						 (( c1 & 0x3f) << 6)
+						 + (c2 & 0x3f));
+		      /* Check for valid 3-byte characters.
+			 Don't allow surrogate, \ufffe or \uffff.  */
+		      if (IN_RANGE (r, 0x800, 0xffff)
+			  && ! IN_RANGE (r, 0xd800, 0xdfff)
+			  && r != 0xfffe && r != 0xffff)
+			return r;
+		    }
 		}
-	      else
-		c = c1;
 	    }
 
-	  /* We simply don't support invalid characters.  */
+	  /* We simply don't support invalid characters.  We also
+	     don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
+	     cannot be valid Java characters.  */
 	  java_lex_error ("malformed UTF-8 character", 0);
 	}
     }
@@ -480,28 +514,23 @@ java_read_char (lex)
 }
 
 static void
-java_store_unicode (l, c, unicode_escape_p)
-    struct java_line *l;
-    unicode_t c;
-    int unicode_escape_p;
+java_store_unicode (struct java_line *l, unicode_t c, int unicode_escape_p)
 {
   if (l->size == l->max)
     {
       l->max += JAVA_LINE_MAX;
-      l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
-      l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p, 
-					       sizeof (char)*l->max);
+      l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
+      l->unicode_escape_p = xrealloc (l->unicode_escape_p, 
+				      sizeof (char)*l->max);
     }
   l->line [l->size] = c;
   l->unicode_escape_p [l->size++] = unicode_escape_p;
 }
 
-static unicode_t
-java_read_unicode (lex, unicode_escape_p)
-     java_lexer *lex;
-     int *unicode_escape_p;
+static int
+java_read_unicode (java_lexer *lex, int *unicode_escape_p)
 {
-  unicode_t c;
+  int c;
 
   c = java_read_char (lex);
   *unicode_escape_p = 0;
@@ -521,25 +550,36 @@ java_read_unicode (lex, unicode_escape_p)
         {
 	  unicode_t unicode = 0;
 	  int shift = 12;
-	  /* Next should be 4 hex digits, otherwise it's an error.
-	     The hex value is converted into the unicode, pushed into
-	     the Unicode stream.  */
-	  for (shift = 12; shift >= 0; shift -= 4)
+
+	  /* Recognize any number of `u's in \u.  */
+	  while ((c = java_read_char (lex)) == 'u')
+	    ;
+
+	  shift = 12;
+	  do
 	    {
-	      if ((c = java_read_char (lex)) == UEOF)
-	        return UEOF;
-	      if (c >= '0' && c <= '9')
-		unicode |= (unicode_t)((c-'0') << shift);
-	      else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
-	        unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
-	      else if (c == 'u')
+	      if (c == UEOF)
 		{
-		  /* Recognize any number of u in \u.  */
-		  shift += 4;
+		  java_lex_error ("prematurely terminated \\u sequence", 0);
+		  return UEOF;
 		}
+
+	      if (hex_p (c))
+		unicode |= (unicode_t)(hex_value (c) << shift);
 	      else
-		java_lex_error ("Non hex digit in Unicode escape sequence", 0);
+		{
+		  java_lex_error ("non-hex digit in \\u sequence", 0);
+		  break;
+		}
+
+	      c = java_read_char (lex);
+	      shift -= 4;
 	    }
+	  while (shift >= 0);
+
+	  if (c != UEOF)
+	    lex->unget_value = c;
+
 	  lex->bs_count = 0;
 	  *unicode_escape_p = 1;
 	  return unicode;
@@ -549,12 +589,11 @@ java_read_unicode (lex, unicode_escape_p)
   return (unicode_t) '\\';
 }
 
-static unicode_t
-java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
-     java_lexer *lex;
-     int *unicode_escape_p;
+static int
+java_read_unicode_collapsing_terminators (java_lexer *lex,
+					  int *unicode_escape_p)
 {
-  unicode_t c = java_read_unicode (lex, unicode_escape_p);
+  int c = java_read_unicode (lex, unicode_escape_p);
 
   if (c == '\r')
     {
@@ -562,7 +601,7 @@ java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
 	 return a single line terminator.  */
       int dummy;
       c = java_read_unicode (lex, &dummy);
-      if (c != '\n')
+      if (c != '\n' && c != UEOF)
 	lex->unget_value = c;
       /* In either case we must return a newline.  */
       c = '\n';
@@ -571,13 +610,18 @@ java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
   return c;
 }
 
-static unicode_t
-java_get_unicode ()
+static int
+java_get_unicode (void)
 {
-  /* It's time to read a line when... */
+  /* It's time to read a line when...  */
   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
     {
-      unicode_t c;
+      int c;
+      int found_chars = 0;
+
+      if (ctxp->lexer->hit_eof)
+	return UEOF;
+
       java_allocate_new_line ();
       if (ctxp->c_line->line[0] != '\n')
 	{
@@ -586,15 +630,24 @@ java_get_unicode ()
 	      int unicode_escape_p;
 	      c = java_read_unicode_collapsing_terminators (ctxp->lexer,
 							    &unicode_escape_p);
-	      java_store_unicode (ctxp->c_line, c, unicode_escape_p);
-	      if (ctxp->c_line->white_space_only 
-		  && !JAVA_WHITE_SPACE_P (c)
-		  && c != '\n'
-		  && c != UEOF)
-		ctxp->c_line->white_space_only = 0;
+	      if (c != UEOF)
+		{
+		  found_chars = 1;
+		  java_store_unicode (ctxp->c_line, c, unicode_escape_p);
+		  if (ctxp->c_line->white_space_only 
+		      && !JAVA_WHITE_SPACE_P (c)
+		      && c != '\n')
+		    ctxp->c_line->white_space_only = 0;
+		}
 	      if ((c == '\n') || (c == UEOF))
 		break;
 	    }
+
+	  if (c == UEOF && ! found_chars)
+	    {
+	      ctxp->lexer->hit_eof = 1;
+	      return UEOF;
+	    }
 	}
     }
   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
@@ -603,12 +656,10 @@ java_get_unicode ()
 }
 
 /* Parse the end of a C style comment.
- * C is the first character following the '/' and '*'. */
+ * C is the first character following the '/' and '*'.  */
 static void
-java_parse_end_comment (c)
-     unicode_t c;
+java_parse_end_comment (int c)
 {
-
   for ( ;; c = java_get_unicode ())
     {
       switch (c)
@@ -624,7 +675,7 @@ java_parse_end_comment (c)
 	      return;
 	    case '/':
 	      return;
-	    case '*':	/* reparse only '*' */
+	    case '*':	/* Reparse only '*'.  */
 	      java_unget_unicode ();
 	    }
 	}
@@ -635,78 +686,154 @@ java_parse_end_comment (c)
    of a documentation comment line (ignoring white space and any `*'
    character). Parsed keyword(s): @DEPRECATED.  */
 
-static int
-java_parse_doc_section (c)
-     unicode_t c;
+static void
+java_parse_doc_section (int c)
 {
-  int valid_tag = 0, seen_star = 0;
+  int last_was_star;
+
+  /* We reset this here, because only the most recent doc comment
+     applies to the following declaration.  */
+  ctxp->deprecated = 0;
 
-  while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
+  /* We loop over all the lines of the comment.  We'll eventually exit
+     if we hit EOF prematurely, or when we see the comment
+     terminator.  */
+  while (1)
     {
-      switch (c)
+      /* These first steps need only be done if we're still looking
+	 for the deprecated tag.  If we've already seen it, we might
+	 as well skip looking for it again.  */
+      if (! ctxp->deprecated)
 	{
-	case '*':
-	  seen_star = 1;
-	  break;
-	case '\n': /* ULT */
-	  valid_tag = 1;
-	default:
-	  seen_star = 0;
-	}
-      c = java_get_unicode();
-    }
-  
-  if (c == UEOF)
-    java_lex_error ("Comment not terminated at end of input", 0);
-  
-  if (seen_star && (c == '/'))
-    return 1;			/* Goto step1 in caller */
+	  /* Skip whitespace and '*'s.  We must also check for the end
+	     of the comment here.  */
+	  while (JAVA_WHITE_SPACE_P (c) || c == '*')
+	    {
+	      last_was_star = (c == '*');
+	      c = java_get_unicode ();
+	      if (last_was_star && c == '/')
+		{
+		  /* We just saw the comment terminator.  */
+		  return;
+		}
+	    }
 
-  /* We're parsing @deprecated */
-  if (valid_tag && (c == '@'))
-    {
-      char tag [11];
-      int  tag_index = 0;
+	  if (c == UEOF)
+	    goto eof;
 
-      while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
+	  if (c == '@')
+	    {
+	      const char *deprecated = "@deprecated";
+	      int i;
+
+	      for (i = 0; deprecated[i]; ++i)
+		{
+		  if (c != deprecated[i])
+		    break;
+		  /* We write the code in this way, with the
+		     update at the end, so that after the loop
+		     we're left with the next character in C.  */
+		  c = java_get_unicode ();
+		}
+
+	      if (c == UEOF)
+		goto eof;
+
+	      /* @deprecated must be followed by a space or newline.
+		 We also allow a '*' in case it appears just before
+		 the end of a comment.  In this position only we also
+		 must allow any Unicode space character.  */
+	      if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
+		{
+		  if (! deprecated[i])
+		    ctxp->deprecated = 1;
+		}
+	    }
+	}
+
+      /* We've examined the relevant content from this line.  Now we
+	 skip the remaining characters and start over with the next
+	 line.  We also check for end of comment here.  */
+      while (c != '\n' && c != UEOF)
 	{
+	  last_was_star = (c == '*');
 	  c = java_get_unicode ();
-	  tag [tag_index++] = c;
+	  if (last_was_star && c == '/')
+	    return;
 	}
-      
-      if (c == UEOF)
-	java_lex_error ("Comment not terminated at end of input", 0);
-      tag [tag_index] = '\0';
 
-      if (!strcmp (tag, "deprecated"))
-	ctxp->deprecated = 1;
+      if (c == UEOF)
+	goto eof;
+      /* We have to advance past the \n.  */
+      c = java_get_unicode ();
+      if (c == UEOF)
+	goto eof;
     }
-  java_unget_unicode ();
-  return 0;
+
+ eof:
+  java_lex_error ("Comment not terminated at end of input", 0);
+}
+
+/* Return true if C is a valid start character for a Java identifier.
+   This is only called if C >= 128 -- smaller values are handled
+   inline.  However, this function handles all values anyway.  */
+static int
+java_start_char_p (unicode_t c)
+{
+  unsigned int hi = c / 256;
+  const char *const page = type_table[hi];
+  unsigned long val = (unsigned long) page;
+  int flags;
+
+  if ((val & ~ LETTER_MASK) != 0)
+    flags = page[c & 255];
+  else
+    flags = val;
+
+  return flags & LETTER_START;
 }
 
-/* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
-   will return a wrong result.  */
+/* Return true if C is a valid part character for a Java identifier.
+   This is only called if C >= 128 -- smaller values are handled
+   inline.  However, this function handles all values anyway.  */
 static int
-java_letter_or_digit_p (c)
-     unicode_t c;
+java_part_char_p (unicode_t c)
 {
-  return _JAVA_LETTER_OR_DIGIT_P (c);
+  unsigned int hi = c / 256;
+  const char *const page = type_table[hi];
+  unsigned long val = (unsigned long) page;
+  int flags;
+
+  if ((val & ~ LETTER_MASK) != 0)
+    flags = page[c & 255];
+  else
+    flags = val;
+
+  return flags & LETTER_PART;
 }
 
-/* This function to be used only by JAVA_ID_CHAR_P ().  */
+/* Return true if C is whitespace.  */
 static int
-java_ignorable_control_p (c)
-     unicode_t c;
+java_space_char_p (unicode_t c)
 {
-  return _JAVA_IDENTIFIER_IGNORABLE (c);
+  unsigned int hi = c / 256;
+  const char *const page = type_table[hi];
+  unsigned long val = (unsigned long) page;
+  int flags;
+
+  if ((val & ~ LETTER_MASK) != 0)
+    flags = page[c & 255];
+  else
+    flags = val;
+
+  return flags & LETTER_SPACE;
 }
 
-static unicode_t
-java_parse_escape_sequence ()
+static int
+java_parse_escape_sequence (void)
 {
   unicode_t char_lit;
-  unicode_t c;
+  int c;
 
   switch (c = java_get_unicode ())
     {
@@ -754,50 +881,36 @@ java_parse_escape_sequence ()
 
 	return char_lit;
       }
-    case '\n':
-      return '\n';		/* ULT, caught latter as a specific error */
     default:
       java_lex_error ("Invalid character in escape sequence", 0);
       return JAVA_CHAR_ERROR;
     }
 }
 
-/* Isolate the code which may raise an arithmetic exception in its
-   own function.  */
-
 #ifndef JC1_LITE
-struct jpa_args
-{
-  YYSTYPE *java_lval;
-  char *literal_token;
-  int fflag;
-  int number_beginning;
-};
+#define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
 
-#ifdef REAL_ARITHMETIC
-#define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
-#else
-#define IS_ZERO(X) ((X) == 0)
-#endif
+/* Subroutine of java_lex: converts floating-point literals to tree
+   nodes.  LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
+   store the result.  FFLAG indicates whether the literal was tagged
+   with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
+   is the line number on which to report any error.  */
 
-static void java_perform_atof	PARAMS ((PTR));
+static void java_perform_atof (YYSTYPE *, char *, int, int);
 
 static void
-java_perform_atof (av)
-     PTR av;
+java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
+		   int number_beginning)
 {
-  struct jpa_args *a = (struct jpa_args *)av;
-  YYSTYPE *java_lval = a->java_lval;
-  int number_beginning = a->number_beginning;
   REAL_VALUE_TYPE value;
-  tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
+  tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 
   SET_REAL_VALUE_ATOF (value,
-		       REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
+		       REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
 
   if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
     {
-      JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
+      JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
       value = DCONST0;
     }
   else if (IS_ZERO (value))
@@ -805,7 +918,7 @@ java_perform_atof (av)
       /* We check to see if the value is really 0 or if we've found an
 	 underflow.  We do this in the most primitive imaginable way.  */
       int really_zero = 1;
-      char *p = a->literal_token;
+      char *p = literal_token;
       if (*p == '-')
 	++p;
       while (*p && *p != 'e' && *p != 'E')
@@ -830,24 +943,24 @@ java_perform_atof (av)
 }
 #endif
 
-static int yylex		PARAMS ((YYSTYPE *));
+static int yylex (YYSTYPE *);
 
 static int
 #ifdef JC1_LITE
-yylex (java_lval)
+yylex (YYSTYPE *java_lval)
 #else
-java_lex (java_lval)
+do_java_lex (YYSTYPE *java_lval)
 #endif
-     YYSTYPE *java_lval;
 {
-  unicode_t c, first_unicode;
+  int c;
+  unicode_t first_unicode;
   int ascii_index, all_ascii;
   char *string;
 
   /* Translation of the Unicode escape in the raw stream of Unicode
      characters. Takes care of line terminator.  */
  step1:
-  /* Skip white spaces: SP, TAB and FF or ULT */ 
+  /* Skip white spaces: SP, TAB and FF or ULT.  */ 
   for (c = java_get_unicode ();
        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
     if (c == '\n')
@@ -858,15 +971,16 @@ java_lex (java_lval)
 
   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 
-  if (c == 0x1a)		/* CTRL-Z */
+  if (c == 0x1a)		/* CTRL-Z.  */
     {
       if ((c = java_get_unicode ()) == UEOF)
-	return 0;		/* Ok here */
+	return 0;		/* Ok here.  */
       else
-	java_unget_unicode ();	/* Caught latter at the end the function */
+	java_unget_unicode ();	/* Caught later, at the end of the
+                                   function.  */
     }
-  /* Handle EOF here */
-  if (c == UEOF)	/* Should probably do something here... */
+  /* Handle EOF here.  */
+  if (c == UEOF)	/* Should probably do something here...  */
     return 0;
 
   /* Take care of eventual comments.  */
@@ -895,13 +1009,19 @@ java_lex (java_lval)
 	case '*':
 	  if ((c = java_get_unicode ()) == '*')
 	    {
-	      if ((c = java_get_unicode ()) == '/')
-		goto step1;	/* Empy documentation comment  */
-	      else if (java_parse_doc_section (c))
-		goto step1;
+	      c = java_get_unicode ();
+	      if (c == '/')
+		{
+		  /* Empty documentation comment.  We have to reset
+		     the deprecation marker as only the most recent
+		     doc comment applies.  */
+		  ctxp->deprecated = 0;
+		}
+	      else
+		java_parse_doc_section (c);
 	    }
-
-	  java_parse_end_comment ((c = java_get_unicode ()));
+	  else
+	    java_parse_end_comment ((c = java_get_unicode ()));
 	  goto step1;
 	  break;
 	default:
@@ -915,24 +1035,26 @@ java_lex (java_lval)
   ctxp->elc.prev_col = ctxp->elc.col;
   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
   if (ctxp->elc.col < 0)
-    fatal ("ctxp->elc.col < 0 - java_lex");
+    abort ();
 
-  /* Numeric literals */
+  /* Numeric literals.  */
   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
     {
-      /* This section of code is borrowed from gcc/c-lex.c  */
+      /* This section of code is borrowed from gcc/c-lex.c.  */
 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
       int parts[TOTAL_PARTS];
       HOST_WIDE_INT high, low;
-      /* End borrowed section  */
+      /* End borrowed section.  */
       char literal_token [256];
       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
+      int  found_hex_digits = 0, found_non_octal_digits = 0;
       int  i;
 #ifndef JC1_LITE
       int  number_beginning = ctxp->c_line->current;
+      tree value;
 #endif
       
-      /* We might have a . separator instead of a FP like .[0-9]* */
+      /* We might have a . separator instead of a FP like .[0-9]*.  */
       if (c == '.')
 	{
 	  unicode_t peep = java_sneak_unicode ();
@@ -957,15 +1079,16 @@ java_lex (java_lval)
 	    }
 	  else if (JAVA_ASCII_DIGIT (c))
 	    radix = 8;
-	  else if (c == '.')
+	  else if (c == '.' || c == 'e' || c =='E')
 	    {
-	      /* Push the '.' back and prepare for a FP parsing... */
+	      /* Push the '.', 'e', or 'E' back and prepare for a FP
+		 parsing...  */
 	      java_unget_unicode ();
 	      c = '0';
 	    }
 	  else
 	    {
-	      /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
+	      /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}.  */
 	      JAVA_LEX_LIT ("0", 10);
               switch (c)
 		{		
@@ -987,17 +1110,23 @@ java_lex (java_lval)
 	}
       /* Parse the first part of the literal, until we find something
 	 which is not a number.  */
-      while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
-	     (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
-	     (radix == 8  && JAVA_ASCII_OCTDIGIT (c)))
+      while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
+	     JAVA_ASCII_DIGIT (c))
 	{
 	  /* We store in a string (in case it turns out to be a FP) and in
 	     PARTS if we have to process a integer literal.  */
-	  int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
+	  int numeric = hex_value (c);
 	  int count;
 
+	  /* Remember when we find a valid hexadecimal digit.  */
+	  if (radix == 16)
+	    found_hex_digits = 1;
+          /* Remember when we find an invalid octal digit.  */
+          else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
+            found_non_octal_digits = 1;
+
 	  literal_token [literal_index++] = c;
-	  /* This section of code if borrowed from gcc/c-lex.c  */
+	  /* This section of code if borrowed from gcc/c-lex.c.  */
 	  for (count = 0; count < TOTAL_PARTS; count++)
 	    {
 	      parts[count] *= radix;
@@ -1023,9 +1152,13 @@ java_lex (java_lval)
 	  int seen_digit = (literal_index ? 1 : 0);
 	  int seen_exponent = 0;
 	  int fflag = 0;	/* 1 for {f,F}, 0 for {d,D}. FP literal are
-				   double unless specified. */
-	  if (radix != 10)
+				   double unless specified.  */
+
+	  /* It is ok if the radix is 8 because this just means we've
+	     seen a leading `0'.  However, radix==16 is invalid.  */
+	  if (radix == 16)
 	    java_lex_error ("Can't express non-decimal FP literal", 0);
+	  radix = 10;
 
 	  for (;;)
 	    {
@@ -1045,9 +1178,10 @@ java_lex (java_lval)
 		{
 		  if (stage < 2)
 		    {
-		      /* {E,e} must have seen at list a digit */
+		      /* {E,e} must have seen at least a digit.  */
 		      if (!seen_digit)
-			java_lex_error ("Invalid FP literal", 0);
+			java_lex_error
+                          ("Invalid FP literal, mantissa must have digit", 0);
 		      seen_digit = 0;
 		      seen_exponent = 1;
 		      stage = 2;
@@ -1060,7 +1194,7 @@ java_lex (java_lval)
 	      if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
 		{
 		  fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
-		  stage = 4;	/* So we fall through */
+		  stage = 4;	/* So we fall through.  */
 		}
 
 	      if ((c=='-' || c =='+') && stage == 2)
@@ -1077,57 +1211,49 @@ java_lex (java_lval)
 		{
 		  if (JAVA_ASCII_DIGIT (c))
 		    seen_digit = 1;
+                  if (stage == 2)
+                    stage = 3;
 		  literal_token [literal_index++ ] = c;
 		  c = java_get_unicode ();
 		}
 	      else
 		{
-#ifndef JC1_LITE
-		  struct jpa_args a;
-#endif
-		  if (stage != 4) /* Don't push back fF/dD */
+		  if (stage != 4) /* Don't push back fF/dD.  */
 		    java_unget_unicode ();
 		  
 		  /* An exponent (if any) must have seen a digit.  */
 		  if (seen_exponent && !seen_digit)
-		    java_lex_error ("Invalid FP literal", 0);
+		    java_lex_error
+                      ("Invalid FP literal, exponent must have digit", 0);
 
 		  literal_token [literal_index] = '\0';
 		  JAVA_LEX_LIT (literal_token, radix);
 
 #ifndef JC1_LITE
-		  a.literal_token = literal_token;
-		  a.fflag = fflag;
-		  a.java_lval = java_lval;
-		  a.number_beginning = number_beginning;
-		  if (do_float_handler (java_perform_atof, (PTR) &a))
-		    return FP_LIT_TK;
-
-		  JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
-#else
-		  return FP_LIT_TK;
+		  java_perform_atof (java_lval, literal_token,
+				     fflag, number_beginning);
 #endif
+		  return FP_LIT_TK;
 		}
 	    }
-	} /* JAVA_ASCCI_FPCHAR (c) */
+	} /* JAVA_ASCII_FPCHAR (c) */
 
       /* Here we get back to converting the integral literal.  */
-      if (c == 'L' || c == 'l')
+      if (radix == 16 && ! found_hex_digits)
+	java_lex_error
+	  ("0x must be followed by at least one hexadecimal digit", 0);
+      else if (radix == 8 && found_non_octal_digits)
+	java_lex_error ("Octal literal contains digit out of range", 0);
+      else if (c == 'L' || c == 'l')
 	long_suffix = 1;
-      else if (radix == 16 && JAVA_ASCII_LETTER (c))
-	java_lex_error ("Digit out of range in hexadecimal literal", 0);
-      else if (radix == 8  && JAVA_ASCII_DIGIT (c))
-	java_lex_error ("Digit out of range in octal literal", 0);
-      else if (radix == 16 && !literal_index)
-	java_lex_error ("No digit specified for hexadecimal literal", 0);
       else
 	java_unget_unicode ();
 
 #ifdef JAVA_LEX_DEBUG
-      literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
+      literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe.  */
       JAVA_LEX_LIT (literal_token, radix);
 #endif
-      /* This section of code is borrowed from gcc/c-lex.c  */
+      /* This section of code is borrowed from gcc/c-lex.c.  */
       if (!overflow)
 	{
 	  bytes = GET_TYPE_PRECISION (long_type_node);
@@ -1148,44 +1274,46 @@ java_lex (java_lval)
 	}
       /* End borrowed section.  */
 
-      /* Range checking */
-      if (long_suffix)
+#ifndef JC1_LITE
+      /* Range checking.  */
+      value = build_int_2 (low, high);
+      /* Temporarily set type to unsigned.  */
+      SET_LVAL_NODE_TYPE (value, (long_suffix
+				  ? unsigned_long_type_node
+				  : unsigned_int_type_node));
+
+      /* For base 10 numbers, only values up to the highest value
+	 (plus one) can be written.  For instance, only ints up to
+	 2147483648 can be written.  The special case of the largest
+	 negative value is handled elsewhere.  For other bases, any
+	 number can be represented.  */
+      if (overflow || (radix == 10
+		       && tree_int_cst_lt (long_suffix
+					   ? decimal_long_max
+					   : decimal_int_max,
+					   value)))
 	{
-	  /* 9223372036854775808L is valid if operand of a '-'. Otherwise
-	     9223372036854775807L is the biggest `long' literal that can be
-	     expressed using a 10 radix. For other radixes, everything that
-	     fits withing 64 bits is OK. */
-	  int hb = (high >> 31);
-	  if (overflow || (hb && low && radix == 10) ||  
-	      (hb && high & 0x7fffffff && radix == 10) ||
-	      (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
+	  if (long_suffix)
 	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
-	}
-      else
-	{
-	  /* 2147483648 is valid if operand of a '-'. Otherwise,
-	     2147483647 is the biggest `int' literal that can be
-	     expressed using a 10 radix. For other radixes, everything
-	     that fits within 32 bits is OK.  As all literals are
-	     signed, we sign extend here. */
-	  int hb = (low >> 31) & 0x1;
-	  if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
-	      (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
+	  else
 	    JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
-	  high = -hb;
 	}
-      ctxp->minus_seen = 0;
+
+      /* Sign extend the value.  */
+      SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
+      force_fit_type (value, 0);
+      JAVA_RADIX10_FLAG (value) = radix == 10;
+#else
       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
-			  (long_suffix ? long_type_node : int_type_node));
+			  long_suffix ? long_type_node : int_type_node);
+#endif
       return INT_LIT_TK;
     }
 
-  ctxp->minus_seen = 0;
-
-  /* Character literals */
+  /* Character literals.  */
   if (c == '\'')
     {
-      unicode_t char_lit;
+      int char_lit;
       if ((c = java_get_unicode ()) == '\\')
 	char_lit = java_parse_escape_sequence ();
       else
@@ -1202,15 +1330,15 @@ java_lex (java_lval)
       if (c != '\'')
 	java_lex_error ("Syntax error in character literal", 0);
 
-      if (c == JAVA_CHAR_ERROR)
-        char_lit = 0;		/* We silently convert it to zero */
+      if (char_lit == JAVA_CHAR_ERROR)
+        char_lit = 0;		/* We silently convert it to zero.  */
 
       JAVA_LEX_CHAR_LIT (char_lit);
       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
       return CHAR_LIT_TK;
     }
 
-  /* String literals */
+  /* String literals.  */
   if (c == '"')
     {
       int no_error;
@@ -1221,21 +1349,26 @@ java_lex (java_lval)
 	{
 	  if (c == '\\')
 	    c = java_parse_escape_sequence ();
-	  no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
+	  if (c == JAVA_CHAR_ERROR)
+	    {
+	      no_error = 0;
+	      c = 0;		/* We silently convert it to zero.  */
+	    }
 	  java_unicode_2_utf8 (c);
 	}
-      if (c == '\n' || c == UEOF) /* ULT */
+      if (c == '\n' || c == UEOF) /* ULT.  */
 	{
-	  lineno--;		/* Refer to the line the terminator was seen */
-	  java_lex_error ("String not terminated at end of line.", 0);
-	  lineno++;
+	  input_line--;	/* Refer to the line where the terminator was seen.  */
+	  java_lex_error ("String not terminated at end of line", 0);
+	  input_line++;
 	}
 
       obstack_1grow (&temporary_obstack, '\0');
       string = obstack_finish (&temporary_obstack);
 #ifndef JC1_LITE
       if (!no_error || (c != '"'))
-	java_lval->node = error_mark_node; /* Requires futher testing FIXME */
+	java_lval->node = error_mark_node; /* FIXME: Requires further
+                                              testing.  */
       else
 	java_lval->node = build_string (strlen (string), string);
 #endif
@@ -1243,7 +1376,7 @@ java_lex (java_lval)
       return STRING_LIT_TK;
     }
 
-  /* Separator */
+  /* Separator.  */
   switch (c)
     {
     case '(':
@@ -1255,14 +1388,14 @@ java_lex (java_lval)
     case '{':
       JAVA_LEX_SEP (c);
       if (ctxp->ccb_indent == 1)
-	ctxp->first_ccb_indent1 = lineno;
+	ctxp->first_ccb_indent1 = input_line;
       ctxp->ccb_indent++;
       BUILD_OPERATOR (OCB_TK);
     case '}':
       JAVA_LEX_SEP (c);
       ctxp->ccb_indent--;
       if (ctxp->ccb_indent == 1)
-        ctxp->last_ccb_indent1 = lineno;
+        ctxp->last_ccb_indent1 = input_line;
       BUILD_OPERATOR (CCB_TK);
     case '[':
       JAVA_LEX_SEP (c);
@@ -1282,7 +1415,7 @@ java_lex (java_lval)
       /*      return DOT_TK; */
     }
 
-  /* Operators */
+  /* Operators.  */
   switch (c)
     {
     case '=':
@@ -1394,7 +1527,6 @@ java_lex (java_lval)
 	  BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
 	default:
 	  java_unget_unicode ();
-	  ctxp->minus_seen = 1;
 	  BUILD_OPERATOR (MINUS_TK);
 	}
 
@@ -1463,9 +1595,9 @@ java_lex (java_lval)
       BUILD_OPERATOR (NOT_TK);
     }
   
-  /* Keyword, boolean literal or null literal */
+  /* Keyword, boolean literal or null literal.  */
   for (first_unicode = c, all_ascii = 1, ascii_index = 0; 
-       JAVA_ID_CHAR_P (c); c = java_get_unicode ())
+       c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
     {
       java_unicode_2_utf8 (c);
       if (all_ascii && c >= 128)
@@ -1475,14 +1607,15 @@ java_lex (java_lval)
 
   obstack_1grow (&temporary_obstack, '\0');
   string = obstack_finish (&temporary_obstack);
-  java_unget_unicode ();
+  if (c != UEOF)
+    java_unget_unicode ();
 
   /* If we have something all ascii, we consider a keyword, a boolean
      literal, a null literal or an all ASCII identifier.  Otherwise,
      this is an identifier (possibly not respecting formation rule).  */
   if (all_ascii)
     {
-      struct java_keyword *kw;
+      const struct java_keyword *kw;
       if ((kw=java_keyword (string, ascii_index)))
 	{
 	  JAVA_LEX_KW (string);
@@ -1491,7 +1624,7 @@ java_lex (java_lval)
 	    case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
 	    case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
 	    case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
-	    case PRIVATE_TK:
+	    case PRIVATE_TK:      case STRICT_TK:
 	      SET_MODIFIER_CTX (kw->token);
 	      return MODIFIER_TK;
 	    case FLOAT_TK:
@@ -1519,7 +1652,7 @@ java_lex (java_lval)
 	      SET_LVAL_NODE (char_type_node);
 	      return INTEGRAL_TK;
 
-	      /* Keyword based literals */
+	      /* Keyword based literals.  */
 	    case TRUE_TK:
 	    case FALSE_TK:
 	      SET_LVAL_NODE ((kw->token == TRUE_TK ? 
@@ -1529,8 +1662,17 @@ java_lex (java_lval)
 	      SET_LVAL_NODE (null_pointer_node);
 	      return NULL_TK;
 
+	    case ASSERT_TK:
+	      if (flag_assert)
+		{
+		  BUILD_OPERATOR (kw->token);
+		  return kw->token;
+		}
+	      else
+		break;
+
 	      /* Some keyword we want to retain information on the location
-		 they where found */
+		 they where found.  */
 	    case CASE_TK:
 	    case DEFAULT_TK:
 	    case SUPER_TK:
@@ -1550,15 +1692,15 @@ java_lex (java_lval)
 	}
     }
   
-  /* We may have and ID here */
-  if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
+  /* We may have an ID here.  */
+  if (JAVA_START_CHAR_P (first_unicode))
     {
       JAVA_LEX_ID (string);
       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
       return ID_TK;
     }
 
-  /* Everything else is an invalid character in the input */
+  /* Everything else is an invalid character in the input.  */
   {
     char lex_error_buffer [128];
     sprintf (lex_error_buffer, "Invalid character `%s' in input", 
@@ -1568,9 +1710,42 @@ java_lex (java_lval)
   return 0;
 }
 
+#ifndef JC1_LITE
+
+/* The exported interface to the lexer.  */
+static int
+java_lex (YYSTYPE *java_lval)
+{
+  int r;
+
+  timevar_push (TV_LEX);
+  r = do_java_lex (java_lval);
+  timevar_pop (TV_LEX);
+  return r;
+}
+
+/* This is called by the parser to see if an error should be generated
+   due to numeric overflow.  This function only handles the particular
+   case of the largest negative value, and is only called in the case
+   where this value is not preceded by `-'.  */
 static void
-java_unicode_2_utf8 (unicode)
-    unicode_t unicode;
+error_if_numeric_overflow (tree value)
+{
+  if (TREE_CODE (value) == INTEGER_CST
+      && JAVA_RADIX10_FLAG (value)
+      && tree_int_cst_sgn (value) < 0)
+    {
+      if (TREE_TYPE (value) == long_type_node)
+	java_lex_error ("Numeric overflow for `long' literal", 0);
+      else
+	java_lex_error ("Numeric overflow for `int' literal", 0);
+    }
+}
+
+#endif /* JC1_LITE */
+
+static void
+java_unicode_2_utf8 (unicode_t unicode)
 {
   if (RANGE (unicode, 0x01, 0x7f))
     obstack_1grow (&temporary_obstack, (char)unicode);
@@ -1581,7 +1756,7 @@ java_unicode_2_utf8 (unicode)
       obstack_1grow (&temporary_obstack,
 		     (unsigned char)(0x80 | (unicode & 0x3f)));
     }
-  else				/* Range 0x800-0xffff */
+  else				/* Range 0x800-0xffff.  */
     {
       obstack_1grow (&temporary_obstack,
 		     (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
@@ -1594,23 +1769,23 @@ java_unicode_2_utf8 (unicode)
 
 #ifndef JC1_LITE
 static tree
-build_wfl_node (node)
-     tree node;
+build_wfl_node (tree node)
 {
-  return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
+  node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
+  /* Prevent java_complete_lhs from short-circuiting node (if constant).  */
+  TREE_TYPE (node) = NULL_TREE;
+  return node;
 }
 #endif
 
 static void
-java_lex_error (msg, forward)
-     const char *msg ATTRIBUTE_UNUSED;
-     int forward ATTRIBUTE_UNUSED;
+java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
 {
 #ifndef JC1_LITE
   ctxp->elc.line = ctxp->c_line->lineno;
   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
 
-  /* Might be caught in the middle of some error report */
+  /* Might be caught in the middle of some error report.  */
   ctxp->java_error_flag = 0;
   java_error (NULL);
   java_error (msg);
@@ -1619,9 +1794,7 @@ java_lex_error (msg, forward)
 
 #ifndef JC1_LITE
 static int
-java_is_eol (fp, c)
-  FILE *fp;
-  int c;
+java_is_eol (FILE *fp, int c)
 {
   int next;
   switch (c)
@@ -1640,18 +1813,17 @@ java_is_eol (fp, c)
 #endif
 
 char *
-java_get_line_col (filename, line, col)
-     const char *filename ATTRIBUTE_UNUSED;
-     int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
+java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
+		   int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
 {
 #ifdef JC1_LITE
   return 0;
 #else
-  /* Dumb implementation. Doesn't try to cache or optimize things. */
-  /* First line of the file is line 1, first column is 1 */
+  /* Dumb implementation. Doesn't try to cache or optimize things.  */
+  /* First line of the file is line 1, first column is 1.  */
 
-  /* COL == -1 means, at the CR/LF in LINE */
-  /* COL == -2 means, at the first non space char in LINE */
+  /* COL == -1 means, at the CR/LF in LINE.  */
+  /* COL == -2 means, at the first non space char in LINE.  */
 
   FILE *fp;
   int c, ccol, cline = 1;
@@ -1660,14 +1832,14 @@ java_get_line_col (filename, line, col)
   char *base;
 
   if (!(fp = fopen (filename, "r")))
-    fatal ("Can't open file - java_display_line_col");
+    fatal_error ("can't open %s: %m", filename);
 
   while (cline != line)
     {
       c = getc (fp);
       if (c == EOF)
 	{
-	  static char msg[] = "<<file too short - unexpected EOF>>";
+	  static const char msg[] = "<<file too short - unexpected EOF>>";
 	  obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
 	  goto have_line;
 	}
@@ -1675,7 +1847,7 @@ java_get_line_col (filename, line, col)
 	cline++;
     }
 
-  /* Gather the chars of the current line in a buffer */
+  /* Gather the chars of the current line in a buffer.  */
   for (;;)
     {
       c = getc (fp);
@@ -1700,11 +1872,11 @@ java_get_line_col (filename, line, col)
   else
     first_non_space = 0;
 
-  /* Place the '^' a the right position */
+  /* Place the '^' a the right position.  */
   base = obstack_base (&temporary_obstack);
   for (ccol = 1; ccol <= col+3; ccol++)
     {
-      /* Compute \t when reaching first_non_space */
+      /* Compute \t when reaching first_non_space.  */
       char c = (first_non_space ?
 		(base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
       obstack_1grow (&temporary_obstack, c);
@@ -1715,3 +1887,168 @@ java_get_line_col (filename, line, col)
   return obstack_finish (&temporary_obstack);
 #endif
 }
+
+#ifndef JC1_LITE
+static int
+utf8_cmp (const unsigned char *str, int length, const char *name)
+{
+  const unsigned char *limit = str + length;
+  int i;
+
+  for (i = 0; name[i]; ++i)
+    {
+      int ch = UTF8_GET (str, limit);
+      if (ch != name[i])
+	return ch - name[i];
+    }
+
+  return str == limit ? 0 : 1;
+}
+
+/* A sorted list of all C++ keywords.  */
+
+static const char *const cxx_keywords[] =
+{
+  "_Complex",
+  "__alignof",
+  "__alignof__",
+  "__asm",
+  "__asm__",
+  "__attribute",
+  "__attribute__",
+  "__builtin_va_arg",
+  "__complex",
+  "__complex__",
+  "__const",
+  "__const__",
+  "__extension__",
+  "__imag",
+  "__imag__",
+  "__inline",
+  "__inline__",
+  "__label__",
+  "__null",
+  "__real",
+  "__real__",
+  "__restrict",
+  "__restrict__",
+  "__signed",
+  "__signed__",
+  "__typeof",
+  "__typeof__",
+  "__volatile",
+  "__volatile__",
+  "and",
+  "and_eq",
+  "asm",
+  "auto",
+  "bitand",
+  "bitor",
+  "bool",
+  "break",
+  "case",
+  "catch",
+  "char",
+  "class",
+  "compl",
+  "const",
+  "const_cast",
+  "continue",
+  "default",
+  "delete",
+  "do",
+  "double",
+  "dynamic_cast",
+  "else",
+  "enum",
+  "explicit",
+  "export",
+  "extern",
+  "false",
+  "float",
+  "for",
+  "friend",
+  "goto",
+  "if",
+  "inline",
+  "int",
+  "long",
+  "mutable",
+  "namespace",
+  "new",
+  "not",
+  "not_eq",
+  "operator",
+  "or",
+  "or_eq",
+  "private",
+  "protected",
+  "public",
+  "register",
+  "reinterpret_cast",
+  "return",
+  "short",
+  "signed",
+  "sizeof",
+  "static",
+  "static_cast",
+  "struct",
+  "switch",
+  "template",
+  "this",      
+  "throw",
+  "true",
+  "try",
+  "typedef",
+  "typeid",
+  "typename",
+  "typeof",
+  "union",
+  "unsigned",
+  "using",
+  "virtual",
+  "void",
+  "volatile",
+  "wchar_t",
+  "while",
+  "xor",
+  "xor_eq"
+};
+
+/* Return true if NAME is a C++ keyword.  */
+
+int
+cxx_keyword_p (const char *name, int length)
+{
+  int last = ARRAY_SIZE (cxx_keywords);
+  int first = 0;
+  int mid = (last + first) / 2;
+  int old = -1;
+
+  for (mid = (last + first) / 2;
+       mid != old;
+       old = mid, mid = (last + first) / 2)
+    {
+      int kwl = strlen (cxx_keywords[mid]);
+      int min_length = kwl > length ? length : kwl;
+      int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
+
+      if (r == 0)
+	{
+	  int i;
+	  /* We've found a match if all the remaining characters are `$'.  */
+	  for (i = min_length; i < length && name[i] == '$'; ++i)
+	    ;
+	  if (i == length)
+	    return 1;
+	  r = 1;
+	}
+
+      if (r < 0)
+	last = mid;
+      else
+	first = mid;
+    }
+  return 0;
+}
+#endif /* JC1_LITE */