gcc/java/lex.c

   1 /* Language lexer for the GNU compiler for the Java(TM) language.
   2    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.
  21
  22 Java and all Java-based marks are trademarks or registered trademarks
  23 of Sun Microsystems, Inc. in the United States and other countries.
  24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  25
  26 /* It defines java_lex (yylex) that reads a Java ASCII source file
  27    possibly containing Unicode escape sequence or utf8 encoded
  28    characters and returns a token for everything found but comments,
  29    white spaces and line terminators. When necessary, it also fills
  30    the java_lval (yylval) union. It's implemented to be called by a
  31    re-entrant parser generated by Bison.
  32
  33    The lexical analysis conforms to the Java grammar described in "The
  34    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
  35    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
  36
  37 #include "keyword.h"
  38 #include "flags.h"
  39 #include "chartables.h"
  40
  41 /* Function declarations.  */
  42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
  43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
  44 static void java_lex_error PARAMS ((const char *, int));
  45 #ifndef JC1_LITE
  46 static int java_is_eol PARAMS ((FILE *, int));
  47 static tree build_wfl_node PARAMS ((tree));
  48 #endif
  49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  50 static int java_parse_escape_sequence PARAMS ((void));
  51 static int java_start_char_p PARAMS ((unicode_t));
  52 static int java_part_char_p PARAMS ((unicode_t));
  53 static int java_parse_doc_section PARAMS ((int));
  54 static void java_parse_end_comment PARAMS ((int));
  55 static int java_get_unicode PARAMS ((void));
  56 static int java_read_unicode PARAMS ((java_lexer *, int *));
  57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
  58                                                              int *));
  59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  60 static int java_read_char PARAMS ((java_lexer *));
  61 static void java_allocate_new_line PARAMS ((void));
  62 static void java_unget_unicode PARAMS ((void));
  63 static unicode_t java_sneak_unicode PARAMS ((void));
  64 #ifndef JC1_LITE
  65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
  66 #endif
  67
  68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
  69 #ifndef JC1_LITE
  70 static void error_if_numeric_overflow PARAMS ((tree));
  71 #endif
  72
  73 #ifdef HAVE_ICONV
  74 /* This is nonzero if we have initialized `need_byteswap'.  */
  75 static int byteswap_init = 0;
  76
  77 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
  78    big-endian order -- not native endian order.  We handle this by
  79    doing a conversion once at startup and seeing what happens.  This
  80    flag holds the results of this determination.  */
  81 static int need_byteswap = 0;
  82 #endif
  83
  84 void
  85 java_init_lex (finput, encoding)
  86      FILE *finput;
  87      const char *encoding;
  88 {
  89 #ifndef JC1_LITE
  90   int java_lang_imported = 0;
  91
  92   if (!java_lang_id)
  93     java_lang_id = get_identifier ("java.lang");
  94   if (!java_lang_cloneable)
  95     java_lang_cloneable = get_identifier ("java.lang.Cloneable");
  96   if (!java_io_serializable)
  97     java_io_serializable = get_identifier ("java.io.Serializable");
  98   if (!inst_id)
  99     inst_id = get_identifier ("inst$");
 100   if (!wpv_id)
 101     wpv_id = get_identifier ("write_parm_value$");
 102
 103   if (!java_lang_imported)
 104     {
 105       tree node = build_tree_list
 106         (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
 107       read_import_dir (TREE_PURPOSE (node));
 108       TREE_CHAIN (node) = ctxp->import_demand_list;
 109       ctxp->import_demand_list = node;
 110       java_lang_imported = 1;
 111     }
 112
 113   if (!wfl_operator)
 114     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
 115   if (!label_id)
 116     label_id = get_identifier ("$L");
 117   if (!wfl_append)
 118     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
 119   if (!wfl_string_buffer)
 120     wfl_string_buffer =
 121       build_expr_wfl (get_identifier (flag_emit_class_files
 122                                       ? "java.lang.StringBuffer"
 123                                       : "gnu.gcj.runtime.StringBuffer"),
 124                       NULL, 0, 0);
 125   if (!wfl_to_string)
 126     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
 127
 128   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
 129     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
 130
 131   memset ((PTR) ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
 132   memset ((PTR) current_jcf, 0, sizeof (JCF));
 133   ctxp->current_parsed_class = NULL;
 134   ctxp->package = NULL_TREE;
 135 #endif
 136
 137   ctxp->filename = input_filename;
 138   ctxp->lineno = lineno = 0;
 139   ctxp->p_line = NULL;
 140   ctxp->c_line = NULL;
 141   ctxp->java_error_flag = 0;
 142   ctxp->lexer = java_new_lexer (finput, encoding);
 143 }
 144
 145 static char *
 146 java_sprint_unicode (line, i)
 147     struct java_line *line;
 148     int i;
 149 {
 150   static char buffer [10];
 151   if (line->unicode_escape_p [i] || line->line [i] > 128)
 152     sprintf (buffer, "\\u%04x", line->line [i]);
 153   else
 154     {
 155       buffer [0] = line->line [i];
 156       buffer [1] = '\0';
 157     }
 158   return buffer;
 159 }
 160
 161 static unicode_t
 162 java_sneak_unicode ()
 163 {
 164   return (ctxp->c_line->line [ctxp->c_line->current]);
 165 }
 166
 167 static void
 168 java_unget_unicode ()
 169 {
 170   if (!ctxp->c_line->current)
 171     /* Can't unget unicode.  */
 172     abort ();
 173
 174   ctxp->c_line->current--;
 175   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 176 }
 177
 178 static void
 179 java_allocate_new_line ()
 180 {
 181   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
 182   char ahead_escape_p = (ctxp->c_line ?
 183                          ctxp->c_line->unicode_escape_ahead_p : 0);
 184
 185   if (ctxp->c_line && !ctxp->c_line->white_space_only)
 186     {
 187       if (ctxp->p_line)
 188         {
 189           free (ctxp->p_line->unicode_escape_p);
 190           free (ctxp->p_line->line);
 191           free (ctxp->p_line);
 192         }
 193       ctxp->p_line = ctxp->c_line;
 194       ctxp->c_line = NULL;              /* Reallocated.  */
 195     }
 196
 197   if (!ctxp->c_line)
 198     {
 199       ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
 200       ctxp->c_line->max = JAVA_LINE_MAX;
 201       ctxp->c_line->line = (unicode_t *)xmalloc
 202         (sizeof (unicode_t)*ctxp->c_line->max);
 203       ctxp->c_line->unicode_escape_p =
 204           (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
 205       ctxp->c_line->white_space_only = 0;
 206     }
 207
 208   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
 209   ctxp->c_line->char_col = ctxp->c_line->current = 0;
 210   if (ahead)
 211     {
 212       ctxp->c_line->line [ctxp->c_line->size] = ahead;
 213       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
 214       ctxp->c_line->size++;
 215     }
 216   ctxp->c_line->ahead [0] = 0;
 217   ctxp->c_line->unicode_escape_ahead_p = 0;
 218   ctxp->c_line->lineno = ++lineno;
 219   ctxp->c_line->white_space_only = 1;
 220 }
 221
 222 /* Create a new lexer object.  */
 223
 224 java_lexer *
 225 java_new_lexer (finput, encoding)
 226      FILE *finput;
 227      const char *encoding;
 228 {
 229   java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
 230   int enc_error = 0;
 231
 232   lex->finput = finput;
 233   lex->bs_count = 0;
 234   lex->unget_value = 0;
 235   lex->hit_eof = 0;
 236
 237 #ifdef HAVE_ICONV
 238   lex->handle = iconv_open ("UCS-2", encoding);
 239   if (lex->handle != (iconv_t) -1)
 240     {
 241       lex->first = -1;
 242       lex->last = -1;
 243       lex->out_first = -1;
 244       lex->out_last = -1;
 245       lex->read_anything = 0;
 246       lex->use_fallback = 0;
 247
 248       /* Work around broken iconv() implementations by doing checking at
 249          runtime.  We assume that if the UTF-8 => UCS-2 encoder is broken,
 250          then all UCS-2 encoders will be broken.  Perhaps not a valid
 251          assumption.  */
 252       if (! byteswap_init)
 253         {
 254           iconv_t handle;
 255
 256           byteswap_init = 1;
 257
 258           handle = iconv_open ("UCS-2", "UTF-8");
 259           if (handle != (iconv_t) -1)
 260             {
 261               unicode_t result;
 262               unsigned char in[3];
 263               char *inp, *outp;
 264               size_t inc, outc, r;
 265
 266               /* This is the UTF-8 encoding of \ufeff.  */
 267               in[0] = 0xef;
 268               in[1] = 0xbb;
 269               in[2] = 0xbf;
 270
 271               inp = in;
 272               inc = 3;
 273               outp = (char *) &result;
 274               outc = 2;
 275
 276               r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
 277                          &outp, &outc);
 278               iconv_close (handle);
 279               /* Conversion must be complete for us to use the result.  */
 280               if (r != (size_t) -1 && inc == 0 && outc == 0)
 281                 need_byteswap = (result != 0xfeff);
 282             }
 283         }
 284
 285       lex->byte_swap = need_byteswap;
 286     }
 287   else
 288 #endif /* HAVE_ICONV */
 289     {
 290       /* If iconv failed, use the internal decoder if the default
 291          encoding was requested.  This code is used on platforms where
 292          iconv exists but is insufficient for our needs.  For
 293          instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
 294
 295          On Solaris the default encoding, as returned by nl_langinfo(),
 296          is `646' (aka ASCII), but the Solaris iconv_open() doesn't
 297          understand that.  We work around that by pretending
 298          `646' to be the same as UTF-8.   */
 299       if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
 300         enc_error = 1;
 301 #ifdef HAVE_ICONV
 302       else
 303         lex->use_fallback = 1;
 304 #endif /* HAVE_ICONV */
 305     }
 306
 307   if (enc_error)
 308     fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
 309
 310   return lex;
 311 }
 312
 313 void
 314 java_destroy_lexer (lex)
 315      java_lexer *lex;
 316 {
 317 #ifdef HAVE_ICONV
 318   if (! lex->use_fallback)
 319     iconv_close (lex->handle);
 320 #endif
 321   free (lex);
 322 }
 323
 324 static int
 325 java_read_char (lex)
 326      java_lexer *lex;
 327 {
 328   if (lex->unget_value)
 329     {
 330       unicode_t r = lex->unget_value;
 331       lex->unget_value = 0;
 332       return r;
 333     }
 334
 335 #ifdef HAVE_ICONV
 336   if (! lex->use_fallback)
 337     {
 338       size_t ir, inbytesleft, in_save, out_count, out_save;
 339       char *inp, *outp;
 340       unicode_t result;
 341
 342       /* If there is data which has already been converted, use it.  */
 343       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 344         {
 345           lex->out_first = 0;
 346           lex->out_last = 0;
 347
 348           while (1)
 349             {
 350               /* See if we need to read more data.  If FIRST == 0 then
 351                  the previous conversion attempt ended in the middle of
 352                  a character at the end of the buffer.  Otherwise we
 353                  only have to read if the buffer is empty.  */
 354               if (lex->first == 0 || lex->first >= lex->last)
 355                 {
 356                   int r;
 357
 358                   if (lex->first >= lex->last)
 359                     {
 360                       lex->first = 0;
 361                       lex->last = 0;
 362                     }
 363                   if (feof (lex->finput))
 364                     return UEOF;
 365                   r = fread (&lex->buffer[lex->last], 1,
 366                              sizeof (lex->buffer) - lex->last,
 367                              lex->finput);
 368                   lex->last += r;
 369                 }
 370
 371               inbytesleft = lex->last - lex->first;
 372               out_count = sizeof (lex->out_buffer) - lex->out_last;
 373
 374               if (inbytesleft == 0)
 375                 {
 376                   /* We've tried to read and there is nothing left.  */
 377                   return UEOF;
 378                 }
 379
 380               in_save = inbytesleft;
 381               out_save = out_count;
 382               inp = &lex->buffer[lex->first];
 383               outp = &lex->out_buffer[lex->out_last];
 384               ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
 385                           &inbytesleft, &outp, &out_count);
 386
 387               /* If we haven't read any bytes, then look to see if we
 388                  have read a BOM.  */
 389               if (! lex->read_anything && out_save - out_count >= 2)
 390                 {
 391                   unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
 392                   if (uc == 0xfeff)
 393                     {
 394                       lex->byte_swap = 0;
 395                       lex->out_first += 2;
 396                     }
 397                   else if (uc == 0xfffe)
 398                     {
 399                       lex->byte_swap = 1;
 400                       lex->out_first += 2;
 401                     }
 402                   lex->read_anything = 1;
 403                 }
 404
 405               if (lex->byte_swap)
 406                 {
 407                   unsigned int i;
 408                   for (i = 0; i < out_save - out_count; i += 2)
 409                     {
 410                       char t = lex->out_buffer[lex->out_last + i];
 411                       lex->out_buffer[lex->out_last + i]
 412                         = lex->out_buffer[lex->out_last + i + 1];
 413                       lex->out_buffer[lex->out_last + i + 1] = t;
 414                     }
 415                 }
 416
 417               lex->first += in_save - inbytesleft;
 418               lex->out_last += out_save - out_count;
 419
 420               /* If we converted anything at all, move along.  */
 421               if (out_count != out_save)
 422                 break;
 423
 424               if (ir == (size_t) -1)
 425                 {
 426                   if (errno == EINVAL)
 427                     {
 428                       /* This is ok.  This means that the end of our buffer
 429                          is in the middle of a character sequence.  We just
 430                          move the valid part of the buffer to the beginning
 431                          to force a read.  */
 432                       memmove (&lex->buffer[0], &lex->buffer[lex->first],
 433                                lex->last - lex->first);
 434                       lex->last -= lex->first;
 435                       lex->first = 0;
 436                     }
 437                   else
 438                     {
 439                       /* A more serious error.  */
 440                       java_lex_error ("unrecognized character in input stream",
 441                                       0);
 442                       return UEOF;
 443                     }
 444                 }
 445             }
 446         }
 447
 448       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 449         {
 450           /* Don't have any data.  */
 451           return UEOF;
 452         }
 453
 454       /* Success.  */
 455       result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
 456       lex->out_first += 2;
 457       return result;
 458     }
 459   else
 460 #endif /* HAVE_ICONV */
 461     {
 462       int c, c1, c2;
 463       c = getc (lex->finput);
 464
 465       if (c == EOF)
 466         return UEOF;
 467       if (c < 128)
 468         return (unicode_t) c;
 469       else
 470         {
 471           if ((c & 0xe0) == 0xc0)
 472             {
 473               c1 = getc (lex->finput);
 474               if ((c1 & 0xc0) == 0x80)
 475                 {
 476                   unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
 477                   /* Check for valid 2-byte characters.  We explicitly
 478                      allow \0 because this encoding is common in the
 479                      Java world.  */
 480                   if (r == 0 || (r >= 0x80 && r <= 0x7ff))
 481                     return r;
 482                 }
 483             }
 484           else if ((c & 0xf0) == 0xe0)
 485             {
 486               c1 = getc (lex->finput);
 487               if ((c1 & 0xc0) == 0x80)
 488                 {
 489                   c2 = getc (lex->finput);
 490                   if ((c2 & 0xc0) == 0x80)
 491                     {
 492                       unicode_t r =  (unicode_t)(((c & 0xf) << 12) +
 493                                                  (( c1 & 0x3f) << 6)
 494                                                  + (c2 & 0x3f));
 495                       /* Check for valid 3-byte characters.
 496                          Don't allow surrogate, \ufffe or \uffff.  */
 497                       if (IN_RANGE (r, 0x800, 0xffff)
 498                           && ! IN_RANGE (r, 0xd800, 0xdfff)
 499                           && r != 0xfffe && r != 0xffff)
 500                         return r;
 501                     }
 502                 }
 503             }
 504
 505           /* We simply don't support invalid characters.  We also
 506              don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
 507              cannot be valid Java characters.  */
 508           java_lex_error ("malformed UTF-8 character", 0);
 509         }
 510     }
 511
 512   /* We only get here on error.  */
 513   return UEOF;
 514 }
 515
 516 static void
 517 java_store_unicode (l, c, unicode_escape_p)
 518     struct java_line *l;
 519     unicode_t c;
 520     int unicode_escape_p;
 521 {
 522   if (l->size == l->max)
 523     {
 524       l->max += JAVA_LINE_MAX;
 525       l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
 526       l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
 527                                                sizeof (char)*l->max);
 528     }
 529   l->line [l->size] = c;
 530   l->unicode_escape_p [l->size++] = unicode_escape_p;
 531 }
 532
 533 static int
 534 java_read_unicode (lex, unicode_escape_p)
 535      java_lexer *lex;
 536      int *unicode_escape_p;
 537 {
 538   int c;
 539
 540   c = java_read_char (lex);
 541   *unicode_escape_p = 0;
 542
 543   if (c != '\\')
 544     {
 545       lex->bs_count = 0;
 546       return c;
 547     }
 548
 549   ++lex->bs_count;
 550   if ((lex->bs_count) % 2 == 1)
 551     {
 552       /* Odd number of \ seen.  */
 553       c = java_read_char (lex);
 554       if (c == 'u')
 555         {
 556           unicode_t unicode = 0;
 557           int shift = 12;
 558
 559           /* Recognize any number of `u's in \u.  */
 560           while ((c = java_read_char (lex)) == 'u')
 561             ;
 562
 563           /* Unget the most recent character as it is not a `u'.  */
 564           if (c == UEOF)
 565             return UEOF;
 566           lex->unget_value = c;
 567
 568           /* Next should be 4 hex digits, otherwise it's an error.
 569              The hex value is converted into the unicode, pushed into
 570              the Unicode stream.  */
 571           for (shift = 12; shift >= 0; shift -= 4)
 572             {
 573               if ((c = java_read_char (lex)) == UEOF)
 574                 return UEOF;
 575               if (hex_p (c))
 576                 unicode |= (unicode_t)(hex_value (c) << shift);
 577               else
 578                 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
 579             }
 580           lex->bs_count = 0;
 581           *unicode_escape_p = 1;
 582           return unicode;
 583         }
 584       lex->unget_value = c;
 585     }
 586   return (unicode_t) '\\';
 587 }
 588
 589 static int
 590 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
 591      java_lexer *lex;
 592      int *unicode_escape_p;
 593 {
 594   int c = java_read_unicode (lex, unicode_escape_p);
 595
 596   if (c == '\r')
 597     {
 598       /* We have to read ahead to see if we got \r\n.  In that case we
 599          return a single line terminator.  */
 600       int dummy;
 601       c = java_read_unicode (lex, &dummy);
 602       if (c != '\n')
 603         lex->unget_value = c;
 604       /* In either case we must return a newline.  */
 605       c = '\n';
 606     }
 607
 608   return c;
 609 }
 610
 611 static int
 612 java_get_unicode ()
 613 {
 614   /* It's time to read a line when...  */
 615   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
 616     {
 617       int c;
 618       int found_chars = 0;
 619
 620       if (ctxp->lexer->hit_eof)
 621         return UEOF;
 622
 623       java_allocate_new_line ();
 624       if (ctxp->c_line->line[0] != '\n')
 625         {
 626           for (;;)
 627             {
 628               int unicode_escape_p;
 629               c = java_read_unicode_collapsing_terminators (ctxp->lexer,
 630                                                             &unicode_escape_p);
 631               if (c != UEOF)
 632                 {
 633                   found_chars = 1;
 634                   java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 635                   if (ctxp->c_line->white_space_only
 636                       && !JAVA_WHITE_SPACE_P (c)
 637                       && c != '\n')
 638                     ctxp->c_line->white_space_only = 0;
 639                 }
 640               if ((c == '\n') || (c == UEOF))
 641                 break;
 642             }
 643
 644           if (c == UEOF && ! found_chars)
 645             {
 646               ctxp->lexer->hit_eof = 1;
 647               return UEOF;
 648             }
 649         }
 650     }
 651   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
 652   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
 653   return ctxp->c_line->line [ctxp->c_line->current++];
 654 }
 655
 656 /* Parse the end of a C style comment.
 657  * C is the first character following the '/' and '*'.  */
 658 static void
 659 java_parse_end_comment (c)
 660      int c;
 661 {
 662   for ( ;; c = java_get_unicode ())
 663     {
 664       switch (c)
 665         {
 666         case UEOF:
 667           java_lex_error ("Comment not terminated at end of input", 0);
 668           return;
 669         case '*':
 670           switch (c = java_get_unicode ())
 671             {
 672             case UEOF:
 673               java_lex_error ("Comment not terminated at end of input", 0);
 674               return;
 675             case '/':
 676               return;
 677             case '*':   /* Reparse only '*'.  */
 678               java_unget_unicode ();
 679             }
 680         }
 681     }
 682 }
 683
 684 /* Parse the documentation section. Keywords must be at the beginning
 685    of a documentation comment line (ignoring white space and any `*'
 686    character). Parsed keyword(s): @DEPRECATED.  */
 687
 688 static int
 689 java_parse_doc_section (c)
 690      int c;
 691 {
 692   int valid_tag = 0, seen_star = 0;
 693
 694   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
 695     {
 696       switch (c)
 697         {
 698         case '*':
 699           seen_star = 1;
 700           break;
 701         case '\n': /* ULT */
 702           valid_tag = 1;
 703         default:
 704           seen_star = 0;
 705         }
 706       c = java_get_unicode();
 707     }
 708
 709   if (c == UEOF)
 710     java_lex_error ("Comment not terminated at end of input", 0);
 711
 712   if (seen_star && (c == '/'))
 713     return 1;                   /* Goto step1 in caller.  */
 714
 715   /* We're parsing `@deprecated'.  */
 716   if (valid_tag && (c == '@'))
 717     {
 718       char tag [11];
 719       int  tag_index = 0;
 720
 721       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
 722         {
 723           c = java_get_unicode ();
 724           tag [tag_index++] = c;
 725         }
 726
 727       if (c == UEOF)
 728         java_lex_error ("Comment not terminated at end of input", 0);
 729       tag [tag_index] = '\0';
 730
 731       if (!strcmp (tag, "deprecated"))
 732         ctxp->deprecated = 1;
 733     }
 734   java_unget_unicode ();
 735   return 0;
 736 }
 737
 738 /* Return true if C is a valid start character for a Java identifier.
 739    This is only called if C >= 128 -- smaller values are handled
 740    inline.  However, this function handles all values anyway.  */
 741 static int
 742 java_start_char_p (c)
 743      unicode_t c;
 744 {
 745   unsigned int hi = c / 256;
 746   const char *const page = type_table[hi];
 747   unsigned long val = (unsigned long) page;
 748   int flags;
 749
 750   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
 751     flags = page[c & 255];
 752   else
 753     flags = val;
 754
 755   return flags & LETTER_START;
 756 }
 757
 758 /* Return true if C is a valid part character for a Java identifier.
 759    This is only called if C >= 128 -- smaller values are handled
 760    inline.  However, this function handles all values anyway.  */
 761 static int
 762 java_part_char_p (c)
 763      unicode_t c;
 764 {
 765   unsigned int hi = c / 256;
 766   const char *const page = type_table[hi];
 767   unsigned long val = (unsigned long) page;
 768   int flags;
 769
 770   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
 771     flags = page[c & 255];
 772   else
 773     flags = val;
 774
 775   return flags & LETTER_PART;
 776 }
 777
 778 static int
 779 java_parse_escape_sequence ()
 780 {
 781   unicode_t char_lit;
 782   int c;
 783
 784   switch (c = java_get_unicode ())
 785     {
 786     case 'b':
 787       return (unicode_t)0x8;
 788     case 't':
 789       return (unicode_t)0x9;
 790     case 'n':
 791       return (unicode_t)0xa;
 792     case 'f':
 793       return (unicode_t)0xc;
 794     case 'r':
 795       return (unicode_t)0xd;
 796     case '"':
 797       return (unicode_t)0x22;
 798     case '\'':
 799       return (unicode_t)0x27;
 800     case '\\':
 801       return (unicode_t)0x5c;
 802     case '0': case '1': case '2': case '3': case '4':
 803     case '5': case '6': case '7':
 804       {
 805         int octal_escape[3];
 806         int octal_escape_index = 0;
 807         int max = 3;
 808         int i, shift;
 809
 810         for (; octal_escape_index < max && RANGE (c, '0', '7');
 811              c = java_get_unicode ())
 812           {
 813             if (octal_escape_index == 0 && c > '3')
 814               {
 815                 /* According to the grammar, `\477' has a well-defined
 816                    meaning -- it is `\47' followed by `7'.  */
 817                 --max;
 818               }
 819             octal_escape [octal_escape_index++] = c;
 820           }
 821
 822         java_unget_unicode ();
 823
 824         for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 825              i < octal_escape_index; i++, shift -= 3)
 826           char_lit |= (octal_escape [i] - '0') << shift;
 827
 828         return char_lit;
 829       }
 830     default:
 831       java_lex_error ("Invalid character in escape sequence", 0);
 832       return JAVA_CHAR_ERROR;
 833     }
 834 }
 835
 836 #ifndef JC1_LITE
 837 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
 838
 839 /* Subroutine of java_lex: converts floating-point literals to tree
 840    nodes.  LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
 841    store the result.  FFLAG indicates whether the literal was tagged
 842    with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
 843    is the line number on which to report any error.  */
 844
 845 static void java_perform_atof   PARAMS ((YYSTYPE *, char *, int, int));
 846
 847 static void
 848 java_perform_atof (java_lval, literal_token, fflag, number_beginning)
 849      YYSTYPE *java_lval;
 850      char *literal_token;
 851      int fflag;
 852      int number_beginning;
 853 {
 854   REAL_VALUE_TYPE value;
 855   tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 856
 857   SET_REAL_VALUE_ATOF (value,
 858                        REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
 859
 860   if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
 861     {
 862       JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
 863       value = DCONST0;
 864     }
 865   else if (IS_ZERO (value))
 866     {
 867       /* We check to see if the value is really 0 or if we've found an
 868          underflow.  We do this in the most primitive imaginable way.  */
 869       int really_zero = 1;
 870       char *p = literal_token;
 871       if (*p == '-')
 872         ++p;
 873       while (*p && *p != 'e' && *p != 'E')
 874         {
 875           if (*p != '0' && *p != '.')
 876             {
 877               really_zero = 0;
 878               break;
 879             }
 880           ++p;
 881         }
 882       if (! really_zero)
 883         {
 884           int i = ctxp->c_line->current;
 885           ctxp->c_line->current = number_beginning;
 886           java_lex_error ("Floating point literal underflow", 0);
 887           ctxp->c_line->current = i;
 888         }
 889     }
 890
 891   SET_LVAL_NODE_TYPE (build_real (type, value), type);
 892 }
 893 #endif
 894
 895 static int yylex                PARAMS ((YYSTYPE *));
 896
 897 static int
 898 #ifdef JC1_LITE
 899 yylex (java_lval)
 900 #else
 901 java_lex (java_lval)
 902 #endif
 903      YYSTYPE *java_lval;
 904 {
 905   int c;
 906   unicode_t first_unicode;
 907   int ascii_index, all_ascii;
 908   char *string;
 909
 910   /* Translation of the Unicode escape in the raw stream of Unicode
 911      characters. Takes care of line terminator.  */
 912  step1:
 913   /* Skip white spaces: SP, TAB and FF or ULT.  */
 914   for (c = java_get_unicode ();
 915        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
 916     if (c == '\n')
 917       {
 918         ctxp->elc.line = ctxp->c_line->lineno;
 919         ctxp->elc.col  = ctxp->c_line->char_col-2;
 920       }
 921
 922   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 923
 924   if (c == 0x1a)                /* CTRL-Z.  */
 925     {
 926       if ((c = java_get_unicode ()) == UEOF)
 927         return 0;               /* Ok here.  */
 928       else
 929         java_unget_unicode ();  /* Caught later, at the end of the
 930                                    function.  */
 931     }
 932   /* Handle EOF here.  */
 933   if (c == UEOF)        /* Should probably do something here...  */
 934     return 0;
 935
 936   /* Take care of eventual comments.  */
 937   if (c == '/')
 938     {
 939       switch (c = java_get_unicode ())
 940         {
 941         case '/':
 942           for (;;)
 943             {
 944               c = java_get_unicode ();
 945               if (c == UEOF)
 946                 {
 947                   /* It is ok to end a `//' comment with EOF, unless
 948                      we're being pedantic.  */
 949                   if (pedantic)
 950                     java_lex_error ("Comment not terminated at end of input",
 951                                     0);
 952                   return 0;
 953                 }
 954               if (c == '\n')    /* ULT */
 955                 goto step1;
 956             }
 957           break;
 958
 959         case '*':
 960           if ((c = java_get_unicode ()) == '*')
 961             {
 962               if ((c = java_get_unicode ()) == '/')
 963                 goto step1;     /* Empty documentation comment.  */
 964               else if (java_parse_doc_section (c))
 965                 goto step1;
 966             }
 967
 968           java_parse_end_comment ((c = java_get_unicode ()));
 969           goto step1;
 970           break;
 971         default:
 972           java_unget_unicode ();
 973           c = '/';
 974           break;
 975         }
 976     }
 977
 978   ctxp->elc.line = ctxp->c_line->lineno;
 979   ctxp->elc.prev_col = ctxp->elc.col;
 980   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
 981   if (ctxp->elc.col < 0)
 982     abort ();
 983
 984   /* Numeric literals.  */
 985   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
 986     {
 987       /* This section of code is borrowed from gcc/c-lex.c.  */
 988 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
 989       int parts[TOTAL_PARTS];
 990       HOST_WIDE_INT high, low;
 991       /* End borrowed section.  */
 992       char literal_token [256];
 993       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
 994       int  found_hex_digits = 0, found_non_octal_digits = 0;
 995       int  i;
 996 #ifndef JC1_LITE
 997       int  number_beginning = ctxp->c_line->current;
 998       tree value;
 999 #endif
1000
1001       /* We might have a . separator instead of a FP like .[0-9]*.  */
1002       if (c == '.')
1003         {
1004           unicode_t peep = java_sneak_unicode ();
1005
1006           if (!JAVA_ASCII_DIGIT (peep))
1007             {
1008               JAVA_LEX_SEP('.');
1009               BUILD_OPERATOR (DOT_TK);
1010             }
1011         }
1012
1013       for (i = 0; i < TOTAL_PARTS; i++)
1014         parts [i] = 0;
1015
1016       if (c == '0')
1017         {
1018           c = java_get_unicode ();
1019           if (c == 'x' || c == 'X')
1020             {
1021               radix = 16;
1022               c = java_get_unicode ();
1023             }
1024           else if (JAVA_ASCII_DIGIT (c))
1025             radix = 8;
1026           else if (c == '.' || c == 'e' || c =='E')
1027             {
1028               /* Push the '.', 'e', or 'E' back and prepare for a FP
1029                  parsing...  */
1030               java_unget_unicode ();
1031               c = '0';
1032             }
1033           else
1034             {
1035               /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}.  */
1036               JAVA_LEX_LIT ("0", 10);
1037               switch (c)
1038                 {
1039                 case 'L': case 'l':
1040                   SET_LVAL_NODE (long_zero_node);
1041                   return (INT_LIT_TK);
1042                 case 'f': case 'F':
1043                   SET_LVAL_NODE (float_zero_node);
1044                   return (FP_LIT_TK);
1045                 case 'd': case 'D':
1046                   SET_LVAL_NODE (double_zero_node);
1047                   return (FP_LIT_TK);
1048                 default:
1049                   java_unget_unicode ();
1050                   SET_LVAL_NODE (integer_zero_node);
1051                   return (INT_LIT_TK);
1052                 }
1053             }
1054         }
1055       /* Parse the first part of the literal, until we find something
1056          which is not a number.  */
1057       while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1058              JAVA_ASCII_DIGIT (c))
1059         {
1060           /* We store in a string (in case it turns out to be a FP) and in
1061              PARTS if we have to process a integer literal.  */
1062           int numeric = hex_value (c);
1063           int count;
1064
1065           /* Remember when we find a valid hexadecimal digit.  */
1066           if (radix == 16)
1067             found_hex_digits = 1;
1068           /* Remember when we find an invalid octal digit.  */
1069           else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1070             found_non_octal_digits = 1;
1071
1072           literal_token [literal_index++] = c;
1073           /* This section of code if borrowed from gcc/c-lex.c.  */
1074           for (count = 0; count < TOTAL_PARTS; count++)
1075             {
1076               parts[count] *= radix;
1077               if (count)
1078                 {
1079                   parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
1080                   parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1081                 }
1082               else
1083                 parts[0] += numeric;
1084             }
1085           if (parts [TOTAL_PARTS-1] != 0)
1086             overflow = 1;
1087           /* End borrowed section.  */
1088           c = java_get_unicode ();
1089         }
1090
1091       /* If we have something from the FP char set but not a digit, parse
1092          a FP literal.  */
1093       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1094         {
1095           int stage = 0;
1096           int seen_digit = (literal_index ? 1 : 0);
1097           int seen_exponent = 0;
1098           int fflag = 0;        /* 1 for {f,F}, 0 for {d,D}. FP literal are
1099                                    double unless specified.  */
1100
1101           /* It is ok if the radix is 8 because this just means we've
1102              seen a leading `0'.  However, radix==16 is invalid.  */
1103           if (radix == 16)
1104             java_lex_error ("Can't express non-decimal FP literal", 0);
1105           radix = 10;
1106
1107           for (;;)
1108             {
1109               if (c == '.')
1110                 {
1111                   if (stage < 1)
1112                     {
1113                       stage = 1;
1114                       literal_token [literal_index++ ] = c;
1115                       c = java_get_unicode ();
1116                     }
1117                   else
1118                     java_lex_error ("Invalid character in FP literal", 0);
1119                 }
1120
1121               if (c == 'e' || c == 'E')
1122                 {
1123                   if (stage < 2)
1124                     {
1125                       /* {E,e} must have seen at least a digit.  */
1126                       if (!seen_digit)
1127                         java_lex_error
1128                           ("Invalid FP literal, mantissa must have digit", 0);
1129                       seen_digit = 0;
1130                       seen_exponent = 1;
1131                       stage = 2;
1132                       literal_token [literal_index++] = c;
1133                       c = java_get_unicode ();
1134                     }
1135                   else
1136                     java_lex_error ("Invalid character in FP literal", 0);
1137                 }
1138               if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1139                 {
1140                   fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1141                   stage = 4;    /* So we fall through.  */
1142                 }
1143
1144               if ((c=='-' || c =='+') && stage == 2)
1145                 {
1146                   stage = 3;
1147                   literal_token [literal_index++] = c;
1148                   c = java_get_unicode ();
1149                 }
1150
1151               if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1152                   (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1153                   (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1154                   (stage == 3 && JAVA_ASCII_DIGIT (c)))
1155                 {
1156                   if (JAVA_ASCII_DIGIT (c))
1157                     seen_digit = 1;
1158                   if (stage == 2)
1159                     stage = 3;
1160                   literal_token [literal_index++ ] = c;
1161                   c = java_get_unicode ();
1162                 }
1163               else
1164                 {
1165                   if (stage != 4) /* Don't push back fF/dD.  */
1166                     java_unget_unicode ();
1167
1168                   /* An exponent (if any) must have seen a digit.  */
1169                   if (seen_exponent && !seen_digit)
1170                     java_lex_error
1171                       ("Invalid FP literal, exponent must have digit", 0);
1172
1173                   literal_token [literal_index] = '\0';
1174                   JAVA_LEX_LIT (literal_token, radix);
1175
1176 #ifndef JC1_LITE
1177                   java_perform_atof (java_lval, literal_token,
1178                                      fflag, number_beginning);
1179 #endif
1180                   return FP_LIT_TK;
1181                 }
1182             }
1183         } /* JAVA_ASCII_FPCHAR (c) */
1184
1185       /* Here we get back to converting the integral literal.  */
1186       if (radix == 16 && ! found_hex_digits)
1187         java_lex_error
1188           ("0x must be followed by at least one hexadecimal digit", 0);
1189       else if (radix == 8 && found_non_octal_digits)
1190         java_lex_error ("Octal literal contains digit out of range", 0);
1191       else if (c == 'L' || c == 'l')
1192         long_suffix = 1;
1193       else
1194         java_unget_unicode ();
1195
1196 #ifdef JAVA_LEX_DEBUG
1197       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe.  */
1198       JAVA_LEX_LIT (literal_token, radix);
1199 #endif
1200       /* This section of code is borrowed from gcc/c-lex.c.  */
1201       if (!overflow)
1202         {
1203           bytes = GET_TYPE_PRECISION (long_type_node);
1204           for (i = bytes; i < TOTAL_PARTS; i++)
1205             if (parts [i])
1206               {
1207                 overflow = 1;
1208                 break;
1209               }
1210         }
1211       high = low = 0;
1212       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1213         {
1214           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1215                                               / HOST_BITS_PER_CHAR)]
1216                    << (i * HOST_BITS_PER_CHAR));
1217           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1218         }
1219       /* End borrowed section.  */
1220
1221       /* Range checking.  */
1222       if (long_suffix)
1223         {
1224           /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1225              9223372036854775807L is the biggest `long' literal that can be
1226              expressed using a 10 radix. For other radices, everything that
1227              fits withing 64 bits is OK.  */
1228           int hb = (high >> 31);
1229           if (overflow || (hb && low && radix == 10)
1230               || (hb && high & 0x7fffffff && radix == 10))
1231             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1232         }
1233       else
1234         {
1235           /* 2147483648 is valid if operand of a '-'. Otherwise,
1236              2147483647 is the biggest `int' literal that can be
1237              expressed using a 10 radix. For other radices, everything
1238              that fits within 32 bits is OK.  As all literals are
1239              signed, we sign extend here.  */
1240           int hb = (low >> 31) & 0x1;
1241           if (overflow || high || (hb && low & 0x7fffffff && radix == 10))
1242             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1243           high = -hb;
1244         }
1245 #ifndef JC1_LITE
1246       value = build_int_2 (low, high);
1247       JAVA_RADIX10_FLAG (value) = radix == 10;
1248       SET_LVAL_NODE_TYPE (value, long_suffix ? long_type_node : int_type_node);
1249 #else
1250       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1251                           long_suffix ? long_type_node : int_type_node);
1252 #endif
1253       return INT_LIT_TK;
1254     }
1255
1256   /* Character literals.  */
1257   if (c == '\'')
1258     {
1259       int char_lit;
1260       if ((c = java_get_unicode ()) == '\\')
1261         char_lit = java_parse_escape_sequence ();
1262       else
1263         {
1264           if (c == '\n' || c == '\'')
1265             java_lex_error ("Invalid character literal", 0);
1266           char_lit = c;
1267         }
1268
1269       c = java_get_unicode ();
1270
1271       if ((c == '\n') || (c == UEOF))
1272         java_lex_error ("Character literal not terminated at end of line", 0);
1273       if (c != '\'')
1274         java_lex_error ("Syntax error in character literal", 0);
1275
1276       if (char_lit == JAVA_CHAR_ERROR)
1277         char_lit = 0;           /* We silently convert it to zero.  */
1278
1279       JAVA_LEX_CHAR_LIT (char_lit);
1280       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1281       return CHAR_LIT_TK;
1282     }
1283
1284   /* String literals.  */
1285   if (c == '"')
1286     {
1287       int no_error;
1288       char *string;
1289
1290       for (no_error = 1, c = java_get_unicode ();
1291            c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1292         {
1293           if (c == '\\')
1294             c = java_parse_escape_sequence ();
1295           if (c == JAVA_CHAR_ERROR)
1296             {
1297               no_error = 0;
1298               c = 0;            /* We silently convert it to zero.  */
1299             }
1300           java_unicode_2_utf8 (c);
1301         }
1302       if (c == '\n' || c == UEOF) /* ULT.  */
1303         {
1304           lineno--;     /* Refer to the line where the terminator was seen.  */
1305           java_lex_error ("String not terminated at end of line", 0);
1306           lineno++;
1307         }
1308
1309       obstack_1grow (&temporary_obstack, '\0');
1310       string = obstack_finish (&temporary_obstack);
1311 #ifndef JC1_LITE
1312       if (!no_error || (c != '"'))
1313         java_lval->node = error_mark_node; /* FIXME: Requires futher
1314                                               testing.  */
1315       else
1316         java_lval->node = build_string (strlen (string), string);
1317 #endif
1318       obstack_free (&temporary_obstack, string);
1319       return STRING_LIT_TK;
1320     }
1321
1322   /* Separator.  */
1323   switch (c)
1324     {
1325     case '(':
1326       JAVA_LEX_SEP (c);
1327       BUILD_OPERATOR (OP_TK);
1328     case ')':
1329       JAVA_LEX_SEP (c);
1330       return CP_TK;
1331     case '{':
1332       JAVA_LEX_SEP (c);
1333       if (ctxp->ccb_indent == 1)
1334         ctxp->first_ccb_indent1 = lineno;
1335       ctxp->ccb_indent++;
1336       BUILD_OPERATOR (OCB_TK);
1337     case '}':
1338       JAVA_LEX_SEP (c);
1339       ctxp->ccb_indent--;
1340       if (ctxp->ccb_indent == 1)
1341         ctxp->last_ccb_indent1 = lineno;
1342       BUILD_OPERATOR (CCB_TK);
1343     case '[':
1344       JAVA_LEX_SEP (c);
1345       BUILD_OPERATOR (OSB_TK);
1346     case ']':
1347       JAVA_LEX_SEP (c);
1348       return CSB_TK;
1349     case ';':
1350       JAVA_LEX_SEP (c);
1351       return SC_TK;
1352     case ',':
1353       JAVA_LEX_SEP (c);
1354       return C_TK;
1355     case '.':
1356       JAVA_LEX_SEP (c);
1357       BUILD_OPERATOR (DOT_TK);
1358       /*      return DOT_TK; */
1359     }
1360
1361   /* Operators.  */
1362   switch (c)
1363     {
1364     case '=':
1365       if ((c = java_get_unicode ()) == '=')
1366         {
1367           BUILD_OPERATOR (EQ_TK);
1368         }
1369       else
1370         {
1371           /* Equals is used in two different locations. In the
1372              variable_declarator: rule, it has to be seen as '=' as opposed
1373              to being seen as an ordinary assignment operator in
1374              assignment_operators: rule.  */
1375           java_unget_unicode ();
1376           BUILD_OPERATOR (ASSIGN_TK);
1377         }
1378
1379     case '>':
1380       switch ((c = java_get_unicode ()))
1381         {
1382         case '=':
1383           BUILD_OPERATOR (GTE_TK);
1384         case '>':
1385           switch ((c = java_get_unicode ()))
1386             {
1387             case '>':
1388               if ((c = java_get_unicode ()) == '=')
1389                 {
1390                   BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1391                 }
1392               else
1393                 {
1394                   java_unget_unicode ();
1395                   BUILD_OPERATOR (ZRS_TK);
1396                 }
1397             case '=':
1398               BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1399             default:
1400               java_unget_unicode ();
1401               BUILD_OPERATOR (SRS_TK);
1402             }
1403         default:
1404           java_unget_unicode ();
1405           BUILD_OPERATOR (GT_TK);
1406         }
1407
1408     case '<':
1409       switch ((c = java_get_unicode ()))
1410         {
1411         case '=':
1412           BUILD_OPERATOR (LTE_TK);
1413         case '<':
1414           if ((c = java_get_unicode ()) == '=')
1415             {
1416               BUILD_OPERATOR2 (LS_ASSIGN_TK);
1417             }
1418           else
1419             {
1420               java_unget_unicode ();
1421               BUILD_OPERATOR (LS_TK);
1422             }
1423         default:
1424           java_unget_unicode ();
1425           BUILD_OPERATOR (LT_TK);
1426         }
1427
1428     case '&':
1429       switch ((c = java_get_unicode ()))
1430         {
1431         case '&':
1432           BUILD_OPERATOR (BOOL_AND_TK);
1433         case '=':
1434           BUILD_OPERATOR2 (AND_ASSIGN_TK);
1435         default:
1436           java_unget_unicode ();
1437           BUILD_OPERATOR (AND_TK);
1438         }
1439
1440     case '|':
1441       switch ((c = java_get_unicode ()))
1442         {
1443         case '|':
1444           BUILD_OPERATOR (BOOL_OR_TK);
1445         case '=':
1446           BUILD_OPERATOR2 (OR_ASSIGN_TK);
1447         default:
1448           java_unget_unicode ();
1449           BUILD_OPERATOR (OR_TK);
1450         }
1451
1452     case '+':
1453       switch ((c = java_get_unicode ()))
1454         {
1455         case '+':
1456           BUILD_OPERATOR (INCR_TK);
1457         case '=':
1458           BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1459         default:
1460           java_unget_unicode ();
1461           BUILD_OPERATOR (PLUS_TK);
1462         }
1463
1464     case '-':
1465       switch ((c = java_get_unicode ()))
1466         {
1467         case '-':
1468           BUILD_OPERATOR (DECR_TK);
1469         case '=':
1470           BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1471         default:
1472           java_unget_unicode ();
1473           BUILD_OPERATOR (MINUS_TK);
1474         }
1475
1476     case '*':
1477       if ((c = java_get_unicode ()) == '=')
1478         {
1479           BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1480         }
1481       else
1482         {
1483           java_unget_unicode ();
1484           BUILD_OPERATOR (MULT_TK);
1485         }
1486
1487     case '/':
1488       if ((c = java_get_unicode ()) == '=')
1489         {
1490           BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1491         }
1492       else
1493         {
1494           java_unget_unicode ();
1495           BUILD_OPERATOR (DIV_TK);
1496         }
1497
1498     case '^':
1499       if ((c = java_get_unicode ()) == '=')
1500         {
1501           BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1502         }
1503       else
1504         {
1505           java_unget_unicode ();
1506           BUILD_OPERATOR (XOR_TK);
1507         }
1508
1509     case '%':
1510       if ((c = java_get_unicode ()) == '=')
1511         {
1512           BUILD_OPERATOR2 (REM_ASSIGN_TK);
1513         }
1514       else
1515         {
1516           java_unget_unicode ();
1517           BUILD_OPERATOR (REM_TK);
1518         }
1519
1520     case '!':
1521       if ((c = java_get_unicode()) == '=')
1522         {
1523           BUILD_OPERATOR (NEQ_TK);
1524         }
1525       else
1526         {
1527           java_unget_unicode ();
1528           BUILD_OPERATOR (NEG_TK);
1529         }
1530
1531     case '?':
1532       JAVA_LEX_OP ("?");
1533       BUILD_OPERATOR (REL_QM_TK);
1534     case ':':
1535       JAVA_LEX_OP (":");
1536       BUILD_OPERATOR (REL_CL_TK);
1537     case '~':
1538       BUILD_OPERATOR (NOT_TK);
1539     }
1540
1541   /* Keyword, boolean literal or null literal.  */
1542   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1543        JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1544     {
1545       java_unicode_2_utf8 (c);
1546       if (all_ascii && c >= 128)
1547         all_ascii = 0;
1548       ascii_index++;
1549     }
1550
1551   obstack_1grow (&temporary_obstack, '\0');
1552   string = obstack_finish (&temporary_obstack);
1553   java_unget_unicode ();
1554
1555   /* If we have something all ascii, we consider a keyword, a boolean
1556      literal, a null literal or an all ASCII identifier.  Otherwise,
1557      this is an identifier (possibly not respecting formation rule).  */
1558   if (all_ascii)
1559     {
1560       const struct java_keyword *kw;
1561       if ((kw=java_keyword (string, ascii_index)))
1562         {
1563           JAVA_LEX_KW (string);
1564           switch (kw->token)
1565             {
1566             case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1567             case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1568             case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1569             case PRIVATE_TK:      case STRICT_TK:
1570               SET_MODIFIER_CTX (kw->token);
1571               return MODIFIER_TK;
1572             case FLOAT_TK:
1573               SET_LVAL_NODE (float_type_node);
1574               return FP_TK;
1575             case DOUBLE_TK:
1576               SET_LVAL_NODE (double_type_node);
1577               return FP_TK;
1578             case BOOLEAN_TK:
1579               SET_LVAL_NODE (boolean_type_node);
1580               return BOOLEAN_TK;
1581             case BYTE_TK:
1582               SET_LVAL_NODE (byte_type_node);
1583               return INTEGRAL_TK;
1584             case SHORT_TK:
1585               SET_LVAL_NODE (short_type_node);
1586               return INTEGRAL_TK;
1587             case INT_TK:
1588               SET_LVAL_NODE (int_type_node);
1589               return INTEGRAL_TK;
1590             case LONG_TK:
1591               SET_LVAL_NODE (long_type_node);
1592               return INTEGRAL_TK;
1593             case CHAR_TK:
1594               SET_LVAL_NODE (char_type_node);
1595               return INTEGRAL_TK;
1596
1597               /* Keyword based literals.  */
1598             case TRUE_TK:
1599             case FALSE_TK:
1600               SET_LVAL_NODE ((kw->token == TRUE_TK ?
1601                               boolean_true_node : boolean_false_node));
1602               return BOOL_LIT_TK;
1603             case NULL_TK:
1604               SET_LVAL_NODE (null_pointer_node);
1605               return NULL_TK;
1606
1607               /* Some keyword we want to retain information on the location
1608                  they where found.  */
1609             case CASE_TK:
1610             case DEFAULT_TK:
1611             case SUPER_TK:
1612             case THIS_TK:
1613             case RETURN_TK:
1614             case BREAK_TK:
1615             case CONTINUE_TK:
1616             case TRY_TK:
1617             case CATCH_TK:
1618             case THROW_TK:
1619             case INSTANCEOF_TK:
1620             case ASSERT_TK:
1621               BUILD_OPERATOR (kw->token);
1622
1623             default:
1624               return kw->token;
1625             }
1626         }
1627     }
1628
1629   /* We may have an ID here.  */
1630   if (JAVA_START_CHAR_P (first_unicode))
1631     {
1632       JAVA_LEX_ID (string);
1633       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1634       return ID_TK;
1635     }
1636
1637   /* Everything else is an invalid character in the input.  */
1638   {
1639     char lex_error_buffer [128];
1640     sprintf (lex_error_buffer, "Invalid character `%s' in input",
1641              java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1642     java_lex_error (lex_error_buffer, 1);
1643   }
1644   return 0;
1645 }
1646
1647 #ifndef JC1_LITE
1648 /* This is called by the parser to see if an error should be generated
1649    due to numeric overflow.  This function only handles the particular
1650    case of the largest negative value, and is only called in the case
1651    where this value is not preceded by `-'.  */
1652 static void
1653 error_if_numeric_overflow (value)
1654      tree value;
1655 {
1656   if (TREE_CODE (value) == INTEGER_CST && JAVA_RADIX10_FLAG (value))
1657     {
1658       unsigned HOST_WIDE_INT lo, hi;
1659
1660       lo = TREE_INT_CST_LOW (value);
1661       hi = TREE_INT_CST_HIGH (value);
1662       if (TREE_TYPE (value) == long_type_node)
1663         {
1664           int hb = (hi >> 31);
1665           if (hb && !(hi & 0x7fffffff))
1666             java_lex_error ("Numeric overflow for `long' literal", 0);
1667         }
1668       else
1669         {
1670           int hb = (lo >> 31) & 0x1;
1671           if (hb && !(lo & 0x7fffffff))
1672             java_lex_error ("Numeric overflow for `int' literal", 0);
1673         }
1674     }
1675 }
1676 #endif /* JC1_LITE */
1677
1678 static void
1679 java_unicode_2_utf8 (unicode)
1680     unicode_t unicode;
1681 {
1682   if (RANGE (unicode, 0x01, 0x7f))
1683     obstack_1grow (&temporary_obstack, (char)unicode);
1684   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1685     {
1686       obstack_1grow (&temporary_obstack,
1687                      (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1688       obstack_1grow (&temporary_obstack,
1689                      (unsigned char)(0x80 | (unicode & 0x3f)));
1690     }
1691   else                          /* Range 0x800-0xffff.  */
1692     {
1693       obstack_1grow (&temporary_obstack,
1694                      (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1695       obstack_1grow (&temporary_obstack,
1696                      (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1697       obstack_1grow (&temporary_obstack,
1698                      (unsigned char)(0x80 | (unicode & 0x003f)));
1699     }
1700 }
1701
1702 #ifndef JC1_LITE
1703 static tree
1704 build_wfl_node (node)
1705      tree node;
1706 {
1707   node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1708   /* Prevent java_complete_lhs from short-circuiting node (if constant).  */
1709   TREE_TYPE (node) = NULL_TREE;
1710   return node;
1711 }
1712 #endif
1713
1714 static void
1715 java_lex_error (msg, forward)
1716      const char *msg ATTRIBUTE_UNUSED;
1717      int forward ATTRIBUTE_UNUSED;
1718 {
1719 #ifndef JC1_LITE
1720   ctxp->elc.line = ctxp->c_line->lineno;
1721   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1722
1723   /* Might be caught in the middle of some error report.  */
1724   ctxp->java_error_flag = 0;
1725   java_error (NULL);
1726   java_error (msg);
1727 #endif
1728 }
1729
1730 #ifndef JC1_LITE
1731 static int
1732 java_is_eol (fp, c)
1733   FILE *fp;
1734   int c;
1735 {
1736   int next;
1737   switch (c)
1738     {
1739     case '\r':
1740       next = getc (fp);
1741       if (next != '\n' && next != EOF)
1742         ungetc (next, fp);
1743       return 1;
1744     case '\n':
1745       return 1;
1746     default:
1747       return 0;
1748     }
1749 }
1750 #endif
1751
1752 char *
1753 java_get_line_col (filename, line, col)
1754      const char *filename ATTRIBUTE_UNUSED;
1755      int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1756 {
1757 #ifdef JC1_LITE
1758   return 0;
1759 #else
1760   /* Dumb implementation. Doesn't try to cache or optimize things.  */
1761   /* First line of the file is line 1, first column is 1.  */
1762
1763   /* COL == -1 means, at the CR/LF in LINE.  */
1764   /* COL == -2 means, at the first non space char in LINE.  */
1765
1766   FILE *fp;
1767   int c, ccol, cline = 1;
1768   int current_line_col = 0;
1769   int first_non_space = 0;
1770   char *base;
1771
1772   if (!(fp = fopen (filename, "r")))
1773     fatal_io_error ("can't open %s", filename);
1774
1775   while (cline != line)
1776     {
1777       c = getc (fp);
1778       if (c == EOF)
1779         {
1780           static const char msg[] = "<<file too short - unexpected EOF>>";
1781           obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1782           goto have_line;
1783         }
1784       if (java_is_eol (fp, c))
1785         cline++;
1786     }
1787
1788   /* Gather the chars of the current line in a buffer.  */
1789   for (;;)
1790     {
1791       c = getc (fp);
1792       if (c < 0 || java_is_eol (fp, c))
1793         break;
1794       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1795         first_non_space = current_line_col;
1796       obstack_1grow (&temporary_obstack, c);
1797       current_line_col++;
1798     }
1799  have_line:
1800
1801   obstack_1grow (&temporary_obstack, '\n');
1802
1803   if (col == -1)
1804     {
1805       col = current_line_col;
1806       first_non_space = 0;
1807     }
1808   else if (col == -2)
1809     col = first_non_space;
1810   else
1811     first_non_space = 0;
1812
1813   /* Place the '^' a the right position.  */
1814   base = obstack_base (&temporary_obstack);
1815   for (ccol = 1; ccol <= col+3; ccol++)
1816     {
1817       /* Compute \t when reaching first_non_space.  */
1818       char c = (first_non_space ?
1819                 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1820       obstack_1grow (&temporary_obstack, c);
1821     }
1822   obstack_grow0 (&temporary_obstack, "^", 1);
1823
1824   fclose (fp);
1825   return obstack_finish (&temporary_obstack);
1826 #endif
1827 }
1828
1829 #ifndef JC1_LITE
1830 static int
1831 utf8_cmp (str, length, name)
1832      const unsigned char *str;
1833      int length;
1834      const char *name;
1835 {
1836   const unsigned char *limit = str + length;
1837   int i;
1838
1839   for (i = 0; name[i]; ++i)
1840     {
1841       int ch = UTF8_GET (str, limit);
1842       if (ch != name[i])
1843         return ch - name[i];
1844     }
1845
1846   return str == limit ? 0 : 1;
1847 }
1848
1849 /* A sorted list of all C++ keywords.  */
1850
1851 static const char *const cxx_keywords[] =
1852 {
1853   "_Complex",
1854   "__alignof",
1855   "__alignof__",
1856   "__asm",
1857   "__asm__",
1858   "__attribute",
1859   "__attribute__",
1860   "__builtin_va_arg",
1861   "__complex",
1862   "__complex__",
1863   "__const",
1864   "__const__",
1865   "__extension__",
1866   "__imag",
1867   "__imag__",
1868   "__inline",
1869   "__inline__",
1870   "__label__",
1871   "__null",
1872   "__real",
1873   "__real__",
1874   "__restrict",
1875   "__restrict__",
1876   "__signed",
1877   "__signed__",
1878   "__typeof",
1879   "__typeof__",
1880   "__volatile",
1881   "__volatile__",
1882   "and",
1883   "and_eq",
1884   "asm",
1885   "auto",
1886   "bitand",
1887   "bitor",
1888   "bool",
1889   "break",
1890   "case",
1891   "catch",
1892   "char",
1893   "class",
1894   "compl",
1895   "const",
1896   "const_cast",
1897   "continue",
1898   "default",
1899   "delete",
1900   "do",
1901   "double",
1902   "dynamic_cast",
1903   "else",
1904   "enum",
1905   "explicit",
1906   "export",
1907   "extern",
1908   "false",
1909   "float",
1910   "for",
1911   "friend",
1912   "goto",
1913   "if",
1914   "inline",
1915   "int",
1916   "long",
1917   "mutable",
1918   "namespace",
1919   "new",
1920   "not",
1921   "not_eq",
1922   "operator",
1923   "or",
1924   "or_eq",
1925   "private",
1926   "protected",
1927   "public",
1928   "register",
1929   "reinterpret_cast",
1930   "return",
1931   "short",
1932   "signed",
1933   "sizeof",
1934   "static",
1935   "static_cast",
1936   "struct",
1937   "switch",
1938   "template",
1939   "this",
1940   "throw",
1941   "true",
1942   "try",
1943   "typedef",
1944   "typeid",
1945   "typename",
1946   "typeof",
1947   "union",
1948   "unsigned",
1949   "using",
1950   "virtual",
1951   "void",
1952   "volatile",
1953   "wchar_t",
1954   "while",
1955   "xor",
1956   "xor_eq"
1957 };
1958
1959 /* Return true if NAME is a C++ keyword.  */
1960
1961 int
1962 cxx_keyword_p (name, length)
1963      const char *name;
1964      int length;
1965 {
1966   int last = ARRAY_SIZE (cxx_keywords);
1967   int first = 0;
1968   int mid = (last + first) / 2;
1969   int old = -1;
1970
1971   for (mid = (last + first) / 2;
1972        mid != old;
1973        old = mid, mid = (last + first) / 2)
1974     {
1975       int kwl = strlen (cxx_keywords[mid]);
1976       int min_length = kwl > length ? length : kwl;
1977       int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1978
1979       if (r == 0)
1980         {
1981           int i;
1982           /* We've found a match if all the remaining characters are `$'.  */
1983           for (i = min_length; i < length && name[i] == '$'; ++i)
1984             ;
1985           if (i == length)
1986             return 1;
1987           r = 1;
1988         }
1989
1990       if (r < 0)
1991         last = mid;
1992       else
1993         first = mid;
1994     }
1995   return 0;
1996 }
1997 #endif /* JC1_LITE */