gcc/java/lex.c

   1 /* Language lexer for the GNU compiler for the Java(TM) language.
   2    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
   3    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.
  21
  22 Java and all Java-based marks are trademarks or registered trademarks
  23 of Sun Microsystems, Inc. in the United States and other countries.
  24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  25
  26 /* It defines java_lex (yylex) that reads a Java ASCII source file
  27    possibly containing Unicode escape sequence or utf8 encoded
  28    characters and returns a token for everything found but comments,
  29    white spaces and line terminators. When necessary, it also fills
  30    the java_lval (yylval) union. It's implemented to be called by a
  31    re-entrant parser generated by Bison.
  32
  33    The lexical analysis conforms to the Java grammar described in "The
  34    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
  35    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
  36
  37 #include "keyword.h"
  38 #include "flags.h"
  39 #include "chartables.h"
  40
  41 /* Function declarations.  */
  42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
  43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
  44 static void java_lex_error PARAMS ((const char *, int));
  45 #ifndef JC1_LITE
  46 static int java_is_eol PARAMS ((FILE *, int));
  47 static tree build_wfl_node PARAMS ((tree));
  48 #endif
  49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  50 static int java_parse_escape_sequence PARAMS ((void));
  51 static int java_start_char_p PARAMS ((unicode_t));
  52 static int java_part_char_p PARAMS ((unicode_t));
  53 static int java_parse_doc_section PARAMS ((int));
  54 static void java_parse_end_comment PARAMS ((int));
  55 static int java_get_unicode PARAMS ((void));
  56 static int java_read_unicode PARAMS ((java_lexer *, int *));
  57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
  58                                                              int *));
  59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  60 static int java_read_char PARAMS ((java_lexer *));
  61 static void java_allocate_new_line PARAMS ((void));
  62 static void java_unget_unicode PARAMS ((void));
  63 static unicode_t java_sneak_unicode PARAMS ((void));
  64 #ifndef JC1_LITE
  65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
  66 #endif
  67
  68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
  69 #ifndef JC1_LITE
  70 static void error_if_numeric_overflow PARAMS ((tree));
  71 #endif
  72
  73 #ifdef HAVE_ICONV
  74 /* This is nonzero if we have initialized `need_byteswap'.  */
  75 static int byteswap_init = 0;
  76
  77 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
  78    big-endian order -- not native endian order.  We handle this by
  79    doing a conversion once at startup and seeing what happens.  This
  80    flag holds the results of this determination.  */
  81 static int need_byteswap = 0;
  82 #endif
  83
  84 void
  85 java_init_lex (finput, encoding)
  86      FILE *finput;
  87      const char *encoding;
  88 {
  89 #ifndef JC1_LITE
  90   int java_lang_imported = 0;
  91
  92   if (!java_lang_id)
  93     java_lang_id = get_identifier ("java.lang");
  94   if (!java_lang_cloneable)
  95     java_lang_cloneable = get_identifier ("java.lang.Cloneable");
  96   if (!java_io_serializable)
  97     java_io_serializable = get_identifier ("java.io.Serializable");
  98   if (!inst_id)
  99     inst_id = get_identifier ("inst$");
 100   if (!wpv_id)
 101     wpv_id = get_identifier ("write_parm_value$");
 102
 103   if (!java_lang_imported)
 104     {
 105       tree node = build_tree_list
 106         (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
 107       read_import_dir (TREE_PURPOSE (node));
 108       TREE_CHAIN (node) = ctxp->import_demand_list;
 109       ctxp->import_demand_list = node;
 110       java_lang_imported = 1;
 111     }
 112
 113   if (!wfl_operator)
 114     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
 115   if (!label_id)
 116     label_id = get_identifier ("$L");
 117   if (!wfl_append)
 118     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
 119   if (!wfl_string_buffer)
 120     wfl_string_buffer =
 121       build_expr_wfl (get_identifier (flag_emit_class_files
 122                                       ? "java.lang.StringBuffer"
 123                                       : "gnu.gcj.runtime.StringBuffer"),
 124                       NULL, 0, 0);
 125   if (!wfl_to_string)
 126     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
 127
 128   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
 129     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
 130
 131   memset ((PTR) ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
 132   memset ((PTR) current_jcf, 0, sizeof (JCF));
 133   ctxp->current_parsed_class = NULL;
 134   ctxp->package = NULL_TREE;
 135 #endif
 136
 137   ctxp->filename = input_filename;
 138   ctxp->lineno = lineno = 0;
 139   ctxp->p_line = NULL;
 140   ctxp->c_line = NULL;
 141   ctxp->java_error_flag = 0;
 142   ctxp->lexer = java_new_lexer (finput, encoding);
 143 }
 144
 145 static char *
 146 java_sprint_unicode (line, i)
 147     struct java_line *line;
 148     int i;
 149 {
 150   static char buffer [10];
 151   if (line->unicode_escape_p [i] || line->line [i] > 128)
 152     sprintf (buffer, "\\u%04x", line->line [i]);
 153   else
 154     {
 155       buffer [0] = line->line [i];
 156       buffer [1] = '\0';
 157     }
 158   return buffer;
 159 }
 160
 161 static unicode_t
 162 java_sneak_unicode ()
 163 {
 164   return (ctxp->c_line->line [ctxp->c_line->current]);
 165 }
 166
 167 static void
 168 java_unget_unicode ()
 169 {
 170   if (!ctxp->c_line->current)
 171     /* Can't unget unicode.  */
 172     abort ();
 173
 174   ctxp->c_line->current--;
 175   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 176 }
 177
 178 static void
 179 java_allocate_new_line ()
 180 {
 181   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
 182   char ahead_escape_p = (ctxp->c_line ?
 183                          ctxp->c_line->unicode_escape_ahead_p : 0);
 184
 185   if (ctxp->c_line && !ctxp->c_line->white_space_only)
 186     {
 187       if (ctxp->p_line)
 188         {
 189           free (ctxp->p_line->unicode_escape_p);
 190           free (ctxp->p_line->line);
 191           free (ctxp->p_line);
 192         }
 193       ctxp->p_line = ctxp->c_line;
 194       ctxp->c_line = NULL;              /* Reallocated.  */
 195     }
 196
 197   if (!ctxp->c_line)
 198     {
 199       ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
 200       ctxp->c_line->max = JAVA_LINE_MAX;
 201       ctxp->c_line->line = (unicode_t *)xmalloc
 202         (sizeof (unicode_t)*ctxp->c_line->max);
 203       ctxp->c_line->unicode_escape_p =
 204           (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
 205       ctxp->c_line->white_space_only = 0;
 206     }
 207
 208   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
 209   ctxp->c_line->char_col = ctxp->c_line->current = 0;
 210   if (ahead)
 211     {
 212       ctxp->c_line->line [ctxp->c_line->size] = ahead;
 213       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
 214       ctxp->c_line->size++;
 215     }
 216   ctxp->c_line->ahead [0] = 0;
 217   ctxp->c_line->unicode_escape_ahead_p = 0;
 218   ctxp->c_line->lineno = ++lineno;
 219   ctxp->c_line->white_space_only = 1;
 220 }
 221
 222 /* Create a new lexer object.  */
 223
 224 java_lexer *
 225 java_new_lexer (finput, encoding)
 226      FILE *finput;
 227      const char *encoding;
 228 {
 229   java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
 230   int enc_error = 0;
 231
 232   lex->finput = finput;
 233   lex->bs_count = 0;
 234   lex->unget_value = 0;
 235   lex->hit_eof = 0;
 236
 237 #ifdef HAVE_ICONV
 238   lex->handle = iconv_open ("UCS-2", encoding);
 239   if (lex->handle != (iconv_t) -1)
 240     {
 241       lex->first = -1;
 242       lex->last = -1;
 243       lex->out_first = -1;
 244       lex->out_last = -1;
 245       lex->read_anything = 0;
 246       lex->use_fallback = 0;
 247
 248       /* Work around broken iconv() implementations by doing checking at
 249          runtime.  We assume that if the UTF-8 => UCS-2 encoder is broken,
 250          then all UCS-2 encoders will be broken.  Perhaps not a valid
 251          assumption.  */
 252       if (! byteswap_init)
 253         {
 254           iconv_t handle;
 255
 256           byteswap_init = 1;
 257
 258           handle = iconv_open ("UCS-2", "UTF-8");
 259           if (handle != (iconv_t) -1)
 260             {
 261               unicode_t result;
 262               unsigned char in[3];
 263               char *inp, *outp;
 264               size_t inc, outc, r;
 265
 266               /* This is the UTF-8 encoding of \ufeff.  */
 267               in[0] = 0xef;
 268               in[1] = 0xbb;
 269               in[2] = 0xbf;
 270
 271               inp = in;
 272               inc = 3;
 273               outp = (char *) &result;
 274               outc = 2;
 275
 276               r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
 277                          &outp, &outc);
 278               iconv_close (handle);
 279               /* Conversion must be complete for us to use the result.  */
 280               if (r != (size_t) -1 && inc == 0 && outc == 0)
 281                 need_byteswap = (result != 0xfeff);
 282             }
 283         }
 284
 285       lex->byte_swap = need_byteswap;
 286     }
 287   else
 288 #endif /* HAVE_ICONV */
 289     {
 290       /* If iconv failed, use the internal decoder if the default
 291          encoding was requested.  This code is used on platforms where
 292          iconv exists but is insufficient for our needs.  For
 293          instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
 294
 295          On Solaris the default encoding, as returned by nl_langinfo(),
 296          is `646' (aka ASCII), but the Solaris iconv_open() doesn't
 297          understand that.  We work around that by pretending
 298          `646' to be the same as UTF-8.   */
 299       if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
 300         enc_error = 1;
 301 #ifdef HAVE_ICONV
 302       else
 303         lex->use_fallback = 1;
 304 #endif /* HAVE_ICONV */
 305     }
 306
 307   if (enc_error)
 308     fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
 309
 310   return lex;
 311 }
 312
 313 void
 314 java_destroy_lexer (lex)
 315      java_lexer *lex;
 316 {
 317 #ifdef HAVE_ICONV
 318   if (! lex->use_fallback)
 319     iconv_close (lex->handle);
 320 #endif
 321   free (lex);
 322 }
 323
 324 static int
 325 java_read_char (lex)
 326      java_lexer *lex;
 327 {
 328   if (lex->unget_value)
 329     {
 330       unicode_t r = lex->unget_value;
 331       lex->unget_value = 0;
 332       return r;
 333     }
 334
 335 #ifdef HAVE_ICONV
 336   if (! lex->use_fallback)
 337     {
 338       size_t ir, inbytesleft, in_save, out_count, out_save;
 339       char *inp, *outp;
 340       unicode_t result;
 341
 342       /* If there is data which has already been converted, use it.  */
 343       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 344         {
 345           lex->out_first = 0;
 346           lex->out_last = 0;
 347
 348           while (1)
 349             {
 350               /* See if we need to read more data.  If FIRST == 0 then
 351                  the previous conversion attempt ended in the middle of
 352                  a character at the end of the buffer.  Otherwise we
 353                  only have to read if the buffer is empty.  */
 354               if (lex->first == 0 || lex->first >= lex->last)
 355                 {
 356                   int r;
 357
 358                   if (lex->first >= lex->last)
 359                     {
 360                       lex->first = 0;
 361                       lex->last = 0;
 362                     }
 363                   if (feof (lex->finput))
 364                     return UEOF;
 365                   r = fread (&lex->buffer[lex->last], 1,
 366                              sizeof (lex->buffer) - lex->last,
 367                              lex->finput);
 368                   lex->last += r;
 369                 }
 370
 371               inbytesleft = lex->last - lex->first;
 372               out_count = sizeof (lex->out_buffer) - lex->out_last;
 373
 374               if (inbytesleft == 0)
 375                 {
 376                   /* We've tried to read and there is nothing left.  */
 377                   return UEOF;
 378                 }
 379
 380               in_save = inbytesleft;
 381               out_save = out_count;
 382               inp = &lex->buffer[lex->first];
 383               outp = &lex->out_buffer[lex->out_last];
 384               ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
 385                           &inbytesleft, &outp, &out_count);
 386
 387               /* If we haven't read any bytes, then look to see if we
 388                  have read a BOM.  */
 389               if (! lex->read_anything && out_save - out_count >= 2)
 390                 {
 391                   unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
 392                   if (uc == 0xfeff)
 393                     {
 394                       lex->byte_swap = 0;
 395                       lex->out_first += 2;
 396                     }
 397                   else if (uc == 0xfffe)
 398                     {
 399                       lex->byte_swap = 1;
 400                       lex->out_first += 2;
 401                     }
 402                   lex->read_anything = 1;
 403                 }
 404
 405               if (lex->byte_swap)
 406                 {
 407                   unsigned int i;
 408                   for (i = 0; i < out_save - out_count; i += 2)
 409                     {
 410                       char t = lex->out_buffer[lex->out_last + i];
 411                       lex->out_buffer[lex->out_last + i]
 412                         = lex->out_buffer[lex->out_last + i + 1];
 413                       lex->out_buffer[lex->out_last + i + 1] = t;
 414                     }
 415                 }
 416
 417               lex->first += in_save - inbytesleft;
 418               lex->out_last += out_save - out_count;
 419
 420               /* If we converted anything at all, move along.  */
 421               if (out_count != out_save)
 422                 break;
 423
 424               if (ir == (size_t) -1)
 425                 {
 426                   if (errno == EINVAL)
 427                     {
 428                       /* This is ok.  This means that the end of our buffer
 429                          is in the middle of a character sequence.  We just
 430                          move the valid part of the buffer to the beginning
 431                          to force a read.  */
 432                       memmove (&lex->buffer[0], &lex->buffer[lex->first],
 433                                lex->last - lex->first);
 434                       lex->last -= lex->first;
 435                       lex->first = 0;
 436                     }
 437                   else
 438                     {
 439                       /* A more serious error.  */
 440                       java_lex_error ("unrecognized character in input stream",
 441                                       0);
 442                       return UEOF;
 443                     }
 444                 }
 445             }
 446         }
 447
 448       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 449         {
 450           /* Don't have any data.  */
 451           return UEOF;
 452         }
 453
 454       /* Success.  */
 455       result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
 456       lex->out_first += 2;
 457       return result;
 458     }
 459   else
 460 #endif /* HAVE_ICONV */
 461     {
 462       int c, c1, c2;
 463       c = getc (lex->finput);
 464
 465       if (c == EOF)
 466         return UEOF;
 467       if (c < 128)
 468         return (unicode_t) c;
 469       else
 470         {
 471           if ((c & 0xe0) == 0xc0)
 472             {
 473               c1 = getc (lex->finput);
 474               if ((c1 & 0xc0) == 0x80)
 475                 {
 476                   unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
 477                   /* Check for valid 2-byte characters.  We explicitly
 478                      allow \0 because this encoding is common in the
 479                      Java world.  */
 480                   if (r == 0 || (r >= 0x80 && r <= 0x7ff))
 481                     return r;
 482                 }
 483             }
 484           else if ((c & 0xf0) == 0xe0)
 485             {
 486               c1 = getc (lex->finput);
 487               if ((c1 & 0xc0) == 0x80)
 488                 {
 489                   c2 = getc (lex->finput);
 490                   if ((c2 & 0xc0) == 0x80)
 491                     {
 492                       unicode_t r =  (unicode_t)(((c & 0xf) << 12) +
 493                                                  (( c1 & 0x3f) << 6)
 494                                                  + (c2 & 0x3f));
 495                       /* Check for valid 3-byte characters.
 496                          Don't allow surrogate, \ufffe or \uffff.  */
 497                       if (IN_RANGE (r, 0x800, 0xffff)
 498                           && ! IN_RANGE (r, 0xd800, 0xdfff)
 499                           && r != 0xfffe && r != 0xffff)
 500                         return r;
 501                     }
 502                 }
 503             }
 504
 505           /* We simply don't support invalid characters.  We also
 506              don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
 507              cannot be valid Java characters.  */
 508           java_lex_error ("malformed UTF-8 character", 0);
 509         }
 510     }
 511
 512   /* We only get here on error.  */
 513   return UEOF;
 514 }
 515
 516 static void
 517 java_store_unicode (l, c, unicode_escape_p)
 518     struct java_line *l;
 519     unicode_t c;
 520     int unicode_escape_p;
 521 {
 522   if (l->size == l->max)
 523     {
 524       l->max += JAVA_LINE_MAX;
 525       l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
 526       l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
 527                                                sizeof (char)*l->max);
 528     }
 529   l->line [l->size] = c;
 530   l->unicode_escape_p [l->size++] = unicode_escape_p;
 531 }
 532
 533 static int
 534 java_read_unicode (lex, unicode_escape_p)
 535      java_lexer *lex;
 536      int *unicode_escape_p;
 537 {
 538   int c;
 539
 540   c = java_read_char (lex);
 541   *unicode_escape_p = 0;
 542
 543   if (c != '\\')
 544     {
 545       lex->bs_count = 0;
 546       return c;
 547     }
 548
 549   ++lex->bs_count;
 550   if ((lex->bs_count) % 2 == 1)
 551     {
 552       /* Odd number of \ seen.  */
 553       c = java_read_char (lex);
 554       if (c == 'u')
 555         {
 556           unicode_t unicode = 0;
 557           int shift = 12;
 558
 559           /* Recognize any number of `u's in \u.  */
 560           while ((c = java_read_char (lex)) == 'u')
 561             ;
 562
 563           /* Unget the most recent character as it is not a `u'.  */
 564           if (c == UEOF)
 565             return UEOF;
 566           lex->unget_value = c;
 567
 568           /* Next should be 4 hex digits, otherwise it's an error.
 569              The hex value is converted into the unicode, pushed into
 570              the Unicode stream.  */
 571           for (shift = 12; shift >= 0; shift -= 4)
 572             {
 573               if ((c = java_read_char (lex)) == UEOF)
 574                 return UEOF;
 575               if (hex_p (c))
 576                 unicode |= (unicode_t)(hex_value (c) << shift);
 577               else
 578                 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
 579             }
 580           lex->bs_count = 0;
 581           *unicode_escape_p = 1;
 582           return unicode;
 583         }
 584       lex->unget_value = c;
 585     }
 586   return (unicode_t) '\\';
 587 }
 588
 589 static int
 590 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
 591      java_lexer *lex;
 592      int *unicode_escape_p;
 593 {
 594   int c = java_read_unicode (lex, unicode_escape_p);
 595
 596   if (c == '\r')
 597     {
 598       /* We have to read ahead to see if we got \r\n.  In that case we
 599          return a single line terminator.  */
 600       int dummy;
 601       c = java_read_unicode (lex, &dummy);
 602       if (c != '\n' && c != UEOF)
 603         lex->unget_value = c;
 604       /* In either case we must return a newline.  */
 605       c = '\n';
 606     }
 607
 608   return c;
 609 }
 610
 611 static int
 612 java_get_unicode ()
 613 {
 614   /* It's time to read a line when...  */
 615   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
 616     {
 617       int c;
 618       int found_chars = 0;
 619
 620       if (ctxp->lexer->hit_eof)
 621         return UEOF;
 622
 623       java_allocate_new_line ();
 624       if (ctxp->c_line->line[0] != '\n')
 625         {
 626           for (;;)
 627             {
 628               int unicode_escape_p;
 629               c = java_read_unicode_collapsing_terminators (ctxp->lexer,
 630                                                             &unicode_escape_p);
 631               if (c != UEOF)
 632                 {
 633                   found_chars = 1;
 634                   java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 635                   if (ctxp->c_line->white_space_only
 636                       && !JAVA_WHITE_SPACE_P (c)
 637                       && c != '\n')
 638                     ctxp->c_line->white_space_only = 0;
 639                 }
 640               if ((c == '\n') || (c == UEOF))
 641                 break;
 642             }
 643
 644           if (c == UEOF && ! found_chars)
 645             {
 646               ctxp->lexer->hit_eof = 1;
 647               return UEOF;
 648             }
 649         }
 650     }
 651   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
 652   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
 653   return ctxp->c_line->line [ctxp->c_line->current++];
 654 }
 655
 656 /* Parse the end of a C style comment.
 657  * C is the first character following the '/' and '*'.  */
 658 static void
 659 java_parse_end_comment (c)
 660      int c;
 661 {
 662   for ( ;; c = java_get_unicode ())
 663     {
 664       switch (c)
 665         {
 666         case UEOF:
 667           java_lex_error ("Comment not terminated at end of input", 0);
 668           return;
 669         case '*':
 670           switch (c = java_get_unicode ())
 671             {
 672             case UEOF:
 673               java_lex_error ("Comment not terminated at end of input", 0);
 674               return;
 675             case '/':
 676               return;
 677             case '*':   /* Reparse only '*'.  */
 678               java_unget_unicode ();
 679             }
 680         }
 681     }
 682 }
 683
 684 /* Parse the documentation section. Keywords must be at the beginning
 685    of a documentation comment line (ignoring white space and any `*'
 686    character). Parsed keyword(s): @DEPRECATED.  */
 687
 688 static int
 689 java_parse_doc_section (c)
 690      int c;
 691 {
 692   int valid_tag = 0, seen_star = 0;
 693
 694   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
 695     {
 696       switch (c)
 697         {
 698         case '*':
 699           seen_star = 1;
 700           break;
 701         case '\n': /* ULT */
 702           valid_tag = 1;
 703         default:
 704           seen_star = 0;
 705         }
 706       c = java_get_unicode();
 707     }
 708
 709   if (c == UEOF)
 710     java_lex_error ("Comment not terminated at end of input", 0);
 711
 712   if (seen_star && (c == '/'))
 713     return 1;                   /* Goto step1 in caller.  */
 714
 715   /* We're parsing `@deprecated'.  */
 716   if (valid_tag && (c == '@'))
 717     {
 718       char tag [11];
 719       int  tag_index = 0;
 720
 721       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
 722         {
 723           c = java_get_unicode ();
 724           tag [tag_index++] = c;
 725         }
 726
 727       if (c == UEOF)
 728         java_lex_error ("Comment not terminated at end of input", 0);
 729       tag [tag_index] = '\0';
 730
 731       if (!strcmp (tag, "deprecated"))
 732         ctxp->deprecated = 1;
 733     }
 734   java_unget_unicode ();
 735   return 0;
 736 }
 737
 738 /* Return true if C is a valid start character for a Java identifier.
 739    This is only called if C >= 128 -- smaller values are handled
 740    inline.  However, this function handles all values anyway.  */
 741 static int
 742 java_start_char_p (c)
 743      unicode_t c;
 744 {
 745   unsigned int hi = c / 256;
 746   const char *const page = type_table[hi];
 747   unsigned long val = (unsigned long) page;
 748   int flags;
 749
 750   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
 751     flags = page[c & 255];
 752   else
 753     flags = val;
 754
 755   return flags & LETTER_START;
 756 }
 757
 758 /* Return true if C is a valid part character for a Java identifier.
 759    This is only called if C >= 128 -- smaller values are handled
 760    inline.  However, this function handles all values anyway.  */
 761 static int
 762 java_part_char_p (c)
 763      unicode_t c;
 764 {
 765   unsigned int hi = c / 256;
 766   const char *const page = type_table[hi];
 767   unsigned long val = (unsigned long) page;
 768   int flags;
 769
 770   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
 771     flags = page[c & 255];
 772   else
 773     flags = val;
 774
 775   return flags & LETTER_PART;
 776 }
 777
 778 static int
 779 java_parse_escape_sequence ()
 780 {
 781   unicode_t char_lit;
 782   int c;
 783
 784   switch (c = java_get_unicode ())
 785     {
 786     case 'b':
 787       return (unicode_t)0x8;
 788     case 't':
 789       return (unicode_t)0x9;
 790     case 'n':
 791       return (unicode_t)0xa;
 792     case 'f':
 793       return (unicode_t)0xc;
 794     case 'r':
 795       return (unicode_t)0xd;
 796     case '"':
 797       return (unicode_t)0x22;
 798     case '\'':
 799       return (unicode_t)0x27;
 800     case '\\':
 801       return (unicode_t)0x5c;
 802     case '0': case '1': case '2': case '3': case '4':
 803     case '5': case '6': case '7':
 804       {
 805         int octal_escape[3];
 806         int octal_escape_index = 0;
 807         int max = 3;
 808         int i, shift;
 809
 810         for (; octal_escape_index < max && RANGE (c, '0', '7');
 811              c = java_get_unicode ())
 812           {
 813             if (octal_escape_index == 0 && c > '3')
 814               {
 815                 /* According to the grammar, `\477' has a well-defined
 816                    meaning -- it is `\47' followed by `7'.  */
 817                 --max;
 818               }
 819             octal_escape [octal_escape_index++] = c;
 820           }
 821
 822         java_unget_unicode ();
 823
 824         for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 825              i < octal_escape_index; i++, shift -= 3)
 826           char_lit |= (octal_escape [i] - '0') << shift;
 827
 828         return char_lit;
 829       }
 830     default:
 831       java_lex_error ("Invalid character in escape sequence", 0);
 832       return JAVA_CHAR_ERROR;
 833     }
 834 }
 835
 836 #ifndef JC1_LITE
 837 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
 838
 839 /* Subroutine of java_lex: converts floating-point literals to tree
 840    nodes.  LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
 841    store the result.  FFLAG indicates whether the literal was tagged
 842    with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
 843    is the line number on which to report any error.  */
 844
 845 static void java_perform_atof   PARAMS ((YYSTYPE *, char *, int, int));
 846
 847 static void
 848 java_perform_atof (java_lval, literal_token, fflag, number_beginning)
 849      YYSTYPE *java_lval;
 850      char *literal_token;
 851      int fflag;
 852      int number_beginning;
 853 {
 854   REAL_VALUE_TYPE value;
 855   tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 856
 857   SET_REAL_VALUE_ATOF (value,
 858                        REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
 859
 860   if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
 861     {
 862       JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
 863       value = DCONST0;
 864     }
 865   else if (IS_ZERO (value))
 866     {
 867       /* We check to see if the value is really 0 or if we've found an
 868          underflow.  We do this in the most primitive imaginable way.  */
 869       int really_zero = 1;
 870       char *p = literal_token;
 871       if (*p == '-')
 872         ++p;
 873       while (*p && *p != 'e' && *p != 'E')
 874         {
 875           if (*p != '0' && *p != '.')
 876             {
 877               really_zero = 0;
 878               break;
 879             }
 880           ++p;
 881         }
 882       if (! really_zero)
 883         {
 884           int i = ctxp->c_line->current;
 885           ctxp->c_line->current = number_beginning;
 886           java_lex_error ("Floating point literal underflow", 0);
 887           ctxp->c_line->current = i;
 888         }
 889     }
 890
 891   SET_LVAL_NODE_TYPE (build_real (type, value), type);
 892 }
 893 #endif
 894
 895 static int yylex                PARAMS ((YYSTYPE *));
 896
 897 static int
 898 #ifdef JC1_LITE
 899 yylex (java_lval)
 900 #else
 901 java_lex (java_lval)
 902 #endif
 903      YYSTYPE *java_lval;
 904 {
 905   int c;
 906   unicode_t first_unicode;
 907   int ascii_index, all_ascii;
 908   char *string;
 909
 910   /* Translation of the Unicode escape in the raw stream of Unicode
 911      characters. Takes care of line terminator.  */
 912  step1:
 913   /* Skip white spaces: SP, TAB and FF or ULT.  */
 914   for (c = java_get_unicode ();
 915        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
 916     if (c == '\n')
 917       {
 918         ctxp->elc.line = ctxp->c_line->lineno;
 919         ctxp->elc.col  = ctxp->c_line->char_col-2;
 920       }
 921
 922   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 923
 924   if (c == 0x1a)                /* CTRL-Z.  */
 925     {
 926       if ((c = java_get_unicode ()) == UEOF)
 927         return 0;               /* Ok here.  */
 928       else
 929         java_unget_unicode ();  /* Caught later, at the end of the
 930                                    function.  */
 931     }
 932   /* Handle EOF here.  */
 933   if (c == UEOF)        /* Should probably do something here...  */
 934     return 0;
 935
 936   /* Take care of eventual comments.  */
 937   if (c == '/')
 938     {
 939       switch (c = java_get_unicode ())
 940         {
 941         case '/':
 942           for (;;)
 943             {
 944               c = java_get_unicode ();
 945               if (c == UEOF)
 946                 {
 947                   /* It is ok to end a `//' comment with EOF, unless
 948                      we're being pedantic.  */
 949                   if (pedantic)
 950                     java_lex_error ("Comment not terminated at end of input",
 951                                     0);
 952                   return 0;
 953                 }
 954               if (c == '\n')    /* ULT */
 955                 goto step1;
 956             }
 957           break;
 958
 959         case '*':
 960           if ((c = java_get_unicode ()) == '*')
 961             {
 962               if ((c = java_get_unicode ()) == '/')
 963                 goto step1;     /* Empty documentation comment.  */
 964               else if (java_parse_doc_section (c))
 965                 goto step1;
 966             }
 967
 968           java_parse_end_comment ((c = java_get_unicode ()));
 969           goto step1;
 970           break;
 971         default:
 972           java_unget_unicode ();
 973           c = '/';
 974           break;
 975         }
 976     }
 977
 978   ctxp->elc.line = ctxp->c_line->lineno;
 979   ctxp->elc.prev_col = ctxp->elc.col;
 980   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
 981   if (ctxp->elc.col < 0)
 982     abort ();
 983
 984   /* Numeric literals.  */
 985   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
 986     {
 987       /* This section of code is borrowed from gcc/c-lex.c.  */
 988 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
 989       int parts[TOTAL_PARTS];
 990       HOST_WIDE_INT high, low;
 991       /* End borrowed section.  */
 992       char literal_token [256];
 993       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
 994       int  found_hex_digits = 0, found_non_octal_digits = 0;
 995       int  i;
 996 #ifndef JC1_LITE
 997       int  number_beginning = ctxp->c_line->current;
 998       tree value;
 999 #endif
1000
1001       /* We might have a . separator instead of a FP like .[0-9]*.  */
1002       if (c == '.')
1003         {
1004           unicode_t peep = java_sneak_unicode ();
1005
1006           if (!JAVA_ASCII_DIGIT (peep))
1007             {
1008               JAVA_LEX_SEP('.');
1009               BUILD_OPERATOR (DOT_TK);
1010             }
1011         }
1012
1013       for (i = 0; i < TOTAL_PARTS; i++)
1014         parts [i] = 0;
1015
1016       if (c == '0')
1017         {
1018           c = java_get_unicode ();
1019           if (c == 'x' || c == 'X')
1020             {
1021               radix = 16;
1022               c = java_get_unicode ();
1023             }
1024           else if (JAVA_ASCII_DIGIT (c))
1025             radix = 8;
1026           else if (c == '.' || c == 'e' || c =='E')
1027             {
1028               /* Push the '.', 'e', or 'E' back and prepare for a FP
1029                  parsing...  */
1030               java_unget_unicode ();
1031               c = '0';
1032             }
1033           else
1034             {
1035               /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}.  */
1036               JAVA_LEX_LIT ("0", 10);
1037               switch (c)
1038                 {
1039                 case 'L': case 'l':
1040                   SET_LVAL_NODE (long_zero_node);
1041                   return (INT_LIT_TK);
1042                 case 'f': case 'F':
1043                   SET_LVAL_NODE (float_zero_node);
1044                   return (FP_LIT_TK);
1045                 case 'd': case 'D':
1046                   SET_LVAL_NODE (double_zero_node);
1047                   return (FP_LIT_TK);
1048                 default:
1049                   java_unget_unicode ();
1050                   SET_LVAL_NODE (integer_zero_node);
1051                   return (INT_LIT_TK);
1052                 }
1053             }
1054         }
1055       /* Parse the first part of the literal, until we find something
1056          which is not a number.  */
1057       while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1058              JAVA_ASCII_DIGIT (c))
1059         {
1060           /* We store in a string (in case it turns out to be a FP) and in
1061              PARTS if we have to process a integer literal.  */
1062           int numeric = hex_value (c);
1063           int count;
1064
1065           /* Remember when we find a valid hexadecimal digit.  */
1066           if (radix == 16)
1067             found_hex_digits = 1;
1068           /* Remember when we find an invalid octal digit.  */
1069           else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1070             found_non_octal_digits = 1;
1071
1072           literal_token [literal_index++] = c;
1073           /* This section of code if borrowed from gcc/c-lex.c.  */
1074           for (count = 0; count < TOTAL_PARTS; count++)
1075             {
1076               parts[count] *= radix;
1077               if (count)
1078                 {
1079                   parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
1080                   parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1081                 }
1082               else
1083                 parts[0] += numeric;
1084             }
1085           if (parts [TOTAL_PARTS-1] != 0)
1086             overflow = 1;
1087           /* End borrowed section.  */
1088           c = java_get_unicode ();
1089         }
1090
1091       /* If we have something from the FP char set but not a digit, parse
1092          a FP literal.  */
1093       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1094         {
1095           int stage = 0;
1096           int seen_digit = (literal_index ? 1 : 0);
1097           int seen_exponent = 0;
1098           int fflag = 0;        /* 1 for {f,F}, 0 for {d,D}. FP literal are
1099                                    double unless specified.  */
1100
1101           /* It is ok if the radix is 8 because this just means we've
1102              seen a leading `0'.  However, radix==16 is invalid.  */
1103           if (radix == 16)
1104             java_lex_error ("Can't express non-decimal FP literal", 0);
1105           radix = 10;
1106
1107           for (;;)
1108             {
1109               if (c == '.')
1110                 {
1111                   if (stage < 1)
1112                     {
1113                       stage = 1;
1114                       literal_token [literal_index++ ] = c;
1115                       c = java_get_unicode ();
1116                     }
1117                   else
1118                     java_lex_error ("Invalid character in FP literal", 0);
1119                 }
1120
1121               if (c == 'e' || c == 'E')
1122                 {
1123                   if (stage < 2)
1124                     {
1125                       /* {E,e} must have seen at least a digit.  */
1126                       if (!seen_digit)
1127                         java_lex_error
1128                           ("Invalid FP literal, mantissa must have digit", 0);
1129                       seen_digit = 0;
1130                       seen_exponent = 1;
1131                       stage = 2;
1132                       literal_token [literal_index++] = c;
1133                       c = java_get_unicode ();
1134                     }
1135                   else
1136                     java_lex_error ("Invalid character in FP literal", 0);
1137                 }
1138               if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1139                 {
1140                   fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1141                   stage = 4;    /* So we fall through.  */
1142                 }
1143
1144               if ((c=='-' || c =='+') && stage == 2)
1145                 {
1146                   stage = 3;
1147                   literal_token [literal_index++] = c;
1148                   c = java_get_unicode ();
1149                 }
1150
1151               if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1152                   (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1153                   (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1154                   (stage == 3 && JAVA_ASCII_DIGIT (c)))
1155                 {
1156                   if (JAVA_ASCII_DIGIT (c))
1157                     seen_digit = 1;
1158                   if (stage == 2)
1159                     stage = 3;
1160                   literal_token [literal_index++ ] = c;
1161                   c = java_get_unicode ();
1162                 }
1163               else
1164                 {
1165                   if (stage != 4) /* Don't push back fF/dD.  */
1166                     java_unget_unicode ();
1167
1168                   /* An exponent (if any) must have seen a digit.  */
1169                   if (seen_exponent && !seen_digit)
1170                     java_lex_error
1171                       ("Invalid FP literal, exponent must have digit", 0);
1172
1173                   literal_token [literal_index] = '\0';
1174                   JAVA_LEX_LIT (literal_token, radix);
1175
1176 #ifndef JC1_LITE
1177                   java_perform_atof (java_lval, literal_token,
1178                                      fflag, number_beginning);
1179 #endif
1180                   return FP_LIT_TK;
1181                 }
1182             }
1183         } /* JAVA_ASCII_FPCHAR (c) */
1184
1185       /* Here we get back to converting the integral literal.  */
1186       if (radix == 16 && ! found_hex_digits)
1187         java_lex_error
1188           ("0x must be followed by at least one hexadecimal digit", 0);
1189       else if (radix == 8 && found_non_octal_digits)
1190         java_lex_error ("Octal literal contains digit out of range", 0);
1191       else if (c == 'L' || c == 'l')
1192         long_suffix = 1;
1193       else
1194         java_unget_unicode ();
1195
1196 #ifdef JAVA_LEX_DEBUG
1197       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe.  */
1198       JAVA_LEX_LIT (literal_token, radix);
1199 #endif
1200       /* This section of code is borrowed from gcc/c-lex.c.  */
1201       if (!overflow)
1202         {
1203           bytes = GET_TYPE_PRECISION (long_type_node);
1204           for (i = bytes; i < TOTAL_PARTS; i++)
1205             if (parts [i])
1206               {
1207                 overflow = 1;
1208                 break;
1209               }
1210         }
1211       high = low = 0;
1212       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1213         {
1214           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1215                                               / HOST_BITS_PER_CHAR)]
1216                    << (i * HOST_BITS_PER_CHAR));
1217           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1218         }
1219       /* End borrowed section.  */
1220
1221 #ifndef JC1_LITE
1222       /* Range checking.  */
1223       value = build_int_2 (low, high);
1224       /* Temporarily set type to unsigned.  */
1225       SET_LVAL_NODE_TYPE (value, (long_suffix
1226                                   ? unsigned_long_type_node
1227                                   : unsigned_int_type_node));
1228
1229       /* For base 10 numbers, only values up to the highest value
1230          (plus one) can be written.  For instance, only ints up to
1231          2147483648 can be written.  The special case of the largest
1232          negative value is handled elsewhere.  For other bases, any
1233          number can be represented.  */
1234       if (overflow || (radix == 10
1235                        && tree_int_cst_lt (long_suffix
1236                                            ? decimal_long_max
1237                                            : decimal_int_max,
1238                                            value)))
1239         {
1240           if (long_suffix)
1241             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1242           else
1243             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1244         }
1245
1246       /* Sign extend the value.  */
1247       SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
1248       force_fit_type (value, 0);
1249       JAVA_RADIX10_FLAG (value) = radix == 10;
1250 #else
1251       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1252                           long_suffix ? long_type_node : int_type_node);
1253 #endif
1254       return INT_LIT_TK;
1255     }
1256
1257   /* Character literals.  */
1258   if (c == '\'')
1259     {
1260       int char_lit;
1261       if ((c = java_get_unicode ()) == '\\')
1262         char_lit = java_parse_escape_sequence ();
1263       else
1264         {
1265           if (c == '\n' || c == '\'')
1266             java_lex_error ("Invalid character literal", 0);
1267           char_lit = c;
1268         }
1269
1270       c = java_get_unicode ();
1271
1272       if ((c == '\n') || (c == UEOF))
1273         java_lex_error ("Character literal not terminated at end of line", 0);
1274       if (c != '\'')
1275         java_lex_error ("Syntax error in character literal", 0);
1276
1277       if (char_lit == JAVA_CHAR_ERROR)
1278         char_lit = 0;           /* We silently convert it to zero.  */
1279
1280       JAVA_LEX_CHAR_LIT (char_lit);
1281       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1282       return CHAR_LIT_TK;
1283     }
1284
1285   /* String literals.  */
1286   if (c == '"')
1287     {
1288       int no_error;
1289       char *string;
1290
1291       for (no_error = 1, c = java_get_unicode ();
1292            c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1293         {
1294           if (c == '\\')
1295             c = java_parse_escape_sequence ();
1296           if (c == JAVA_CHAR_ERROR)
1297             {
1298               no_error = 0;
1299               c = 0;            /* We silently convert it to zero.  */
1300             }
1301           java_unicode_2_utf8 (c);
1302         }
1303       if (c == '\n' || c == UEOF) /* ULT.  */
1304         {
1305           lineno--;     /* Refer to the line where the terminator was seen.  */
1306           java_lex_error ("String not terminated at end of line", 0);
1307           lineno++;
1308         }
1309
1310       obstack_1grow (&temporary_obstack, '\0');
1311       string = obstack_finish (&temporary_obstack);
1312 #ifndef JC1_LITE
1313       if (!no_error || (c != '"'))
1314         java_lval->node = error_mark_node; /* FIXME: Requires futher
1315                                               testing.  */
1316       else
1317         java_lval->node = build_string (strlen (string), string);
1318 #endif
1319       obstack_free (&temporary_obstack, string);
1320       return STRING_LIT_TK;
1321     }
1322
1323   /* Separator.  */
1324   switch (c)
1325     {
1326     case '(':
1327       JAVA_LEX_SEP (c);
1328       BUILD_OPERATOR (OP_TK);
1329     case ')':
1330       JAVA_LEX_SEP (c);
1331       return CP_TK;
1332     case '{':
1333       JAVA_LEX_SEP (c);
1334       if (ctxp->ccb_indent == 1)
1335         ctxp->first_ccb_indent1 = lineno;
1336       ctxp->ccb_indent++;
1337       BUILD_OPERATOR (OCB_TK);
1338     case '}':
1339       JAVA_LEX_SEP (c);
1340       ctxp->ccb_indent--;
1341       if (ctxp->ccb_indent == 1)
1342         ctxp->last_ccb_indent1 = lineno;
1343       BUILD_OPERATOR (CCB_TK);
1344     case '[':
1345       JAVA_LEX_SEP (c);
1346       BUILD_OPERATOR (OSB_TK);
1347     case ']':
1348       JAVA_LEX_SEP (c);
1349       return CSB_TK;
1350     case ';':
1351       JAVA_LEX_SEP (c);
1352       return SC_TK;
1353     case ',':
1354       JAVA_LEX_SEP (c);
1355       return C_TK;
1356     case '.':
1357       JAVA_LEX_SEP (c);
1358       BUILD_OPERATOR (DOT_TK);
1359       /*      return DOT_TK; */
1360     }
1361
1362   /* Operators.  */
1363   switch (c)
1364     {
1365     case '=':
1366       if ((c = java_get_unicode ()) == '=')
1367         {
1368           BUILD_OPERATOR (EQ_TK);
1369         }
1370       else
1371         {
1372           /* Equals is used in two different locations. In the
1373              variable_declarator: rule, it has to be seen as '=' as opposed
1374              to being seen as an ordinary assignment operator in
1375              assignment_operators: rule.  */
1376           java_unget_unicode ();
1377           BUILD_OPERATOR (ASSIGN_TK);
1378         }
1379
1380     case '>':
1381       switch ((c = java_get_unicode ()))
1382         {
1383         case '=':
1384           BUILD_OPERATOR (GTE_TK);
1385         case '>':
1386           switch ((c = java_get_unicode ()))
1387             {
1388             case '>':
1389               if ((c = java_get_unicode ()) == '=')
1390                 {
1391                   BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1392                 }
1393               else
1394                 {
1395                   java_unget_unicode ();
1396                   BUILD_OPERATOR (ZRS_TK);
1397                 }
1398             case '=':
1399               BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1400             default:
1401               java_unget_unicode ();
1402               BUILD_OPERATOR (SRS_TK);
1403             }
1404         default:
1405           java_unget_unicode ();
1406           BUILD_OPERATOR (GT_TK);
1407         }
1408
1409     case '<':
1410       switch ((c = java_get_unicode ()))
1411         {
1412         case '=':
1413           BUILD_OPERATOR (LTE_TK);
1414         case '<':
1415           if ((c = java_get_unicode ()) == '=')
1416             {
1417               BUILD_OPERATOR2 (LS_ASSIGN_TK);
1418             }
1419           else
1420             {
1421               java_unget_unicode ();
1422               BUILD_OPERATOR (LS_TK);
1423             }
1424         default:
1425           java_unget_unicode ();
1426           BUILD_OPERATOR (LT_TK);
1427         }
1428
1429     case '&':
1430       switch ((c = java_get_unicode ()))
1431         {
1432         case '&':
1433           BUILD_OPERATOR (BOOL_AND_TK);
1434         case '=':
1435           BUILD_OPERATOR2 (AND_ASSIGN_TK);
1436         default:
1437           java_unget_unicode ();
1438           BUILD_OPERATOR (AND_TK);
1439         }
1440
1441     case '|':
1442       switch ((c = java_get_unicode ()))
1443         {
1444         case '|':
1445           BUILD_OPERATOR (BOOL_OR_TK);
1446         case '=':
1447           BUILD_OPERATOR2 (OR_ASSIGN_TK);
1448         default:
1449           java_unget_unicode ();
1450           BUILD_OPERATOR (OR_TK);
1451         }
1452
1453     case '+':
1454       switch ((c = java_get_unicode ()))
1455         {
1456         case '+':
1457           BUILD_OPERATOR (INCR_TK);
1458         case '=':
1459           BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1460         default:
1461           java_unget_unicode ();
1462           BUILD_OPERATOR (PLUS_TK);
1463         }
1464
1465     case '-':
1466       switch ((c = java_get_unicode ()))
1467         {
1468         case '-':
1469           BUILD_OPERATOR (DECR_TK);
1470         case '=':
1471           BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1472         default:
1473           java_unget_unicode ();
1474           BUILD_OPERATOR (MINUS_TK);
1475         }
1476
1477     case '*':
1478       if ((c = java_get_unicode ()) == '=')
1479         {
1480           BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1481         }
1482       else
1483         {
1484           java_unget_unicode ();
1485           BUILD_OPERATOR (MULT_TK);
1486         }
1487
1488     case '/':
1489       if ((c = java_get_unicode ()) == '=')
1490         {
1491           BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1492         }
1493       else
1494         {
1495           java_unget_unicode ();
1496           BUILD_OPERATOR (DIV_TK);
1497         }
1498
1499     case '^':
1500       if ((c = java_get_unicode ()) == '=')
1501         {
1502           BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1503         }
1504       else
1505         {
1506           java_unget_unicode ();
1507           BUILD_OPERATOR (XOR_TK);
1508         }
1509
1510     case '%':
1511       if ((c = java_get_unicode ()) == '=')
1512         {
1513           BUILD_OPERATOR2 (REM_ASSIGN_TK);
1514         }
1515       else
1516         {
1517           java_unget_unicode ();
1518           BUILD_OPERATOR (REM_TK);
1519         }
1520
1521     case '!':
1522       if ((c = java_get_unicode()) == '=')
1523         {
1524           BUILD_OPERATOR (NEQ_TK);
1525         }
1526       else
1527         {
1528           java_unget_unicode ();
1529           BUILD_OPERATOR (NEG_TK);
1530         }
1531
1532     case '?':
1533       JAVA_LEX_OP ("?");
1534       BUILD_OPERATOR (REL_QM_TK);
1535     case ':':
1536       JAVA_LEX_OP (":");
1537       BUILD_OPERATOR (REL_CL_TK);
1538     case '~':
1539       BUILD_OPERATOR (NOT_TK);
1540     }
1541
1542   /* Keyword, boolean literal or null literal.  */
1543   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1544        JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1545     {
1546       java_unicode_2_utf8 (c);
1547       if (all_ascii && c >= 128)
1548         all_ascii = 0;
1549       ascii_index++;
1550     }
1551
1552   obstack_1grow (&temporary_obstack, '\0');
1553   string = obstack_finish (&temporary_obstack);
1554   java_unget_unicode ();
1555
1556   /* If we have something all ascii, we consider a keyword, a boolean
1557      literal, a null literal or an all ASCII identifier.  Otherwise,
1558      this is an identifier (possibly not respecting formation rule).  */
1559   if (all_ascii)
1560     {
1561       const struct java_keyword *kw;
1562       if ((kw=java_keyword (string, ascii_index)))
1563         {
1564           JAVA_LEX_KW (string);
1565           switch (kw->token)
1566             {
1567             case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1568             case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1569             case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1570             case PRIVATE_TK:      case STRICT_TK:
1571               SET_MODIFIER_CTX (kw->token);
1572               return MODIFIER_TK;
1573             case FLOAT_TK:
1574               SET_LVAL_NODE (float_type_node);
1575               return FP_TK;
1576             case DOUBLE_TK:
1577               SET_LVAL_NODE (double_type_node);
1578               return FP_TK;
1579             case BOOLEAN_TK:
1580               SET_LVAL_NODE (boolean_type_node);
1581               return BOOLEAN_TK;
1582             case BYTE_TK:
1583               SET_LVAL_NODE (byte_type_node);
1584               return INTEGRAL_TK;
1585             case SHORT_TK:
1586               SET_LVAL_NODE (short_type_node);
1587               return INTEGRAL_TK;
1588             case INT_TK:
1589               SET_LVAL_NODE (int_type_node);
1590               return INTEGRAL_TK;
1591             case LONG_TK:
1592               SET_LVAL_NODE (long_type_node);
1593               return INTEGRAL_TK;
1594             case CHAR_TK:
1595               SET_LVAL_NODE (char_type_node);
1596               return INTEGRAL_TK;
1597
1598               /* Keyword based literals.  */
1599             case TRUE_TK:
1600             case FALSE_TK:
1601               SET_LVAL_NODE ((kw->token == TRUE_TK ?
1602                               boolean_true_node : boolean_false_node));
1603               return BOOL_LIT_TK;
1604             case NULL_TK:
1605               SET_LVAL_NODE (null_pointer_node);
1606               return NULL_TK;
1607
1608             case ASSERT_TK:
1609               if (flag_assert)
1610                 {
1611                   BUILD_OPERATOR (kw->token);
1612                   return kw->token;
1613                 }
1614               else
1615                 break;
1616
1617               /* Some keyword we want to retain information on the location
1618                  they where found.  */
1619             case CASE_TK:
1620             case DEFAULT_TK:
1621             case SUPER_TK:
1622             case THIS_TK:
1623             case RETURN_TK:
1624             case BREAK_TK:
1625             case CONTINUE_TK:
1626             case TRY_TK:
1627             case CATCH_TK:
1628             case THROW_TK:
1629             case INSTANCEOF_TK:
1630               BUILD_OPERATOR (kw->token);
1631
1632             default:
1633               return kw->token;
1634             }
1635         }
1636     }
1637
1638   /* We may have an ID here.  */
1639   if (JAVA_START_CHAR_P (first_unicode))
1640     {
1641       JAVA_LEX_ID (string);
1642       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1643       return ID_TK;
1644     }
1645
1646   /* Everything else is an invalid character in the input.  */
1647   {
1648     char lex_error_buffer [128];
1649     sprintf (lex_error_buffer, "Invalid character `%s' in input",
1650              java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1651     java_lex_error (lex_error_buffer, 1);
1652   }
1653   return 0;
1654 }
1655
1656 #ifndef JC1_LITE
1657 /* This is called by the parser to see if an error should be generated
1658    due to numeric overflow.  This function only handles the particular
1659    case of the largest negative value, and is only called in the case
1660    where this value is not preceded by `-'.  */
1661 static void
1662 error_if_numeric_overflow (value)
1663      tree value;
1664 {
1665   if (TREE_CODE (value) == INTEGER_CST
1666       && JAVA_RADIX10_FLAG (value)
1667       && tree_int_cst_sgn (value) < 0)
1668     {
1669       if (TREE_TYPE (value) == long_type_node)
1670         java_lex_error ("Numeric overflow for `long' literal", 0);
1671       else
1672         java_lex_error ("Numeric overflow for `int' literal", 0);
1673     }
1674 }
1675 #endif /* JC1_LITE */
1676
1677 static void
1678 java_unicode_2_utf8 (unicode)
1679     unicode_t unicode;
1680 {
1681   if (RANGE (unicode, 0x01, 0x7f))
1682     obstack_1grow (&temporary_obstack, (char)unicode);
1683   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1684     {
1685       obstack_1grow (&temporary_obstack,
1686                      (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1687       obstack_1grow (&temporary_obstack,
1688                      (unsigned char)(0x80 | (unicode & 0x3f)));
1689     }
1690   else                          /* Range 0x800-0xffff.  */
1691     {
1692       obstack_1grow (&temporary_obstack,
1693                      (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1694       obstack_1grow (&temporary_obstack,
1695                      (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1696       obstack_1grow (&temporary_obstack,
1697                      (unsigned char)(0x80 | (unicode & 0x003f)));
1698     }
1699 }
1700
1701 #ifndef JC1_LITE
1702 static tree
1703 build_wfl_node (node)
1704      tree node;
1705 {
1706   node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1707   /* Prevent java_complete_lhs from short-circuiting node (if constant).  */
1708   TREE_TYPE (node) = NULL_TREE;
1709   return node;
1710 }
1711 #endif
1712
1713 static void
1714 java_lex_error (msg, forward)
1715      const char *msg ATTRIBUTE_UNUSED;
1716      int forward ATTRIBUTE_UNUSED;
1717 {
1718 #ifndef JC1_LITE
1719   ctxp->elc.line = ctxp->c_line->lineno;
1720   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1721
1722   /* Might be caught in the middle of some error report.  */
1723   ctxp->java_error_flag = 0;
1724   java_error (NULL);
1725   java_error (msg);
1726 #endif
1727 }
1728
1729 #ifndef JC1_LITE
1730 static int
1731 java_is_eol (fp, c)
1732   FILE *fp;
1733   int c;
1734 {
1735   int next;
1736   switch (c)
1737     {
1738     case '\r':
1739       next = getc (fp);
1740       if (next != '\n' && next != EOF)
1741         ungetc (next, fp);
1742       return 1;
1743     case '\n':
1744       return 1;
1745     default:
1746       return 0;
1747     }
1748 }
1749 #endif
1750
1751 char *
1752 java_get_line_col (filename, line, col)
1753      const char *filename ATTRIBUTE_UNUSED;
1754      int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1755 {
1756 #ifdef JC1_LITE
1757   return 0;
1758 #else
1759   /* Dumb implementation. Doesn't try to cache or optimize things.  */
1760   /* First line of the file is line 1, first column is 1.  */
1761
1762   /* COL == -1 means, at the CR/LF in LINE.  */
1763   /* COL == -2 means, at the first non space char in LINE.  */
1764
1765   FILE *fp;
1766   int c, ccol, cline = 1;
1767   int current_line_col = 0;
1768   int first_non_space = 0;
1769   char *base;
1770
1771   if (!(fp = fopen (filename, "r")))
1772     fatal_io_error ("can't open %s", filename);
1773
1774   while (cline != line)
1775     {
1776       c = getc (fp);
1777       if (c == EOF)
1778         {
1779           static const char msg[] = "<<file too short - unexpected EOF>>";
1780           obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1781           goto have_line;
1782         }
1783       if (java_is_eol (fp, c))
1784         cline++;
1785     }
1786
1787   /* Gather the chars of the current line in a buffer.  */
1788   for (;;)
1789     {
1790       c = getc (fp);
1791       if (c < 0 || java_is_eol (fp, c))
1792         break;
1793       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1794         first_non_space = current_line_col;
1795       obstack_1grow (&temporary_obstack, c);
1796       current_line_col++;
1797     }
1798  have_line:
1799
1800   obstack_1grow (&temporary_obstack, '\n');
1801
1802   if (col == -1)
1803     {
1804       col = current_line_col;
1805       first_non_space = 0;
1806     }
1807   else if (col == -2)
1808     col = first_non_space;
1809   else
1810     first_non_space = 0;
1811
1812   /* Place the '^' a the right position.  */
1813   base = obstack_base (&temporary_obstack);
1814   for (ccol = 1; ccol <= col+3; ccol++)
1815     {
1816       /* Compute \t when reaching first_non_space.  */
1817       char c = (first_non_space ?
1818                 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1819       obstack_1grow (&temporary_obstack, c);
1820     }
1821   obstack_grow0 (&temporary_obstack, "^", 1);
1822
1823   fclose (fp);
1824   return obstack_finish (&temporary_obstack);
1825 #endif
1826 }
1827
1828 #ifndef JC1_LITE
1829 static int
1830 utf8_cmp (str, length, name)
1831      const unsigned char *str;
1832      int length;
1833      const char *name;
1834 {
1835   const unsigned char *limit = str + length;
1836   int i;
1837
1838   for (i = 0; name[i]; ++i)
1839     {
1840       int ch = UTF8_GET (str, limit);
1841       if (ch != name[i])
1842         return ch - name[i];
1843     }
1844
1845   return str == limit ? 0 : 1;
1846 }
1847
1848 /* A sorted list of all C++ keywords.  */
1849
1850 static const char *const cxx_keywords[] =
1851 {
1852   "_Complex",
1853   "__alignof",
1854   "__alignof__",
1855   "__asm",
1856   "__asm__",
1857   "__attribute",
1858   "__attribute__",
1859   "__builtin_va_arg",
1860   "__complex",
1861   "__complex__",
1862   "__const",
1863   "__const__",
1864   "__extension__",
1865   "__imag",
1866   "__imag__",
1867   "__inline",
1868   "__inline__",
1869   "__label__",
1870   "__null",
1871   "__real",
1872   "__real__",
1873   "__restrict",
1874   "__restrict__",
1875   "__signed",
1876   "__signed__",
1877   "__typeof",
1878   "__typeof__",
1879   "__volatile",
1880   "__volatile__",
1881   "and",
1882   "and_eq",
1883   "asm",
1884   "auto",
1885   "bitand",
1886   "bitor",
1887   "bool",
1888   "break",
1889   "case",
1890   "catch",
1891   "char",
1892   "class",
1893   "compl",
1894   "const",
1895   "const_cast",
1896   "continue",
1897   "default",
1898   "delete",
1899   "do",
1900   "double",
1901   "dynamic_cast",
1902   "else",
1903   "enum",
1904   "explicit",
1905   "export",
1906   "extern",
1907   "false",
1908   "float",
1909   "for",
1910   "friend",
1911   "goto",
1912   "if",
1913   "inline",
1914   "int",
1915   "long",
1916   "mutable",
1917   "namespace",
1918   "new",
1919   "not",
1920   "not_eq",
1921   "operator",
1922   "or",
1923   "or_eq",
1924   "private",
1925   "protected",
1926   "public",
1927   "register",
1928   "reinterpret_cast",
1929   "return",
1930   "short",
1931   "signed",
1932   "sizeof",
1933   "static",
1934   "static_cast",
1935   "struct",
1936   "switch",
1937   "template",
1938   "this",
1939   "throw",
1940   "true",
1941   "try",
1942   "typedef",
1943   "typeid",
1944   "typename",
1945   "typeof",
1946   "union",
1947   "unsigned",
1948   "using",
1949   "virtual",
1950   "void",
1951   "volatile",
1952   "wchar_t",
1953   "while",
1954   "xor",
1955   "xor_eq"
1956 };
1957
1958 /* Return true if NAME is a C++ keyword.  */
1959
1960 int
1961 cxx_keyword_p (name, length)
1962      const char *name;
1963      int length;
1964 {
1965   int last = ARRAY_SIZE (cxx_keywords);
1966   int first = 0;
1967   int mid = (last + first) / 2;
1968   int old = -1;
1969
1970   for (mid = (last + first) / 2;
1971        mid != old;
1972        old = mid, mid = (last + first) / 2)
1973     {
1974       int kwl = strlen (cxx_keywords[mid]);
1975       int min_length = kwl > length ? length : kwl;
1976       int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1977
1978       if (r == 0)
1979         {
1980           int i;
1981           /* We've found a match if all the remaining characters are `$'.  */
1982           for (i = min_length; i < length && name[i] == '$'; ++i)
1983             ;
1984           if (i == length)
1985             return 1;
1986           r = 1;
1987         }
1988
1989       if (r < 0)
1990         last = mid;
1991       else
1992         first = mid;
1993     }
1994   return 0;
1995 }
1996 #endif /* JC1_LITE */