gcc/java/lex.c

   1 /* Language lexer for the GNU compiler for the Java(TM) language.
   2    Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
   3    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.
  21
  22 Java and all Java-based marks are trademarks or registered trademarks
  23 of Sun Microsystems, Inc. in the United States and other countries.
  24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  25
  26 /* It defines java_lex (yylex) that reads a Java ASCII source file
  27    possibly containing Unicode escape sequence or utf8 encoded
  28    characters and returns a token for everything found but comments,
  29    white spaces and line terminators. When necessary, it also fills
  30    the java_lval (yylval) union. It's implemented to be called by a
  31    re-entrant parser generated by Bison.
  32
  33    The lexical analysis conforms to the Java grammar described in "The
  34    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
  35    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
  36
  37 #include "keyword.h"
  38 #include "flags.h"
  39 #include "chartables.h"
  40
  41 /* Function declaration  */
  42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
  43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
  44 static void java_lex_error PARAMS ((const char *, int));
  45 #ifndef JC1_LITE
  46 static int java_is_eol PARAMS ((FILE *, int));
  47 static tree build_wfl_node PARAMS ((tree));
  48 #endif
  49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  50 static int java_parse_escape_sequence PARAMS ((void));
  51 static int java_start_char_p PARAMS ((unicode_t));
  52 static int java_part_char_p PARAMS ((unicode_t));
  53 static int java_parse_doc_section PARAMS ((int));
  54 static void java_parse_end_comment PARAMS ((int));
  55 static int java_get_unicode PARAMS ((void));
  56 static int java_read_unicode PARAMS ((java_lexer *, int *));
  57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
  58                                                              int *));
  59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  60 static int java_read_char PARAMS ((java_lexer *));
  61 static void java_allocate_new_line PARAMS ((void));
  62 static void java_unget_unicode PARAMS ((void));
  63 static unicode_t java_sneak_unicode PARAMS ((void));
  64 #ifndef JC1_LITE
  65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
  66 #endif
  67
  68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
  69 #ifndef JC1_LITE
  70 static void error_if_numeric_overflow PARAMS ((tree));
  71 #endif
  72
  73 #ifdef HAVE_ICONV
  74 /* This is nonzero if we have initialized `need_byteswap'.  */
  75 static int byteswap_init = 0;
  76
  77 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
  78    big-endian order -- not native endian order.  We handle this by
  79    doing a conversion once at startup and seeing what happens.  This
  80    flag holds the results of this determination.  */
  81 static int need_byteswap = 0;
  82 #endif
  83
  84 void
  85 java_init_lex (finput, encoding)
  86      FILE *finput;
  87      const char *encoding;
  88 {
  89 #ifndef JC1_LITE
  90   int java_lang_imported = 0;
  91
  92   if (!java_lang_id)
  93     java_lang_id = get_identifier ("java.lang");
  94   if (!java_lang_cloneable)
  95     java_lang_cloneable = get_identifier ("java.lang.Cloneable");
  96   if (!java_io_serializable)
  97     java_io_serializable = get_identifier ("java.io.Serializable");
  98   if (!inst_id)
  99     inst_id = get_identifier ("inst$");
 100   if (!wpv_id)
 101     wpv_id = get_identifier ("write_parm_value$");
 102
 103   if (!java_lang_imported)
 104     {
 105       tree node = build_tree_list
 106         (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
 107       read_import_dir (TREE_PURPOSE (node));
 108       TREE_CHAIN (node) = ctxp->import_demand_list;
 109       ctxp->import_demand_list = node;
 110       java_lang_imported = 1;
 111     }
 112
 113   if (!wfl_operator)
 114     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
 115   if (!label_id)
 116     label_id = get_identifier ("$L");
 117   if (!wfl_append)
 118     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
 119   if (!wfl_string_buffer)
 120     wfl_string_buffer =
 121       build_expr_wfl (get_identifier (flag_emit_class_files
 122                                       ? "java.lang.StringBuffer"
 123                                       : "gnu.gcj.runtime.StringBuffer"),
 124                       NULL, 0, 0);
 125   if (!wfl_to_string)
 126     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
 127
 128   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
 129     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
 130
 131   memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
 132   memset ((PTR) current_jcf, 0, sizeof (JCF));
 133   ctxp->current_parsed_class = NULL;
 134   ctxp->package = NULL_TREE;
 135 #endif
 136
 137   ctxp->filename = input_filename;
 138   ctxp->lineno = lineno = 0;
 139   ctxp->p_line = NULL;
 140   ctxp->c_line = NULL;
 141   ctxp->java_error_flag = 0;
 142   ctxp->lexer = java_new_lexer (finput, encoding);
 143 }
 144
 145 static char *
 146 java_sprint_unicode (line, i)
 147     struct java_line *line;
 148     int i;
 149 {
 150   static char buffer [10];
 151   if (line->unicode_escape_p [i] || line->line [i] > 128)
 152     sprintf (buffer, "\\u%04x", line->line [i]);
 153   else
 154     {
 155       buffer [0] = line->line [i];
 156       buffer [1] = '\0';
 157     }
 158   return buffer;
 159 }
 160
 161 static unicode_t
 162 java_sneak_unicode ()
 163 {
 164   return (ctxp->c_line->line [ctxp->c_line->current]);
 165 }
 166
 167 static void
 168 java_unget_unicode ()
 169 {
 170   if (!ctxp->c_line->current)
 171     /* Can't unget unicode.  */
 172     abort ();
 173
 174   ctxp->c_line->current--;
 175   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 176 }
 177
 178 static void
 179 java_allocate_new_line ()
 180 {
 181   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
 182   char ahead_escape_p = (ctxp->c_line ?
 183                          ctxp->c_line->unicode_escape_ahead_p : 0);
 184
 185   if (ctxp->c_line && !ctxp->c_line->white_space_only)
 186     {
 187       if (ctxp->p_line)
 188         {
 189           free (ctxp->p_line->unicode_escape_p);
 190           free (ctxp->p_line->line);
 191           free (ctxp->p_line);
 192         }
 193       ctxp->p_line = ctxp->c_line;
 194       ctxp->c_line = NULL;              /* Reallocated */
 195     }
 196
 197   if (!ctxp->c_line)
 198     {
 199       ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
 200       ctxp->c_line->max = JAVA_LINE_MAX;
 201       ctxp->c_line->line = (unicode_t *)xmalloc
 202         (sizeof (unicode_t)*ctxp->c_line->max);
 203       ctxp->c_line->unicode_escape_p =
 204           (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
 205       ctxp->c_line->white_space_only = 0;
 206     }
 207
 208   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
 209   ctxp->c_line->char_col = ctxp->c_line->current = 0;
 210   if (ahead)
 211     {
 212       ctxp->c_line->line [ctxp->c_line->size] = ahead;
 213       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
 214       ctxp->c_line->size++;
 215     }
 216   ctxp->c_line->ahead [0] = 0;
 217   ctxp->c_line->unicode_escape_ahead_p = 0;
 218   ctxp->c_line->lineno = ++lineno;
 219   ctxp->c_line->white_space_only = 1;
 220 }
 221
 222 /* Create a new lexer object.  */
 223
 224 java_lexer *
 225 java_new_lexer (finput, encoding)
 226      FILE *finput;
 227      const char *encoding;
 228 {
 229   java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
 230   int enc_error = 0;
 231
 232   lex->finput = finput;
 233   lex->bs_count = 0;
 234   lex->unget_value = 0;
 235   lex->hit_eof = 0;
 236
 237 #ifdef HAVE_ICONV
 238   lex->handle = iconv_open ("UCS-2", encoding);
 239   if (lex->handle != (iconv_t) -1)
 240     {
 241       lex->first = -1;
 242       lex->last = -1;
 243       lex->out_first = -1;
 244       lex->out_last = -1;
 245       lex->read_anything = 0;
 246       lex->use_fallback = 0;
 247
 248       /* Work around broken iconv() implementations by doing checking at
 249          runtime.  We assume that if the UTF-8 => UCS-2 encoder is broken,
 250          then all UCS-2 encoders will be broken.  Perhaps not a valid
 251          assumption.  */
 252       if (! byteswap_init)
 253         {
 254           iconv_t handle;
 255
 256           byteswap_init = 1;
 257
 258           handle = iconv_open ("UCS-2", "UTF-8");
 259           if (handle != (iconv_t) -1)
 260             {
 261               unicode_t result;
 262               unsigned char in[3];
 263               char *inp, *outp;
 264               size_t inc, outc, r;
 265
 266               /* This is the UTF-8 encoding of \ufeff.  */
 267               in[0] = 0xef;
 268               in[1] = 0xbb;
 269               in[2] = 0xbf;
 270
 271               inp = in;
 272               inc = 3;
 273               outp = (char *) &result;
 274               outc = 2;
 275
 276               r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
 277                          &outp, &outc);
 278               iconv_close (handle);
 279               /* Conversion must be complete for us to use the result.  */
 280               if (r != (size_t) -1 && inc == 0 && outc == 0)
 281                 need_byteswap = (result != 0xfeff);
 282             }
 283         }
 284
 285       lex->byte_swap = need_byteswap;
 286     }
 287   else
 288 #endif /* HAVE_ICONV */
 289     {
 290       /* If iconv failed, use the internal decoder if the default
 291          encoding was requested.  This code is used on platforms where
 292          iconv exists but is insufficient for our needs.  For
 293          instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.  */
 294       if (strcmp (encoding, DEFAULT_ENCODING))
 295         enc_error = 1;
 296 #ifdef HAVE_ICONV
 297       else
 298         lex->use_fallback = 1;
 299 #endif /* HAVE_ICONV */
 300     }
 301
 302   if (enc_error)
 303     fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
 304
 305   return lex;
 306 }
 307
 308 void
 309 java_destroy_lexer (lex)
 310      java_lexer *lex;
 311 {
 312 #ifdef HAVE_ICONV
 313   if (! lex->use_fallback)
 314     iconv_close (lex->handle);
 315 #endif
 316   free (lex);
 317 }
 318
 319 static int
 320 java_read_char (lex)
 321      java_lexer *lex;
 322 {
 323   if (lex->unget_value)
 324     {
 325       unicode_t r = lex->unget_value;
 326       lex->unget_value = 0;
 327       return r;
 328     }
 329
 330 #ifdef HAVE_ICONV
 331   if (! lex->use_fallback)
 332     {
 333       size_t ir, inbytesleft, in_save, out_count, out_save;
 334       char *inp, *outp;
 335       unicode_t result;
 336
 337       /* If there is data which has already been converted, use it.  */
 338       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 339         {
 340           lex->out_first = 0;
 341           lex->out_last = 0;
 342
 343           while (1)
 344             {
 345               /* See if we need to read more data.  If FIRST == 0 then
 346                  the previous conversion attempt ended in the middle of
 347                  a character at the end of the buffer.  Otherwise we
 348                  only have to read if the buffer is empty.  */
 349               if (lex->first == 0 || lex->first >= lex->last)
 350                 {
 351                   int r;
 352
 353                   if (lex->first >= lex->last)
 354                     {
 355                       lex->first = 0;
 356                       lex->last = 0;
 357                     }
 358                   if (feof (lex->finput))
 359                     return UEOF;
 360                   r = fread (&lex->buffer[lex->last], 1,
 361                              sizeof (lex->buffer) - lex->last,
 362                              lex->finput);
 363                   lex->last += r;
 364                 }
 365
 366               inbytesleft = lex->last - lex->first;
 367               out_count = sizeof (lex->out_buffer) - lex->out_last;
 368
 369               if (inbytesleft == 0)
 370                 {
 371                   /* We've tried to read and there is nothing left.  */
 372                   return UEOF;
 373                 }
 374
 375               in_save = inbytesleft;
 376               out_save = out_count;
 377               inp = &lex->buffer[lex->first];
 378               outp = &lex->out_buffer[lex->out_last];
 379               ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
 380                           &inbytesleft, &outp, &out_count);
 381
 382               /* If we haven't read any bytes, then look to see if we
 383                  have read a BOM.  */
 384               if (! lex->read_anything && out_save - out_count >= 2)
 385                 {
 386                   unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
 387                   if (uc == 0xfeff)
 388                     {
 389                       lex->byte_swap = 0;
 390                       lex->out_first += 2;
 391                     }
 392                   else if (uc == 0xfffe)
 393                     {
 394                       lex->byte_swap = 1;
 395                       lex->out_first += 2;
 396                     }
 397                   lex->read_anything = 1;
 398                 }
 399
 400               if (lex->byte_swap)
 401                 {
 402                   unsigned int i;
 403                   for (i = 0; i < out_save - out_count; i += 2)
 404                     {
 405                       char t = lex->out_buffer[lex->out_last + i];
 406                       lex->out_buffer[lex->out_last + i]
 407                         = lex->out_buffer[lex->out_last + i + 1];
 408                       lex->out_buffer[lex->out_last + i + 1] = t;
 409                     }
 410                 }
 411
 412               lex->first += in_save - inbytesleft;
 413               lex->out_last += out_save - out_count;
 414
 415               /* If we converted anything at all, move along.  */
 416               if (out_count != out_save)
 417                 break;
 418
 419               if (ir == (size_t) -1)
 420                 {
 421                   if (errno == EINVAL)
 422                     {
 423                       /* This is ok.  This means that the end of our buffer
 424                          is in the middle of a character sequence.  We just
 425                          move the valid part of the buffer to the beginning
 426                          to force a read.  */
 427                       memmove (&lex->buffer[0], &lex->buffer[lex->first],
 428                                lex->last - lex->first);
 429                       lex->last -= lex->first;
 430                       lex->first = 0;
 431                     }
 432                   else
 433                     {
 434                       /* A more serious error.  */
 435                       java_lex_error ("unrecognized character in input stream",
 436                                       0);
 437                       return UEOF;
 438                     }
 439                 }
 440             }
 441         }
 442
 443       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 444         {
 445           /* Don't have any data.  */
 446           return UEOF;
 447         }
 448
 449       /* Success.  */
 450       result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
 451       lex->out_first += 2;
 452       return result;
 453     }
 454   else
 455 #endif /* HAVE_ICONV */
 456     {
 457       int c, c1, c2;
 458       c = getc (lex->finput);
 459
 460       if (c == EOF)
 461         return UEOF;
 462       if (c < 128)
 463         return (unicode_t) c;
 464       else
 465         {
 466           if ((c & 0xe0) == 0xc0)
 467             {
 468               c1 = getc (lex->finput);
 469               if ((c1 & 0xc0) == 0x80)
 470                 {
 471                   unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
 472                   /* Check for valid 2-byte characters.  We explicitly
 473                      allow \0 because this encoding is common in the
 474                      Java world.  */
 475                   if (r == 0 || (r >= 0x80 && r <= 0x7ff))
 476                     return r;
 477                 }
 478             }
 479           else if ((c & 0xf0) == 0xe0)
 480             {
 481               c1 = getc (lex->finput);
 482               if ((c1 & 0xc0) == 0x80)
 483                 {
 484                   c2 = getc (lex->finput);
 485                   if ((c2 & 0xc0) == 0x80)
 486                     {
 487                       unicode_t r =  (unicode_t)(((c & 0xf) << 12) +
 488                                                  (( c1 & 0x3f) << 6)
 489                                                  + (c2 & 0x3f));
 490                       /* Check for valid 3-byte characters.
 491                          Don't allow surrogate, \ufffe or \uffff.  */
 492                       if (r >= 0x800 && r <= 0xffff
 493                           && ! (r >= 0xd800 && r <= 0xdfff)
 494                           && r != 0xfffe && r != 0xffff)
 495                         return r;
 496                     }
 497                 }
 498             }
 499
 500           /* We simply don't support invalid characters.  We also
 501              don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
 502              cannot be valid Java characters.  */
 503           java_lex_error ("malformed UTF-8 character", 0);
 504         }
 505     }
 506
 507   /* We only get here on error.  */
 508   return UEOF;
 509 }
 510
 511 static void
 512 java_store_unicode (l, c, unicode_escape_p)
 513     struct java_line *l;
 514     unicode_t c;
 515     int unicode_escape_p;
 516 {
 517   if (l->size == l->max)
 518     {
 519       l->max += JAVA_LINE_MAX;
 520       l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
 521       l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
 522                                                sizeof (char)*l->max);
 523     }
 524   l->line [l->size] = c;
 525   l->unicode_escape_p [l->size++] = unicode_escape_p;
 526 }
 527
 528 static int
 529 java_read_unicode (lex, unicode_escape_p)
 530      java_lexer *lex;
 531      int *unicode_escape_p;
 532 {
 533   int c;
 534
 535   c = java_read_char (lex);
 536   *unicode_escape_p = 0;
 537
 538   if (c != '\\')
 539     {
 540       lex->bs_count = 0;
 541       return c;
 542     }
 543
 544   ++lex->bs_count;
 545   if ((lex->bs_count) % 2 == 1)
 546     {
 547       /* Odd number of \ seen.  */
 548       c = java_read_char (lex);
 549       if (c == 'u')
 550         {
 551           unicode_t unicode = 0;
 552           int shift = 12;
 553
 554           /* Recognize any number of `u's in \u.  */
 555           while ((c = java_read_char (lex)) == 'u')
 556             ;
 557
 558           /* Unget the most recent character as it is not a `u'.  */
 559           if (c == UEOF)
 560             return UEOF;
 561           lex->unget_value = c;
 562
 563           /* Next should be 4 hex digits, otherwise it's an error.
 564              The hex value is converted into the unicode, pushed into
 565              the Unicode stream.  */
 566           for (shift = 12; shift >= 0; shift -= 4)
 567             {
 568               if ((c = java_read_char (lex)) == UEOF)
 569                 return UEOF;
 570               if (ISDIGIT (c))
 571                 unicode |= (unicode_t)((c-'0') << shift);
 572               else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
 573                 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
 574               else
 575                 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
 576             }
 577           lex->bs_count = 0;
 578           *unicode_escape_p = 1;
 579           return unicode;
 580         }
 581       lex->unget_value = c;
 582     }
 583   return (unicode_t) '\\';
 584 }
 585
 586 static int
 587 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
 588      java_lexer *lex;
 589      int *unicode_escape_p;
 590 {
 591   int c = java_read_unicode (lex, unicode_escape_p);
 592
 593   if (c == '\r')
 594     {
 595       /* We have to read ahead to see if we got \r\n.  In that case we
 596          return a single line terminator.  */
 597       int dummy;
 598       c = java_read_unicode (lex, &dummy);
 599       if (c != '\n')
 600         lex->unget_value = c;
 601       /* In either case we must return a newline.  */
 602       c = '\n';
 603     }
 604
 605   return c;
 606 }
 607
 608 static int
 609 java_get_unicode ()
 610 {
 611   /* It's time to read a line when... */
 612   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
 613     {
 614       int c;
 615       int found_chars = 0;
 616
 617       if (ctxp->lexer->hit_eof)
 618         return UEOF;
 619
 620       java_allocate_new_line ();
 621       if (ctxp->c_line->line[0] != '\n')
 622         {
 623           for (;;)
 624             {
 625               int unicode_escape_p;
 626               c = java_read_unicode_collapsing_terminators (ctxp->lexer,
 627                                                             &unicode_escape_p);
 628               if (c != UEOF)
 629                 {
 630                   found_chars = 1;
 631                   java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 632                   if (ctxp->c_line->white_space_only
 633                       && !JAVA_WHITE_SPACE_P (c)
 634                       && c != '\n')
 635                     ctxp->c_line->white_space_only = 0;
 636                 }
 637               if ((c == '\n') || (c == UEOF))
 638                 break;
 639             }
 640
 641           if (c == UEOF && ! found_chars)
 642             {
 643               ctxp->lexer->hit_eof = 1;
 644               return UEOF;
 645             }
 646         }
 647     }
 648   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
 649   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
 650   return ctxp->c_line->line [ctxp->c_line->current++];
 651 }
 652
 653 /* Parse the end of a C style comment.
 654  * C is the first character following the '/' and '*'. */
 655 static void
 656 java_parse_end_comment (c)
 657      int c;
 658 {
 659   for ( ;; c = java_get_unicode ())
 660     {
 661       switch (c)
 662         {
 663         case UEOF:
 664           java_lex_error ("Comment not terminated at end of input", 0);
 665           return;
 666         case '*':
 667           switch (c = java_get_unicode ())
 668             {
 669             case UEOF:
 670               java_lex_error ("Comment not terminated at end of input", 0);
 671               return;
 672             case '/':
 673               return;
 674             case '*':   /* reparse only '*' */
 675               java_unget_unicode ();
 676             }
 677         }
 678     }
 679 }
 680
 681 /* Parse the documentation section. Keywords must be at the beginning
 682    of a documentation comment line (ignoring white space and any `*'
 683    character). Parsed keyword(s): @DEPRECATED.  */
 684
 685 static int
 686 java_parse_doc_section (c)
 687      int c;
 688 {
 689   int valid_tag = 0, seen_star = 0;
 690
 691   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
 692     {
 693       switch (c)
 694         {
 695         case '*':
 696           seen_star = 1;
 697           break;
 698         case '\n': /* ULT */
 699           valid_tag = 1;
 700         default:
 701           seen_star = 0;
 702         }
 703       c = java_get_unicode();
 704     }
 705
 706   if (c == UEOF)
 707     java_lex_error ("Comment not terminated at end of input", 0);
 708
 709   if (seen_star && (c == '/'))
 710     return 1;                   /* Goto step1 in caller */
 711
 712   /* We're parsing @deprecated */
 713   if (valid_tag && (c == '@'))
 714     {
 715       char tag [11];
 716       int  tag_index = 0;
 717
 718       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
 719         {
 720           c = java_get_unicode ();
 721           tag [tag_index++] = c;
 722         }
 723
 724       if (c == UEOF)
 725         java_lex_error ("Comment not terminated at end of input", 0);
 726       tag [tag_index] = '\0';
 727
 728       if (!strcmp (tag, "deprecated"))
 729         ctxp->deprecated = 1;
 730     }
 731   java_unget_unicode ();
 732   return 0;
 733 }
 734
 735 /* Return true if C is a valid start character for a Java identifier.
 736    This is only called if C >= 128 -- smaller values are handled
 737    inline.  However, this function handles all values anyway.  */
 738 static int
 739 java_start_char_p (c)
 740      unicode_t c;
 741 {
 742   unsigned int hi = c / 256;
 743   char *page = type_table[hi];
 744   unsigned long val = (unsigned long) page;
 745   int flags;
 746
 747   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
 748     flags = page[c & 255];
 749   else
 750     flags = val;
 751
 752   return flags & LETTER_START;
 753 }
 754
 755 /* Return true if C is a valid part character for a Java identifier.
 756    This is only called if C >= 128 -- smaller values are handled
 757    inline.  However, this function handles all values anyway.  */
 758 static int
 759 java_part_char_p (c)
 760      unicode_t c;
 761 {
 762   unsigned int hi = c / 256;
 763   char *page = type_table[hi];
 764   unsigned long val = (unsigned long) page;
 765   int flags;
 766
 767   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
 768     flags = page[c & 255];
 769   else
 770     flags = val;
 771
 772   return flags & LETTER_PART;
 773 }
 774
 775 static int
 776 java_parse_escape_sequence ()
 777 {
 778   unicode_t char_lit;
 779   int c;
 780
 781   switch (c = java_get_unicode ())
 782     {
 783     case 'b':
 784       return (unicode_t)0x8;
 785     case 't':
 786       return (unicode_t)0x9;
 787     case 'n':
 788       return (unicode_t)0xa;
 789     case 'f':
 790       return (unicode_t)0xc;
 791     case 'r':
 792       return (unicode_t)0xd;
 793     case '"':
 794       return (unicode_t)0x22;
 795     case '\'':
 796       return (unicode_t)0x27;
 797     case '\\':
 798       return (unicode_t)0x5c;
 799     case '0': case '1': case '2': case '3': case '4':
 800     case '5': case '6': case '7':
 801       {
 802         int octal_escape[3];
 803         int octal_escape_index = 0;
 804         int max = 3;
 805         int i, shift;
 806
 807         for (; octal_escape_index < max && RANGE (c, '0', '7');
 808              c = java_get_unicode ())
 809           {
 810             if (octal_escape_index == 0 && c > '3')
 811               {
 812                 /* According to the grammar, `\477' has a well-defined
 813                    meaning -- it is `\47' followed by `7'.  */
 814                 --max;
 815               }
 816             octal_escape [octal_escape_index++] = c;
 817           }
 818
 819         java_unget_unicode ();
 820
 821         for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 822              i < octal_escape_index; i++, shift -= 3)
 823           char_lit |= (octal_escape [i] - '0') << shift;
 824
 825         return char_lit;
 826       }
 827     default:
 828       java_lex_error ("Invalid character in escape sequence", 0);
 829       return JAVA_CHAR_ERROR;
 830     }
 831 }
 832
 833 /* Isolate the code which may raise an arithmetic exception in its
 834    own function.  */
 835
 836 #ifndef JC1_LITE
 837 struct jpa_args
 838 {
 839   YYSTYPE *java_lval;
 840   char *literal_token;
 841   int fflag;
 842   int number_beginning;
 843 };
 844
 845 #ifdef REAL_ARITHMETIC
 846 #define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
 847 #else
 848 #define IS_ZERO(X) ((X) == 0)
 849 #endif
 850
 851 static void java_perform_atof   PARAMS ((PTR));
 852
 853 static void
 854 java_perform_atof (av)
 855      PTR av;
 856 {
 857   struct jpa_args *a = (struct jpa_args *)av;
 858   YYSTYPE *java_lval = a->java_lval;
 859   int number_beginning = a->number_beginning;
 860   REAL_VALUE_TYPE value;
 861   tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 862
 863   SET_REAL_VALUE_ATOF (value,
 864                        REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
 865
 866   if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
 867     {
 868       JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
 869       value = DCONST0;
 870     }
 871   else if (IS_ZERO (value))
 872     {
 873       /* We check to see if the value is really 0 or if we've found an
 874          underflow.  We do this in the most primitive imaginable way.  */
 875       int really_zero = 1;
 876       char *p = a->literal_token;
 877       if (*p == '-')
 878         ++p;
 879       while (*p && *p != 'e' && *p != 'E')
 880         {
 881           if (*p != '0' && *p != '.')
 882             {
 883               really_zero = 0;
 884               break;
 885             }
 886           ++p;
 887         }
 888       if (! really_zero)
 889         {
 890           int i = ctxp->c_line->current;
 891           ctxp->c_line->current = number_beginning;
 892           java_lex_error ("Floating point literal underflow", 0);
 893           ctxp->c_line->current = i;
 894         }
 895     }
 896
 897   SET_LVAL_NODE_TYPE (build_real (type, value), type);
 898 }
 899 #endif
 900
 901 static int yylex                PARAMS ((YYSTYPE *));
 902
 903 static int
 904 #ifdef JC1_LITE
 905 yylex (java_lval)
 906 #else
 907 java_lex (java_lval)
 908 #endif
 909      YYSTYPE *java_lval;
 910 {
 911   int c;
 912   unicode_t first_unicode;
 913   int ascii_index, all_ascii;
 914   char *string;
 915
 916   /* Translation of the Unicode escape in the raw stream of Unicode
 917      characters. Takes care of line terminator.  */
 918  step1:
 919   /* Skip white spaces: SP, TAB and FF or ULT */
 920   for (c = java_get_unicode ();
 921        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
 922     if (c == '\n')
 923       {
 924         ctxp->elc.line = ctxp->c_line->lineno;
 925         ctxp->elc.col  = ctxp->c_line->char_col-2;
 926       }
 927
 928   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 929
 930   if (c == 0x1a)                /* CTRL-Z */
 931     {
 932       if ((c = java_get_unicode ()) == UEOF)
 933         return 0;               /* Ok here */
 934       else
 935         java_unget_unicode ();  /* Caught later, at the end of the function */
 936     }
 937   /* Handle EOF here */
 938   if (c == UEOF)        /* Should probably do something here... */
 939     return 0;
 940
 941   /* Take care of eventual comments.  */
 942   if (c == '/')
 943     {
 944       switch (c = java_get_unicode ())
 945         {
 946         case '/':
 947           for (;;)
 948             {
 949               c = java_get_unicode ();
 950               if (c == UEOF)
 951                 {
 952                   /* It is ok to end a `//' comment with EOF, unless
 953                      we're being pedantic.  */
 954                   if (pedantic)
 955                     java_lex_error ("Comment not terminated at end of input",
 956                                     0);
 957                   return 0;
 958                 }
 959               if (c == '\n')    /* ULT */
 960                 goto step1;
 961             }
 962           break;
 963
 964         case '*':
 965           if ((c = java_get_unicode ()) == '*')
 966             {
 967               if ((c = java_get_unicode ()) == '/')
 968                 goto step1;     /* Empy documentation comment  */
 969               else if (java_parse_doc_section (c))
 970                 goto step1;
 971             }
 972
 973           java_parse_end_comment ((c = java_get_unicode ()));
 974           goto step1;
 975           break;
 976         default:
 977           java_unget_unicode ();
 978           c = '/';
 979           break;
 980         }
 981     }
 982
 983   ctxp->elc.line = ctxp->c_line->lineno;
 984   ctxp->elc.prev_col = ctxp->elc.col;
 985   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
 986   if (ctxp->elc.col < 0)
 987     abort ();
 988
 989   /* Numeric literals */
 990   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
 991     {
 992       /* This section of code is borrowed from gcc/c-lex.c  */
 993 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
 994       int parts[TOTAL_PARTS];
 995       HOST_WIDE_INT high, low;
 996       /* End borrowed section  */
 997       char literal_token [256];
 998       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
 999       int  found_hex_digits = 0;
1000       int  i;
1001 #ifndef JC1_LITE
1002       int  number_beginning = ctxp->c_line->current;
1003       tree value;
1004 #endif
1005
1006       /* We might have a . separator instead of a FP like .[0-9]* */
1007       if (c == '.')
1008         {
1009           unicode_t peep = java_sneak_unicode ();
1010
1011           if (!JAVA_ASCII_DIGIT (peep))
1012             {
1013               JAVA_LEX_SEP('.');
1014               BUILD_OPERATOR (DOT_TK);
1015             }
1016         }
1017
1018       for (i = 0; i < TOTAL_PARTS; i++)
1019         parts [i] = 0;
1020
1021       if (c == '0')
1022         {
1023           c = java_get_unicode ();
1024           if (c == 'x' || c == 'X')
1025             {
1026               radix = 16;
1027               c = java_get_unicode ();
1028             }
1029           else if (JAVA_ASCII_DIGIT (c))
1030             radix = 8;
1031           else if (c == '.')
1032             {
1033               /* Push the '.' back and prepare for a FP parsing... */
1034               java_unget_unicode ();
1035               c = '0';
1036             }
1037           else
1038             {
1039               /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
1040               JAVA_LEX_LIT ("0", 10);
1041               switch (c)
1042                 {
1043                 case 'L': case 'l':
1044                   SET_LVAL_NODE (long_zero_node);
1045                   return (INT_LIT_TK);
1046                 case 'f': case 'F':
1047                   SET_LVAL_NODE (float_zero_node);
1048                   return (FP_LIT_TK);
1049                 case 'd': case 'D':
1050                   SET_LVAL_NODE (double_zero_node);
1051                   return (FP_LIT_TK);
1052                 default:
1053                   java_unget_unicode ();
1054                   SET_LVAL_NODE (integer_zero_node);
1055                   return (INT_LIT_TK);
1056                 }
1057             }
1058         }
1059       /* Parse the first part of the literal, until we find something
1060          which is not a number.  */
1061       while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
1062              (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1063              (radix == 8  && JAVA_ASCII_OCTDIGIT (c)))
1064         {
1065           /* We store in a string (in case it turns out to be a FP) and in
1066              PARTS if we have to process a integer literal.  */
1067           int numeric = (ISDIGIT (c) ? c-'0' : 10 +(c|0x20)-'a');
1068           int count;
1069
1070           /* Remember when we find a valid hexadecimal digit */
1071           if (radix == 16)
1072             found_hex_digits = 1;
1073
1074           literal_token [literal_index++] = c;
1075           /* This section of code if borrowed from gcc/c-lex.c  */
1076           for (count = 0; count < TOTAL_PARTS; count++)
1077             {
1078               parts[count] *= radix;
1079               if (count)
1080                 {
1081                   parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
1082                   parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1083                 }
1084               else
1085                 parts[0] += numeric;
1086             }
1087           if (parts [TOTAL_PARTS-1] != 0)
1088             overflow = 1;
1089           /* End borrowed section.  */
1090           c = java_get_unicode ();
1091         }
1092
1093       /* If we have something from the FP char set but not a digit, parse
1094          a FP literal.  */
1095       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1096         {
1097           int stage = 0;
1098           int seen_digit = (literal_index ? 1 : 0);
1099           int seen_exponent = 0;
1100           int fflag = 0;        /* 1 for {f,F}, 0 for {d,D}. FP literal are
1101                                    double unless specified. */
1102
1103           /* It is ok if the radix is 8 because this just means we've
1104              seen a leading `0'.  However, radix==16 is invalid.  */
1105           if (radix == 16)
1106             java_lex_error ("Can't express non-decimal FP literal", 0);
1107           radix = 10;
1108
1109           for (;;)
1110             {
1111               if (c == '.')
1112                 {
1113                   if (stage < 1)
1114                     {
1115                       stage = 1;
1116                       literal_token [literal_index++ ] = c;
1117                       c = java_get_unicode ();
1118                     }
1119                   else
1120                     java_lex_error ("Invalid character in FP literal", 0);
1121                 }
1122
1123               if (c == 'e' || c == 'E')
1124                 {
1125                   if (stage < 2)
1126                     {
1127                       /* {E,e} must have seen at list a digit */
1128                       if (!seen_digit)
1129                         java_lex_error ("Invalid FP literal", 0);
1130                       seen_digit = 0;
1131                       seen_exponent = 1;
1132                       stage = 2;
1133                       literal_token [literal_index++] = c;
1134                       c = java_get_unicode ();
1135                     }
1136                   else
1137                     java_lex_error ("Invalid character in FP literal", 0);
1138                 }
1139               if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1140                 {
1141                   fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1142                   stage = 4;    /* So we fall through */
1143                 }
1144
1145               if ((c=='-' || c =='+') && stage == 2)
1146                 {
1147                   stage = 3;
1148                   literal_token [literal_index++] = c;
1149                   c = java_get_unicode ();
1150                 }
1151
1152               if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1153                   (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1154                   (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1155                   (stage == 3 && JAVA_ASCII_DIGIT (c)))
1156                 {
1157                   if (JAVA_ASCII_DIGIT (c))
1158                     seen_digit = 1;
1159                   literal_token [literal_index++ ] = c;
1160                   c = java_get_unicode ();
1161                 }
1162               else
1163                 {
1164 #ifndef JC1_LITE
1165                   struct jpa_args a;
1166 #endif
1167                   if (stage != 4) /* Don't push back fF/dD */
1168                     java_unget_unicode ();
1169
1170                   /* An exponent (if any) must have seen a digit.  */
1171                   if (seen_exponent && !seen_digit)
1172                     java_lex_error ("Invalid FP literal", 0);
1173
1174                   literal_token [literal_index] = '\0';
1175                   JAVA_LEX_LIT (literal_token, radix);
1176
1177 #ifndef JC1_LITE
1178                   a.literal_token = literal_token;
1179                   a.fflag = fflag;
1180                   a.java_lval = java_lval;
1181                   a.number_beginning = number_beginning;
1182                   if (do_float_handler (java_perform_atof, (PTR) &a))
1183                     return FP_LIT_TK;
1184
1185                   JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
1186 #else
1187                   return FP_LIT_TK;
1188 #endif
1189                 }
1190             }
1191         } /* JAVA_ASCCI_FPCHAR (c) */
1192
1193       if (radix == 16 && ! found_hex_digits)
1194         java_lex_error
1195           ("0x must be followed by at least one hexadecimal digit", 0);
1196
1197       /* Here we get back to converting the integral literal.  */
1198       if (c == 'L' || c == 'l')
1199         long_suffix = 1;
1200       else if (radix == 16 && JAVA_ASCII_LETTER (c))
1201         java_lex_error ("Digit out of range in hexadecimal literal", 0);
1202       else if (radix == 8  && JAVA_ASCII_DIGIT (c))
1203         java_lex_error ("Digit out of range in octal literal", 0);
1204       else if (radix == 16 && !literal_index)
1205         java_lex_error ("No digit specified for hexadecimal literal", 0);
1206       else
1207         java_unget_unicode ();
1208
1209 #ifdef JAVA_LEX_DEBUG
1210       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1211       JAVA_LEX_LIT (literal_token, radix);
1212 #endif
1213       /* This section of code is borrowed from gcc/c-lex.c  */
1214       if (!overflow)
1215         {
1216           bytes = GET_TYPE_PRECISION (long_type_node);
1217           for (i = bytes; i < TOTAL_PARTS; i++)
1218             if (parts [i])
1219               {
1220                 overflow = 1;
1221                 break;
1222               }
1223         }
1224       high = low = 0;
1225       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1226         {
1227           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1228                                               / HOST_BITS_PER_CHAR)]
1229                    << (i * HOST_BITS_PER_CHAR));
1230           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1231         }
1232       /* End borrowed section.  */
1233
1234       /* Range checking */
1235       if (long_suffix)
1236         {
1237           /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1238              9223372036854775807L is the biggest `long' literal that can be
1239              expressed using a 10 radix. For other radixes, everything that
1240              fits withing 64 bits is OK. */
1241           int hb = (high >> 31);
1242           if (overflow || (hb && low && radix == 10)
1243               || (hb && high & 0x7fffffff && radix == 10))
1244             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1245         }
1246       else
1247         {
1248           /* 2147483648 is valid if operand of a '-'. Otherwise,
1249              2147483647 is the biggest `int' literal that can be
1250              expressed using a 10 radix. For other radixes, everything
1251              that fits within 32 bits is OK.  As all literals are
1252              signed, we sign extend here. */
1253           int hb = (low >> 31) & 0x1;
1254           if (overflow || high || (hb && low & 0x7fffffff && radix == 10))
1255             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1256           high = -hb;
1257         }
1258 #ifndef JC1_LITE
1259       value = build_int_2 (low, high);
1260       JAVA_RADIX10_FLAG (value) = radix == 10;
1261       SET_LVAL_NODE_TYPE (value, long_suffix ? long_type_node : int_type_node);
1262 #else
1263       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1264                           long_suffix ? long_type_node : int_type_node);
1265 #endif
1266       return INT_LIT_TK;
1267     }
1268
1269   /* Character literals */
1270   if (c == '\'')
1271     {
1272       int char_lit;
1273       if ((c = java_get_unicode ()) == '\\')
1274         char_lit = java_parse_escape_sequence ();
1275       else
1276         {
1277           if (c == '\n' || c == '\'')
1278             java_lex_error ("Invalid character literal", 0);
1279           char_lit = c;
1280         }
1281
1282       c = java_get_unicode ();
1283
1284       if ((c == '\n') || (c == UEOF))
1285         java_lex_error ("Character literal not terminated at end of line", 0);
1286       if (c != '\'')
1287         java_lex_error ("Syntax error in character literal", 0);
1288
1289       if (char_lit == JAVA_CHAR_ERROR)
1290         char_lit = 0;           /* We silently convert it to zero */
1291
1292       JAVA_LEX_CHAR_LIT (char_lit);
1293       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1294       return CHAR_LIT_TK;
1295     }
1296
1297   /* String literals */
1298   if (c == '"')
1299     {
1300       int no_error;
1301       char *string;
1302
1303       for (no_error = 1, c = java_get_unicode ();
1304            c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1305         {
1306           if (c == '\\')
1307             c = java_parse_escape_sequence ();
1308           if (c == JAVA_CHAR_ERROR)
1309             {
1310               no_error = 0;
1311               c = 0;            /* We silently convert it to zero.  */
1312             }
1313           java_unicode_2_utf8 (c);
1314         }
1315       if (c == '\n' || c == UEOF) /* ULT */
1316         {
1317           lineno--;             /* Refer to the line the terminator was seen */
1318           java_lex_error ("String not terminated at end of line", 0);
1319           lineno++;
1320         }
1321
1322       obstack_1grow (&temporary_obstack, '\0');
1323       string = obstack_finish (&temporary_obstack);
1324 #ifndef JC1_LITE
1325       if (!no_error || (c != '"'))
1326         java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1327       else
1328         java_lval->node = build_string (strlen (string), string);
1329 #endif
1330       obstack_free (&temporary_obstack, string);
1331       return STRING_LIT_TK;
1332     }
1333
1334   /* Separator */
1335   switch (c)
1336     {
1337     case '(':
1338       JAVA_LEX_SEP (c);
1339       BUILD_OPERATOR (OP_TK);
1340     case ')':
1341       JAVA_LEX_SEP (c);
1342       return CP_TK;
1343     case '{':
1344       JAVA_LEX_SEP (c);
1345       if (ctxp->ccb_indent == 1)
1346         ctxp->first_ccb_indent1 = lineno;
1347       ctxp->ccb_indent++;
1348       BUILD_OPERATOR (OCB_TK);
1349     case '}':
1350       JAVA_LEX_SEP (c);
1351       ctxp->ccb_indent--;
1352       if (ctxp->ccb_indent == 1)
1353         ctxp->last_ccb_indent1 = lineno;
1354       BUILD_OPERATOR (CCB_TK);
1355     case '[':
1356       JAVA_LEX_SEP (c);
1357       BUILD_OPERATOR (OSB_TK);
1358     case ']':
1359       JAVA_LEX_SEP (c);
1360       return CSB_TK;
1361     case ';':
1362       JAVA_LEX_SEP (c);
1363       return SC_TK;
1364     case ',':
1365       JAVA_LEX_SEP (c);
1366       return C_TK;
1367     case '.':
1368       JAVA_LEX_SEP (c);
1369       BUILD_OPERATOR (DOT_TK);
1370       /*      return DOT_TK; */
1371     }
1372
1373   /* Operators */
1374   switch (c)
1375     {
1376     case '=':
1377       if ((c = java_get_unicode ()) == '=')
1378         {
1379           BUILD_OPERATOR (EQ_TK);
1380         }
1381       else
1382         {
1383           /* Equals is used in two different locations. In the
1384              variable_declarator: rule, it has to be seen as '=' as opposed
1385              to being seen as an ordinary assignment operator in
1386              assignment_operators: rule.  */
1387           java_unget_unicode ();
1388           BUILD_OPERATOR (ASSIGN_TK);
1389         }
1390
1391     case '>':
1392       switch ((c = java_get_unicode ()))
1393         {
1394         case '=':
1395           BUILD_OPERATOR (GTE_TK);
1396         case '>':
1397           switch ((c = java_get_unicode ()))
1398             {
1399             case '>':
1400               if ((c = java_get_unicode ()) == '=')
1401                 {
1402                   BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1403                 }
1404               else
1405                 {
1406                   java_unget_unicode ();
1407                   BUILD_OPERATOR (ZRS_TK);
1408                 }
1409             case '=':
1410               BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1411             default:
1412               java_unget_unicode ();
1413               BUILD_OPERATOR (SRS_TK);
1414             }
1415         default:
1416           java_unget_unicode ();
1417           BUILD_OPERATOR (GT_TK);
1418         }
1419
1420     case '<':
1421       switch ((c = java_get_unicode ()))
1422         {
1423         case '=':
1424           BUILD_OPERATOR (LTE_TK);
1425         case '<':
1426           if ((c = java_get_unicode ()) == '=')
1427             {
1428               BUILD_OPERATOR2 (LS_ASSIGN_TK);
1429             }
1430           else
1431             {
1432               java_unget_unicode ();
1433               BUILD_OPERATOR (LS_TK);
1434             }
1435         default:
1436           java_unget_unicode ();
1437           BUILD_OPERATOR (LT_TK);
1438         }
1439
1440     case '&':
1441       switch ((c = java_get_unicode ()))
1442         {
1443         case '&':
1444           BUILD_OPERATOR (BOOL_AND_TK);
1445         case '=':
1446           BUILD_OPERATOR2 (AND_ASSIGN_TK);
1447         default:
1448           java_unget_unicode ();
1449           BUILD_OPERATOR (AND_TK);
1450         }
1451
1452     case '|':
1453       switch ((c = java_get_unicode ()))
1454         {
1455         case '|':
1456           BUILD_OPERATOR (BOOL_OR_TK);
1457         case '=':
1458           BUILD_OPERATOR2 (OR_ASSIGN_TK);
1459         default:
1460           java_unget_unicode ();
1461           BUILD_OPERATOR (OR_TK);
1462         }
1463
1464     case '+':
1465       switch ((c = java_get_unicode ()))
1466         {
1467         case '+':
1468           BUILD_OPERATOR (INCR_TK);
1469         case '=':
1470           BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1471         default:
1472           java_unget_unicode ();
1473           BUILD_OPERATOR (PLUS_TK);
1474         }
1475
1476     case '-':
1477       switch ((c = java_get_unicode ()))
1478         {
1479         case '-':
1480           BUILD_OPERATOR (DECR_TK);
1481         case '=':
1482           BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1483         default:
1484           java_unget_unicode ();
1485           BUILD_OPERATOR (MINUS_TK);
1486         }
1487
1488     case '*':
1489       if ((c = java_get_unicode ()) == '=')
1490         {
1491           BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1492         }
1493       else
1494         {
1495           java_unget_unicode ();
1496           BUILD_OPERATOR (MULT_TK);
1497         }
1498
1499     case '/':
1500       if ((c = java_get_unicode ()) == '=')
1501         {
1502           BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1503         }
1504       else
1505         {
1506           java_unget_unicode ();
1507           BUILD_OPERATOR (DIV_TK);
1508         }
1509
1510     case '^':
1511       if ((c = java_get_unicode ()) == '=')
1512         {
1513           BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1514         }
1515       else
1516         {
1517           java_unget_unicode ();
1518           BUILD_OPERATOR (XOR_TK);
1519         }
1520
1521     case '%':
1522       if ((c = java_get_unicode ()) == '=')
1523         {
1524           BUILD_OPERATOR2 (REM_ASSIGN_TK);
1525         }
1526       else
1527         {
1528           java_unget_unicode ();
1529           BUILD_OPERATOR (REM_TK);
1530         }
1531
1532     case '!':
1533       if ((c = java_get_unicode()) == '=')
1534         {
1535           BUILD_OPERATOR (NEQ_TK);
1536         }
1537       else
1538         {
1539           java_unget_unicode ();
1540           BUILD_OPERATOR (NEG_TK);
1541         }
1542
1543     case '?':
1544       JAVA_LEX_OP ("?");
1545       BUILD_OPERATOR (REL_QM_TK);
1546     case ':':
1547       JAVA_LEX_OP (":");
1548       BUILD_OPERATOR (REL_CL_TK);
1549     case '~':
1550       BUILD_OPERATOR (NOT_TK);
1551     }
1552
1553   /* Keyword, boolean literal or null literal */
1554   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1555        JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1556     {
1557       java_unicode_2_utf8 (c);
1558       if (all_ascii && c >= 128)
1559         all_ascii = 0;
1560       ascii_index++;
1561     }
1562
1563   obstack_1grow (&temporary_obstack, '\0');
1564   string = obstack_finish (&temporary_obstack);
1565   java_unget_unicode ();
1566
1567   /* If we have something all ascii, we consider a keyword, a boolean
1568      literal, a null literal or an all ASCII identifier.  Otherwise,
1569      this is an identifier (possibly not respecting formation rule).  */
1570   if (all_ascii)
1571     {
1572       struct java_keyword *kw;
1573       if ((kw=java_keyword (string, ascii_index)))
1574         {
1575           JAVA_LEX_KW (string);
1576           switch (kw->token)
1577             {
1578             case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1579             case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1580             case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1581             case PRIVATE_TK:      case STRICT_TK:
1582               SET_MODIFIER_CTX (kw->token);
1583               return MODIFIER_TK;
1584             case FLOAT_TK:
1585               SET_LVAL_NODE (float_type_node);
1586               return FP_TK;
1587             case DOUBLE_TK:
1588               SET_LVAL_NODE (double_type_node);
1589               return FP_TK;
1590             case BOOLEAN_TK:
1591               SET_LVAL_NODE (boolean_type_node);
1592               return BOOLEAN_TK;
1593             case BYTE_TK:
1594               SET_LVAL_NODE (byte_type_node);
1595               return INTEGRAL_TK;
1596             case SHORT_TK:
1597               SET_LVAL_NODE (short_type_node);
1598               return INTEGRAL_TK;
1599             case INT_TK:
1600               SET_LVAL_NODE (int_type_node);
1601               return INTEGRAL_TK;
1602             case LONG_TK:
1603               SET_LVAL_NODE (long_type_node);
1604               return INTEGRAL_TK;
1605             case CHAR_TK:
1606               SET_LVAL_NODE (char_type_node);
1607               return INTEGRAL_TK;
1608
1609               /* Keyword based literals */
1610             case TRUE_TK:
1611             case FALSE_TK:
1612               SET_LVAL_NODE ((kw->token == TRUE_TK ?
1613                               boolean_true_node : boolean_false_node));
1614               return BOOL_LIT_TK;
1615             case NULL_TK:
1616               SET_LVAL_NODE (null_pointer_node);
1617               return NULL_TK;
1618
1619               /* Some keyword we want to retain information on the location
1620                  they where found */
1621             case CASE_TK:
1622             case DEFAULT_TK:
1623             case SUPER_TK:
1624             case THIS_TK:
1625             case RETURN_TK:
1626             case BREAK_TK:
1627             case CONTINUE_TK:
1628             case TRY_TK:
1629             case CATCH_TK:
1630             case THROW_TK:
1631             case INSTANCEOF_TK:
1632               BUILD_OPERATOR (kw->token);
1633
1634             default:
1635               return kw->token;
1636             }
1637         }
1638     }
1639
1640   /* We may have an ID here */
1641   if (JAVA_START_CHAR_P (first_unicode))
1642     {
1643       JAVA_LEX_ID (string);
1644       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1645       return ID_TK;
1646     }
1647
1648   /* Everything else is an invalid character in the input */
1649   {
1650     char lex_error_buffer [128];
1651     sprintf (lex_error_buffer, "Invalid character `%s' in input",
1652              java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1653     java_lex_error (lex_error_buffer, 1);
1654   }
1655   return 0;
1656 }
1657
1658 #ifndef JC1_LITE
1659 /* This is called by the parser to see if an error should be generated
1660    due to numeric overflow.  This function only handles the particular
1661    case of the largest negative value, and is only called in the case
1662    where this value is not preceeded by `-'.  */
1663 static void
1664 error_if_numeric_overflow (value)
1665      tree value;
1666 {
1667   if (TREE_CODE (value) == INTEGER_CST && JAVA_RADIX10_FLAG (value))
1668     {
1669       unsigned HOST_WIDE_INT lo, hi;
1670
1671       lo = TREE_INT_CST_LOW (value);
1672       hi = TREE_INT_CST_HIGH (value);
1673       if (TREE_TYPE (value) == long_type_node)
1674         {
1675           int hb = (hi >> 31);
1676           if (hb && !(hi & 0x7fffffff))
1677             java_lex_error ("Numeric overflow for `long' literal", 0);
1678         }
1679       else
1680         {
1681           int hb = (lo >> 31) & 0x1;
1682           if (hb && !(lo & 0x7fffffff))
1683             java_lex_error ("Numeric overflow for `int' literal", 0);
1684         }
1685     }
1686 }
1687 #endif /* JC1_LITE */
1688
1689 static void
1690 java_unicode_2_utf8 (unicode)
1691     unicode_t unicode;
1692 {
1693   if (RANGE (unicode, 0x01, 0x7f))
1694     obstack_1grow (&temporary_obstack, (char)unicode);
1695   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1696     {
1697       obstack_1grow (&temporary_obstack,
1698                      (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1699       obstack_1grow (&temporary_obstack,
1700                      (unsigned char)(0x80 | (unicode & 0x3f)));
1701     }
1702   else                          /* Range 0x800-0xffff */
1703     {
1704       obstack_1grow (&temporary_obstack,
1705                      (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1706       obstack_1grow (&temporary_obstack,
1707                      (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1708       obstack_1grow (&temporary_obstack,
1709                      (unsigned char)(0x80 | (unicode & 0x003f)));
1710     }
1711 }
1712
1713 #ifndef JC1_LITE
1714 static tree
1715 build_wfl_node (node)
1716      tree node;
1717 {
1718   node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1719   /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1720   TREE_TYPE (node) = NULL_TREE;
1721   return node;
1722 }
1723 #endif
1724
1725 static void
1726 java_lex_error (msg, forward)
1727      const char *msg ATTRIBUTE_UNUSED;
1728      int forward ATTRIBUTE_UNUSED;
1729 {
1730 #ifndef JC1_LITE
1731   ctxp->elc.line = ctxp->c_line->lineno;
1732   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1733
1734   /* Might be caught in the middle of some error report */
1735   ctxp->java_error_flag = 0;
1736   java_error (NULL);
1737   java_error (msg);
1738 #endif
1739 }
1740
1741 #ifndef JC1_LITE
1742 static int
1743 java_is_eol (fp, c)
1744   FILE *fp;
1745   int c;
1746 {
1747   int next;
1748   switch (c)
1749     {
1750     case '\r':
1751       next = getc (fp);
1752       if (next != '\n' && next != EOF)
1753         ungetc (next, fp);
1754       return 1;
1755     case '\n':
1756       return 1;
1757     default:
1758       return 0;
1759     }
1760 }
1761 #endif
1762
1763 char *
1764 java_get_line_col (filename, line, col)
1765      const char *filename ATTRIBUTE_UNUSED;
1766      int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1767 {
1768 #ifdef JC1_LITE
1769   return 0;
1770 #else
1771   /* Dumb implementation. Doesn't try to cache or optimize things. */
1772   /* First line of the file is line 1, first column is 1 */
1773
1774   /* COL == -1 means, at the CR/LF in LINE */
1775   /* COL == -2 means, at the first non space char in LINE */
1776
1777   FILE *fp;
1778   int c, ccol, cline = 1;
1779   int current_line_col = 0;
1780   int first_non_space = 0;
1781   char *base;
1782
1783   if (!(fp = fopen (filename, "r")))
1784     fatal_io_error ("can't open %s", filename);
1785
1786   while (cline != line)
1787     {
1788       c = getc (fp);
1789       if (c == EOF)
1790         {
1791           static const char msg[] = "<<file too short - unexpected EOF>>";
1792           obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1793           goto have_line;
1794         }
1795       if (java_is_eol (fp, c))
1796         cline++;
1797     }
1798
1799   /* Gather the chars of the current line in a buffer */
1800   for (;;)
1801     {
1802       c = getc (fp);
1803       if (c < 0 || java_is_eol (fp, c))
1804         break;
1805       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1806         first_non_space = current_line_col;
1807       obstack_1grow (&temporary_obstack, c);
1808       current_line_col++;
1809     }
1810  have_line:
1811
1812   obstack_1grow (&temporary_obstack, '\n');
1813
1814   if (col == -1)
1815     {
1816       col = current_line_col;
1817       first_non_space = 0;
1818     }
1819   else if (col == -2)
1820     col = first_non_space;
1821   else
1822     first_non_space = 0;
1823
1824   /* Place the '^' a the right position */
1825   base = obstack_base (&temporary_obstack);
1826   for (ccol = 1; ccol <= col+3; ccol++)
1827     {
1828       /* Compute \t when reaching first_non_space */
1829       char c = (first_non_space ?
1830                 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1831       obstack_1grow (&temporary_obstack, c);
1832     }
1833   obstack_grow0 (&temporary_obstack, "^", 1);
1834
1835   fclose (fp);
1836   return obstack_finish (&temporary_obstack);
1837 #endif
1838 }
1839
1840 #ifndef JC1_LITE
1841 static int
1842 utf8_cmp (str, length, name)
1843      const unsigned char *str;
1844      int length;
1845      const char *name;
1846 {
1847   const unsigned char *limit = str + length;
1848   int i;
1849
1850   for (i = 0; name[i]; ++i)
1851     {
1852       int ch = UTF8_GET (str, limit);
1853       if (ch != name[i])
1854         return ch - name[i];
1855     }
1856
1857   return str == limit ? 0 : 1;
1858 }
1859
1860 /* A sorted list of all C++ keywords.  */
1861
1862 static const char *const cxx_keywords[] =
1863 {
1864   "_Complex",
1865   "__alignof",
1866   "__alignof__",
1867   "__asm",
1868   "__asm__",
1869   "__attribute",
1870   "__attribute__",
1871   "__builtin_va_arg",
1872   "__complex",
1873   "__complex__",
1874   "__const",
1875   "__const__",
1876   "__extension__",
1877   "__imag",
1878   "__imag__",
1879   "__inline",
1880   "__inline__",
1881   "__label__",
1882   "__null",
1883   "__real",
1884   "__real__",
1885   "__restrict",
1886   "__restrict__",
1887   "__signed",
1888   "__signed__",
1889   "__typeof",
1890   "__typeof__",
1891   "__volatile",
1892   "__volatile__",
1893   "and",
1894   "and_eq",
1895   "asm",
1896   "auto",
1897   "bitand",
1898   "bitor",
1899   "bool",
1900   "break",
1901   "case",
1902   "catch",
1903   "char",
1904   "class",
1905   "compl",
1906   "const",
1907   "const_cast",
1908   "continue",
1909   "default",
1910   "delete",
1911   "do",
1912   "double",
1913   "dynamic_cast",
1914   "else",
1915   "enum",
1916   "explicit",
1917   "export",
1918   "extern",
1919   "false",
1920   "float",
1921   "for",
1922   "friend",
1923   "goto",
1924   "if",
1925   "inline",
1926   "int",
1927   "long",
1928   "mutable",
1929   "namespace",
1930   "new",
1931   "not",
1932   "not_eq",
1933   "operator",
1934   "or",
1935   "or_eq",
1936   "private",
1937   "protected",
1938   "public",
1939   "register",
1940   "reinterpret_cast",
1941   "return",
1942   "short",
1943   "signed",
1944   "sizeof",
1945   "static",
1946   "static_cast",
1947   "struct",
1948   "switch",
1949   "template",
1950   "this",
1951   "throw",
1952   "true",
1953   "try",
1954   "typedef",
1955   "typeid",
1956   "typename",
1957   "typeof",
1958   "union",
1959   "unsigned",
1960   "using",
1961   "virtual",
1962   "void",
1963   "volatile",
1964   "wchar_t",
1965   "while",
1966   "xor",
1967   "xor_eq"
1968 };
1969
1970 /* Return true if NAME is a C++ keyword.  */
1971
1972 int
1973 cxx_keyword_p (name, length)
1974      const char *name;
1975      int length;
1976 {
1977   int last = ARRAY_SIZE (cxx_keywords);
1978   int first = 0;
1979   int mid = (last + first) / 2;
1980   int old = -1;
1981
1982   for (mid = (last + first) / 2;
1983        mid != old;
1984        old = mid, mid = (last + first) / 2)
1985     {
1986       int kwl = strlen (cxx_keywords[mid]);
1987       int min_length = kwl > length ? length : kwl;
1988       int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1989
1990       if (r == 0)
1991         {
1992           int i;
1993           /* We've found a match if all the remaining characters are
1994              `$'.  */
1995           for (i = min_length; i < length && name[i] == '$'; ++i)
1996             ;
1997           if (i == length)
1998             return 1;
1999           r = 1;
2000         }
2001
2002       if (r < 0)
2003         last = mid;
2004       else
2005         first = mid;
2006     }
2007   return 0;
2008 }
2009 #endif /* JC1_LITE */