gcc/java/lex.c

   1 /* Language lexer for the GNU compiler for the Java(TM) language.
   2    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   3    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.
  21
  22 Java and all Java-based marks are trademarks or registered trademarks
  23 of Sun Microsystems, Inc. in the United States and other countries.
  24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  25
  26 /* It defines java_lex (yylex) that reads a Java ASCII source file
  27    possibly containing Unicode escape sequence or utf8 encoded
  28    characters and returns a token for everything found but comments,
  29    white spaces and line terminators. When necessary, it also fills
  30    the java_lval (yylval) union. It's implemented to be called by a
  31    re-entrant parser generated by Bison.
  32
  33    The lexical analysis conforms to the Java grammar described in "The
  34    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
  35    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
  36
  37 #include "keyword.h"
  38
  39 #ifndef JC1_LITE
  40 extern struct obstack *expression_obstack;
  41 #endif
  42
  43 /* Function declaration  */
  44 static int java_lineterminator PARAMS ((unicode_t));
  45 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
  46 static void java_unicode_2_utf8 PARAMS ((unicode_t));
  47 static void java_lex_error PARAMS ((const char *, int));
  48 #ifndef JC1_LITE
  49 static int java_is_eol PARAMS ((FILE *, int));
  50 static tree build_wfl_node PARAMS ((tree));
  51 #endif
  52 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  53 static unicode_t java_parse_escape_sequence PARAMS ((void));
  54 static int java_letter_or_digit_p PARAMS ((unicode_t));
  55 static int java_parse_doc_section PARAMS ((unicode_t));
  56 static void java_parse_end_comment PARAMS ((unicode_t));
  57 static unicode_t java_get_unicode PARAMS ((void));
  58 static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
  59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  60 static unicode_t java_read_char PARAMS ((java_lexer *));
  61 static void java_allocate_new_line PARAMS ((void));
  62 static void java_unget_unicode PARAMS ((void));
  63 static unicode_t java_sneak_unicode PARAMS ((void));
  64 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
  65
  66 void
  67 java_init_lex (finput, encoding)
  68      FILE *finput;
  69      const char *encoding;
  70 {
  71 #ifndef JC1_LITE
  72   int java_lang_imported = 0;
  73
  74   if (!java_lang_id)
  75     java_lang_id = get_identifier ("java.lang");
  76   if (!java_lang_cloneable)
  77     java_lang_cloneable = get_identifier ("java.lang.Cloneable");
  78   if (!java_io_serializable)
  79     java_io_serializable = get_identifier ("java.io.Serializable");
  80   if (!inst_id)
  81     inst_id = get_identifier ("inst$");
  82   if (!wpv_id)
  83     wpv_id = get_identifier ("write_parm_value$");
  84
  85   if (!java_lang_imported)
  86     {
  87       tree node = build_tree_list
  88         (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
  89       read_import_dir (TREE_PURPOSE (node));
  90       TREE_CHAIN (node) = ctxp->import_demand_list;
  91       ctxp->import_demand_list = node;
  92       java_lang_imported = 1;
  93     }
  94
  95   if (!wfl_operator)
  96     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
  97   if (!label_id)
  98     label_id = get_identifier ("$L");
  99   if (!wfl_append)
 100     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
 101   if (!wfl_string_buffer)
 102     wfl_string_buffer =
 103       build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
 104   if (!wfl_to_string)
 105     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
 106
 107   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
 108     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
 109
 110   bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
 111   bzero ((PTR) current_jcf, sizeof (JCF));
 112   ctxp->current_parsed_class = NULL;
 113   ctxp->package = NULL_TREE;
 114 #endif
 115
 116   ctxp->filename = input_filename;
 117   ctxp->lineno = lineno = 0;
 118   ctxp->p_line = NULL;
 119   ctxp->c_line = NULL;
 120   ctxp->minus_seen = 0;
 121   ctxp->java_error_flag = 0;
 122   ctxp->lexer = java_new_lexer (finput, encoding);
 123 }
 124
 125 static char *
 126 java_sprint_unicode (line, i)
 127     struct java_line *line;
 128     int i;
 129 {
 130   static char buffer [10];
 131   if (line->unicode_escape_p [i] || line->line [i] > 128)
 132     sprintf (buffer, "\\u%04x", line->line [i]);
 133   else
 134     {
 135       buffer [0] = line->line [i];
 136       buffer [1] = '\0';
 137     }
 138   return buffer;
 139 }
 140
 141 static unicode_t
 142 java_sneak_unicode ()
 143 {
 144   return (ctxp->c_line->line [ctxp->c_line->current]);
 145 }
 146
 147 static void
 148 java_unget_unicode ()
 149 {
 150   if (!ctxp->c_line->current)
 151     fatal ("can't unget unicode - java_unget_unicode");
 152   ctxp->c_line->current--;
 153   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 154 }
 155
 156 static void
 157 java_allocate_new_line ()
 158 {
 159   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
 160   char ahead_escape_p = (ctxp->c_line ?
 161                          ctxp->c_line->unicode_escape_ahead_p : 0);
 162
 163   if (ctxp->c_line && !ctxp->c_line->white_space_only)
 164     {
 165       if (ctxp->p_line)
 166         {
 167           free (ctxp->p_line->unicode_escape_p);
 168           free (ctxp->p_line->line);
 169           free (ctxp->p_line);
 170         }
 171       ctxp->p_line = ctxp->c_line;
 172       ctxp->c_line = NULL;              /* Reallocated */
 173     }
 174
 175   if (!ctxp->c_line)
 176     {
 177       ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
 178       ctxp->c_line->max = JAVA_LINE_MAX;
 179       ctxp->c_line->line = (unicode_t *)xmalloc
 180         (sizeof (unicode_t)*ctxp->c_line->max);
 181       ctxp->c_line->unicode_escape_p =
 182           (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
 183       ctxp->c_line->white_space_only = 0;
 184     }
 185
 186   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
 187   ctxp->c_line->char_col = ctxp->c_line->current = 0;
 188   if (ahead)
 189     {
 190       ctxp->c_line->line [ctxp->c_line->size] = ahead;
 191       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
 192       ctxp->c_line->size++;
 193     }
 194   ctxp->c_line->ahead [0] = 0;
 195   ctxp->c_line->unicode_escape_ahead_p = 0;
 196   ctxp->c_line->lineno = ++lineno;
 197   ctxp->c_line->white_space_only = 1;
 198 }
 199
 200 /* Create a new lexer object.  */
 201 java_lexer *
 202 java_new_lexer (finput, encoding)
 203      FILE *finput;
 204      const char *encoding;
 205 {
 206   java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
 207   int enc_error = 0;
 208
 209   lex->finput = finput;
 210   lex->bs_count = 0;
 211   lex->unget_value = 0;
 212
 213 #ifdef HAVE_ICONV
 214   lex->handle = iconv_open ("UCS-2", encoding);
 215   if (lex->handle == (iconv_t) -1)
 216     {
 217       /* FIXME: we should give a nice error based on errno here.  */
 218       enc_error = 1;
 219     }
 220   lex->first = -1;
 221   lex->last = -1;
 222 #else /* HAVE_ICONV */
 223   if (strcmp (encoding, DEFAULT_ENCODING))
 224     enc_error = 1;
 225 #endif /* HAVE_ICONV */
 226
 227   if (enc_error)
 228     fatal ("unknown encoding: `%s'", encoding);
 229
 230   return lex;
 231 }
 232
 233 void
 234 java_destroy_lexer (lex)
 235      java_lexer *lex;
 236 {
 237 #ifdef HAVE_ICONV
 238   iconv_close (lex->handle);
 239 #endif
 240   free (lex);
 241 }
 242
 243 static unicode_t
 244 java_read_char (lex)
 245      java_lexer *lex;
 246 {
 247   if (lex->unget_value)
 248     {
 249       unicode_t r = lex->unget_value;
 250       lex->unget_value = 0;
 251       return r;
 252     }
 253
 254 #ifdef HAVE_ICONV
 255   {
 256     char out[2];
 257     size_t ir, inbytesleft, in_save, out_count;
 258     char *inp, *outp;
 259
 260     while (1)
 261       {
 262         /* See if we need to read more data.  If FIRST == 0 then the
 263            previous conversion attempt ended in the middle of a
 264            character at the end of the buffer.  Otherwise we only have
 265            to read if the buffer is empty.  */
 266         if (lex->first == 0 || lex->first >= lex->last)
 267           {
 268             int r;
 269
 270             if (lex->first >= lex->last)
 271               {
 272                 lex->first = 0;
 273                 lex->last = 0;
 274               }
 275             if (feof (lex->finput))
 276               return UEOF;
 277             r = fread (&lex->buffer[lex->last], 1,
 278                        sizeof (lex->buffer) - lex->last,
 279                        lex->finput);
 280             lex->last += r;
 281           }
 282
 283         inbytesleft = lex->last - lex->first;
 284
 285         if (inbytesleft == 0)
 286           {
 287             /* We've tried to read and there is nothing left.  */
 288             return UEOF;
 289           }
 290
 291         in_save = inbytesleft;
 292         out_count = 2;
 293         inp = &lex->buffer[lex->first];
 294         outp = out;
 295         ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
 296                     &outp, &out_count);
 297         lex->first += in_save - inbytesleft;
 298
 299         if (out_count == 0)
 300           {
 301             /* Success.  We assume that UCS-2 is big-endian.  This
 302                appears to be an ok assumption.  */
 303             unicode_t result;
 304             result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
 305             return result;
 306           }
 307
 308         if (ir == (size_t) -1)
 309           {
 310             if (errno == EINVAL)
 311               {
 312                 /* This is ok.  This means that the end of our buffer
 313                    is in the middle of a character sequence.  We just
 314                    move the valid part of the buffer to the beginning
 315                    to force a read.  */
 316                 /* We use bcopy() because it should work for
 317                    overlapping strings.  Use memmove() instead... */
 318                 bcopy (&lex->buffer[lex->first], &lex->buffer[0],
 319                        lex->last - lex->first);
 320                 lex->last -= lex->first;
 321                 lex->first = 0;
 322               }
 323             else
 324               {
 325                 /* A more serious error.  */
 326                 java_lex_error ("unrecognized character in input stream", 0);
 327                 return UEOF;
 328               }
 329           }
 330       }
 331   }
 332 #else /* HAVE_ICONV */
 333   {
 334     int c, c1, c2;
 335     c = getc (lex->finput);
 336
 337     if (c < 128)
 338       return (unicode_t)c;
 339     if (c == EOF)
 340       return UEOF;
 341     else
 342       {
 343         if ((c & 0xe0) == 0xc0)
 344           {
 345             c1 = getc (lex->finput);
 346             if ((c1 & 0xc0) == 0x80)
 347               return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
 348             c = c1;
 349           }
 350         else if ((c & 0xf0) == 0xe0)
 351           {
 352             c1 = getc (lex->finput);
 353             if ((c1 & 0xc0) == 0x80)
 354               {
 355                 c2 = getc (lex->finput);
 356                 if ((c2 & 0xc0) == 0x80)
 357                   return (unicode_t)(((c & 0xf) << 12) +
 358                                      (( c1 & 0x3f) << 6) + (c2 & 0x3f));
 359                 else
 360                   c = c2;
 361               }
 362             else
 363               c = c1;
 364           }
 365
 366         /* We simply don't support invalid characters.  */
 367         java_lex_error ("malformed UTF-8 character", 0);
 368       }
 369   }
 370 #endif /* HAVE_ICONV */
 371
 372   /* We only get here on error.  */
 373   return UEOF;
 374 }
 375
 376 static void
 377 java_store_unicode (l, c, unicode_escape_p)
 378     struct java_line *l;
 379     unicode_t c;
 380     int unicode_escape_p;
 381 {
 382   if (l->size == l->max)
 383     {
 384       l->max += JAVA_LINE_MAX;
 385       l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
 386       l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
 387                                                sizeof (char)*l->max);
 388     }
 389   l->line [l->size] = c;
 390   l->unicode_escape_p [l->size++] = unicode_escape_p;
 391 }
 392
 393 static unicode_t
 394 java_read_unicode (lex, term_context, unicode_escape_p)
 395      java_lexer *lex;
 396      int term_context;
 397      int *unicode_escape_p;
 398 {
 399   unicode_t c;
 400
 401   c = java_read_char (lex);
 402   *unicode_escape_p = 0;
 403
 404   if (c != '\\')
 405     {
 406       lex->bs_count = 0;
 407       return (term_context ? c : (java_lineterminator (c)
 408                                   ? '\n'
 409                                   : (unicode_t) c));
 410     }
 411
 412   ++lex->bs_count;
 413   if ((lex->bs_count) % 2 == 1)
 414     {
 415       /* Odd number of \ seen.  */
 416       c = java_read_char (lex);
 417       if (c == 'u')
 418         {
 419           unicode_t unicode = 0;
 420           int shift = 12;
 421           /* Next should be 4 hex digits, otherwise it's an error.
 422              The hex value is converted into the unicode, pushed into
 423              the Unicode stream.  */
 424           for (shift = 12; shift >= 0; shift -= 4)
 425             {
 426               if ((c = java_read_char (lex)) == UEOF)
 427                 return UEOF;
 428               if (c >= '0' && c <= '9')
 429                 unicode |= (unicode_t)((c-'0') << shift);
 430               else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
 431                 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
 432               else
 433                 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
 434             }
 435           lex->bs_count = 0;
 436           *unicode_escape_p = 1;
 437           return (term_context
 438                   ? unicode : (java_lineterminator (c) ? '\n' : unicode));
 439         }
 440       lex->unget_value = c;
 441     }
 442   return (unicode_t) '\\';
 443 }
 444
 445 static unicode_t
 446 java_get_unicode ()
 447 {
 448   /* It's time to read a line when... */
 449   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
 450     {
 451       unicode_t c;
 452       java_allocate_new_line ();
 453       if (ctxp->c_line->line[0] != '\n')
 454         for (;;)
 455           {
 456             int unicode_escape_p;
 457             c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
 458             java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 459             if (ctxp->c_line->white_space_only
 460                 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
 461               ctxp->c_line->white_space_only = 0;
 462             if ((c == '\n') || (c == UEOF))
 463               break;
 464           }
 465     }
 466   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
 467   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
 468   return ctxp->c_line->line [ctxp->c_line->current++];
 469 }
 470
 471 static int
 472 java_lineterminator (c)
 473      unicode_t c;
 474 {
 475   if (c == '\n')                /* LF */
 476     return 1;
 477   else if (c == '\r')           /* CR */
 478     {
 479       int unicode_escape_p;
 480       c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
 481       if (c == '\r')
 482         {
 483           /* In this case we will have another terminator.  For some
 484              reason the lexer has several different unget methods.  We
 485              can't use the `ahead' method because then the \r will end
 486              up in the actual text of the line, causing an error.  So
 487              instead we choose a very low-level method.  FIXME: this
 488              is incredibly ugly.  */
 489           ctxp->lexer->unget_value = c;
 490         }
 491       else if (c != '\n')
 492         {
 493           ctxp->c_line->ahead [0] = c;
 494           ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
 495         }
 496       return 1;
 497     }
 498   else
 499     return 0;
 500 }
 501
 502 /* Parse the end of a C style comment.
 503  * C is the first character following the '/' and '*'. */
 504 static void
 505 java_parse_end_comment (c)
 506      unicode_t c;
 507 {
 508
 509   for ( ;; c = java_get_unicode ())
 510     {
 511       switch (c)
 512         {
 513         case UEOF:
 514           java_lex_error ("Comment not terminated at end of input", 0);
 515         case '*':
 516           switch (c = java_get_unicode ())
 517             {
 518             case UEOF:
 519               java_lex_error ("Comment not terminated at end of input", 0);
 520             case '/':
 521               return;
 522             case '*':   /* reparse only '*' */
 523               java_unget_unicode ();
 524             }
 525         }
 526     }
 527 }
 528
 529 /* Parse the documentation section. Keywords must be at the beginning
 530    of a documentation comment line (ignoring white space and any `*'
 531    character). Parsed keyword(s): @DEPRECATED.  */
 532
 533 static int
 534 java_parse_doc_section (c)
 535      unicode_t c;
 536 {
 537   int valid_tag = 0, seen_star = 0;
 538
 539   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
 540     {
 541       switch (c)
 542         {
 543         case '*':
 544           seen_star = 1;
 545           break;
 546         case '\n': /* ULT */
 547           valid_tag = 1;
 548         default:
 549           seen_star = 0;
 550         }
 551       c = java_get_unicode();
 552     }
 553
 554   if (c == UEOF)
 555     java_lex_error ("Comment not terminated at end of input", 0);
 556
 557   if (seen_star && (c == '/'))
 558     return 1;                   /* Goto step1 in caller */
 559
 560   /* We're parsing @deprecated */
 561   if (valid_tag && (c == '@'))
 562     {
 563       char tag [11];
 564       int  tag_index = 0;
 565
 566       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
 567         {
 568           c = java_get_unicode ();
 569           tag [tag_index++] = c;
 570         }
 571
 572       if (c == UEOF)
 573         java_lex_error ("Comment not terminated at end of input", 0);
 574       tag [tag_index] = '\0';
 575
 576       if (!strcmp (tag, "deprecated"))
 577         ctxp->deprecated = 1;
 578     }
 579   java_unget_unicode ();
 580   return 0;
 581 }
 582
 583 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
 584    will return a wrong result.  */
 585 static int
 586 java_letter_or_digit_p (c)
 587      unicode_t c;
 588 {
 589   return _JAVA_LETTER_OR_DIGIT_P (c);
 590 }
 591
 592 static unicode_t
 593 java_parse_escape_sequence ()
 594 {
 595   unicode_t char_lit;
 596   unicode_t c;
 597
 598   switch (c = java_get_unicode ())
 599     {
 600     case 'b':
 601       return (unicode_t)0x8;
 602     case 't':
 603       return (unicode_t)0x9;
 604     case 'n':
 605       return (unicode_t)0xa;
 606     case 'f':
 607       return (unicode_t)0xc;
 608     case 'r':
 609       return (unicode_t)0xd;
 610     case '"':
 611       return (unicode_t)0x22;
 612     case '\'':
 613       return (unicode_t)0x27;
 614     case '\\':
 615       return (unicode_t)0x5c;
 616     case '0': case '1': case '2': case '3': case '4':
 617     case '5': case '6': case '7': case '8': case '9':
 618       {
 619         int octal_escape[3];
 620         int octal_escape_index = 0;
 621
 622         for (; octal_escape_index < 3 && RANGE (c, '0', '9');
 623              c = java_get_unicode ())
 624           octal_escape [octal_escape_index++] = c;
 625
 626         java_unget_unicode ();
 627
 628         if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
 629           {
 630             java_lex_error ("Literal octal escape out of range", 0);
 631             return JAVA_CHAR_ERROR;
 632           }
 633         else
 634           {
 635             int i, shift;
 636             for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 637                  i < octal_escape_index; i++, shift -= 3)
 638               char_lit |= (octal_escape [i] - '0') << shift;
 639
 640             return (char_lit);
 641           }
 642         break;
 643       }
 644     case '\n':
 645       return '\n';              /* ULT, caught latter as a specific error */
 646     default:
 647       java_lex_error ("Illegal character in escape sequence", 0);
 648       return JAVA_CHAR_ERROR;
 649     }
 650 }
 651
 652 /* Isolate the code which may raise an arithmetic exception in its
 653    own function.  */
 654
 655 #ifndef JC1_LITE
 656 struct jpa_args
 657 {
 658   YYSTYPE *java_lval;
 659   char *literal_token;
 660   int fflag;
 661   int number_beginning;
 662 };
 663
 664 static void java_perform_atof   PARAMS ((PTR));
 665
 666 static void
 667 java_perform_atof (av)
 668      PTR av;
 669 {
 670   struct jpa_args *a = (struct jpa_args *)av;
 671   YYSTYPE *java_lval = a->java_lval;
 672   int number_beginning = a->number_beginning;
 673   REAL_VALUE_TYPE value;
 674   tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 675
 676   SET_REAL_VALUE_ATOF (value,
 677                        REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
 678
 679   if (REAL_VALUE_ISINF (value)
 680       || REAL_VALUE_ISNAN (value))
 681     {
 682       JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
 683       value = DCONST0;
 684     }
 685
 686   SET_LVAL_NODE_TYPE (build_real (type, value), type);
 687 }
 688 #endif
 689
 690 static int yylex                PARAMS ((YYSTYPE *));
 691
 692 static int
 693 #ifdef JC1_LITE
 694 yylex (java_lval)
 695 #else
 696 java_lex (java_lval)
 697 #endif
 698      YYSTYPE *java_lval;
 699 {
 700   unicode_t c, first_unicode;
 701   int ascii_index, all_ascii;
 702   char *string;
 703
 704   /* Translation of the Unicode escape in the raw stream of Unicode
 705      characters. Takes care of line terminator.  */
 706  step1:
 707   /* Skip white spaces: SP, TAB and FF or ULT */
 708   for (c = java_get_unicode ();
 709        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
 710     if (c == '\n')
 711       {
 712         ctxp->elc.line = ctxp->c_line->lineno;
 713         ctxp->elc.col  = ctxp->c_line->char_col-2;
 714       }
 715
 716   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 717
 718   if (c == 0x1a)                /* CTRL-Z */
 719     {
 720       if ((c = java_get_unicode ()) == UEOF)
 721         return 0;               /* Ok here */
 722       else
 723         java_unget_unicode ();  /* Caught latter at the end the function */
 724     }
 725   /* Handle EOF here */
 726   if (c == UEOF)        /* Should probably do something here... */
 727     return 0;
 728
 729   /* Take care of eventual comments.  */
 730   if (c == '/')
 731     {
 732       switch (c = java_get_unicode ())
 733         {
 734         case '/':
 735           for (;;)
 736             {
 737               c = java_get_unicode ();
 738               if (c == UEOF)
 739                 java_lex_error ("Comment not terminated at end of input", 0);
 740               if (c == '\n')    /* ULT */
 741                 goto step1;
 742             }
 743           break;
 744
 745         case '*':
 746           if ((c = java_get_unicode ()) == '*')
 747             {
 748               if ((c = java_get_unicode ()) == '/')
 749                 goto step1;     /* Empy documentation comment  */
 750               else if (java_parse_doc_section (c))
 751                 goto step1;
 752             }
 753
 754           java_parse_end_comment ((c = java_get_unicode ()));
 755           goto step1;
 756           break;
 757         default:
 758           java_unget_unicode ();
 759           c = '/';
 760           break;
 761         }
 762     }
 763
 764   ctxp->elc.line = ctxp->c_line->lineno;
 765   ctxp->elc.prev_col = ctxp->elc.col;
 766   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
 767   if (ctxp->elc.col < 0)
 768     fatal ("ctxp->elc.col < 0 - java_lex");
 769
 770   /* Numeric literals */
 771   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
 772     {
 773       /* This section of code is borrowed from gcc/c-lex.c  */
 774 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
 775       int parts[TOTAL_PARTS];
 776       HOST_WIDE_INT high, low;
 777       /* End borrowed section  */
 778       char literal_token [256];
 779       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
 780       int  i;
 781 #ifndef JC1_LITE
 782       int  number_beginning = ctxp->c_line->current;
 783 #endif
 784
 785       /* We might have a . separator instead of a FP like .[0-9]* */
 786       if (c == '.')
 787         {
 788           unicode_t peep = java_sneak_unicode ();
 789
 790           if (!JAVA_ASCII_DIGIT (peep))
 791             {
 792               JAVA_LEX_SEP('.');
 793               BUILD_OPERATOR (DOT_TK);
 794             }
 795         }
 796
 797       for (i = 0; i < TOTAL_PARTS; i++)
 798         parts [i] = 0;
 799
 800       if (c == '0')
 801         {
 802           c = java_get_unicode ();
 803           if (c == 'x' || c == 'X')
 804             {
 805               radix = 16;
 806               c = java_get_unicode ();
 807             }
 808           else if (JAVA_ASCII_DIGIT (c))
 809             radix = 8;
 810           else if (c == '.')
 811             {
 812               /* Push the '.' back and prepare for a FP parsing... */
 813               java_unget_unicode ();
 814               c = '0';
 815             }
 816           else
 817             {
 818               /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
 819               JAVA_LEX_LIT ("0", 10);
 820               switch (c)
 821                 {
 822                 case 'L': case 'l':
 823                   SET_LVAL_NODE (long_zero_node);
 824                   return (INT_LIT_TK);
 825                 case 'f': case 'F':
 826                   SET_LVAL_NODE (float_zero_node);
 827                   return (FP_LIT_TK);
 828                 case 'd': case 'D':
 829                   SET_LVAL_NODE (double_zero_node);
 830                   return (FP_LIT_TK);
 831                 default:
 832                   java_unget_unicode ();
 833                   SET_LVAL_NODE (integer_zero_node);
 834                   return (INT_LIT_TK);
 835                 }
 836             }
 837         }
 838       /* Parse the first part of the literal, until we find something
 839          which is not a number.  */
 840       while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
 841              (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
 842              (radix == 8  && JAVA_ASCII_OCTDIGIT (c)))
 843         {
 844           /* We store in a string (in case it turns out to be a FP) and in
 845              PARTS if we have to process a integer literal.  */
 846           int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
 847           int count;
 848
 849           literal_token [literal_index++] = c;
 850           /* This section of code if borrowed from gcc/c-lex.c  */
 851           for (count = 0; count < TOTAL_PARTS; count++)
 852             {
 853               parts[count] *= radix;
 854               if (count)
 855                 {
 856                   parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
 857                   parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
 858                 }
 859               else
 860                 parts[0] += numeric;
 861             }
 862           if (parts [TOTAL_PARTS-1] != 0)
 863             overflow = 1;
 864           /* End borrowed section.  */
 865           c = java_get_unicode ();
 866         }
 867
 868       /* If we have something from the FP char set but not a digit, parse
 869          a FP literal.  */
 870       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
 871         {
 872           int stage = 0;
 873           int seen_digit = (literal_index ? 1 : 0);
 874           int seen_exponent = 0;
 875           int fflag = 0;        /* 1 for {f,F}, 0 for {d,D}. FP literal are
 876                                    double unless specified. */
 877           if (radix != 10)
 878             java_lex_error ("Can't express non-decimal FP literal", 0);
 879
 880           for (;;)
 881             {
 882               if (c == '.')
 883                 {
 884                   if (stage < 1)
 885                     {
 886                       stage = 1;
 887                       literal_token [literal_index++ ] = c;
 888                       c = java_get_unicode ();
 889                     }
 890                   else
 891                     java_lex_error ("Invalid character in FP literal", 0);
 892                 }
 893
 894               if (c == 'e' || c == 'E')
 895                 {
 896                   if (stage < 2)
 897                     {
 898                       /* {E,e} must have seen at list a digit */
 899                       if (!seen_digit)
 900                         java_lex_error ("Invalid FP literal", 0);
 901                       seen_digit = 0;
 902                       seen_exponent = 1;
 903                       stage = 2;
 904                       literal_token [literal_index++] = c;
 905                       c = java_get_unicode ();
 906                     }
 907                   else
 908                     java_lex_error ("Invalid character in FP literal", 0);
 909                 }
 910               if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
 911                 {
 912                   fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
 913                   stage = 4;    /* So we fall through */
 914                 }
 915
 916               if ((c=='-' || c =='+') && stage == 2)
 917                 {
 918                   stage = 3;
 919                   literal_token [literal_index++] = c;
 920                   c = java_get_unicode ();
 921                 }
 922
 923               if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
 924                   (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
 925                   (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
 926                   (stage == 3 && JAVA_ASCII_DIGIT (c)))
 927                 {
 928                   if (JAVA_ASCII_DIGIT (c))
 929                     seen_digit = 1;
 930                   literal_token [literal_index++ ] = c;
 931                   c = java_get_unicode ();
 932                 }
 933               else
 934                 {
 935 #ifndef JC1_LITE
 936                   struct jpa_args a;
 937 #endif
 938                   if (stage != 4) /* Don't push back fF/dD */
 939                     java_unget_unicode ();
 940
 941                   /* An exponent (if any) must have seen a digit.  */
 942                   if (seen_exponent && !seen_digit)
 943                     java_lex_error ("Invalid FP literal", 0);
 944
 945                   literal_token [literal_index] = '\0';
 946                   JAVA_LEX_LIT (literal_token, radix);
 947
 948 #ifndef JC1_LITE
 949                   a.literal_token = literal_token;
 950                   a.fflag = fflag;
 951                   a.java_lval = java_lval;
 952                   a.number_beginning = number_beginning;
 953                   if (do_float_handler (java_perform_atof, (PTR) &a))
 954                     return FP_LIT_TK;
 955
 956                   JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
 957 #else
 958                   return FP_LIT_TK;
 959 #endif
 960                 }
 961             }
 962         } /* JAVA_ASCCI_FPCHAR (c) */
 963
 964       /* Here we get back to converting the integral literal.  */
 965       if (c == 'L' || c == 'l')
 966         long_suffix = 1;
 967       else if (radix == 16 && JAVA_ASCII_LETTER (c))
 968         java_lex_error ("Digit out of range in hexadecimal literal", 0);
 969       else if (radix == 8  && JAVA_ASCII_DIGIT (c))
 970         java_lex_error ("Digit out of range in octal literal", 0);
 971       else if (radix == 16 && !literal_index)
 972         java_lex_error ("No digit specified for hexadecimal literal", 0);
 973       else
 974         java_unget_unicode ();
 975
 976 #ifdef JAVA_LEX_DEBUG
 977       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
 978       JAVA_LEX_LIT (literal_token, radix);
 979 #endif
 980       /* This section of code is borrowed from gcc/c-lex.c  */
 981       if (!overflow)
 982         {
 983           bytes = GET_TYPE_PRECISION (long_type_node);
 984           for (i = bytes; i < TOTAL_PARTS; i++)
 985             if (parts [i])
 986               {
 987                 overflow = 1;
 988                 break;
 989               }
 990         }
 991       high = low = 0;
 992       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
 993         {
 994           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
 995                                               / HOST_BITS_PER_CHAR)]
 996                    << (i * HOST_BITS_PER_CHAR));
 997           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
 998         }
 999       /* End borrowed section.  */
1000
1001       /* Range checking */
1002       if (long_suffix)
1003         {
1004           /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1005              9223372036854775807L is the biggest `long' literal that can be
1006              expressed using a 10 radix. For other radixes, everything that
1007              fits withing 64 bits is OK. */
1008           int hb = (high >> 31);
1009           if (overflow || (hb && low && radix == 10) ||
1010               (hb && high & 0x7fffffff && radix == 10) ||
1011               (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1012             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1013         }
1014       else
1015         {
1016           /* 2147483648 is valid if operand of a '-'. Otherwise,
1017              2147483647 is the biggest `int' literal that can be
1018              expressed using a 10 radix. For other radixes, everything
1019              that fits within 32 bits is OK.  As all literals are
1020              signed, we sign extend here. */
1021           int hb = (low >> 31) & 0x1;
1022           if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1023               (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1024             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1025           high = -hb;
1026         }
1027       ctxp->minus_seen = 0;
1028       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1029                           (long_suffix ? long_type_node : int_type_node));
1030       return INT_LIT_TK;
1031     }
1032
1033   ctxp->minus_seen = 0;
1034   /* Character literals */
1035   if (c == '\'')
1036     {
1037       unicode_t char_lit;
1038       if ((c = java_get_unicode ()) == '\\')
1039         char_lit = java_parse_escape_sequence ();
1040       else
1041         char_lit = c;
1042
1043       c = java_get_unicode ();
1044
1045       if ((c == '\n') || (c == UEOF))
1046         java_lex_error ("Character literal not terminated at end of line", 0);
1047       if (c != '\'')
1048         java_lex_error ("Syntax error in character literal", 0);
1049
1050       if (c == JAVA_CHAR_ERROR)
1051         char_lit = 0;           /* We silently convert it to zero */
1052
1053       JAVA_LEX_CHAR_LIT (char_lit);
1054       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1055       return CHAR_LIT_TK;
1056     }
1057
1058   /* String literals */
1059   if (c == '"')
1060     {
1061       int no_error;
1062       char *string;
1063
1064       for (no_error = 1, c = java_get_unicode ();
1065            c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1066         {
1067           if (c == '\\')
1068             c = java_parse_escape_sequence ();
1069           no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
1070           java_unicode_2_utf8 (c);
1071         }
1072       if (c == '\n' || c == UEOF) /* ULT */
1073         {
1074           lineno--;             /* Refer to the line the terminator was seen */
1075           java_lex_error ("String not terminated at end of line.", 0);
1076           lineno++;
1077         }
1078
1079       obstack_1grow (&temporary_obstack, '\0');
1080       string = obstack_finish (&temporary_obstack);
1081 #ifndef JC1_LITE
1082       if (!no_error || (c != '"'))
1083         java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1084       else
1085         java_lval->node = build_string (strlen (string), string);
1086 #endif
1087       return STRING_LIT_TK;
1088     }
1089
1090   /* Separator */
1091   switch (c)
1092     {
1093     case '(':
1094       JAVA_LEX_SEP (c);
1095       BUILD_OPERATOR (OP_TK);
1096     case ')':
1097       JAVA_LEX_SEP (c);
1098       return CP_TK;
1099     case '{':
1100       JAVA_LEX_SEP (c);
1101       if (ctxp->ccb_indent == 1)
1102         ctxp->first_ccb_indent1 = lineno;
1103       ctxp->ccb_indent++;
1104       BUILD_OPERATOR (OCB_TK);
1105     case '}':
1106       JAVA_LEX_SEP (c);
1107       ctxp->ccb_indent--;
1108       if (ctxp->ccb_indent == 1)
1109         ctxp->last_ccb_indent1 = lineno;
1110       BUILD_OPERATOR (CCB_TK);
1111     case '[':
1112       JAVA_LEX_SEP (c);
1113       BUILD_OPERATOR (OSB_TK);
1114     case ']':
1115       JAVA_LEX_SEP (c);
1116       return CSB_TK;
1117     case ';':
1118       JAVA_LEX_SEP (c);
1119       return SC_TK;
1120     case ',':
1121       JAVA_LEX_SEP (c);
1122       return C_TK;
1123     case '.':
1124       JAVA_LEX_SEP (c);
1125       BUILD_OPERATOR (DOT_TK);
1126       /*      return DOT_TK; */
1127     }
1128
1129   /* Operators */
1130   switch (c)
1131     {
1132     case '=':
1133       if ((c = java_get_unicode ()) == '=')
1134         {
1135           BUILD_OPERATOR (EQ_TK);
1136         }
1137       else
1138         {
1139           /* Equals is used in two different locations. In the
1140              variable_declarator: rule, it has to be seen as '=' as opposed
1141              to being seen as an ordinary assignment operator in
1142              assignment_operators: rule.  */
1143           java_unget_unicode ();
1144           BUILD_OPERATOR (ASSIGN_TK);
1145         }
1146
1147     case '>':
1148       switch ((c = java_get_unicode ()))
1149         {
1150         case '=':
1151           BUILD_OPERATOR (GTE_TK);
1152         case '>':
1153           switch ((c = java_get_unicode ()))
1154             {
1155             case '>':
1156               if ((c = java_get_unicode ()) == '=')
1157                 {
1158                   BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1159                 }
1160               else
1161                 {
1162                   java_unget_unicode ();
1163                   BUILD_OPERATOR (ZRS_TK);
1164                 }
1165             case '=':
1166               BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1167             default:
1168               java_unget_unicode ();
1169               BUILD_OPERATOR (SRS_TK);
1170             }
1171         default:
1172           java_unget_unicode ();
1173           BUILD_OPERATOR (GT_TK);
1174         }
1175
1176     case '<':
1177       switch ((c = java_get_unicode ()))
1178         {
1179         case '=':
1180           BUILD_OPERATOR (LTE_TK);
1181         case '<':
1182           if ((c = java_get_unicode ()) == '=')
1183             {
1184               BUILD_OPERATOR2 (LS_ASSIGN_TK);
1185             }
1186           else
1187             {
1188               java_unget_unicode ();
1189               BUILD_OPERATOR (LS_TK);
1190             }
1191         default:
1192           java_unget_unicode ();
1193           BUILD_OPERATOR (LT_TK);
1194         }
1195
1196     case '&':
1197       switch ((c = java_get_unicode ()))
1198         {
1199         case '&':
1200           BUILD_OPERATOR (BOOL_AND_TK);
1201         case '=':
1202           BUILD_OPERATOR2 (AND_ASSIGN_TK);
1203         default:
1204           java_unget_unicode ();
1205           BUILD_OPERATOR (AND_TK);
1206         }
1207
1208     case '|':
1209       switch ((c = java_get_unicode ()))
1210         {
1211         case '|':
1212           BUILD_OPERATOR (BOOL_OR_TK);
1213         case '=':
1214           BUILD_OPERATOR2 (OR_ASSIGN_TK);
1215         default:
1216           java_unget_unicode ();
1217           BUILD_OPERATOR (OR_TK);
1218         }
1219
1220     case '+':
1221       switch ((c = java_get_unicode ()))
1222         {
1223         case '+':
1224           BUILD_OPERATOR (INCR_TK);
1225         case '=':
1226           BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1227         default:
1228           java_unget_unicode ();
1229           BUILD_OPERATOR (PLUS_TK);
1230         }
1231
1232     case '-':
1233       switch ((c = java_get_unicode ()))
1234         {
1235         case '-':
1236           BUILD_OPERATOR (DECR_TK);
1237         case '=':
1238           BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1239         default:
1240           java_unget_unicode ();
1241           ctxp->minus_seen = 1;
1242           BUILD_OPERATOR (MINUS_TK);
1243         }
1244
1245     case '*':
1246       if ((c = java_get_unicode ()) == '=')
1247         {
1248           BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1249         }
1250       else
1251         {
1252           java_unget_unicode ();
1253           BUILD_OPERATOR (MULT_TK);
1254         }
1255
1256     case '/':
1257       if ((c = java_get_unicode ()) == '=')
1258         {
1259           BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1260         }
1261       else
1262         {
1263           java_unget_unicode ();
1264           BUILD_OPERATOR (DIV_TK);
1265         }
1266
1267     case '^':
1268       if ((c = java_get_unicode ()) == '=')
1269         {
1270           BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1271         }
1272       else
1273         {
1274           java_unget_unicode ();
1275           BUILD_OPERATOR (XOR_TK);
1276         }
1277
1278     case '%':
1279       if ((c = java_get_unicode ()) == '=')
1280         {
1281           BUILD_OPERATOR2 (REM_ASSIGN_TK);
1282         }
1283       else
1284         {
1285           java_unget_unicode ();
1286           BUILD_OPERATOR (REM_TK);
1287         }
1288
1289     case '!':
1290       if ((c = java_get_unicode()) == '=')
1291         {
1292           BUILD_OPERATOR (NEQ_TK);
1293         }
1294       else
1295         {
1296           java_unget_unicode ();
1297           BUILD_OPERATOR (NEG_TK);
1298         }
1299
1300     case '?':
1301       JAVA_LEX_OP ("?");
1302       BUILD_OPERATOR (REL_QM_TK);
1303     case ':':
1304       JAVA_LEX_OP (":");
1305       BUILD_OPERATOR (REL_CL_TK);
1306     case '~':
1307       BUILD_OPERATOR (NOT_TK);
1308     }
1309
1310   /* Keyword, boolean literal or null literal */
1311   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1312        JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1313     {
1314       java_unicode_2_utf8 (c);
1315       if (all_ascii && c >= 128)
1316         all_ascii = 0;
1317       ascii_index++;
1318     }
1319
1320   obstack_1grow (&temporary_obstack, '\0');
1321   string = obstack_finish (&temporary_obstack);
1322   java_unget_unicode ();
1323
1324   /* If we have something all ascii, we consider a keyword, a boolean
1325      literal, a null literal or an all ASCII identifier.  Otherwise,
1326      this is an identifier (possibly not respecting formation rule).  */
1327   if (all_ascii)
1328     {
1329       struct java_keyword *kw;
1330       if ((kw=java_keyword (string, ascii_index)))
1331         {
1332           JAVA_LEX_KW (string);
1333           switch (kw->token)
1334             {
1335             case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1336             case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1337             case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1338             case PRIVATE_TK:
1339               SET_MODIFIER_CTX (kw->token);
1340               return MODIFIER_TK;
1341             case FLOAT_TK:
1342               SET_LVAL_NODE (float_type_node);
1343               return FP_TK;
1344             case DOUBLE_TK:
1345               SET_LVAL_NODE (double_type_node);
1346               return FP_TK;
1347             case BOOLEAN_TK:
1348               SET_LVAL_NODE (boolean_type_node);
1349               return BOOLEAN_TK;
1350             case BYTE_TK:
1351               SET_LVAL_NODE (byte_type_node);
1352               return INTEGRAL_TK;
1353             case SHORT_TK:
1354               SET_LVAL_NODE (short_type_node);
1355               return INTEGRAL_TK;
1356             case INT_TK:
1357               SET_LVAL_NODE (int_type_node);
1358               return INTEGRAL_TK;
1359             case LONG_TK:
1360               SET_LVAL_NODE (long_type_node);
1361               return INTEGRAL_TK;
1362             case CHAR_TK:
1363               SET_LVAL_NODE (char_type_node);
1364               return INTEGRAL_TK;
1365
1366               /* Keyword based literals */
1367             case TRUE_TK:
1368             case FALSE_TK:
1369               SET_LVAL_NODE ((kw->token == TRUE_TK ?
1370                               boolean_true_node : boolean_false_node));
1371               return BOOL_LIT_TK;
1372             case NULL_TK:
1373               SET_LVAL_NODE (null_pointer_node);
1374               return NULL_TK;
1375
1376               /* Some keyword we want to retain information on the location
1377                  they where found */
1378             case CASE_TK:
1379             case DEFAULT_TK:
1380             case SUPER_TK:
1381             case THIS_TK:
1382             case RETURN_TK:
1383             case BREAK_TK:
1384             case CONTINUE_TK:
1385             case TRY_TK:
1386             case CATCH_TK:
1387             case THROW_TK:
1388             case INSTANCEOF_TK:
1389               BUILD_OPERATOR (kw->token);
1390
1391             default:
1392               return kw->token;
1393             }
1394         }
1395     }
1396
1397   /* We may have and ID here */
1398   if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1399     {
1400       JAVA_LEX_ID (string);
1401       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1402       return ID_TK;
1403     }
1404
1405   /* Everything else is an invalid character in the input */
1406   {
1407     char lex_error_buffer [128];
1408     sprintf (lex_error_buffer, "Invalid character '%s' in input",
1409              java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1410     java_lex_error (lex_error_buffer, 1);
1411   }
1412   return 0;
1413 }
1414
1415 static void
1416 java_unicode_2_utf8 (unicode)
1417     unicode_t unicode;
1418 {
1419   if (RANGE (unicode, 0x01, 0x7f))
1420     obstack_1grow (&temporary_obstack, (char)unicode);
1421   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1422     {
1423       obstack_1grow (&temporary_obstack,
1424                      (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1425       obstack_1grow (&temporary_obstack,
1426                      (unsigned char)(0x80 | (unicode & 0x3f)));
1427     }
1428   else                          /* Range 0x800-0xffff */
1429     {
1430       obstack_1grow (&temporary_obstack,
1431                      (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1432       obstack_1grow (&temporary_obstack,
1433                      (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1434       obstack_1grow (&temporary_obstack,
1435                      (unsigned char)(0x80 | (unicode & 0x003f)));
1436     }
1437 }
1438
1439 #ifndef JC1_LITE
1440 static tree
1441 build_wfl_node (node)
1442      tree node;
1443 {
1444   return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1445 }
1446 #endif
1447
1448 static void
1449 java_lex_error (msg, forward)
1450      const char *msg ATTRIBUTE_UNUSED;
1451      int forward ATTRIBUTE_UNUSED;
1452 {
1453 #ifndef JC1_LITE
1454   ctxp->elc.line = ctxp->c_line->lineno;
1455   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1456
1457   /* Might be caught in the middle of some error report */
1458   ctxp->java_error_flag = 0;
1459   java_error (NULL);
1460   java_error (msg);
1461 #endif
1462 }
1463
1464 #ifndef JC1_LITE
1465 static int
1466 java_is_eol (fp, c)
1467   FILE *fp;
1468   int c;
1469 {
1470   int next;
1471   switch (c)
1472     {
1473     case '\r':
1474       next = getc (fp);
1475       if (next != '\n' && next != EOF)
1476         ungetc (next, fp);
1477       return 1;
1478     case '\n':
1479       return 1;
1480     default:
1481       return 0;
1482     }
1483 }
1484 #endif
1485
1486 char *
1487 java_get_line_col (filename, line, col)
1488      const char *filename ATTRIBUTE_UNUSED;
1489      int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1490 {
1491 #ifdef JC1_LITE
1492   return 0;
1493 #else
1494   /* Dumb implementation. Doesn't try to cache or optimize things. */
1495   /* First line of the file is line 1, first column is 1 */
1496
1497   /* COL == -1 means, at the CR/LF in LINE */
1498   /* COL == -2 means, at the first non space char in LINE */
1499
1500   FILE *fp;
1501   int c, ccol, cline = 1;
1502   int current_line_col = 0;
1503   int first_non_space = 0;
1504   char *base;
1505
1506   if (!(fp = fopen (filename, "r")))
1507     fatal ("Can't open file - java_display_line_col");
1508
1509   while (cline != line)
1510     {
1511       c = getc (fp);
1512       if (c < 0)
1513         {
1514           static char msg[] = "<<file too short - unexpected EOF>>";
1515           obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1516           goto have_line;
1517         }
1518       if (java_is_eol (fp, c))
1519         cline++;
1520     }
1521
1522   /* Gather the chars of the current line in a buffer */
1523   for (;;)
1524     {
1525       c = getc (fp);
1526       if (c < 0 || java_is_eol (fp, c))
1527         break;
1528       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1529         first_non_space = current_line_col;
1530       obstack_1grow (&temporary_obstack, c);
1531       current_line_col++;
1532     }
1533  have_line:
1534
1535   obstack_1grow (&temporary_obstack, '\n');
1536
1537   if (col == -1)
1538     {
1539       col = current_line_col;
1540       first_non_space = 0;
1541     }
1542   else if (col == -2)
1543     col = first_non_space;
1544   else
1545     first_non_space = 0;
1546
1547   /* Place the '^' a the right position */
1548   base = obstack_base (&temporary_obstack);
1549   for (ccol = 1; ccol <= col; ccol++)
1550     {
1551       /* Compute \t when reaching first_non_space */
1552       char c = (first_non_space ?
1553                 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1554       obstack_1grow (&temporary_obstack, c);
1555     }
1556   obstack_grow0 (&temporary_obstack, "^", 1);
1557
1558   fclose (fp);
1559   return obstack_finish (&temporary_obstack);
1560 #endif
1561 }