gcc/java/lex.c

   1 /* Language lexer for the GNU compiler for the Java(TM) language.
   2    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   3    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.
  21
  22 Java and all Java-based marks are trademarks or registered trademarks
  23 of Sun Microsystems, Inc. in the United States and other countries.
  24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  25
  26 /* It defines java_lex (yylex) that reads a Java ASCII source file
  27    possibly containing Unicode escape sequence or utf8 encoded
  28    characters and returns a token for everything found but comments,
  29    white spaces and line terminators. When necessary, it also fills
  30    the java_lval (yylval) union. It's implemented to be called by a
  31    re-entrant parser generated by Bison.
  32
  33    The lexical analysis conforms to the Java grammar described in "The
  34    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
  35    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
  36
  37 #include "keyword.h"
  38
  39 #ifndef JC1_LITE
  40 extern struct obstack *expression_obstack;
  41 #endif
  42
  43 /* Function declaration  */
  44 static int java_lineterminator PARAMS ((unicode_t));
  45 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
  46 static void java_unicode_2_utf8 PARAMS ((unicode_t));
  47 static void java_lex_error PARAMS ((const char *, int));
  48 #ifndef JC1_LITE
  49 static int java_is_eol PARAMS ((FILE *, int));
  50 static tree build_wfl_node PARAMS ((tree));
  51 #endif
  52 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  53 static unicode_t java_parse_escape_sequence PARAMS ((void));
  54 static int java_letter_or_digit_p PARAMS ((unicode_t));
  55 static int java_parse_doc_section PARAMS ((unicode_t));
  56 static void java_parse_end_comment PARAMS ((unicode_t));
  57 static unicode_t java_get_unicode PARAMS ((void));
  58 static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
  59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  60 static unicode_t java_read_char PARAMS ((java_lexer *));
  61 static void java_allocate_new_line PARAMS ((void));
  62 static void java_unget_unicode PARAMS ((void));
  63 static unicode_t java_sneak_unicode PARAMS ((void));
  64 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
  65
  66 void
  67 java_init_lex (finput, encoding)
  68      FILE *finput;
  69      const char *encoding;
  70 {
  71 #ifndef JC1_LITE
  72   int java_lang_imported = 0;
  73
  74   if (!java_lang_id)
  75     java_lang_id = get_identifier ("java.lang");
  76   if (!java_lang_cloneable)
  77     java_lang_cloneable = get_identifier ("java.lang.Cloneable");
  78   if (!java_io_serializable)
  79     java_io_serializable = get_identifier ("java.io.Serializable");
  80   if (!inst_id)
  81     inst_id = get_identifier ("inst$");
  82   if (!wpv_id)
  83     wpv_id = get_identifier ("write_parm_value$");
  84
  85   if (!java_lang_imported)
  86     {
  87       tree node = build_tree_list
  88         (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
  89       read_import_dir (TREE_PURPOSE (node));
  90       TREE_CHAIN (node) = ctxp->import_demand_list;
  91       ctxp->import_demand_list = node;
  92       java_lang_imported = 1;
  93     }
  94
  95   if (!wfl_operator)
  96     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
  97   if (!label_id)
  98     label_id = get_identifier ("$L");
  99   if (!wfl_append)
 100     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
 101   if (!wfl_string_buffer)
 102     wfl_string_buffer =
 103       build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
 104   if (!wfl_to_string)
 105     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
 106
 107   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
 108     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
 109
 110   bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
 111   bzero ((PTR) current_jcf, sizeof (JCF));
 112   ctxp->current_parsed_class = NULL;
 113   ctxp->package = NULL_TREE;
 114 #endif
 115
 116   ctxp->filename = input_filename;
 117   ctxp->lineno = lineno = 0;
 118   ctxp->p_line = NULL;
 119   ctxp->c_line = NULL;
 120   ctxp->minus_seen = 0;
 121   ctxp->java_error_flag = 0;
 122   ctxp->lexer = java_new_lexer (finput, encoding);
 123 }
 124
 125 static char *
 126 java_sprint_unicode (line, i)
 127     struct java_line *line;
 128     int i;
 129 {
 130   static char buffer [10];
 131   if (line->unicode_escape_p [i] || line->line [i] > 128)
 132     sprintf (buffer, "\\u%04x", line->line [i]);
 133   else
 134     {
 135       buffer [0] = line->line [i];
 136       buffer [1] = '\0';
 137     }
 138   return buffer;
 139 }
 140
 141 static unicode_t
 142 java_sneak_unicode ()
 143 {
 144   return (ctxp->c_line->line [ctxp->c_line->current]);
 145 }
 146
 147 static void
 148 java_unget_unicode ()
 149 {
 150   if (!ctxp->c_line->current)
 151     fatal ("can't unget unicode - java_unget_unicode");
 152   ctxp->c_line->current--;
 153   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 154 }
 155
 156 static void
 157 java_allocate_new_line ()
 158 {
 159   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
 160   char ahead_escape_p = (ctxp->c_line ?
 161                          ctxp->c_line->unicode_escape_ahead_p : 0);
 162
 163   if (ctxp->c_line && !ctxp->c_line->white_space_only)
 164     {
 165       if (ctxp->p_line)
 166         {
 167           free (ctxp->p_line->unicode_escape_p);
 168           free (ctxp->p_line->line);
 169           free (ctxp->p_line);
 170         }
 171       ctxp->p_line = ctxp->c_line;
 172       ctxp->c_line = NULL;              /* Reallocated */
 173     }
 174
 175   if (!ctxp->c_line)
 176     {
 177       ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
 178       ctxp->c_line->max = JAVA_LINE_MAX;
 179       ctxp->c_line->line = (unicode_t *)xmalloc
 180         (sizeof (unicode_t)*ctxp->c_line->max);
 181       ctxp->c_line->unicode_escape_p =
 182           (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
 183       ctxp->c_line->white_space_only = 0;
 184     }
 185
 186   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
 187   ctxp->c_line->char_col = ctxp->c_line->current = 0;
 188   if (ahead)
 189     {
 190       ctxp->c_line->line [ctxp->c_line->size] = ahead;
 191       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
 192       ctxp->c_line->size++;
 193     }
 194   ctxp->c_line->ahead [0] = 0;
 195   ctxp->c_line->unicode_escape_ahead_p = 0;
 196   ctxp->c_line->lineno = ++lineno;
 197   ctxp->c_line->white_space_only = 1;
 198 }
 199
 200 /* Create a new lexer object.  */
 201 java_lexer *
 202 java_new_lexer (finput, encoding)
 203      FILE *finput;
 204      const char *encoding;
 205 {
 206   java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
 207   int enc_error = 0;
 208
 209   lex->finput = finput;
 210   lex->bs_count = 0;
 211   lex->unget_value = 0;
 212
 213 #ifdef HAVE_ICONV
 214   lex->handle = iconv_open ("UCS-2", encoding);
 215   if (lex->handle == (iconv_t) -1)
 216     {
 217       /* FIXME: we should give a nice error based on errno here.  */
 218       enc_error = 1;
 219     }
 220   lex->first = -1;
 221   lex->last = -1;
 222 #else /* HAVE_ICONV */
 223   if (strcmp (encoding, DEFAULT_ENCODING))
 224     enc_error = 1;
 225 #endif /* HAVE_ICONV */
 226
 227   if (enc_error)
 228     fatal ("unknown encoding: `%s'", encoding);
 229
 230   return lex;
 231 }
 232
 233 void
 234 java_destroy_lexer (lex)
 235      java_lexer *lex;
 236 {
 237 #ifdef HAVE_ICONV
 238   iconv_close (lex->handle);
 239 #endif
 240   free (lex);
 241 }
 242
 243 static unicode_t
 244 java_read_char (lex)
 245      java_lexer *lex;
 246 {
 247   if (lex->unget_value)
 248     {
 249       unicode_t r = lex->unget_value;
 250       lex->unget_value = 0;
 251       return r;
 252     }
 253
 254 #ifdef HAVE_ICONV
 255   {
 256     char out[2];
 257     size_t ir, inbytesleft, in_save, out_count;
 258     char *inp, *outp;
 259
 260     while (1)
 261       {
 262         /* See if we need to read more data.  If FIRST == 0 then the
 263            previous conversion attempt ended in the middle of a
 264            character at the end of the buffer.  Otherwise we only have
 265            to read if the buffer is empty.  */
 266         if (lex->first == 0 || lex->first >= lex->last)
 267           {
 268             int r;
 269
 270             if (lex->first >= lex->last)
 271               {
 272                 lex->first = 0;
 273                 lex->last = 0;
 274               }
 275             if (feof (lex->finput))
 276               return UEOF;
 277             r = fread (&lex->buffer[lex->last], 1,
 278                        sizeof (lex->buffer) - lex->last,
 279                        lex->finput);
 280             lex->last += r;
 281           }
 282
 283         inbytesleft = lex->last - lex->first;
 284
 285         if (inbytesleft == 0)
 286           {
 287             /* We've tried to read and there is nothing left.  */
 288             return UEOF;
 289           }
 290
 291         in_save = inbytesleft;
 292         out_count = 2;
 293         inp = &lex->buffer[lex->first];
 294         outp = out;
 295         ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
 296                     &outp, &out_count);
 297         lex->first += in_save - inbytesleft;
 298
 299         if (out_count == 0)
 300           {
 301             /* Success.  We assume that UCS-2 is big-endian.  This
 302                appears to be an ok assumption.  */
 303             unicode_t result;
 304             result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
 305             return result;
 306           }
 307
 308         if (ir == (size_t) -1)
 309           {
 310             if (errno == EINVAL)
 311               {
 312                 /* This is ok.  This means that the end of our buffer
 313                    is in the middle of a character sequence.  We just
 314                    move the valid part of the buffer to the beginning
 315                    to force a read.  */
 316                 /* We use bcopy() because it should work for
 317                    overlapping strings.  Use memmove() instead... */
 318                 bcopy (&lex->buffer[lex->first], &lex->buffer[0],
 319                        lex->last - lex->first);
 320                 lex->last -= lex->first;
 321                 lex->first = 0;
 322               }
 323             else
 324               {
 325                 /* A more serious error.  */
 326                 java_lex_error ("unrecognized character in input stream", 0);
 327                 return UEOF;
 328               }
 329           }
 330       }
 331   }
 332 #else /* HAVE_ICONV */
 333   {
 334     int c, c1, c2;
 335     c = getc (lex->finput);
 336
 337     if (c < 128)
 338       return (unicode_t)c;
 339     if (c == EOF)
 340       return UEOF;
 341     else
 342       {
 343         if ((c & 0xe0) == 0xc0)
 344           {
 345             c1 = getc (lex->finput);
 346             if ((c1 & 0xc0) == 0x80)
 347               return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
 348             c = c1;
 349           }
 350         else if ((c & 0xf0) == 0xe0)
 351           {
 352             c1 = getc (lex->finput);
 353             if ((c1 & 0xc0) == 0x80)
 354               {
 355                 c2 = getc (lex->finput);
 356                 if ((c2 & 0xc0) == 0x80)
 357                   return (unicode_t)(((c & 0xf) << 12) +
 358                                      (( c1 & 0x3f) << 6) + (c2 & 0x3f));
 359                 else
 360                   c = c2;
 361               }
 362             else
 363               c = c1;
 364           }
 365
 366         /* We simply don't support invalid characters.  */
 367         java_lex_error ("malformed UTF-8 character", 0);
 368       }
 369   }
 370 #endif /* HAVE_ICONV */
 371
 372   /* We only get here on error.  */
 373   return UEOF;
 374 }
 375
 376 static void
 377 java_store_unicode (l, c, unicode_escape_p)
 378     struct java_line *l;
 379     unicode_t c;
 380     int unicode_escape_p;
 381 {
 382   if (l->size == l->max)
 383     {
 384       l->max += JAVA_LINE_MAX;
 385       l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
 386       l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
 387                                                sizeof (char)*l->max);
 388     }
 389   l->line [l->size] = c;
 390   l->unicode_escape_p [l->size++] = unicode_escape_p;
 391 }
 392
 393 static unicode_t
 394 java_read_unicode (lex, term_context, unicode_escape_p)
 395      java_lexer *lex;
 396      int term_context;
 397      int *unicode_escape_p;
 398 {
 399   unicode_t c;
 400
 401   c = java_read_char (lex);
 402   *unicode_escape_p = 0;
 403
 404   if (c != '\\')
 405     {
 406       lex->bs_count = 0;
 407       return (term_context ? c : (java_lineterminator (c)
 408                                   ? '\n'
 409                                   : (unicode_t) c));
 410     }
 411
 412   ++lex->bs_count;
 413   if ((lex->bs_count) % 2 == 1)
 414     {
 415       /* Odd number of \ seen.  */
 416       c = java_read_char (lex);
 417       if (c == 'u')
 418         {
 419           unicode_t unicode = 0;
 420           int shift = 12;
 421           /* Next should be 4 hex digits, otherwise it's an error.
 422              The hex value is converted into the unicode, pushed into
 423              the Unicode stream.  */
 424           for (shift = 12; shift >= 0; shift -= 4)
 425             {
 426               if ((c = java_read_char (lex)) == UEOF)
 427                 return UEOF;
 428               if (c >= '0' && c <= '9')
 429                 unicode |= (unicode_t)((c-'0') << shift);
 430               else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
 431                 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
 432               else
 433                 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
 434             }
 435           *unicode_escape_p = 1;
 436           return (term_context
 437                   ? unicode : (java_lineterminator (c) ? '\n' : unicode));
 438         }
 439       lex->unget_value = c;
 440     }
 441   return (unicode_t) '\\';
 442 }
 443
 444 static unicode_t
 445 java_get_unicode ()
 446 {
 447   /* It's time to read a line when... */
 448   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
 449     {
 450       unicode_t c;
 451       java_allocate_new_line ();
 452       if (ctxp->c_line->line[0] != '\n')
 453         for (;;)
 454           {
 455             int unicode_escape_p;
 456             c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
 457             java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 458             if (ctxp->c_line->white_space_only
 459                 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
 460               ctxp->c_line->white_space_only = 0;
 461             if ((c == '\n') || (c == UEOF))
 462               break;
 463           }
 464     }
 465   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
 466   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
 467   return ctxp->c_line->line [ctxp->c_line->current++];
 468 }
 469
 470 static int
 471 java_lineterminator (c)
 472      unicode_t c;
 473 {
 474   if (c == '\n')                /* LF */
 475     return 1;
 476   else if (c == '\r')           /* CR */
 477     {
 478       int unicode_escape_p;
 479       c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
 480       if (c == '\r')
 481         {
 482           /* In this case we will have another terminator.  For some
 483              reason the lexer has several different unget methods.  We
 484              can't use the `ahead' method because then the \r will end
 485              up in the actual text of the line, causing an error.  So
 486              instead we choose a very low-level method.  FIXME: this
 487              is incredibly ugly.  */
 488           ctxp->lexer->unget_value = c;
 489         }
 490       else if (c != '\n')
 491         {
 492           ctxp->c_line->ahead [0] = c;
 493           ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
 494         }
 495       return 1;
 496     }
 497   else
 498     return 0;
 499 }
 500
 501 /* Parse the end of a C style comment.
 502  * C is the first character following the '/' and '*'. */
 503 static void
 504 java_parse_end_comment (c)
 505      unicode_t c;
 506 {
 507
 508   for ( ;; c = java_get_unicode ())
 509     {
 510       switch (c)
 511         {
 512         case UEOF:
 513           java_lex_error ("Comment not terminated at end of input", 0);
 514         case '*':
 515           switch (c = java_get_unicode ())
 516             {
 517             case UEOF:
 518               java_lex_error ("Comment not terminated at end of input", 0);
 519             case '/':
 520               return;
 521             case '*':   /* reparse only '*' */
 522               java_unget_unicode ();
 523             }
 524         }
 525     }
 526 }
 527
 528 /* Parse the documentation section. Keywords must be at the beginning
 529    of a documentation comment line (ignoring white space and any `*'
 530    character). Parsed keyword(s): @DEPRECATED.  */
 531
 532 static int
 533 java_parse_doc_section (c)
 534      unicode_t c;
 535 {
 536   int valid_tag = 0, seen_star = 0;
 537
 538   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
 539     {
 540       switch (c)
 541         {
 542         case '*':
 543           seen_star = 1;
 544           break;
 545         case '\n': /* ULT */
 546           valid_tag = 1;
 547         default:
 548           seen_star = 0;
 549         }
 550       c = java_get_unicode();
 551     }
 552
 553   if (c == UEOF)
 554     java_lex_error ("Comment not terminated at end of input", 0);
 555
 556   if (seen_star && (c == '/'))
 557     return 1;                   /* Goto step1 in caller */
 558
 559   /* We're parsing @deprecated */
 560   if (valid_tag && (c == '@'))
 561     {
 562       char tag [11];
 563       int  tag_index = 0;
 564
 565       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
 566         {
 567           c = java_get_unicode ();
 568           tag [tag_index++] = c;
 569         }
 570
 571       if (c == UEOF)
 572         java_lex_error ("Comment not terminated at end of input", 0);
 573       tag [tag_index] = '\0';
 574
 575       if (!strcmp (tag, "deprecated"))
 576         ctxp->deprecated = 1;
 577     }
 578   java_unget_unicode ();
 579   return 0;
 580 }
 581
 582 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
 583    will return a wrong result.  */
 584 static int
 585 java_letter_or_digit_p (c)
 586      unicode_t c;
 587 {
 588   return _JAVA_LETTER_OR_DIGIT_P (c);
 589 }
 590
 591 static unicode_t
 592 java_parse_escape_sequence ()
 593 {
 594   unicode_t char_lit;
 595   unicode_t c;
 596
 597   switch (c = java_get_unicode ())
 598     {
 599     case 'b':
 600       return (unicode_t)0x8;
 601     case 't':
 602       return (unicode_t)0x9;
 603     case 'n':
 604       return (unicode_t)0xa;
 605     case 'f':
 606       return (unicode_t)0xc;
 607     case 'r':
 608       return (unicode_t)0xd;
 609     case '"':
 610       return (unicode_t)0x22;
 611     case '\'':
 612       return (unicode_t)0x27;
 613     case '\\':
 614       return (unicode_t)0x5c;
 615     case '0': case '1': case '2': case '3': case '4':
 616     case '5': case '6': case '7': case '8': case '9':
 617       {
 618         int octal_escape[3];
 619         int octal_escape_index = 0;
 620
 621         for (; octal_escape_index < 3 && RANGE (c, '0', '9');
 622              c = java_get_unicode ())
 623           octal_escape [octal_escape_index++] = c;
 624
 625         java_unget_unicode ();
 626
 627         if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
 628           {
 629             java_lex_error ("Literal octal escape out of range", 0);
 630             return JAVA_CHAR_ERROR;
 631           }
 632         else
 633           {
 634             int i, shift;
 635             for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 636                  i < octal_escape_index; i++, shift -= 3)
 637               char_lit |= (octal_escape [i] - '0') << shift;
 638
 639             return (char_lit);
 640           }
 641         break;
 642       }
 643     case '\n':
 644       return '\n';              /* ULT, caught latter as a specific error */
 645     default:
 646       java_lex_error ("Illegal character in escape sequence", 0);
 647       return JAVA_CHAR_ERROR;
 648     }
 649 }
 650
 651 /* Isolate the code which may raise an arithmetic exception in its
 652    own function.  */
 653
 654 #ifndef JC1_LITE
 655 struct jpa_args
 656 {
 657   YYSTYPE *java_lval;
 658   char *literal_token;
 659   int fflag;
 660   int number_beginning;
 661 };
 662
 663 static void java_perform_atof   PARAMS ((PTR));
 664
 665 static void
 666 java_perform_atof (av)
 667      PTR av;
 668 {
 669   struct jpa_args *a = (struct jpa_args *)av;
 670   YYSTYPE *java_lval = a->java_lval;
 671   int number_beginning = a->number_beginning;
 672   REAL_VALUE_TYPE value;
 673   tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 674
 675   SET_REAL_VALUE_ATOF (value,
 676                        REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
 677
 678   if (REAL_VALUE_ISINF (value)
 679       || REAL_VALUE_ISNAN (value))
 680     {
 681       JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
 682       value = DCONST0;
 683     }
 684
 685   SET_LVAL_NODE_TYPE (build_real (type, value), type);
 686 }
 687 #endif
 688
 689 static int yylex                PARAMS ((YYSTYPE *));
 690
 691 static int
 692 #ifdef JC1_LITE
 693 yylex (java_lval)
 694 #else
 695 java_lex (java_lval)
 696 #endif
 697      YYSTYPE *java_lval;
 698 {
 699   unicode_t c, first_unicode;
 700   int ascii_index, all_ascii;
 701   char *string;
 702
 703   /* Translation of the Unicode escape in the raw stream of Unicode
 704      characters. Takes care of line terminator.  */
 705  step1:
 706   /* Skip white spaces: SP, TAB and FF or ULT */
 707   for (c = java_get_unicode ();
 708        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
 709     if (c == '\n')
 710       {
 711         ctxp->elc.line = ctxp->c_line->lineno;
 712         ctxp->elc.col  = ctxp->c_line->char_col-2;
 713       }
 714
 715   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 716
 717   if (c == 0x1a)                /* CTRL-Z */
 718     {
 719       if ((c = java_get_unicode ()) == UEOF)
 720         return 0;               /* Ok here */
 721       else
 722         java_unget_unicode ();  /* Caught latter at the end the function */
 723     }
 724   /* Handle EOF here */
 725   if (c == UEOF)        /* Should probably do something here... */
 726     return 0;
 727
 728   /* Take care of eventual comments.  */
 729   if (c == '/')
 730     {
 731       switch (c = java_get_unicode ())
 732         {
 733         case '/':
 734           for (;;)
 735             {
 736               c = java_get_unicode ();
 737               if (c == UEOF)
 738                 java_lex_error ("Comment not terminated at end of input", 0);
 739               if (c == '\n')    /* ULT */
 740                 goto step1;
 741             }
 742           break;
 743
 744         case '*':
 745           if ((c = java_get_unicode ()) == '*')
 746             {
 747               if ((c = java_get_unicode ()) == '/')
 748                 goto step1;     /* Empy documentation comment  */
 749               else if (java_parse_doc_section (c))
 750                 goto step1;
 751             }
 752
 753           java_parse_end_comment ((c = java_get_unicode ()));
 754           goto step1;
 755           break;
 756         default:
 757           java_unget_unicode ();
 758           c = '/';
 759           break;
 760         }
 761     }
 762
 763   ctxp->elc.line = ctxp->c_line->lineno;
 764   ctxp->elc.prev_col = ctxp->elc.col;
 765   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
 766   if (ctxp->elc.col < 0)
 767     fatal ("ctxp->elc.col < 0 - java_lex");
 768
 769   /* Numeric literals */
 770   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
 771     {
 772       /* This section of code is borrowed from gcc/c-lex.c  */
 773 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
 774       int parts[TOTAL_PARTS];
 775       HOST_WIDE_INT high, low;
 776       /* End borrowed section  */
 777       char literal_token [256];
 778       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
 779       int  i;
 780 #ifndef JC1_LITE
 781       int  number_beginning = ctxp->c_line->current;
 782 #endif
 783
 784       /* We might have a . separator instead of a FP like .[0-9]* */
 785       if (c == '.')
 786         {
 787           unicode_t peep = java_sneak_unicode ();
 788
 789           if (!JAVA_ASCII_DIGIT (peep))
 790             {
 791               JAVA_LEX_SEP('.');
 792               BUILD_OPERATOR (DOT_TK);
 793             }
 794         }
 795
 796       for (i = 0; i < TOTAL_PARTS; i++)
 797         parts [i] = 0;
 798
 799       if (c == '0')
 800         {
 801           c = java_get_unicode ();
 802           if (c == 'x' || c == 'X')
 803             {
 804               radix = 16;
 805               c = java_get_unicode ();
 806             }
 807           else if (JAVA_ASCII_DIGIT (c))
 808             radix = 8;
 809           else if (c == '.')
 810             {
 811               /* Push the '.' back and prepare for a FP parsing... */
 812               java_unget_unicode ();
 813               c = '0';
 814             }
 815           else
 816             {
 817               /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
 818               JAVA_LEX_LIT ("0", 10);
 819               switch (c)
 820                 {
 821                 case 'L': case 'l':
 822                   SET_LVAL_NODE (long_zero_node);
 823                   return (INT_LIT_TK);
 824                 case 'f': case 'F':
 825                   SET_LVAL_NODE (float_zero_node);
 826                   return (FP_LIT_TK);
 827                 case 'd': case 'D':
 828                   SET_LVAL_NODE (double_zero_node);
 829                   return (FP_LIT_TK);
 830                 default:
 831                   java_unget_unicode ();
 832                   SET_LVAL_NODE (integer_zero_node);
 833                   return (INT_LIT_TK);
 834                 }
 835             }
 836         }
 837       /* Parse the first part of the literal, until we find something
 838          which is not a number.  */
 839       while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
 840              (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
 841              (radix == 8  && JAVA_ASCII_OCTDIGIT (c)))
 842         {
 843           /* We store in a string (in case it turns out to be a FP) and in
 844              PARTS if we have to process a integer literal.  */
 845           int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
 846           int count;
 847
 848           literal_token [literal_index++] = c;
 849           /* This section of code if borrowed from gcc/c-lex.c  */
 850           for (count = 0; count < TOTAL_PARTS; count++)
 851             {
 852               parts[count] *= radix;
 853               if (count)
 854                 {
 855                   parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
 856                   parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
 857                 }
 858               else
 859                 parts[0] += numeric;
 860             }
 861           if (parts [TOTAL_PARTS-1] != 0)
 862             overflow = 1;
 863           /* End borrowed section.  */
 864           c = java_get_unicode ();
 865         }
 866
 867       /* If we have something from the FP char set but not a digit, parse
 868          a FP literal.  */
 869       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
 870         {
 871           int stage = 0;
 872           int seen_digit = (literal_index ? 1 : 0);
 873           int seen_exponent = 0;
 874           int fflag = 0;        /* 1 for {f,F}, 0 for {d,D}. FP literal are
 875                                    double unless specified. */
 876           if (radix != 10)
 877             java_lex_error ("Can't express non-decimal FP literal", 0);
 878
 879           for (;;)
 880             {
 881               if (c == '.')
 882                 {
 883                   if (stage < 1)
 884                     {
 885                       stage = 1;
 886                       literal_token [literal_index++ ] = c;
 887                       c = java_get_unicode ();
 888                     }
 889                   else
 890                     java_lex_error ("Invalid character in FP literal", 0);
 891                 }
 892
 893               if (c == 'e' || c == 'E')
 894                 {
 895                   if (stage < 2)
 896                     {
 897                       /* {E,e} must have seen at list a digit */
 898                       if (!seen_digit)
 899                         java_lex_error ("Invalid FP literal", 0);
 900                       seen_digit = 0;
 901                       seen_exponent = 1;
 902                       stage = 2;
 903                       literal_token [literal_index++] = c;
 904                       c = java_get_unicode ();
 905                     }
 906                   else
 907                     java_lex_error ("Invalid character in FP literal", 0);
 908                 }
 909               if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
 910                 {
 911                   fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
 912                   stage = 4;    /* So we fall through */
 913                 }
 914
 915               if ((c=='-' || c =='+') && stage == 2)
 916                 {
 917                   stage = 3;
 918                   literal_token [literal_index++] = c;
 919                   c = java_get_unicode ();
 920                 }
 921
 922               if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
 923                   (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
 924                   (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
 925                   (stage == 3 && JAVA_ASCII_DIGIT (c)))
 926                 {
 927                   if (JAVA_ASCII_DIGIT (c))
 928                     seen_digit = 1;
 929                   literal_token [literal_index++ ] = c;
 930                   c = java_get_unicode ();
 931                 }
 932               else
 933                 {
 934 #ifndef JC1_LITE
 935                   struct jpa_args a;
 936 #endif
 937                   if (stage != 4) /* Don't push back fF/dD */
 938                     java_unget_unicode ();
 939
 940                   /* An exponent (if any) must have seen a digit.  */
 941                   if (seen_exponent && !seen_digit)
 942                     java_lex_error ("Invalid FP literal", 0);
 943
 944                   literal_token [literal_index] = '\0';
 945                   JAVA_LEX_LIT (literal_token, radix);
 946
 947 #ifndef JC1_LITE
 948                   a.literal_token = literal_token;
 949                   a.fflag = fflag;
 950                   a.java_lval = java_lval;
 951                   a.number_beginning = number_beginning;
 952                   if (do_float_handler (java_perform_atof, (PTR) &a))
 953                     return FP_LIT_TK;
 954
 955                   JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
 956 #else
 957                   return FP_LIT_TK;
 958 #endif
 959                 }
 960             }
 961         } /* JAVA_ASCCI_FPCHAR (c) */
 962
 963       /* Here we get back to converting the integral literal.  */
 964       if (c == 'L' || c == 'l')
 965         long_suffix = 1;
 966       else if (radix == 16 && JAVA_ASCII_LETTER (c))
 967         java_lex_error ("Digit out of range in hexadecimal literal", 0);
 968       else if (radix == 8  && JAVA_ASCII_DIGIT (c))
 969         java_lex_error ("Digit out of range in octal literal", 0);
 970       else if (radix == 16 && !literal_index)
 971         java_lex_error ("No digit specified for hexadecimal literal", 0);
 972       else
 973         java_unget_unicode ();
 974
 975 #ifdef JAVA_LEX_DEBUG
 976       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
 977       JAVA_LEX_LIT (literal_token, radix);
 978 #endif
 979       /* This section of code is borrowed from gcc/c-lex.c  */
 980       if (!overflow)
 981         {
 982           bytes = GET_TYPE_PRECISION (long_type_node);
 983           for (i = bytes; i < TOTAL_PARTS; i++)
 984             if (parts [i])
 985               {
 986                 overflow = 1;
 987                 break;
 988               }
 989         }
 990       high = low = 0;
 991       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
 992         {
 993           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
 994                                               / HOST_BITS_PER_CHAR)]
 995                    << (i * HOST_BITS_PER_CHAR));
 996           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
 997         }
 998       /* End borrowed section.  */
 999
1000       /* Range checking */
1001       if (long_suffix)
1002         {
1003           /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1004              9223372036854775807L is the biggest `long' literal that can be
1005              expressed using a 10 radix. For other radixes, everything that
1006              fits withing 64 bits is OK. */
1007           int hb = (high >> 31);
1008           if (overflow || (hb && low && radix == 10) ||
1009               (hb && high & 0x7fffffff && radix == 10) ||
1010               (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1011             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1012         }
1013       else
1014         {
1015           /* 2147483648 is valid if operand of a '-'. Otherwise,
1016              2147483647 is the biggest `int' literal that can be
1017              expressed using a 10 radix. For other radixes, everything
1018              that fits within 32 bits is OK.  As all literals are
1019              signed, we sign extend here. */
1020           int hb = (low >> 31) & 0x1;
1021           if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1022               (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1023             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1024           high = -hb;
1025         }
1026       ctxp->minus_seen = 0;
1027       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1028                           (long_suffix ? long_type_node : int_type_node));
1029       return INT_LIT_TK;
1030     }
1031
1032   ctxp->minus_seen = 0;
1033   /* Character literals */
1034   if (c == '\'')
1035     {
1036       unicode_t char_lit;
1037       if ((c = java_get_unicode ()) == '\\')
1038         char_lit = java_parse_escape_sequence ();
1039       else
1040         char_lit = c;
1041
1042       c = java_get_unicode ();
1043
1044       if ((c == '\n') || (c == UEOF))
1045         java_lex_error ("Character literal not terminated at end of line", 0);
1046       if (c != '\'')
1047         java_lex_error ("Syntax error in character literal", 0);
1048
1049       if (c == JAVA_CHAR_ERROR)
1050         char_lit = 0;           /* We silently convert it to zero */
1051
1052       JAVA_LEX_CHAR_LIT (char_lit);
1053       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1054       return CHAR_LIT_TK;
1055     }
1056
1057   /* String literals */
1058   if (c == '"')
1059     {
1060       int no_error;
1061       char *string;
1062
1063       for (no_error = 1, c = java_get_unicode ();
1064            c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1065         {
1066           if (c == '\\')
1067             c = java_parse_escape_sequence ();
1068           no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
1069           java_unicode_2_utf8 (c);
1070         }
1071       if (c == '\n' || c == UEOF) /* ULT */
1072         {
1073           lineno--;             /* Refer to the line the terminator was seen */
1074           java_lex_error ("String not terminated at end of line.", 0);
1075           lineno++;
1076         }
1077
1078       obstack_1grow (&temporary_obstack, '\0');
1079       string = obstack_finish (&temporary_obstack);
1080 #ifndef JC1_LITE
1081       if (!no_error || (c != '"'))
1082         java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1083       else
1084         {
1085           tree s = make_node (STRING_CST);
1086           TREE_STRING_LENGTH (s) = strlen (string);
1087           TREE_STRING_POINTER (s) =
1088             obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
1089           strcpy (TREE_STRING_POINTER (s), string);
1090           java_lval->node = s;
1091         }
1092 #endif
1093       return STRING_LIT_TK;
1094     }
1095
1096   /* Separator */
1097   switch (c)
1098     {
1099     case '(':
1100       JAVA_LEX_SEP (c);
1101       BUILD_OPERATOR (OP_TK);
1102     case ')':
1103       JAVA_LEX_SEP (c);
1104       return CP_TK;
1105     case '{':
1106       JAVA_LEX_SEP (c);
1107       if (ctxp->ccb_indent == 1)
1108         ctxp->first_ccb_indent1 = lineno;
1109       ctxp->ccb_indent++;
1110       BUILD_OPERATOR (OCB_TK);
1111     case '}':
1112       JAVA_LEX_SEP (c);
1113       ctxp->ccb_indent--;
1114       if (ctxp->ccb_indent == 1)
1115         ctxp->last_ccb_indent1 = lineno;
1116       BUILD_OPERATOR (CCB_TK);
1117     case '[':
1118       JAVA_LEX_SEP (c);
1119       BUILD_OPERATOR (OSB_TK);
1120     case ']':
1121       JAVA_LEX_SEP (c);
1122       return CSB_TK;
1123     case ';':
1124       JAVA_LEX_SEP (c);
1125       return SC_TK;
1126     case ',':
1127       JAVA_LEX_SEP (c);
1128       return C_TK;
1129     case '.':
1130       JAVA_LEX_SEP (c);
1131       BUILD_OPERATOR (DOT_TK);
1132       /*      return DOT_TK; */
1133     }
1134
1135   /* Operators */
1136   switch (c)
1137     {
1138     case '=':
1139       if ((c = java_get_unicode ()) == '=')
1140         {
1141           BUILD_OPERATOR (EQ_TK);
1142         }
1143       else
1144         {
1145           /* Equals is used in two different locations. In the
1146              variable_declarator: rule, it has to be seen as '=' as opposed
1147              to being seen as an ordinary assignment operator in
1148              assignment_operators: rule.  */
1149           java_unget_unicode ();
1150           BUILD_OPERATOR (ASSIGN_TK);
1151         }
1152
1153     case '>':
1154       switch ((c = java_get_unicode ()))
1155         {
1156         case '=':
1157           BUILD_OPERATOR (GTE_TK);
1158         case '>':
1159           switch ((c = java_get_unicode ()))
1160             {
1161             case '>':
1162               if ((c = java_get_unicode ()) == '=')
1163                 {
1164                   BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1165                 }
1166               else
1167                 {
1168                   java_unget_unicode ();
1169                   BUILD_OPERATOR (ZRS_TK);
1170                 }
1171             case '=':
1172               BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1173             default:
1174               java_unget_unicode ();
1175               BUILD_OPERATOR (SRS_TK);
1176             }
1177         default:
1178           java_unget_unicode ();
1179           BUILD_OPERATOR (GT_TK);
1180         }
1181
1182     case '<':
1183       switch ((c = java_get_unicode ()))
1184         {
1185         case '=':
1186           BUILD_OPERATOR (LTE_TK);
1187         case '<':
1188           if ((c = java_get_unicode ()) == '=')
1189             {
1190               BUILD_OPERATOR2 (LS_ASSIGN_TK);
1191             }
1192           else
1193             {
1194               java_unget_unicode ();
1195               BUILD_OPERATOR (LS_TK);
1196             }
1197         default:
1198           java_unget_unicode ();
1199           BUILD_OPERATOR (LT_TK);
1200         }
1201
1202     case '&':
1203       switch ((c = java_get_unicode ()))
1204         {
1205         case '&':
1206           BUILD_OPERATOR (BOOL_AND_TK);
1207         case '=':
1208           BUILD_OPERATOR2 (AND_ASSIGN_TK);
1209         default:
1210           java_unget_unicode ();
1211           BUILD_OPERATOR (AND_TK);
1212         }
1213
1214     case '|':
1215       switch ((c = java_get_unicode ()))
1216         {
1217         case '|':
1218           BUILD_OPERATOR (BOOL_OR_TK);
1219         case '=':
1220           BUILD_OPERATOR2 (OR_ASSIGN_TK);
1221         default:
1222           java_unget_unicode ();
1223           BUILD_OPERATOR (OR_TK);
1224         }
1225
1226     case '+':
1227       switch ((c = java_get_unicode ()))
1228         {
1229         case '+':
1230           BUILD_OPERATOR (INCR_TK);
1231         case '=':
1232           BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1233         default:
1234           java_unget_unicode ();
1235           BUILD_OPERATOR (PLUS_TK);
1236         }
1237
1238     case '-':
1239       switch ((c = java_get_unicode ()))
1240         {
1241         case '-':
1242           BUILD_OPERATOR (DECR_TK);
1243         case '=':
1244           BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1245         default:
1246           java_unget_unicode ();
1247           ctxp->minus_seen = 1;
1248           BUILD_OPERATOR (MINUS_TK);
1249         }
1250
1251     case '*':
1252       if ((c = java_get_unicode ()) == '=')
1253         {
1254           BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1255         }
1256       else
1257         {
1258           java_unget_unicode ();
1259           BUILD_OPERATOR (MULT_TK);
1260         }
1261
1262     case '/':
1263       if ((c = java_get_unicode ()) == '=')
1264         {
1265           BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1266         }
1267       else
1268         {
1269           java_unget_unicode ();
1270           BUILD_OPERATOR (DIV_TK);
1271         }
1272
1273     case '^':
1274       if ((c = java_get_unicode ()) == '=')
1275         {
1276           BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1277         }
1278       else
1279         {
1280           java_unget_unicode ();
1281           BUILD_OPERATOR (XOR_TK);
1282         }
1283
1284     case '%':
1285       if ((c = java_get_unicode ()) == '=')
1286         {
1287           BUILD_OPERATOR2 (REM_ASSIGN_TK);
1288         }
1289       else
1290         {
1291           java_unget_unicode ();
1292           BUILD_OPERATOR (REM_TK);
1293         }
1294
1295     case '!':
1296       if ((c = java_get_unicode()) == '=')
1297         {
1298           BUILD_OPERATOR (NEQ_TK);
1299         }
1300       else
1301         {
1302           java_unget_unicode ();
1303           BUILD_OPERATOR (NEG_TK);
1304         }
1305
1306     case '?':
1307       JAVA_LEX_OP ("?");
1308       BUILD_OPERATOR (REL_QM_TK);
1309     case ':':
1310       JAVA_LEX_OP (":");
1311       BUILD_OPERATOR (REL_CL_TK);
1312     case '~':
1313       BUILD_OPERATOR (NOT_TK);
1314     }
1315
1316   /* Keyword, boolean literal or null literal */
1317   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1318        JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1319     {
1320       java_unicode_2_utf8 (c);
1321       if (all_ascii && c >= 128)
1322         all_ascii = 0;
1323       ascii_index++;
1324     }
1325
1326   obstack_1grow (&temporary_obstack, '\0');
1327   string = obstack_finish (&temporary_obstack);
1328   java_unget_unicode ();
1329
1330   /* If we have something all ascii, we consider a keyword, a boolean
1331      literal, a null literal or an all ASCII identifier.  Otherwise,
1332      this is an identifier (possibly not respecting formation rule).  */
1333   if (all_ascii)
1334     {
1335       struct java_keyword *kw;
1336       if ((kw=java_keyword (string, ascii_index)))
1337         {
1338           JAVA_LEX_KW (string);
1339           switch (kw->token)
1340             {
1341             case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1342             case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1343             case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1344             case PRIVATE_TK:
1345               SET_MODIFIER_CTX (kw->token);
1346               return MODIFIER_TK;
1347             case FLOAT_TK:
1348               SET_LVAL_NODE (float_type_node);
1349               return FP_TK;
1350             case DOUBLE_TK:
1351               SET_LVAL_NODE (double_type_node);
1352               return FP_TK;
1353             case BOOLEAN_TK:
1354               SET_LVAL_NODE (boolean_type_node);
1355               return BOOLEAN_TK;
1356             case BYTE_TK:
1357               SET_LVAL_NODE (byte_type_node);
1358               return INTEGRAL_TK;
1359             case SHORT_TK:
1360               SET_LVAL_NODE (short_type_node);
1361               return INTEGRAL_TK;
1362             case INT_TK:
1363               SET_LVAL_NODE (int_type_node);
1364               return INTEGRAL_TK;
1365             case LONG_TK:
1366               SET_LVAL_NODE (long_type_node);
1367               return INTEGRAL_TK;
1368             case CHAR_TK:
1369               SET_LVAL_NODE (char_type_node);
1370               return INTEGRAL_TK;
1371
1372               /* Keyword based literals */
1373             case TRUE_TK:
1374             case FALSE_TK:
1375               SET_LVAL_NODE ((kw->token == TRUE_TK ?
1376                               boolean_true_node : boolean_false_node));
1377               return BOOL_LIT_TK;
1378             case NULL_TK:
1379               SET_LVAL_NODE (null_pointer_node);
1380               return NULL_TK;
1381
1382               /* Some keyword we want to retain information on the location
1383                  they where found */
1384             case CASE_TK:
1385             case DEFAULT_TK:
1386             case SUPER_TK:
1387             case THIS_TK:
1388             case RETURN_TK:
1389             case BREAK_TK:
1390             case CONTINUE_TK:
1391             case TRY_TK:
1392             case CATCH_TK:
1393             case THROW_TK:
1394             case INSTANCEOF_TK:
1395               BUILD_OPERATOR (kw->token);
1396
1397             default:
1398               return kw->token;
1399             }
1400         }
1401     }
1402
1403   /* We may have and ID here */
1404   if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1405     {
1406       JAVA_LEX_ID (string);
1407       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1408       return ID_TK;
1409     }
1410
1411   /* Everything else is an invalid character in the input */
1412   {
1413     char lex_error_buffer [128];
1414     sprintf (lex_error_buffer, "Invalid character '%s' in input",
1415              java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1416     java_lex_error (lex_error_buffer, 1);
1417   }
1418   return 0;
1419 }
1420
1421 static void
1422 java_unicode_2_utf8 (unicode)
1423     unicode_t unicode;
1424 {
1425   if (RANGE (unicode, 0x01, 0x7f))
1426     obstack_1grow (&temporary_obstack, (char)unicode);
1427   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1428     {
1429       obstack_1grow (&temporary_obstack,
1430                      (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1431       obstack_1grow (&temporary_obstack,
1432                      (unsigned char)(0x80 | (unicode & 0x3f)));
1433     }
1434   else                          /* Range 0x800-0xffff */
1435     {
1436       obstack_1grow (&temporary_obstack,
1437                      (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1438       obstack_1grow (&temporary_obstack,
1439                      (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1440       obstack_1grow (&temporary_obstack,
1441                      (unsigned char)(0x80 | (unicode & 0x003f)));
1442     }
1443 }
1444
1445 #ifndef JC1_LITE
1446 static tree
1447 build_wfl_node (node)
1448      tree node;
1449 {
1450   return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1451 }
1452 #endif
1453
1454 static void
1455 java_lex_error (msg, forward)
1456      const char *msg ATTRIBUTE_UNUSED;
1457      int forward ATTRIBUTE_UNUSED;
1458 {
1459 #ifndef JC1_LITE
1460   ctxp->elc.line = ctxp->c_line->lineno;
1461   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1462
1463   /* Might be caught in the middle of some error report */
1464   ctxp->java_error_flag = 0;
1465   java_error (NULL);
1466   java_error (msg);
1467 #endif
1468 }
1469
1470 #ifndef JC1_LITE
1471 static int
1472 java_is_eol (fp, c)
1473   FILE *fp;
1474   int c;
1475 {
1476   int next;
1477   switch (c)
1478     {
1479     case '\r':
1480       next = getc (fp);
1481       if (next != '\n' && next != EOF)
1482         ungetc (next, fp);
1483       return 1;
1484     case '\n':
1485       return 1;
1486     default:
1487       return 0;
1488     }
1489 }
1490 #endif
1491
1492 char *
1493 java_get_line_col (filename, line, col)
1494      const char *filename ATTRIBUTE_UNUSED;
1495      int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1496 {
1497 #ifdef JC1_LITE
1498   return 0;
1499 #else
1500   /* Dumb implementation. Doesn't try to cache or optimize things. */
1501   /* First line of the file is line 1, first column is 1 */
1502
1503   /* COL == -1 means, at the CR/LF in LINE */
1504   /* COL == -2 means, at the first non space char in LINE */
1505
1506   FILE *fp;
1507   int c, ccol, cline = 1;
1508   int current_line_col = 0;
1509   int first_non_space = 0;
1510   char *base;
1511
1512   if (!(fp = fopen (filename, "r")))
1513     fatal ("Can't open file - java_display_line_col");
1514
1515   while (cline != line)
1516     {
1517       c = getc (fp);
1518       if (c < 0)
1519         {
1520           static char msg[] = "<<file too short - unexpected EOF>>";
1521           obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1522           goto have_line;
1523         }
1524       if (java_is_eol (fp, c))
1525         cline++;
1526     }
1527
1528   /* Gather the chars of the current line in a buffer */
1529   for (;;)
1530     {
1531       c = getc (fp);
1532       if (c < 0 || java_is_eol (fp, c))
1533         break;
1534       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1535         first_non_space = current_line_col;
1536       obstack_1grow (&temporary_obstack, c);
1537       current_line_col++;
1538     }
1539  have_line:
1540
1541   obstack_1grow (&temporary_obstack, '\n');
1542
1543   if (col == -1)
1544     {
1545       col = current_line_col;
1546       first_non_space = 0;
1547     }
1548   else if (col == -2)
1549     col = first_non_space;
1550   else
1551     first_non_space = 0;
1552
1553   /* Place the '^' a the right position */
1554   base = obstack_base (&temporary_obstack);
1555   for (ccol = 1; ccol <= col; ccol++)
1556     {
1557       /* Compute \t when reaching first_non_space */
1558       char c = (first_non_space ?
1559                 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1560       obstack_1grow (&temporary_obstack, c);
1561     }
1562   obstack_grow0 (&temporary_obstack, "^", 1);
1563
1564   fclose (fp);
1565   return obstack_finish (&temporary_obstack);
1566 #endif
1567 }