gcc/java/lex.c

   1 /* Language lexer for the GNU compiler for the Java(TM) language.
   2    Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
   3    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.
  21
  22 Java and all Java-based marks are trademarks or registered trademarks
  23 of Sun Microsystems, Inc. in the United States and other countries.
  24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  25
  26 /* It defines java_lex (yylex) that reads a Java ASCII source file
  27    possibly containing Unicode escape sequence or utf8 encoded
  28    characters and returns a token for everything found but comments,
  29    white spaces and line terminators. When necessary, it also fills
  30    the java_lval (yylval) union. It's implemented to be called by a
  31    re-entrant parser generated by Bison.
  32
  33    The lexical analysis conforms to the Java grammar described in "The
  34    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
  35    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
  36
  37 #include "keyword.h"
  38
  39 /* Function declaration  */
  40 static int java_lineterminator PARAMS ((unicode_t));
  41 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
  42 static void java_unicode_2_utf8 PARAMS ((unicode_t));
  43 static void java_lex_error PARAMS ((const char *, int));
  44 #ifndef JC1_LITE
  45 static int java_is_eol PARAMS ((FILE *, int));
  46 static tree build_wfl_node PARAMS ((tree));
  47 #endif
  48 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  49 static unicode_t java_parse_escape_sequence PARAMS ((void));
  50 static int java_letter_or_digit_p PARAMS ((unicode_t));
  51 static int java_parse_doc_section PARAMS ((unicode_t));
  52 static void java_parse_end_comment PARAMS ((unicode_t));
  53 static unicode_t java_get_unicode PARAMS ((void));
  54 static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
  55 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  56 static unicode_t java_read_char PARAMS ((java_lexer *));
  57 static void java_allocate_new_line PARAMS ((void));
  58 static void java_unget_unicode PARAMS ((void));
  59 static unicode_t java_sneak_unicode PARAMS ((void));
  60 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
  61
  62 void
  63 java_init_lex (finput, encoding)
  64      FILE *finput;
  65      const char *encoding;
  66 {
  67 #ifndef JC1_LITE
  68   int java_lang_imported = 0;
  69
  70   if (!java_lang_id)
  71     java_lang_id = get_identifier ("java.lang");
  72   if (!java_lang_cloneable)
  73     java_lang_cloneable = get_identifier ("java.lang.Cloneable");
  74   if (!java_io_serializable)
  75     java_io_serializable = get_identifier ("java.io.Serializable");
  76   if (!inst_id)
  77     inst_id = get_identifier ("inst$");
  78   if (!wpv_id)
  79     wpv_id = get_identifier ("write_parm_value$");
  80
  81   if (!java_lang_imported)
  82     {
  83       tree node = build_tree_list
  84         (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
  85       read_import_dir (TREE_PURPOSE (node));
  86       TREE_CHAIN (node) = ctxp->import_demand_list;
  87       ctxp->import_demand_list = node;
  88       java_lang_imported = 1;
  89     }
  90
  91   if (!wfl_operator)
  92     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
  93   if (!label_id)
  94     label_id = get_identifier ("$L");
  95   if (!wfl_append)
  96     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
  97   if (!wfl_string_buffer)
  98     wfl_string_buffer =
  99       build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
 100   if (!wfl_to_string)
 101     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
 102
 103   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
 104     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
 105
 106   bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
 107   bzero ((PTR) current_jcf, sizeof (JCF));
 108   ctxp->current_parsed_class = NULL;
 109   ctxp->package = NULL_TREE;
 110 #endif
 111
 112   ctxp->filename = input_filename;
 113   ctxp->lineno = lineno = 0;
 114   ctxp->p_line = NULL;
 115   ctxp->c_line = NULL;
 116   ctxp->minus_seen = 0;
 117   ctxp->java_error_flag = 0;
 118   ctxp->lexer = java_new_lexer (finput, encoding);
 119 }
 120
 121 static char *
 122 java_sprint_unicode (line, i)
 123     struct java_line *line;
 124     int i;
 125 {
 126   static char buffer [10];
 127   if (line->unicode_escape_p [i] || line->line [i] > 128)
 128     sprintf (buffer, "\\u%04x", line->line [i]);
 129   else
 130     {
 131       buffer [0] = line->line [i];
 132       buffer [1] = '\0';
 133     }
 134   return buffer;
 135 }
 136
 137 static unicode_t
 138 java_sneak_unicode ()
 139 {
 140   return (ctxp->c_line->line [ctxp->c_line->current]);
 141 }
 142
 143 static void
 144 java_unget_unicode ()
 145 {
 146   if (!ctxp->c_line->current)
 147     fatal ("can't unget unicode - java_unget_unicode");
 148   ctxp->c_line->current--;
 149   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 150 }
 151
 152 static void
 153 java_allocate_new_line ()
 154 {
 155   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
 156   char ahead_escape_p = (ctxp->c_line ?
 157                          ctxp->c_line->unicode_escape_ahead_p : 0);
 158
 159   if (ctxp->c_line && !ctxp->c_line->white_space_only)
 160     {
 161       if (ctxp->p_line)
 162         {
 163           free (ctxp->p_line->unicode_escape_p);
 164           free (ctxp->p_line->line);
 165           free (ctxp->p_line);
 166         }
 167       ctxp->p_line = ctxp->c_line;
 168       ctxp->c_line = NULL;              /* Reallocated */
 169     }
 170
 171   if (!ctxp->c_line)
 172     {
 173       ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
 174       ctxp->c_line->max = JAVA_LINE_MAX;
 175       ctxp->c_line->line = (unicode_t *)xmalloc
 176         (sizeof (unicode_t)*ctxp->c_line->max);
 177       ctxp->c_line->unicode_escape_p =
 178           (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
 179       ctxp->c_line->white_space_only = 0;
 180     }
 181
 182   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
 183   ctxp->c_line->char_col = ctxp->c_line->current = 0;
 184   if (ahead)
 185     {
 186       ctxp->c_line->line [ctxp->c_line->size] = ahead;
 187       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
 188       ctxp->c_line->size++;
 189     }
 190   ctxp->c_line->ahead [0] = 0;
 191   ctxp->c_line->unicode_escape_ahead_p = 0;
 192   ctxp->c_line->lineno = ++lineno;
 193   ctxp->c_line->white_space_only = 1;
 194 }
 195
 196 /* Create a new lexer object.  */
 197 java_lexer *
 198 java_new_lexer (finput, encoding)
 199      FILE *finput;
 200      const char *encoding;
 201 {
 202   java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
 203   int enc_error = 0;
 204
 205   lex->finput = finput;
 206   lex->bs_count = 0;
 207   lex->unget_value = 0;
 208
 209 #ifdef HAVE_ICONV
 210   lex->handle = iconv_open ("UCS-2", encoding);
 211   if (lex->handle == (iconv_t) -1)
 212     {
 213       /* FIXME: we should give a nice error based on errno here.  */
 214       enc_error = 1;
 215     }
 216   lex->first = -1;
 217   lex->last = -1;
 218   lex->out_first = -1;
 219   lex->out_last = -1;
 220 #else /* HAVE_ICONV */
 221   if (strcmp (encoding, DEFAULT_ENCODING))
 222     enc_error = 1;
 223 #endif /* HAVE_ICONV */
 224
 225   if (enc_error)
 226     fatal ("unknown encoding: `%s'", encoding);
 227
 228   return lex;
 229 }
 230
 231 void
 232 java_destroy_lexer (lex)
 233      java_lexer *lex;
 234 {
 235 #ifdef HAVE_ICONV
 236   iconv_close (lex->handle);
 237 #endif
 238   free (lex);
 239 }
 240
 241 static unicode_t
 242 java_read_char (lex)
 243      java_lexer *lex;
 244 {
 245   if (lex->unget_value)
 246     {
 247       unicode_t r = lex->unget_value;
 248       lex->unget_value = 0;
 249       return r;
 250     }
 251
 252 #ifdef HAVE_ICONV
 253   {
 254     size_t ir, inbytesleft, in_save, out_count, out_save;
 255     char *inp, *outp;
 256     unicode_t result;
 257
 258     /* If there is data which has already been converted, use it.  */
 259     if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 260       {
 261         lex->out_first = 0;
 262         lex->out_last = 0;
 263
 264         while (1)
 265           {
 266             /* See if we need to read more data.  If FIRST == 0 then
 267                the previous conversion attempt ended in the middle of
 268                a character at the end of the buffer.  Otherwise we
 269                only have to read if the buffer is empty.  */
 270             if (lex->first == 0 || lex->first >= lex->last)
 271               {
 272                 int r;
 273
 274                 if (lex->first >= lex->last)
 275                   {
 276                     lex->first = 0;
 277                     lex->last = 0;
 278                   }
 279                 if (feof (lex->finput))
 280                   return UEOF;
 281                 r = fread (&lex->buffer[lex->last], 1,
 282                            sizeof (lex->buffer) - lex->last,
 283                            lex->finput);
 284                 lex->last += r;
 285               }
 286
 287             inbytesleft = lex->last - lex->first;
 288             out_count = sizeof (lex->out_buffer) - lex->out_last;
 289
 290             if (inbytesleft == 0)
 291               {
 292                 /* We've tried to read and there is nothing left.  */
 293                 return UEOF;
 294               }
 295
 296             in_save = inbytesleft;
 297             out_save = out_count;
 298             inp = &lex->buffer[lex->first];
 299             outp = &lex->out_buffer[lex->out_last];
 300             ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
 301                         &outp, &out_count);
 302             lex->first += in_save - inbytesleft;
 303             lex->out_last += out_save - out_count;
 304
 305             /* If we converted anything at all, move along.  */
 306             if (out_count != out_save)
 307               break;
 308
 309             if (ir == (size_t) -1)
 310               {
 311                 if (errno == EINVAL)
 312                   {
 313                     /* This is ok.  This means that the end of our buffer
 314                        is in the middle of a character sequence.  We just
 315                        move the valid part of the buffer to the beginning
 316                        to force a read.  */
 317                     /* We use bcopy() because it should work for
 318                        overlapping strings.  Use memmove() instead... */
 319                     bcopy (&lex->buffer[lex->first], &lex->buffer[0],
 320                            lex->last - lex->first);
 321                     lex->last -= lex->first;
 322                     lex->first = 0;
 323                   }
 324                 else
 325                   {
 326                     /* A more serious error.  */
 327                     java_lex_error ("unrecognized character in input stream",
 328                                     0);
 329                     return UEOF;
 330                   }
 331               }
 332           }
 333       }
 334
 335     if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 336       {
 337         /* Don't have any data.  */
 338         return UEOF;
 339       }
 340
 341     /* Success.  We assume that UCS-2 is big-endian.  This appears to
 342        be an ok assumption.  */
 343     result = ((((unsigned char) lex->out_buffer[lex->out_first]) << 8)
 344               | (unsigned char) lex->out_buffer[lex->out_first + 1]);
 345     lex->out_first += 2;
 346     return result;
 347   }
 348 #else /* HAVE_ICONV */
 349   {
 350     int c, c1, c2;
 351     c = getc (lex->finput);
 352
 353     if (c < 128)
 354       return (unicode_t)c;
 355     if (c == EOF)
 356       return UEOF;
 357     else
 358       {
 359         if ((c & 0xe0) == 0xc0)
 360           {
 361             c1 = getc (lex->finput);
 362             if ((c1 & 0xc0) == 0x80)
 363               return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
 364             c = c1;
 365           }
 366         else if ((c & 0xf0) == 0xe0)
 367           {
 368             c1 = getc (lex->finput);
 369             if ((c1 & 0xc0) == 0x80)
 370               {
 371                 c2 = getc (lex->finput);
 372                 if ((c2 & 0xc0) == 0x80)
 373                   return (unicode_t)(((c & 0xf) << 12) +
 374                                      (( c1 & 0x3f) << 6) + (c2 & 0x3f));
 375                 else
 376                   c = c2;
 377               }
 378             else
 379               c = c1;
 380           }
 381
 382         /* We simply don't support invalid characters.  */
 383         java_lex_error ("malformed UTF-8 character", 0);
 384       }
 385   }
 386 #endif /* HAVE_ICONV */
 387
 388   /* We only get here on error.  */
 389   return UEOF;
 390 }
 391
 392 static void
 393 java_store_unicode (l, c, unicode_escape_p)
 394     struct java_line *l;
 395     unicode_t c;
 396     int unicode_escape_p;
 397 {
 398   if (l->size == l->max)
 399     {
 400       l->max += JAVA_LINE_MAX;
 401       l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
 402       l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
 403                                                sizeof (char)*l->max);
 404     }
 405   l->line [l->size] = c;
 406   l->unicode_escape_p [l->size++] = unicode_escape_p;
 407 }
 408
 409 static unicode_t
 410 java_read_unicode (lex, term_context, unicode_escape_p)
 411      java_lexer *lex;
 412      int term_context;
 413      int *unicode_escape_p;
 414 {
 415   unicode_t c;
 416
 417   c = java_read_char (lex);
 418   *unicode_escape_p = 0;
 419
 420   if (c != '\\')
 421     {
 422       lex->bs_count = 0;
 423       return (term_context ? c : (java_lineterminator (c)
 424                                   ? '\n'
 425                                   : (unicode_t) c));
 426     }
 427
 428   ++lex->bs_count;
 429   if ((lex->bs_count) % 2 == 1)
 430     {
 431       /* Odd number of \ seen.  */
 432       c = java_read_char (lex);
 433       if (c == 'u')
 434         {
 435           unicode_t unicode = 0;
 436           int shift = 12;
 437           /* Next should be 4 hex digits, otherwise it's an error.
 438              The hex value is converted into the unicode, pushed into
 439              the Unicode stream.  */
 440           for (shift = 12; shift >= 0; shift -= 4)
 441             {
 442               if ((c = java_read_char (lex)) == UEOF)
 443                 return UEOF;
 444               if (c >= '0' && c <= '9')
 445                 unicode |= (unicode_t)((c-'0') << shift);
 446               else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
 447                 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
 448               else
 449                 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
 450             }
 451           lex->bs_count = 0;
 452           *unicode_escape_p = 1;
 453           return (term_context
 454                   ? unicode : (java_lineterminator (c) ? '\n' : unicode));
 455         }
 456       lex->unget_value = c;
 457     }
 458   return (unicode_t) '\\';
 459 }
 460
 461 static unicode_t
 462 java_get_unicode ()
 463 {
 464   /* It's time to read a line when... */
 465   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
 466     {
 467       unicode_t c;
 468       java_allocate_new_line ();
 469       if (ctxp->c_line->line[0] != '\n')
 470         for (;;)
 471           {
 472             int unicode_escape_p;
 473             c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
 474             java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 475             if (ctxp->c_line->white_space_only
 476                 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
 477               ctxp->c_line->white_space_only = 0;
 478             if ((c == '\n') || (c == UEOF))
 479               break;
 480           }
 481     }
 482   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
 483   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
 484   return ctxp->c_line->line [ctxp->c_line->current++];
 485 }
 486
 487 static int
 488 java_lineterminator (c)
 489      unicode_t c;
 490 {
 491   if (c == '\n')                /* LF */
 492     return 1;
 493   else if (c == '\r')           /* CR */
 494     {
 495       int unicode_escape_p;
 496       c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
 497       if (c == '\r')
 498         {
 499           /* In this case we will have another terminator.  For some
 500              reason the lexer has several different unget methods.  We
 501              can't use the `ahead' method because then the \r will end
 502              up in the actual text of the line, causing an error.  So
 503              instead we choose a very low-level method.  FIXME: this
 504              is incredibly ugly.  */
 505           ctxp->lexer->unget_value = c;
 506         }
 507       else if (c != '\n')
 508         {
 509           ctxp->c_line->ahead [0] = c;
 510           ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
 511         }
 512       return 1;
 513     }
 514   else
 515     return 0;
 516 }
 517
 518 /* Parse the end of a C style comment.
 519  * C is the first character following the '/' and '*'. */
 520 static void
 521 java_parse_end_comment (c)
 522      unicode_t c;
 523 {
 524
 525   for ( ;; c = java_get_unicode ())
 526     {
 527       switch (c)
 528         {
 529         case UEOF:
 530           java_lex_error ("Comment not terminated at end of input", 0);
 531         case '*':
 532           switch (c = java_get_unicode ())
 533             {
 534             case UEOF:
 535               java_lex_error ("Comment not terminated at end of input", 0);
 536             case '/':
 537               return;
 538             case '*':   /* reparse only '*' */
 539               java_unget_unicode ();
 540             }
 541         }
 542     }
 543 }
 544
 545 /* Parse the documentation section. Keywords must be at the beginning
 546    of a documentation comment line (ignoring white space and any `*'
 547    character). Parsed keyword(s): @DEPRECATED.  */
 548
 549 static int
 550 java_parse_doc_section (c)
 551      unicode_t c;
 552 {
 553   int valid_tag = 0, seen_star = 0;
 554
 555   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
 556     {
 557       switch (c)
 558         {
 559         case '*':
 560           seen_star = 1;
 561           break;
 562         case '\n': /* ULT */
 563           valid_tag = 1;
 564         default:
 565           seen_star = 0;
 566         }
 567       c = java_get_unicode();
 568     }
 569
 570   if (c == UEOF)
 571     java_lex_error ("Comment not terminated at end of input", 0);
 572
 573   if (seen_star && (c == '/'))
 574     return 1;                   /* Goto step1 in caller */
 575
 576   /* We're parsing @deprecated */
 577   if (valid_tag && (c == '@'))
 578     {
 579       char tag [11];
 580       int  tag_index = 0;
 581
 582       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
 583         {
 584           c = java_get_unicode ();
 585           tag [tag_index++] = c;
 586         }
 587
 588       if (c == UEOF)
 589         java_lex_error ("Comment not terminated at end of input", 0);
 590       tag [tag_index] = '\0';
 591
 592       if (!strcmp (tag, "deprecated"))
 593         ctxp->deprecated = 1;
 594     }
 595   java_unget_unicode ();
 596   return 0;
 597 }
 598
 599 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
 600    will return a wrong result.  */
 601 static int
 602 java_letter_or_digit_p (c)
 603      unicode_t c;
 604 {
 605   return _JAVA_LETTER_OR_DIGIT_P (c);
 606 }
 607
 608 static unicode_t
 609 java_parse_escape_sequence ()
 610 {
 611   unicode_t char_lit;
 612   unicode_t c;
 613
 614   switch (c = java_get_unicode ())
 615     {
 616     case 'b':
 617       return (unicode_t)0x8;
 618     case 't':
 619       return (unicode_t)0x9;
 620     case 'n':
 621       return (unicode_t)0xa;
 622     case 'f':
 623       return (unicode_t)0xc;
 624     case 'r':
 625       return (unicode_t)0xd;
 626     case '"':
 627       return (unicode_t)0x22;
 628     case '\'':
 629       return (unicode_t)0x27;
 630     case '\\':
 631       return (unicode_t)0x5c;
 632     case '0': case '1': case '2': case '3': case '4':
 633     case '5': case '6': case '7': case '8': case '9':
 634       {
 635         int octal_escape[3];
 636         int octal_escape_index = 0;
 637
 638         for (; octal_escape_index < 3 && RANGE (c, '0', '9');
 639              c = java_get_unicode ())
 640           octal_escape [octal_escape_index++] = c;
 641
 642         java_unget_unicode ();
 643
 644         if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
 645           {
 646             java_lex_error ("Literal octal escape out of range", 0);
 647             return JAVA_CHAR_ERROR;
 648           }
 649         else
 650           {
 651             int i, shift;
 652             for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 653                  i < octal_escape_index; i++, shift -= 3)
 654               char_lit |= (octal_escape [i] - '0') << shift;
 655
 656             return (char_lit);
 657           }
 658         break;
 659       }
 660     case '\n':
 661       return '\n';              /* ULT, caught latter as a specific error */
 662     default:
 663       java_lex_error ("Illegal character in escape sequence", 0);
 664       return JAVA_CHAR_ERROR;
 665     }
 666 }
 667
 668 /* Isolate the code which may raise an arithmetic exception in its
 669    own function.  */
 670
 671 #ifndef JC1_LITE
 672 struct jpa_args
 673 {
 674   YYSTYPE *java_lval;
 675   char *literal_token;
 676   int fflag;
 677   int number_beginning;
 678 };
 679
 680 static void java_perform_atof   PARAMS ((PTR));
 681
 682 static void
 683 java_perform_atof (av)
 684      PTR av;
 685 {
 686   struct jpa_args *a = (struct jpa_args *)av;
 687   YYSTYPE *java_lval = a->java_lval;
 688   int number_beginning = a->number_beginning;
 689   REAL_VALUE_TYPE value;
 690   tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 691
 692   SET_REAL_VALUE_ATOF (value,
 693                        REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
 694
 695   if (REAL_VALUE_ISINF (value)
 696       || REAL_VALUE_ISNAN (value))
 697     {
 698       JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
 699       value = DCONST0;
 700     }
 701
 702   SET_LVAL_NODE_TYPE (build_real (type, value), type);
 703 }
 704 #endif
 705
 706 static int yylex                PARAMS ((YYSTYPE *));
 707
 708 static int
 709 #ifdef JC1_LITE
 710 yylex (java_lval)
 711 #else
 712 java_lex (java_lval)
 713 #endif
 714      YYSTYPE *java_lval;
 715 {
 716   unicode_t c, first_unicode;
 717   int ascii_index, all_ascii;
 718   char *string;
 719
 720   /* Translation of the Unicode escape in the raw stream of Unicode
 721      characters. Takes care of line terminator.  */
 722  step1:
 723   /* Skip white spaces: SP, TAB and FF or ULT */
 724   for (c = java_get_unicode ();
 725        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
 726     if (c == '\n')
 727       {
 728         ctxp->elc.line = ctxp->c_line->lineno;
 729         ctxp->elc.col  = ctxp->c_line->char_col-2;
 730       }
 731
 732   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 733
 734   if (c == 0x1a)                /* CTRL-Z */
 735     {
 736       if ((c = java_get_unicode ()) == UEOF)
 737         return 0;               /* Ok here */
 738       else
 739         java_unget_unicode ();  /* Caught latter at the end the function */
 740     }
 741   /* Handle EOF here */
 742   if (c == UEOF)        /* Should probably do something here... */
 743     return 0;
 744
 745   /* Take care of eventual comments.  */
 746   if (c == '/')
 747     {
 748       switch (c = java_get_unicode ())
 749         {
 750         case '/':
 751           for (;;)
 752             {
 753               c = java_get_unicode ();
 754               if (c == UEOF)
 755                 java_lex_error ("Comment not terminated at end of input", 0);
 756               if (c == '\n')    /* ULT */
 757                 goto step1;
 758             }
 759           break;
 760
 761         case '*':
 762           if ((c = java_get_unicode ()) == '*')
 763             {
 764               if ((c = java_get_unicode ()) == '/')
 765                 goto step1;     /* Empy documentation comment  */
 766               else if (java_parse_doc_section (c))
 767                 goto step1;
 768             }
 769
 770           java_parse_end_comment ((c = java_get_unicode ()));
 771           goto step1;
 772           break;
 773         default:
 774           java_unget_unicode ();
 775           c = '/';
 776           break;
 777         }
 778     }
 779
 780   ctxp->elc.line = ctxp->c_line->lineno;
 781   ctxp->elc.prev_col = ctxp->elc.col;
 782   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
 783   if (ctxp->elc.col < 0)
 784     fatal ("ctxp->elc.col < 0 - java_lex");
 785
 786   /* Numeric literals */
 787   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
 788     {
 789       /* This section of code is borrowed from gcc/c-lex.c  */
 790 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
 791       int parts[TOTAL_PARTS];
 792       HOST_WIDE_INT high, low;
 793       /* End borrowed section  */
 794       char literal_token [256];
 795       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
 796       int  i;
 797 #ifndef JC1_LITE
 798       int  number_beginning = ctxp->c_line->current;
 799 #endif
 800
 801       /* We might have a . separator instead of a FP like .[0-9]* */
 802       if (c == '.')
 803         {
 804           unicode_t peep = java_sneak_unicode ();
 805
 806           if (!JAVA_ASCII_DIGIT (peep))
 807             {
 808               JAVA_LEX_SEP('.');
 809               BUILD_OPERATOR (DOT_TK);
 810             }
 811         }
 812
 813       for (i = 0; i < TOTAL_PARTS; i++)
 814         parts [i] = 0;
 815
 816       if (c == '0')
 817         {
 818           c = java_get_unicode ();
 819           if (c == 'x' || c == 'X')
 820             {
 821               radix = 16;
 822               c = java_get_unicode ();
 823             }
 824           else if (JAVA_ASCII_DIGIT (c))
 825             radix = 8;
 826           else if (c == '.')
 827             {
 828               /* Push the '.' back and prepare for a FP parsing... */
 829               java_unget_unicode ();
 830               c = '0';
 831             }
 832           else
 833             {
 834               /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
 835               JAVA_LEX_LIT ("0", 10);
 836               switch (c)
 837                 {
 838                 case 'L': case 'l':
 839                   SET_LVAL_NODE (long_zero_node);
 840                   return (INT_LIT_TK);
 841                 case 'f': case 'F':
 842                   SET_LVAL_NODE (float_zero_node);
 843                   return (FP_LIT_TK);
 844                 case 'd': case 'D':
 845                   SET_LVAL_NODE (double_zero_node);
 846                   return (FP_LIT_TK);
 847                 default:
 848                   java_unget_unicode ();
 849                   SET_LVAL_NODE (integer_zero_node);
 850                   return (INT_LIT_TK);
 851                 }
 852             }
 853         }
 854       /* Parse the first part of the literal, until we find something
 855          which is not a number.  */
 856       while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
 857              (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
 858              (radix == 8  && JAVA_ASCII_OCTDIGIT (c)))
 859         {
 860           /* We store in a string (in case it turns out to be a FP) and in
 861              PARTS if we have to process a integer literal.  */
 862           int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
 863           int count;
 864
 865           literal_token [literal_index++] = c;
 866           /* This section of code if borrowed from gcc/c-lex.c  */
 867           for (count = 0; count < TOTAL_PARTS; count++)
 868             {
 869               parts[count] *= radix;
 870               if (count)
 871                 {
 872                   parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
 873                   parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
 874                 }
 875               else
 876                 parts[0] += numeric;
 877             }
 878           if (parts [TOTAL_PARTS-1] != 0)
 879             overflow = 1;
 880           /* End borrowed section.  */
 881           c = java_get_unicode ();
 882         }
 883
 884       /* If we have something from the FP char set but not a digit, parse
 885          a FP literal.  */
 886       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
 887         {
 888           int stage = 0;
 889           int seen_digit = (literal_index ? 1 : 0);
 890           int seen_exponent = 0;
 891           int fflag = 0;        /* 1 for {f,F}, 0 for {d,D}. FP literal are
 892                                    double unless specified. */
 893           if (radix != 10)
 894             java_lex_error ("Can't express non-decimal FP literal", 0);
 895
 896           for (;;)
 897             {
 898               if (c == '.')
 899                 {
 900                   if (stage < 1)
 901                     {
 902                       stage = 1;
 903                       literal_token [literal_index++ ] = c;
 904                       c = java_get_unicode ();
 905                     }
 906                   else
 907                     java_lex_error ("Invalid character in FP literal", 0);
 908                 }
 909
 910               if (c == 'e' || c == 'E')
 911                 {
 912                   if (stage < 2)
 913                     {
 914                       /* {E,e} must have seen at list a digit */
 915                       if (!seen_digit)
 916                         java_lex_error ("Invalid FP literal", 0);
 917                       seen_digit = 0;
 918                       seen_exponent = 1;
 919                       stage = 2;
 920                       literal_token [literal_index++] = c;
 921                       c = java_get_unicode ();
 922                     }
 923                   else
 924                     java_lex_error ("Invalid character in FP literal", 0);
 925                 }
 926               if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
 927                 {
 928                   fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
 929                   stage = 4;    /* So we fall through */
 930                 }
 931
 932               if ((c=='-' || c =='+') && stage == 2)
 933                 {
 934                   stage = 3;
 935                   literal_token [literal_index++] = c;
 936                   c = java_get_unicode ();
 937                 }
 938
 939               if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
 940                   (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
 941                   (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
 942                   (stage == 3 && JAVA_ASCII_DIGIT (c)))
 943                 {
 944                   if (JAVA_ASCII_DIGIT (c))
 945                     seen_digit = 1;
 946                   literal_token [literal_index++ ] = c;
 947                   c = java_get_unicode ();
 948                 }
 949               else
 950                 {
 951 #ifndef JC1_LITE
 952                   struct jpa_args a;
 953 #endif
 954                   if (stage != 4) /* Don't push back fF/dD */
 955                     java_unget_unicode ();
 956
 957                   /* An exponent (if any) must have seen a digit.  */
 958                   if (seen_exponent && !seen_digit)
 959                     java_lex_error ("Invalid FP literal", 0);
 960
 961                   literal_token [literal_index] = '\0';
 962                   JAVA_LEX_LIT (literal_token, radix);
 963
 964 #ifndef JC1_LITE
 965                   a.literal_token = literal_token;
 966                   a.fflag = fflag;
 967                   a.java_lval = java_lval;
 968                   a.number_beginning = number_beginning;
 969                   if (do_float_handler (java_perform_atof, (PTR) &a))
 970                     return FP_LIT_TK;
 971
 972                   JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
 973 #else
 974                   return FP_LIT_TK;
 975 #endif
 976                 }
 977             }
 978         } /* JAVA_ASCCI_FPCHAR (c) */
 979
 980       /* Here we get back to converting the integral literal.  */
 981       if (c == 'L' || c == 'l')
 982         long_suffix = 1;
 983       else if (radix == 16 && JAVA_ASCII_LETTER (c))
 984         java_lex_error ("Digit out of range in hexadecimal literal", 0);
 985       else if (radix == 8  && JAVA_ASCII_DIGIT (c))
 986         java_lex_error ("Digit out of range in octal literal", 0);
 987       else if (radix == 16 && !literal_index)
 988         java_lex_error ("No digit specified for hexadecimal literal", 0);
 989       else
 990         java_unget_unicode ();
 991
 992 #ifdef JAVA_LEX_DEBUG
 993       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
 994       JAVA_LEX_LIT (literal_token, radix);
 995 #endif
 996       /* This section of code is borrowed from gcc/c-lex.c  */
 997       if (!overflow)
 998         {
 999           bytes = GET_TYPE_PRECISION (long_type_node);
1000           for (i = bytes; i < TOTAL_PARTS; i++)
1001             if (parts [i])
1002               {
1003                 overflow = 1;
1004                 break;
1005               }
1006         }
1007       high = low = 0;
1008       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1009         {
1010           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1011                                               / HOST_BITS_PER_CHAR)]
1012                    << (i * HOST_BITS_PER_CHAR));
1013           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1014         }
1015       /* End borrowed section.  */
1016
1017       /* Range checking */
1018       if (long_suffix)
1019         {
1020           /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1021              9223372036854775807L is the biggest `long' literal that can be
1022              expressed using a 10 radix. For other radixes, everything that
1023              fits withing 64 bits is OK. */
1024           int hb = (high >> 31);
1025           if (overflow || (hb && low && radix == 10) ||
1026               (hb && high & 0x7fffffff && radix == 10) ||
1027               (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1028             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1029         }
1030       else
1031         {
1032           /* 2147483648 is valid if operand of a '-'. Otherwise,
1033              2147483647 is the biggest `int' literal that can be
1034              expressed using a 10 radix. For other radixes, everything
1035              that fits within 32 bits is OK.  As all literals are
1036              signed, we sign extend here. */
1037           int hb = (low >> 31) & 0x1;
1038           if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1039               (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1040             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1041           high = -hb;
1042         }
1043       ctxp->minus_seen = 0;
1044       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1045                           (long_suffix ? long_type_node : int_type_node));
1046       return INT_LIT_TK;
1047     }
1048
1049   ctxp->minus_seen = 0;
1050   /* Character literals */
1051   if (c == '\'')
1052     {
1053       unicode_t char_lit;
1054       if ((c = java_get_unicode ()) == '\\')
1055         char_lit = java_parse_escape_sequence ();
1056       else
1057         char_lit = c;
1058
1059       c = java_get_unicode ();
1060
1061       if ((c == '\n') || (c == UEOF))
1062         java_lex_error ("Character literal not terminated at end of line", 0);
1063       if (c != '\'')
1064         java_lex_error ("Syntax error in character literal", 0);
1065
1066       if (c == JAVA_CHAR_ERROR)
1067         char_lit = 0;           /* We silently convert it to zero */
1068
1069       JAVA_LEX_CHAR_LIT (char_lit);
1070       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1071       return CHAR_LIT_TK;
1072     }
1073
1074   /* String literals */
1075   if (c == '"')
1076     {
1077       int no_error;
1078       char *string;
1079
1080       for (no_error = 1, c = java_get_unicode ();
1081            c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1082         {
1083           if (c == '\\')
1084             c = java_parse_escape_sequence ();
1085           no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
1086           java_unicode_2_utf8 (c);
1087         }
1088       if (c == '\n' || c == UEOF) /* ULT */
1089         {
1090           lineno--;             /* Refer to the line the terminator was seen */
1091           java_lex_error ("String not terminated at end of line.", 0);
1092           lineno++;
1093         }
1094
1095       obstack_1grow (&temporary_obstack, '\0');
1096       string = obstack_finish (&temporary_obstack);
1097 #ifndef JC1_LITE
1098       if (!no_error || (c != '"'))
1099         java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1100       else
1101         java_lval->node = build_string (strlen (string), string);
1102 #endif
1103       obstack_free (&temporary_obstack, string);
1104       return STRING_LIT_TK;
1105     }
1106
1107   /* Separator */
1108   switch (c)
1109     {
1110     case '(':
1111       JAVA_LEX_SEP (c);
1112       BUILD_OPERATOR (OP_TK);
1113     case ')':
1114       JAVA_LEX_SEP (c);
1115       return CP_TK;
1116     case '{':
1117       JAVA_LEX_SEP (c);
1118       if (ctxp->ccb_indent == 1)
1119         ctxp->first_ccb_indent1 = lineno;
1120       ctxp->ccb_indent++;
1121       BUILD_OPERATOR (OCB_TK);
1122     case '}':
1123       JAVA_LEX_SEP (c);
1124       ctxp->ccb_indent--;
1125       if (ctxp->ccb_indent == 1)
1126         ctxp->last_ccb_indent1 = lineno;
1127       BUILD_OPERATOR (CCB_TK);
1128     case '[':
1129       JAVA_LEX_SEP (c);
1130       BUILD_OPERATOR (OSB_TK);
1131     case ']':
1132       JAVA_LEX_SEP (c);
1133       return CSB_TK;
1134     case ';':
1135       JAVA_LEX_SEP (c);
1136       return SC_TK;
1137     case ',':
1138       JAVA_LEX_SEP (c);
1139       return C_TK;
1140     case '.':
1141       JAVA_LEX_SEP (c);
1142       BUILD_OPERATOR (DOT_TK);
1143       /*      return DOT_TK; */
1144     }
1145
1146   /* Operators */
1147   switch (c)
1148     {
1149     case '=':
1150       if ((c = java_get_unicode ()) == '=')
1151         {
1152           BUILD_OPERATOR (EQ_TK);
1153         }
1154       else
1155         {
1156           /* Equals is used in two different locations. In the
1157              variable_declarator: rule, it has to be seen as '=' as opposed
1158              to being seen as an ordinary assignment operator in
1159              assignment_operators: rule.  */
1160           java_unget_unicode ();
1161           BUILD_OPERATOR (ASSIGN_TK);
1162         }
1163
1164     case '>':
1165       switch ((c = java_get_unicode ()))
1166         {
1167         case '=':
1168           BUILD_OPERATOR (GTE_TK);
1169         case '>':
1170           switch ((c = java_get_unicode ()))
1171             {
1172             case '>':
1173               if ((c = java_get_unicode ()) == '=')
1174                 {
1175                   BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1176                 }
1177               else
1178                 {
1179                   java_unget_unicode ();
1180                   BUILD_OPERATOR (ZRS_TK);
1181                 }
1182             case '=':
1183               BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1184             default:
1185               java_unget_unicode ();
1186               BUILD_OPERATOR (SRS_TK);
1187             }
1188         default:
1189           java_unget_unicode ();
1190           BUILD_OPERATOR (GT_TK);
1191         }
1192
1193     case '<':
1194       switch ((c = java_get_unicode ()))
1195         {
1196         case '=':
1197           BUILD_OPERATOR (LTE_TK);
1198         case '<':
1199           if ((c = java_get_unicode ()) == '=')
1200             {
1201               BUILD_OPERATOR2 (LS_ASSIGN_TK);
1202             }
1203           else
1204             {
1205               java_unget_unicode ();
1206               BUILD_OPERATOR (LS_TK);
1207             }
1208         default:
1209           java_unget_unicode ();
1210           BUILD_OPERATOR (LT_TK);
1211         }
1212
1213     case '&':
1214       switch ((c = java_get_unicode ()))
1215         {
1216         case '&':
1217           BUILD_OPERATOR (BOOL_AND_TK);
1218         case '=':
1219           BUILD_OPERATOR2 (AND_ASSIGN_TK);
1220         default:
1221           java_unget_unicode ();
1222           BUILD_OPERATOR (AND_TK);
1223         }
1224
1225     case '|':
1226       switch ((c = java_get_unicode ()))
1227         {
1228         case '|':
1229           BUILD_OPERATOR (BOOL_OR_TK);
1230         case '=':
1231           BUILD_OPERATOR2 (OR_ASSIGN_TK);
1232         default:
1233           java_unget_unicode ();
1234           BUILD_OPERATOR (OR_TK);
1235         }
1236
1237     case '+':
1238       switch ((c = java_get_unicode ()))
1239         {
1240         case '+':
1241           BUILD_OPERATOR (INCR_TK);
1242         case '=':
1243           BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1244         default:
1245           java_unget_unicode ();
1246           BUILD_OPERATOR (PLUS_TK);
1247         }
1248
1249     case '-':
1250       switch ((c = java_get_unicode ()))
1251         {
1252         case '-':
1253           BUILD_OPERATOR (DECR_TK);
1254         case '=':
1255           BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1256         default:
1257           java_unget_unicode ();
1258           ctxp->minus_seen = 1;
1259           BUILD_OPERATOR (MINUS_TK);
1260         }
1261
1262     case '*':
1263       if ((c = java_get_unicode ()) == '=')
1264         {
1265           BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1266         }
1267       else
1268         {
1269           java_unget_unicode ();
1270           BUILD_OPERATOR (MULT_TK);
1271         }
1272
1273     case '/':
1274       if ((c = java_get_unicode ()) == '=')
1275         {
1276           BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1277         }
1278       else
1279         {
1280           java_unget_unicode ();
1281           BUILD_OPERATOR (DIV_TK);
1282         }
1283
1284     case '^':
1285       if ((c = java_get_unicode ()) == '=')
1286         {
1287           BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1288         }
1289       else
1290         {
1291           java_unget_unicode ();
1292           BUILD_OPERATOR (XOR_TK);
1293         }
1294
1295     case '%':
1296       if ((c = java_get_unicode ()) == '=')
1297         {
1298           BUILD_OPERATOR2 (REM_ASSIGN_TK);
1299         }
1300       else
1301         {
1302           java_unget_unicode ();
1303           BUILD_OPERATOR (REM_TK);
1304         }
1305
1306     case '!':
1307       if ((c = java_get_unicode()) == '=')
1308         {
1309           BUILD_OPERATOR (NEQ_TK);
1310         }
1311       else
1312         {
1313           java_unget_unicode ();
1314           BUILD_OPERATOR (NEG_TK);
1315         }
1316
1317     case '?':
1318       JAVA_LEX_OP ("?");
1319       BUILD_OPERATOR (REL_QM_TK);
1320     case ':':
1321       JAVA_LEX_OP (":");
1322       BUILD_OPERATOR (REL_CL_TK);
1323     case '~':
1324       BUILD_OPERATOR (NOT_TK);
1325     }
1326
1327   /* Keyword, boolean literal or null literal */
1328   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1329        JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1330     {
1331       java_unicode_2_utf8 (c);
1332       if (all_ascii && c >= 128)
1333         all_ascii = 0;
1334       ascii_index++;
1335     }
1336
1337   obstack_1grow (&temporary_obstack, '\0');
1338   string = obstack_finish (&temporary_obstack);
1339   java_unget_unicode ();
1340
1341   /* If we have something all ascii, we consider a keyword, a boolean
1342      literal, a null literal or an all ASCII identifier.  Otherwise,
1343      this is an identifier (possibly not respecting formation rule).  */
1344   if (all_ascii)
1345     {
1346       struct java_keyword *kw;
1347       if ((kw=java_keyword (string, ascii_index)))
1348         {
1349           JAVA_LEX_KW (string);
1350           switch (kw->token)
1351             {
1352             case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1353             case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1354             case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1355             case PRIVATE_TK:
1356               SET_MODIFIER_CTX (kw->token);
1357               return MODIFIER_TK;
1358             case FLOAT_TK:
1359               SET_LVAL_NODE (float_type_node);
1360               return FP_TK;
1361             case DOUBLE_TK:
1362               SET_LVAL_NODE (double_type_node);
1363               return FP_TK;
1364             case BOOLEAN_TK:
1365               SET_LVAL_NODE (boolean_type_node);
1366               return BOOLEAN_TK;
1367             case BYTE_TK:
1368               SET_LVAL_NODE (byte_type_node);
1369               return INTEGRAL_TK;
1370             case SHORT_TK:
1371               SET_LVAL_NODE (short_type_node);
1372               return INTEGRAL_TK;
1373             case INT_TK:
1374               SET_LVAL_NODE (int_type_node);
1375               return INTEGRAL_TK;
1376             case LONG_TK:
1377               SET_LVAL_NODE (long_type_node);
1378               return INTEGRAL_TK;
1379             case CHAR_TK:
1380               SET_LVAL_NODE (char_type_node);
1381               return INTEGRAL_TK;
1382
1383               /* Keyword based literals */
1384             case TRUE_TK:
1385             case FALSE_TK:
1386               SET_LVAL_NODE ((kw->token == TRUE_TK ?
1387                               boolean_true_node : boolean_false_node));
1388               return BOOL_LIT_TK;
1389             case NULL_TK:
1390               SET_LVAL_NODE (null_pointer_node);
1391               return NULL_TK;
1392
1393               /* Some keyword we want to retain information on the location
1394                  they where found */
1395             case CASE_TK:
1396             case DEFAULT_TK:
1397             case SUPER_TK:
1398             case THIS_TK:
1399             case RETURN_TK:
1400             case BREAK_TK:
1401             case CONTINUE_TK:
1402             case TRY_TK:
1403             case CATCH_TK:
1404             case THROW_TK:
1405             case INSTANCEOF_TK:
1406               BUILD_OPERATOR (kw->token);
1407
1408             default:
1409               return kw->token;
1410             }
1411         }
1412     }
1413
1414   /* We may have and ID here */
1415   if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1416     {
1417       JAVA_LEX_ID (string);
1418       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1419       return ID_TK;
1420     }
1421
1422   /* Everything else is an invalid character in the input */
1423   {
1424     char lex_error_buffer [128];
1425     sprintf (lex_error_buffer, "Invalid character '%s' in input",
1426              java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1427     java_lex_error (lex_error_buffer, 1);
1428   }
1429   return 0;
1430 }
1431
1432 static void
1433 java_unicode_2_utf8 (unicode)
1434     unicode_t unicode;
1435 {
1436   if (RANGE (unicode, 0x01, 0x7f))
1437     obstack_1grow (&temporary_obstack, (char)unicode);
1438   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1439     {
1440       obstack_1grow (&temporary_obstack,
1441                      (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1442       obstack_1grow (&temporary_obstack,
1443                      (unsigned char)(0x80 | (unicode & 0x3f)));
1444     }
1445   else                          /* Range 0x800-0xffff */
1446     {
1447       obstack_1grow (&temporary_obstack,
1448                      (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1449       obstack_1grow (&temporary_obstack,
1450                      (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1451       obstack_1grow (&temporary_obstack,
1452                      (unsigned char)(0x80 | (unicode & 0x003f)));
1453     }
1454 }
1455
1456 #ifndef JC1_LITE
1457 static tree
1458 build_wfl_node (node)
1459      tree node;
1460 {
1461   return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1462 }
1463 #endif
1464
1465 static void
1466 java_lex_error (msg, forward)
1467      const char *msg ATTRIBUTE_UNUSED;
1468      int forward ATTRIBUTE_UNUSED;
1469 {
1470 #ifndef JC1_LITE
1471   ctxp->elc.line = ctxp->c_line->lineno;
1472   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1473
1474   /* Might be caught in the middle of some error report */
1475   ctxp->java_error_flag = 0;
1476   java_error (NULL);
1477   java_error (msg);
1478 #endif
1479 }
1480
1481 #ifndef JC1_LITE
1482 static int
1483 java_is_eol (fp, c)
1484   FILE *fp;
1485   int c;
1486 {
1487   int next;
1488   switch (c)
1489     {
1490     case '\r':
1491       next = getc (fp);
1492       if (next != '\n' && next != EOF)
1493         ungetc (next, fp);
1494       return 1;
1495     case '\n':
1496       return 1;
1497     default:
1498       return 0;
1499     }
1500 }
1501 #endif
1502
1503 char *
1504 java_get_line_col (filename, line, col)
1505      const char *filename ATTRIBUTE_UNUSED;
1506      int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1507 {
1508 #ifdef JC1_LITE
1509   return 0;
1510 #else
1511   /* Dumb implementation. Doesn't try to cache or optimize things. */
1512   /* First line of the file is line 1, first column is 1 */
1513
1514   /* COL == -1 means, at the CR/LF in LINE */
1515   /* COL == -2 means, at the first non space char in LINE */
1516
1517   FILE *fp;
1518   int c, ccol, cline = 1;
1519   int current_line_col = 0;
1520   int first_non_space = 0;
1521   char *base;
1522
1523   if (!(fp = fopen (filename, "r")))
1524     fatal ("Can't open file - java_display_line_col");
1525
1526   while (cline != line)
1527     {
1528       c = getc (fp);
1529       if (c == EOF)
1530         {
1531           static char msg[] = "<<file too short - unexpected EOF>>";
1532           obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1533           goto have_line;
1534         }
1535       if (java_is_eol (fp, c))
1536         cline++;
1537     }
1538
1539   /* Gather the chars of the current line in a buffer */
1540   for (;;)
1541     {
1542       c = getc (fp);
1543       if (c < 0 || java_is_eol (fp, c))
1544         break;
1545       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1546         first_non_space = current_line_col;
1547       obstack_1grow (&temporary_obstack, c);
1548       current_line_col++;
1549     }
1550  have_line:
1551
1552   obstack_1grow (&temporary_obstack, '\n');
1553
1554   if (col == -1)
1555     {
1556       col = current_line_col;
1557       first_non_space = 0;
1558     }
1559   else if (col == -2)
1560     col = first_non_space;
1561   else
1562     first_non_space = 0;
1563
1564   /* Place the '^' a the right position */
1565   base = obstack_base (&temporary_obstack);
1566   for (ccol = 1; ccol <= col+3; ccol++)
1567     {
1568       /* Compute \t when reaching first_non_space */
1569       char c = (first_non_space ?
1570                 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1571       obstack_1grow (&temporary_obstack, c);
1572     }
1573   obstack_grow0 (&temporary_obstack, "^", 1);
1574
1575   fclose (fp);
1576   return obstack_finish (&temporary_obstack);
1577 #endif
1578 }