gcc/java/lex.c

   1 /* Language lexer for the GNU compiler for the Java(TM) language.
   2    Copyright (C) 1997, 1998 Free Software Foundation, Inc.
   3    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.
  21
  22 Java and all Java-based marks are trademarks or registered trademarks
  23 of Sun Microsystems, Inc. in the United States and other countries.
  24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  25
  26 /* It defines java_lex (yylex) that reads a Java ASCII source file
  27 possibly containing Unicode escape sequence or utf8 encoded characters
  28 and returns a token for everything found but comments, white spaces
  29 and line terminators. When necessary, it also fills the java_lval
  30 (yylval) union. It's implemented to be called by a re-entrant parser
  31 generated by Bison.
  32
  33 The lexical analysis conforms to the Java grammar described in "The
  34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
  35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html)  */
  36
  37 #include <stdio.h>
  38 #include <string.h>
  39 #include <strings.h>
  40
  41 #ifdef JAVA_LEX_DEBUG
  42 #include <ctype.h>
  43 #endif
  44
  45 #ifdef inline                   /* javaop.h redefines inline as static */
  46 #undef inline
  47 #endif
  48 #include "keyword.h"
  49
  50 #ifndef SEEK_SET
  51 #include <unistd.h>
  52 #endif
  53
  54 #ifndef JC1_LITE
  55 extern struct obstack *expression_obstack;
  56 #endif
  57
  58 void
  59 java_init_lex ()
  60 {
  61   int java_lang_imported = 0;
  62
  63 #ifndef JC1_LITE
  64   if (!java_lang_imported)
  65     {
  66       tree node = build_tree_list
  67         (build_expr_wfl (get_identifier ("java.lang"), NULL, 0, 0), NULL_TREE);
  68       read_import_dir (TREE_PURPOSE (node));
  69       TREE_CHAIN (node) = ctxp->import_demand_list;
  70       ctxp->import_demand_list = node;
  71       java_lang_imported = 1;
  72     }
  73
  74   if (!wfl_operator)
  75     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
  76   if (!label_id)
  77     label_id = get_identifier ("$L");
  78   if (!wfl_append)
  79     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
  80   if (!wfl_string_buffer)
  81     wfl_string_buffer =
  82       build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
  83   if (!wfl_to_string)
  84     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
  85
  86   ctxp->static_initialized = ctxp->non_static_initialized =
  87     ctxp->incomplete_class = NULL_TREE;
  88
  89   bzero (ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
  90   bzero (current_jcf, sizeof (JCF));
  91   ctxp->current_parsed_class = NULL;
  92   ctxp->package = NULL_TREE;
  93 #endif
  94
  95   ctxp->filename = input_filename;
  96   ctxp->lineno = lineno = 0;
  97   ctxp->p_line = NULL;
  98   ctxp->c_line = NULL;
  99   ctxp->unget_utf8_value = 0;
 100   ctxp->minus_seen = 0;
 101   ctxp->java_error_flag = 0;
 102 }
 103
 104 static char *
 105 java_sprint_unicode (line, i)
 106     struct java_line *line;
 107     int i;
 108 {
 109   static char buffer [10];
 110   if (line->unicode_escape_p [i] || line->line [i] > 128)
 111     sprintf (buffer, "\\u%04x", line->line [i]);
 112   else
 113     {
 114       buffer [0] = line->line [i];
 115       buffer [1] = '\0';
 116     }
 117   return buffer;
 118 }
 119
 120 static unicode_t
 121 java_sneak_unicode ()
 122 {
 123   return (ctxp->c_line->line [ctxp->c_line->current]);
 124 }
 125
 126 static void
 127 java_unget_unicode ()
 128 {
 129   if (!ctxp->c_line->current)
 130     fatal ("can't unget unicode - java_unget_unicode");
 131   ctxp->c_line->current--;
 132   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 133 }
 134
 135 void
 136 java_allocate_new_line ()
 137 {
 138   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
 139   char ahead_escape_p = (ctxp->c_line ?
 140                          ctxp->c_line->unicode_escape_ahead_p : 0);
 141
 142   if (ctxp->c_line && !ctxp->c_line->white_space_only)
 143     {
 144       if (ctxp->p_line)
 145         {
 146           free (ctxp->p_line->unicode_escape_p);
 147           free (ctxp->p_line->line);
 148           free (ctxp->p_line);
 149         }
 150       ctxp->p_line = ctxp->c_line;
 151       ctxp->c_line = NULL;              /* Reallocated */
 152     }
 153
 154   if (!ctxp->c_line)
 155     {
 156       ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
 157       ctxp->c_line->max = JAVA_LINE_MAX;
 158       ctxp->c_line->line = (unicode_t *)xmalloc
 159         (sizeof (unicode_t)*ctxp->c_line->max);
 160       ctxp->c_line->unicode_escape_p =
 161           (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
 162       ctxp->c_line->white_space_only = 0;
 163     }
 164
 165   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
 166   ctxp->c_line->char_col = ctxp->c_line->current = 0;
 167   if (ahead)
 168     {
 169       ctxp->c_line->line [ctxp->c_line->size] = ahead;
 170       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
 171       ctxp->c_line->size++;
 172     }
 173   ctxp->c_line->ahead [0] = 0;
 174   ctxp->c_line->unicode_escape_ahead_p = 0;
 175   ctxp->c_line->lineno = ++lineno;
 176   ctxp->c_line->white_space_only = 1;
 177 }
 178
 179 static unicode_t
 180 java_read_char ()
 181 {
 182   int c;
 183   int c1, c2;
 184
 185   if (ctxp->unget_utf8_value)
 186     {
 187       int to_return = ctxp->unget_utf8_value;
 188       ctxp->unget_utf8_value = 0;
 189       return (to_return);
 190     }
 191
 192   c = GETC ();
 193
 194   if (c < 128)
 195     return (unicode_t)c;
 196   if (c == EOF)
 197     return UEOF;
 198   else
 199     {
 200       if (c & (0xe0 == 0xc0))
 201         {
 202           c1 = GETC ();
 203           if (c1 & (0xc0 == 0x80))
 204             return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
 205         }
 206       else if (c & (0xf0 == 0xe0))
 207         {
 208           c1 = GETC ();
 209           if (c1 & (0xc0 == 0x80))
 210             {
 211               c2 = GETC ();
 212               if (c2 & (0xc0 == 0x80))
 213                 return (unicode_t)(((c & 0xf) << 12) +
 214                                    (( c1 & 0x3f) << 6) + (c2 & 0x3f));
 215             }
 216         }
 217       java_lex_error ("Bad utf8 encoding", 0);
 218     }
 219   return 0;
 220 }
 221
 222 static void
 223 java_store_unicode (l, c, unicode_escape_p)
 224     struct java_line *l;
 225     unicode_t c;
 226     int unicode_escape_p;
 227 {
 228   if (l->size == l->max)
 229     {
 230       l->max += JAVA_LINE_MAX;
 231       l->line = (unicode_t *)realloc (l->line, sizeof (unicode_t)*l->max);
 232       l->unicode_escape_p = (char *)realloc (l->unicode_escape_p,
 233                                              sizeof (char)*l->max);
 234     }
 235   l->line [l->size] = c;
 236   l->unicode_escape_p [l->size++] = unicode_escape_p;
 237 }
 238
 239 static unicode_t
 240 java_read_unicode (term_context, unicode_escape_p)
 241     int term_context;
 242     int *unicode_escape_p;
 243 {
 244   unicode_t c;
 245   long i, base;
 246
 247   c = java_read_char ();
 248   *unicode_escape_p = 0;
 249
 250   if (c != '\\')
 251     return ((term_context ? c :
 252              java_lineterminator (c) ? '\n' : (unicode_t)c));
 253
 254   /* Count the number of preceeding '\' */
 255   for (base = ftell (finput), i = base-2; c == '\\';)
 256     {
 257       fseek (finput, i--, SEEK_SET);
 258       c = java_read_char ();    /* Will fail if reading utf8 stream. FIXME */
 259     }
 260   fseek (finput, base, SEEK_SET);
 261   if ((base-i-3)%2 == 0)        /* If odd number of \ seen */
 262     {
 263       c = java_read_char ();
 264       if (c == 'u')
 265         {
 266           unsigned short unicode = 0;
 267           int shift = 12;
 268           /* Next should be 4 hex digits, otherwise it's an error.
 269              The hex value is converted into the unicode, pushed into
 270              the Unicode stream.  */
 271           for (shift = 12; shift >= 0; shift -= 4)
 272             {
 273               if ((c = java_read_char ()) == UEOF)
 274                 return UEOF;
 275               if (c >= '0' && c <= '9')
 276                 unicode |= (unicode_t)((c-'0') << shift);
 277               else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
 278                 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
 279               else
 280                   java_lex_error
 281                     ("Non hex digit in Unicode escape sequence", 0);
 282             }
 283           *unicode_escape_p = 1;
 284           return (term_context ? unicode :
 285                   (java_lineterminator (c) ? '\n' : unicode));
 286         }
 287       UNGETC (c);
 288     }
 289   return (unicode_t)'\\';
 290 }
 291
 292 static unicode_t
 293 java_get_unicode ()
 294 {
 295   /* It's time to read a line when... */
 296   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
 297     {
 298       unicode_t c;
 299       java_allocate_new_line ();
 300       if (ctxp->c_line->line[0] != '\n')
 301         for (;;)
 302           {
 303             int unicode_escape_p;
 304             c = java_read_unicode (0, &unicode_escape_p);
 305             java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 306             if (ctxp->c_line->white_space_only
 307                 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
 308               ctxp->c_line->white_space_only = 0;
 309             if ((c == '\n') || (c == UEOF))
 310               break;
 311           }
 312     }
 313   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
 314   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
 315   return ctxp->c_line->line [ctxp->c_line->current++];
 316 }
 317
 318 static int
 319 java_lineterminator (c)
 320      unicode_t c;
 321 {
 322   int unicode_escape_p;
 323   if (c == '\n')                /* CR */
 324     {
 325       if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r')
 326         {
 327           ctxp->c_line->ahead [0] = c;
 328           ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
 329         }
 330       return 1;
 331     }
 332   else if (c == '\r')           /* LF */
 333     {
 334       if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n')
 335         {
 336           ctxp->c_line->ahead [0] = c;
 337           ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
 338         }
 339       return 1;
 340     }
 341   else
 342     return 0;
 343 }
 344
 345 /* Parse the end of a C style comment */
 346 static void
 347 java_parse_end_comment ()
 348 {
 349   unicode_t c;
 350
 351   for (c = java_get_unicode ();; c = java_get_unicode ())
 352     {
 353       switch (c)
 354         {
 355         case UEOF:
 356           java_lex_error ("Comment not terminated at end of input", 0);
 357         case '*':
 358           switch (c = java_get_unicode ())
 359             {
 360             case UEOF:
 361               java_lex_error ("Comment not terminated at end of input", 0);
 362             case '/':
 363               return;
 364             case '*':   /* reparse only '*' */
 365               java_unget_unicode ();
 366             }
 367         }
 368     }
 369 }
 370
 371 /* Parse the documentation section. Keywords must be at the beginning
 372    of a documentation comment line (ignoring white space and any `*'
 373    character). Parsed keyword(s): @DEPRECATED.  */
 374
 375 static int
 376 java_parse_doc_section (c)
 377      unicode_t c;
 378 {
 379   int valid_tag = 0, seen_star;
 380
 381   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
 382     {
 383       switch (c)
 384         {
 385         case '*':
 386           seen_star = 1;
 387           break;
 388         case '\n': /* ULT */
 389           valid_tag = 1;
 390           break;
 391         default:
 392           seen_star = 0;
 393         }
 394       c = java_get_unicode();
 395     }
 396
 397   if (c == UEOF)
 398     java_lex_error ("Comment not terminated at end of input", 0);
 399
 400   if (seen_star && (c == '/'))
 401     return 1;                   /* Goto step1 in caller */
 402
 403   /* We're parsing @deprecated */
 404   if (valid_tag && (c == '@'))
 405     {
 406       char tag [10];
 407       int  tag_index = 0;
 408
 409       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
 410         {
 411           c = java_get_unicode ();
 412           tag [tag_index++] = c;
 413         }
 414
 415       if (c == UEOF)
 416         java_lex_error ("Comment not terminated at end of input", 0);
 417
 418       java_unget_unicode ();
 419       tag [tag_index] = '\0';
 420
 421       if (!strcmp (tag, "deprecated"))
 422         ctxp->deprecated = 1;
 423     }
 424   return 0;
 425 }
 426
 427 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
 428    will return a wrong result.  */
 429 static int
 430 java_letter_or_digit_p (c)
 431      unicode_t c;
 432 {
 433   return _JAVA_LETTER_OR_DIGIT_P (c);
 434 }
 435
 436 static unicode_t
 437 java_parse_escape_sequence ()
 438 {
 439   unicode_t char_lit;
 440   unicode_t c;
 441
 442   switch (c = java_get_unicode ())
 443     {
 444     case 'b':
 445       return (unicode_t)0x8;
 446     case 't':
 447       return (unicode_t)0x9;
 448     case 'n':
 449       return (unicode_t)0xa;
 450     case 'f':
 451       return (unicode_t)0xc;
 452     case 'r':
 453       return (unicode_t)0xd;
 454     case '"':
 455       return (unicode_t)0x22;
 456     case '\'':
 457       return (unicode_t)0x27;
 458     case '\\':
 459       return (unicode_t)0x5c;
 460     case '0': case '1': case '2': case '3': case '4':
 461     case '5': case '6': case '7': case '8': case '9':
 462       {
 463         int octal_escape[3];
 464         int octal_escape_index = 0;
 465
 466         for (; octal_escape_index < 3 && RANGE (c, '0', '9');
 467              c = java_get_unicode ())
 468           octal_escape [octal_escape_index++] = c;
 469
 470         java_unget_unicode ();
 471
 472         if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
 473           {
 474             java_lex_error ("Literal octal escape out of range", 0);
 475             return JAVA_CHAR_ERROR;
 476           }
 477         else
 478           {
 479             int i, shift;
 480             for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 481                  i < octal_escape_index; i++, shift -= 3)
 482               char_lit |= (octal_escape [i] - '0') << shift;
 483
 484             return (char_lit);
 485           }
 486         break;
 487       }
 488     case '\n':
 489       return '\n';              /* ULT, caught latter as a specific error */
 490     default:
 491       java_lex_error ("Illegal character in escape sequence", 0);
 492       return JAVA_CHAR_ERROR;
 493     }
 494 }
 495
 496 int
 497 #ifdef JC1_LITE
 498 yylex (java_lval)
 499 #else
 500 java_lex (java_lval)
 501 #endif
 502      YYSTYPE *java_lval;
 503 {
 504   unicode_t c, first_unicode;
 505   int ascii_index, all_ascii;
 506   char *string;
 507
 508   /* Translation of the Unicode escape in the raw stream of Unicode
 509      characters. Takes care of line terminator.  */
 510  step1:
 511   /* Skip white spaces: SP, TAB and FF or ULT */
 512   for (c = java_get_unicode ();
 513        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
 514     if (c == '\n')
 515       {
 516         ctxp->elc.line = ctxp->c_line->lineno;
 517         ctxp->elc.col  = ctxp->c_line->char_col-2;
 518       }
 519
 520   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 521
 522   if (c == 0x1a)                /* CTRL-Z */
 523     {
 524       if ((c = java_get_unicode ()) == UEOF)
 525         return 0;               /* Ok here */
 526       else
 527         java_unget_unicode ();  /* Caught latter at the end the function */
 528     }
 529   /* Handle EOF here */
 530   if (c == UEOF)        /* Should probably do something here... */
 531     return 0;
 532
 533   /* Take care of eventual comments.  */
 534   if (c == '/')
 535     {
 536       switch (c = java_get_unicode ())
 537         {
 538         case '/':
 539           for (c = java_get_unicode ();;c = java_get_unicode ())
 540             {
 541               if (c == UEOF)
 542                 java_lex_error ("Comment not terminated at end of input", 0);
 543               if (c == '\n')    /* ULT */
 544                 goto step1;
 545             }
 546           break;
 547
 548         case '*':
 549           if ((c = java_get_unicode ()) == '*')
 550             {
 551               if ((c = java_get_unicode ()) == '/')
 552                 goto step1;     /* Empy documentation comment  */
 553               else if (java_parse_doc_section (c))
 554                 goto step1;
 555             }
 556           else
 557             java_unget_unicode ();
 558
 559           java_parse_end_comment ();
 560           goto step1;
 561           break;
 562         default:
 563           java_unget_unicode ();
 564           c = '/';
 565           break;
 566         }
 567     }
 568
 569   ctxp->elc.line = ctxp->c_line->lineno;
 570   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
 571   if (ctxp->elc.col < 0)
 572     fatal ("ctxp->elc.col < 0 - java_lex");
 573
 574   /* Numeric literals */
 575   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
 576     {
 577       /* This section of code is borrowed from gcc/c-lex.c  */
 578 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
 579       int parts[TOTAL_PARTS];
 580       HOST_WIDE_INT high, low;
 581       /* End borrowed section  */
 582       char literal_token [256];
 583       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
 584       int  i;
 585       int  number_beginning = ctxp->c_line->current;
 586
 587       /* We might have a . separator instead of a FP like .[0-9]* */
 588       if (c == '.')
 589         {
 590           unicode_t peep = java_sneak_unicode ();
 591
 592           if (!JAVA_ASCII_DIGIT (peep))
 593             {
 594               JAVA_LEX_SEP('.');
 595               BUILD_OPERATOR (DOT_TK);
 596             }
 597         }
 598
 599       for (i = 0; i < TOTAL_PARTS; i++)
 600         parts [i] = 0;
 601
 602       if (c == '0')
 603         {
 604           c = java_get_unicode ();
 605           if (c == 'x' || c == 'X')
 606             {
 607               radix = 16;
 608               c = java_get_unicode ();
 609             }
 610           else if (JAVA_ASCII_DIGIT (c))
 611             radix = 8;
 612           else if (c == '.')
 613             {
 614               /* Push the '.' back and prepare for a FP parsing... */
 615               java_unget_unicode ();
 616               c = '0';
 617             }
 618           else
 619             {
 620               /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
 621               JAVA_LEX_LIT ("0", 10);
 622               switch (c)
 623                 {
 624                 case 'L': case 'l':
 625                   SET_LVAL_NODE_TYPE (integer_zero_node, long_type_node);
 626                   return (INT_LIT_TK);
 627                 case 'f': case 'F':
 628                   SET_LVAL_NODE_TYPE (build_real (float_type_node, dconst0),
 629                                         float_type_node);
 630                   return (FP_LIT_TK);
 631                 case 'd': case 'D':
 632                   SET_LVAL_NODE_TYPE (build_real (double_type_node, dconst0),
 633                                         double_type_node);
 634                   return (FP_LIT_TK);
 635                 default:
 636                   java_unget_unicode ();
 637                   SET_LVAL_NODE_TYPE (integer_zero_node, int_type_node);
 638                   return (INT_LIT_TK);
 639                 }
 640             }
 641         }
 642       /* Parse the first part of the literal, until we find something
 643          which is not a number.  */
 644       while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
 645              (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
 646              (radix == 8  && JAVA_ASCII_OCTDIGIT (c)))
 647         {
 648           /* We store in a string (in case it turns out to be a FP) and in
 649              PARTS if we have to process a integer literal.  */
 650           int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
 651           int count;
 652
 653           literal_token [literal_index++] = c;
 654           /* This section of code if borrowed from gcc/c-lex.c  */
 655           for (count = 0; count < TOTAL_PARTS; count++)
 656             {
 657               parts[count] *= radix;
 658               if (count)
 659                 {
 660                   parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
 661                   parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
 662                 }
 663               else
 664                 parts[0] += numeric;
 665             }
 666           if (parts [TOTAL_PARTS-1] != 0)
 667             overflow = 1;
 668           /* End borrowed section.  */
 669           c = java_get_unicode ();
 670         }
 671
 672       /* If we have something from the FP char set but not a digit, parse
 673          a FP literal.  */
 674       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
 675         {
 676           int stage = 0;
 677           int seen_digit = (literal_index ? 1 : 0);
 678           int seen_exponent = 0;
 679           int fflag = 0;        /* 1 for {f,F}, 0 for {d,D}. FP literal are
 680                                    double unless specified. */
 681           if (radix != 10)
 682             java_lex_error ("Can't express non-decimal FP literal", 0);
 683
 684           for (;;)
 685             {
 686               if (c == '.')
 687                 {
 688                   if (stage < 1)
 689                     {
 690                       stage = 1;
 691                       literal_token [literal_index++ ] = c;
 692                       c = java_get_unicode ();
 693                     }
 694                   else
 695                     java_lex_error ("Invalid character in FP literal", 0);
 696                 }
 697
 698               if (c == 'e' || c == 'E')
 699                 {
 700                   if (stage < 2)
 701                     {
 702                       /* {E,e} must have seen at list a digit */
 703                       if (!seen_digit)
 704                         java_lex_error ("Invalid FP literal", 0);
 705                       seen_digit = 0;
 706                       seen_exponent = 1;
 707                       stage = 2;
 708                       literal_token [literal_index++] = c;
 709                       c = java_get_unicode ();
 710                     }
 711                   else
 712                     java_lex_error ("Invalid character in FP literal", 0);
 713                 }
 714               if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
 715                 {
 716                   fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
 717                   stage = 4;    /* So we fall through */
 718                 }
 719
 720               if ((c=='-' || c =='+') && stage < 3)
 721                 {
 722                   stage = 3;
 723                   literal_token [literal_index++] = c;
 724                   c = java_get_unicode ();
 725                 }
 726
 727               if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
 728                   (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
 729                   (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
 730                   (stage == 3 && JAVA_ASCII_DIGIT (c)))
 731                 {
 732                   if (JAVA_ASCII_DIGIT (c))
 733                     seen_digit = 1;
 734                   literal_token [literal_index++ ] = c;
 735                   c = java_get_unicode ();
 736                 }
 737               else
 738                 {
 739                   jmp_buf handler;
 740                   REAL_VALUE_TYPE value;
 741 #ifndef JC1_LITE
 742                   tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 743 #endif
 744
 745                   if (stage != 4) /* Don't push back fF/dD */
 746                     java_unget_unicode ();
 747
 748                   /* An exponent (if any) must have seen a digit.  */
 749                   if (seen_exponent && !seen_digit)
 750                     java_lex_error ("Invalid FP literal", 0);
 751
 752                   literal_token [literal_index] = '\0';
 753                   JAVA_LEX_LIT (literal_token, radix);
 754
 755                   if (setjmp (handler))
 756                     {
 757                       JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
 758                       value = DCONST0;
 759                     }
 760                   else
 761                     {
 762                       SET_FLOAT_HANDLER (handler);
 763                       SET_REAL_VALUE_ATOF
 764                         (value, REAL_VALUE_ATOF (literal_token,
 765                                                  TYPE_MODE (type)));
 766
 767                       if (REAL_VALUE_ISINF (value))
 768                         JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
 769
 770                       if (REAL_VALUE_ISNAN (value))
 771                         JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
 772
 773                       SET_LVAL_NODE_TYPE (build_real (type, value), type);
 774                       SET_FLOAT_HANDLER (NULL_PTR);
 775                       return FP_LIT_TK;
 776                     }
 777                 }
 778             }
 779         } /* JAVA_ASCCI_FPCHAR (c) */
 780
 781       /* Here we get back to converting the integral literal.  */
 782       if (c == 'L' || c == 'l')
 783         long_suffix = 1;
 784       else if (radix == 16 && JAVA_ASCII_LETTER (c))
 785         java_lex_error ("Digit out of range in hexadecimal literal", 0);
 786       else if (radix == 8  && JAVA_ASCII_DIGIT (c))
 787         java_lex_error ("Digit out of range in octal literal", 0);
 788       else if (radix == 16 && !literal_index)
 789         java_lex_error ("No digit specified for hexadecimal literal", 0);
 790       else
 791         java_unget_unicode ();
 792
 793 #ifdef JAVA_LEX_DEBUG
 794       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
 795       JAVA_LEX_LIT (literal_token, radix);
 796 #endif
 797       /* This section of code is borrowed from gcc/c-lex.c  */
 798       if (!overflow)
 799         {
 800           bytes = GET_TYPE_PRECISION (long_type_node);
 801           for (i = bytes; i < TOTAL_PARTS; i++)
 802             if (parts [i])
 803               {
 804                 overflow = 1;
 805                 break;
 806               }
 807         }
 808       high = low = 0;
 809       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
 810         {
 811           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
 812                                               / HOST_BITS_PER_CHAR)]
 813                    << (i * HOST_BITS_PER_CHAR));
 814           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
 815         }
 816       /* End borrowed section.  */
 817
 818       /* Range checking */
 819       if (long_suffix)
 820         {
 821           /* 9223372036854775808L is valid if operand of a '-'. Otherwise
 822              9223372036854775807L is the biggest `long' literal that can be
 823              expressed using a 10 radix. For other radixes, everything that
 824              fits withing 64 bits is OK. */
 825           int hb = (high >> 31);
 826           if (overflow || (hb && low && radix == 10) ||
 827               (hb && high & 0x7fffffff && radix == 10) ||
 828               (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
 829             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
 830         }
 831       else
 832         {
 833           /* 2147483648 is valid if operand of a '-'. Otherwise,
 834              2147483647 is the biggest `int' literal that can be
 835              expressed using a 10 radix. For other radixes, everything
 836              that fits within 32 bits is OK. */
 837           int hb = (low >> 31) & 0x1;
 838           if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
 839               (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
 840             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
 841         }
 842       ctxp->minus_seen = 0;
 843       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
 844                           (long_suffix ? long_type_node : int_type_node));
 845       return INT_LIT_TK;
 846     }
 847
 848   ctxp->minus_seen = 0;
 849   /* Character literals */
 850   if (c == '\'')
 851     {
 852       unicode_t char_lit;
 853       if ((c = java_get_unicode ()) == '\\')
 854         char_lit = java_parse_escape_sequence ();
 855       else
 856         char_lit = c;
 857
 858       c = java_get_unicode ();
 859
 860       if ((c == '\n') || (c == UEOF))
 861         java_lex_error ("Character literal not terminated at end of line", 0);
 862       if (c != '\'')
 863         java_lex_error ("Syntax error in character literal", 0);
 864
 865       if (c == JAVA_CHAR_ERROR)
 866         char_lit = 0;           /* We silently convert it to zero */
 867
 868       JAVA_LEX_CHAR_LIT (char_lit);
 869       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
 870       return CHAR_LIT_TK;
 871     }
 872
 873   /* String literals */
 874   if (c == '"')
 875     {
 876       int no_error;
 877       char *string;
 878
 879       for (no_error = 1, c = java_get_unicode ();
 880            c != '"' && c != '\n'; c = java_get_unicode ())
 881         {
 882           if (c == '\\')
 883             c = java_parse_escape_sequence ();
 884           no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
 885           if (c)
 886             java_unicode_2_utf8 (c);
 887         }
 888       if (c == '\n' || c == UEOF) /* ULT */
 889         {
 890           lineno--;             /* Refer to the line the terminator was seen */
 891           java_lex_error ("String not terminated at end of line.", 0);
 892           lineno++;
 893         }
 894
 895       obstack_1grow (&temporary_obstack, '\0');
 896       string = obstack_finish (&temporary_obstack);
 897 #ifndef JC1_LITE
 898       if (!no_error || (c != '"'))
 899         java_lval->node = error_mark_node; /* Requires futher testing FIXME */
 900       else
 901         {
 902           tree s = make_node (STRING_CST);
 903           TREE_STRING_LENGTH (s) = strlen (string);
 904           TREE_STRING_POINTER (s) =
 905             obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
 906           strcpy (TREE_STRING_POINTER (s), string);
 907           java_lval->node = s;
 908         }
 909 #endif
 910       return STRING_LIT_TK;
 911     }
 912
 913   /* Separator */
 914   switch (c)
 915     {
 916     case '(':
 917       JAVA_LEX_SEP (c);
 918       BUILD_OPERATOR (OP_TK);
 919     case ')':
 920       JAVA_LEX_SEP (c);
 921       return CP_TK;
 922     case '{':
 923       JAVA_LEX_SEP (c);
 924       if (ctxp->ccb_indent == 1)
 925         ctxp->first_ccb_indent1 = lineno;
 926       ctxp->ccb_indent++;
 927       return OCB_TK;
 928     case '}':
 929       JAVA_LEX_SEP (c);
 930       ctxp->ccb_indent--;
 931       if (ctxp->ccb_indent == 1)
 932         ctxp->last_ccb_indent1 = lineno;
 933       return CCB_TK;
 934     case '[':
 935       JAVA_LEX_SEP (c);
 936       BUILD_OPERATOR (OSB_TK);
 937     case ']':
 938       JAVA_LEX_SEP (c);
 939       return CSB_TK;
 940     case ';':
 941       JAVA_LEX_SEP (c);
 942       return SC_TK;
 943     case ',':
 944       JAVA_LEX_SEP (c);
 945       return C_TK;
 946     case '.':
 947       JAVA_LEX_SEP (c);
 948       BUILD_OPERATOR (DOT_TK);
 949       /*      return DOT_TK; */
 950     }
 951
 952   /* Operators */
 953   switch (c)
 954     {
 955     case '=':
 956       if ((c = java_get_unicode ()) == '=')
 957         {
 958           BUILD_OPERATOR (EQ_TK);
 959         }
 960       else
 961         {
 962           /* Equals is used in two different locations. In the
 963              variable_declarator: rule, it has to be seen as '=' as opposed
 964              to being seen as an ordinary assignment operator in
 965              assignment_operators: rule.  */
 966           java_unget_unicode ();
 967           BUILD_OPERATOR (ASSIGN_TK);
 968         }
 969
 970     case '>':
 971       switch ((c = java_get_unicode ()))
 972         {
 973         case '=':
 974           BUILD_OPERATOR (GTE_TK);
 975         case '>':
 976           switch ((c = java_get_unicode ()))
 977             {
 978             case '>':
 979               if ((c = java_get_unicode ()) == '=')
 980                 {
 981                   BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
 982                 }
 983               else
 984                 {
 985                   java_unget_unicode ();
 986                   BUILD_OPERATOR (ZRS_TK);
 987                 }
 988             case '=':
 989               BUILD_OPERATOR2 (SRS_ASSIGN_TK);
 990             default:
 991               java_unget_unicode ();
 992               BUILD_OPERATOR (SRS_TK);
 993             }
 994         default:
 995           java_unget_unicode ();
 996           BUILD_OPERATOR (GT_TK);
 997         }
 998
 999     case '<':
1000       switch ((c = java_get_unicode ()))
1001         {
1002         case '=':
1003           BUILD_OPERATOR (LTE_TK);
1004         case '<':
1005           if ((c = java_get_unicode ()) == '=')
1006             {
1007               BUILD_OPERATOR2 (LS_ASSIGN_TK);
1008             }
1009           else
1010             {
1011               java_unget_unicode ();
1012               BUILD_OPERATOR (LS_TK);
1013             }
1014         default:
1015           java_unget_unicode ();
1016           BUILD_OPERATOR (LT_TK);
1017         }
1018
1019     case '&':
1020       switch ((c = java_get_unicode ()))
1021         {
1022         case '&':
1023           BUILD_OPERATOR (BOOL_AND_TK);
1024         case '=':
1025           BUILD_OPERATOR2 (AND_ASSIGN_TK);
1026         default:
1027           java_unget_unicode ();
1028           BUILD_OPERATOR (AND_TK);
1029         }
1030
1031     case '|':
1032       switch ((c = java_get_unicode ()))
1033         {
1034         case '|':
1035           BUILD_OPERATOR (BOOL_OR_TK);
1036         case '=':
1037           BUILD_OPERATOR2 (OR_ASSIGN_TK);
1038         default:
1039           java_unget_unicode ();
1040           BUILD_OPERATOR (OR_TK);
1041         }
1042
1043     case '+':
1044       switch ((c = java_get_unicode ()))
1045         {
1046         case '+':
1047           BUILD_OPERATOR (INCR_TK);
1048         case '=':
1049           BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1050         default:
1051           java_unget_unicode ();
1052           BUILD_OPERATOR (PLUS_TK);
1053         }
1054
1055     case '-':
1056       switch ((c = java_get_unicode ()))
1057         {
1058         case '-':
1059           BUILD_OPERATOR (DECR_TK);
1060         case '=':
1061           BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1062         default:
1063           java_unget_unicode ();
1064           ctxp->minus_seen = 1;
1065           BUILD_OPERATOR (MINUS_TK);
1066         }
1067
1068     case '*':
1069       if ((c = java_get_unicode ()) == '=')
1070         {
1071           BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1072         }
1073       else
1074         {
1075           java_unget_unicode ();
1076           BUILD_OPERATOR (MULT_TK);
1077         }
1078
1079     case '/':
1080       if ((c = java_get_unicode ()) == '=')
1081         {
1082           BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1083         }
1084       else
1085         {
1086           java_unget_unicode ();
1087           BUILD_OPERATOR (DIV_TK);
1088         }
1089
1090     case '^':
1091       if ((c = java_get_unicode ()) == '=')
1092         {
1093           BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1094         }
1095       else
1096         {
1097           java_unget_unicode ();
1098           BUILD_OPERATOR (XOR_TK);
1099         }
1100
1101     case '%':
1102       if ((c = java_get_unicode ()) == '=')
1103         {
1104           BUILD_OPERATOR2 (REM_ASSIGN_TK);
1105         }
1106       else
1107         {
1108           java_unget_unicode ();
1109           BUILD_OPERATOR (REM_TK);
1110         }
1111
1112     case '!':
1113       if ((c = java_get_unicode()) == '=')
1114         {
1115           BUILD_OPERATOR (NEQ_TK);
1116         }
1117       else
1118         {
1119           java_unget_unicode ();
1120           BUILD_OPERATOR (NEG_TK);
1121         }
1122
1123     case '?':
1124       JAVA_LEX_OP ("?");
1125       BUILD_OPERATOR (REL_QM_TK);
1126     case ':':
1127       JAVA_LEX_OP (":");
1128       BUILD_OPERATOR (REL_CL_TK);
1129     case '~':
1130       BUILD_OPERATOR (NOT_TK);
1131     }
1132
1133   /* Keyword, boolean literal or null literal */
1134   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1135        JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1136     {
1137       java_unicode_2_utf8 (c);
1138       if (all_ascii && c >= 128)
1139         all_ascii = 0;
1140       ascii_index++;
1141     }
1142
1143   obstack_1grow (&temporary_obstack, '\0');
1144   string = obstack_finish (&temporary_obstack);
1145   java_unget_unicode ();
1146
1147   /* If we have something all ascii, we consider a keyword, a boolean
1148      literal, a null literal or an all ASCII identifier.  Otherwise,
1149      this is an identifier (possibly not respecting formation rule).  */
1150   if (all_ascii)
1151     {
1152       struct java_keyword *kw;
1153       if ((kw=java_keyword (string, ascii_index)))
1154         {
1155           JAVA_LEX_KW (string);
1156           switch (kw->token)
1157             {
1158             case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1159             case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1160             case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1161             case PRIVATE_TK:
1162               SET_MODIFIER_CTX (kw->token);
1163               return MODIFIER_TK;
1164             case FLOAT_TK:
1165               SET_LVAL_NODE (float_type_node);
1166               return FP_TK;
1167             case DOUBLE_TK:
1168               SET_LVAL_NODE (double_type_node);
1169               return FP_TK;
1170             case BOOLEAN_TK:
1171               SET_LVAL_NODE (boolean_type_node);
1172               return BOOLEAN_TK;
1173             case BYTE_TK:
1174               SET_LVAL_NODE (byte_type_node);
1175               return INTEGRAL_TK;
1176             case SHORT_TK:
1177               SET_LVAL_NODE (short_type_node);
1178               return INTEGRAL_TK;
1179             case INT_TK:
1180               SET_LVAL_NODE (int_type_node);
1181               return INTEGRAL_TK;
1182             case LONG_TK:
1183               SET_LVAL_NODE (long_type_node);
1184               return INTEGRAL_TK;
1185             case CHAR_TK:
1186               SET_LVAL_NODE (char_type_node);
1187               return INTEGRAL_TK;
1188
1189               /* Keyword based literals */
1190             case TRUE_TK:
1191             case FALSE_TK:
1192               SET_LVAL_NODE ((kw->token == TRUE_TK ?
1193                               boolean_true_node : boolean_false_node));
1194               return BOOL_LIT_TK;
1195             case NULL_TK:
1196               SET_LVAL_NODE (null_pointer_node);
1197               return NULL_TK;
1198
1199               /* Some keyword we want to retain information on the location
1200                  they where found */
1201             case CASE_TK:
1202             case DEFAULT_TK:
1203             case SUPER_TK:
1204             case THIS_TK:
1205             case RETURN_TK:
1206             case BREAK_TK:
1207             case CONTINUE_TK:
1208             case TRY_TK:
1209             case CATCH_TK:
1210             case THROW_TK:
1211             case INSTANCEOF_TK:
1212               BUILD_OPERATOR (kw->token);
1213
1214             default:
1215               return kw->token;
1216             }
1217         }
1218     }
1219
1220   /* We may have and ID here */
1221   if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1222     {
1223       JAVA_LEX_ID (string);
1224       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1225       return ID_TK;
1226     }
1227
1228   /* Everything else is an invalid character in the input */
1229   {
1230     char lex_error_buffer [128];
1231     sprintf (lex_error_buffer, "Invalid character '%s' in input",
1232              java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1233     java_lex_error (lex_error_buffer, 1);
1234   }
1235   return 0;
1236 }
1237
1238 static void
1239 java_unicode_2_utf8 (unicode)
1240     unicode_t unicode;
1241 {
1242   if (RANGE (unicode, 0x01, 0x7f))
1243     obstack_1grow (&temporary_obstack, (char)unicode);
1244   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1245     {
1246       obstack_1grow (&temporary_obstack,
1247                      (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1248       obstack_1grow (&temporary_obstack,
1249                      (unsigned char)(0x80 | (unicode & 0x3f)));
1250     }
1251   else                          /* Range 0x800-0xffff */
1252     {
1253       obstack_1grow (&temporary_obstack,
1254                      (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1255       obstack_1grow (&temporary_obstack,
1256                      (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1257       obstack_1grow (&temporary_obstack,
1258                      (unsigned char)(0x80 | (unicode & 0x003f) >> 12));
1259     }
1260 }
1261
1262 #ifndef JC1_LITE
1263 static tree
1264 build_wfl_node (node)
1265      tree node;
1266 {
1267   return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1268 }
1269 #endif
1270
1271 static void
1272 java_lex_error (msg, forward)
1273      char *msg;
1274      int forward;
1275 {
1276 #ifndef JC1_LITE
1277   ctxp->elc.line = ctxp->c_line->lineno;
1278   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1279
1280   /* Might be caught in the middle of some error report */
1281   ctxp->java_error_flag = 0;
1282   java_error (NULL);
1283   java_error (msg);
1284 #endif
1285 }
1286
1287 static int
1288 java_is_eol (fp, c)
1289   FILE *fp;
1290   int c;
1291 {
1292   int next;
1293   switch (c)
1294     {
1295     case '\n':
1296       next = getc (fp);
1297       if (next != '\r' && next != EOF)
1298         ungetc (next, fp);
1299       return 1;
1300     case '\r':
1301       return 1;
1302     default:
1303       return 0;
1304     }
1305 }
1306
1307 char *
1308 java_get_line_col (filename, line, col)
1309      char *filename;
1310      int line, col;
1311 {
1312 #ifdef JC1_LITE
1313   return 0;
1314 #else
1315   /* Dumb implementation. Doesn't try to cache or optimize things. */
1316   /* First line of the file is line 1, first column is 1 */
1317
1318   /* COL <= 0 means, at the CR/LF in LINE */
1319
1320   FILE *fp;
1321   int c, ccol, cline = 1;
1322   int current_line_col = 0;
1323
1324   if (!(fp = fopen (filename, "r")))
1325     fatal ("Can't open file - java_display_line_col");
1326
1327   while (cline != line)
1328     {
1329       c = getc (fp);
1330       if (c < 0)
1331         {
1332           static char msg[] = "<<file too short - unexpected EOF>>";
1333           obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1334           goto have_line;
1335         }
1336       if (java_is_eol (fp, c))
1337         cline++;
1338     }
1339
1340   /* Gather the chars of the current line in a buffer */
1341   for (;;)
1342     {
1343       c = getc (fp);
1344       if (c < 0 || java_is_eol (fp, c))
1345         break;
1346       obstack_1grow (&temporary_obstack, c);
1347       current_line_col++;
1348     }
1349  have_line:
1350
1351   obstack_1grow (&temporary_obstack, '\n');
1352
1353   if (col < 0)
1354     col = current_line_col;
1355
1356   /* Place the '^' a the right position */
1357   for (ccol = 1; ccol <= col; ccol++)
1358     obstack_1grow (&temporary_obstack, ' ');
1359   obstack_grow0 (&temporary_obstack, "^", 1);
1360
1361   fclose (fp);
1362   return obstack_finish (&temporary_obstack);
1363 #endif
1364 }