1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
39 #include "chartables.h"
41 /* Function declaration */
42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
44 static void java_lex_error PARAMS ((const char *, int));
46 static int java_is_eol PARAMS ((FILE *, int));
47 static tree build_wfl_node PARAMS ((tree));
49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50 static int java_parse_escape_sequence PARAMS ((void));
51 static int java_start_char_p PARAMS ((unicode_t));
52 static int java_part_char_p PARAMS ((unicode_t));
53 static int java_parse_doc_section PARAMS ((int));
54 static void java_parse_end_comment PARAMS ((int));
55 static int java_get_unicode PARAMS ((void));
56 static int java_read_unicode PARAMS ((java_lexer *, int *));
57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static int java_read_char PARAMS ((java_lexer *));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
70 static void error_if_numeric_overflow PARAMS ((tree));
74 /* This is nonzero if we have initialized `need_byteswap'. */
75 static int byteswap_init = 0;
77 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
78 big-endian order -- not native endian order. We handle this by
79 doing a conversion once at startup and seeing what happens. This
80 flag holds the results of this determination. */
81 static int need_byteswap = 0;
85 java_init_lex (finput, encoding)
90 int java_lang_imported = 0;
93 java_lang_id = get_identifier ("java.lang");
94 if (!java_lang_cloneable)
95 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
96 if (!java_io_serializable)
97 java_io_serializable = get_identifier ("java.io.Serializable");
99 inst_id = get_identifier ("inst$");
101 wpv_id = get_identifier ("write_parm_value$");
103 if (!java_lang_imported)
105 tree node = build_tree_list
106 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
107 read_import_dir (TREE_PURPOSE (node));
108 TREE_CHAIN (node) = ctxp->import_demand_list;
109 ctxp->import_demand_list = node;
110 java_lang_imported = 1;
114 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
116 label_id = get_identifier ("$L");
118 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
119 if (!wfl_string_buffer)
121 build_expr_wfl (get_identifier (flag_emit_class_files
122 ? "java.lang.StringBuffer"
123 : "gnu.gcj.runtime.StringBuffer"),
126 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
128 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
129 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
131 memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
132 memset ((PTR) current_jcf, 0, sizeof (JCF));
133 ctxp->current_parsed_class = NULL;
134 ctxp->package = NULL_TREE;
137 ctxp->filename = input_filename;
138 ctxp->lineno = lineno = 0;
141 ctxp->java_error_flag = 0;
142 ctxp->lexer = java_new_lexer (finput, encoding);
146 java_sprint_unicode (line, i)
147 struct java_line *line;
150 static char buffer [10];
151 if (line->unicode_escape_p [i] || line->line [i] > 128)
152 sprintf (buffer, "\\u%04x", line->line [i]);
155 buffer [0] = line->line [i];
162 java_sneak_unicode ()
164 return (ctxp->c_line->line [ctxp->c_line->current]);
168 java_unget_unicode ()
170 if (!ctxp->c_line->current)
171 /* Can't unget unicode. */
174 ctxp->c_line->current--;
175 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
179 java_allocate_new_line ()
181 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
182 char ahead_escape_p = (ctxp->c_line ?
183 ctxp->c_line->unicode_escape_ahead_p : 0);
185 if (ctxp->c_line && !ctxp->c_line->white_space_only)
189 free (ctxp->p_line->unicode_escape_p);
190 free (ctxp->p_line->line);
193 ctxp->p_line = ctxp->c_line;
194 ctxp->c_line = NULL; /* Reallocated */
199 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
200 ctxp->c_line->max = JAVA_LINE_MAX;
201 ctxp->c_line->line = (unicode_t *)xmalloc
202 (sizeof (unicode_t)*ctxp->c_line->max);
203 ctxp->c_line->unicode_escape_p =
204 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
205 ctxp->c_line->white_space_only = 0;
208 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
209 ctxp->c_line->char_col = ctxp->c_line->current = 0;
212 ctxp->c_line->line [ctxp->c_line->size] = ahead;
213 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
214 ctxp->c_line->size++;
216 ctxp->c_line->ahead [0] = 0;
217 ctxp->c_line->unicode_escape_ahead_p = 0;
218 ctxp->c_line->lineno = ++lineno;
219 ctxp->c_line->white_space_only = 1;
222 /* Create a new lexer object. */
225 java_new_lexer (finput, encoding)
227 const char *encoding;
229 java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
232 lex->finput = finput;
234 lex->unget_value = 0;
238 lex->handle = iconv_open ("UCS-2", encoding);
239 if (lex->handle != (iconv_t) -1)
245 lex->read_anything = 0;
246 lex->use_fallback = 0;
248 /* Work around broken iconv() implementations by doing checking at
249 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
250 then all UCS-2 encoders will be broken. Perhaps not a valid
258 handle = iconv_open ("UCS-2", "UTF-8");
259 if (handle != (iconv_t) -1)
266 /* This is the UTF-8 encoding of \ufeff. */
273 outp = (char *) &result;
276 r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
278 iconv_close (handle);
279 /* Conversion must be complete for us to use the result. */
280 if (r != (size_t) -1 && inc == 0 && outc == 0)
281 need_byteswap = (result != 0xfeff);
285 lex->byte_swap = need_byteswap;
288 #endif /* HAVE_ICONV */
290 /* If iconv failed, use the internal decoder if the default
291 encoding was requested. This code is used on platforms where
292 iconv exists but is insufficient for our needs. For
293 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2. */
294 if (strcmp (encoding, DEFAULT_ENCODING))
298 lex->use_fallback = 1;
299 #endif /* HAVE_ICONV */
303 fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
309 java_destroy_lexer (lex)
313 if (! lex->use_fallback)
314 iconv_close (lex->handle);
323 if (lex->unget_value)
325 unicode_t r = lex->unget_value;
326 lex->unget_value = 0;
331 if (! lex->use_fallback)
333 size_t ir, inbytesleft, in_save, out_count, out_save;
337 /* If there is data which has already been converted, use it. */
338 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
345 /* See if we need to read more data. If FIRST == 0 then
346 the previous conversion attempt ended in the middle of
347 a character at the end of the buffer. Otherwise we
348 only have to read if the buffer is empty. */
349 if (lex->first == 0 || lex->first >= lex->last)
353 if (lex->first >= lex->last)
358 if (feof (lex->finput))
360 r = fread (&lex->buffer[lex->last], 1,
361 sizeof (lex->buffer) - lex->last,
366 inbytesleft = lex->last - lex->first;
367 out_count = sizeof (lex->out_buffer) - lex->out_last;
369 if (inbytesleft == 0)
371 /* We've tried to read and there is nothing left. */
375 in_save = inbytesleft;
376 out_save = out_count;
377 inp = &lex->buffer[lex->first];
378 outp = &lex->out_buffer[lex->out_last];
379 ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
380 &inbytesleft, &outp, &out_count);
382 /* If we haven't read any bytes, then look to see if we
384 if (! lex->read_anything && out_save - out_count >= 2)
386 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
392 else if (uc == 0xfffe)
397 lex->read_anything = 1;
403 for (i = 0; i < out_save - out_count; i += 2)
405 char t = lex->out_buffer[lex->out_last + i];
406 lex->out_buffer[lex->out_last + i]
407 = lex->out_buffer[lex->out_last + i + 1];
408 lex->out_buffer[lex->out_last + i + 1] = t;
412 lex->first += in_save - inbytesleft;
413 lex->out_last += out_save - out_count;
415 /* If we converted anything at all, move along. */
416 if (out_count != out_save)
419 if (ir == (size_t) -1)
423 /* This is ok. This means that the end of our buffer
424 is in the middle of a character sequence. We just
425 move the valid part of the buffer to the beginning
427 memmove (&lex->buffer[0], &lex->buffer[lex->first],
428 lex->last - lex->first);
429 lex->last -= lex->first;
434 /* A more serious error. */
435 java_lex_error ("unrecognized character in input stream",
443 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
445 /* Don't have any data. */
450 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
455 #endif /* HAVE_ICONV */
458 c = getc (lex->finput);
463 return (unicode_t) c;
466 if ((c & 0xe0) == 0xc0)
468 c1 = getc (lex->finput);
469 if ((c1 & 0xc0) == 0x80)
471 unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
472 /* Check for valid 2-byte characters. We explicitly
473 allow \0 because this encoding is common in the
475 if (r == 0 || (r >= 0x80 && r <= 0x7ff))
479 else if ((c & 0xf0) == 0xe0)
481 c1 = getc (lex->finput);
482 if ((c1 & 0xc0) == 0x80)
484 c2 = getc (lex->finput);
485 if ((c2 & 0xc0) == 0x80)
487 unicode_t r = (unicode_t)(((c & 0xf) << 12) +
490 /* Check for valid 3-byte characters.
491 Don't allow surrogate, \ufffe or \uffff. */
492 if (r >= 0x800 && r <= 0xffff
493 && ! (r >= 0xd800 && r <= 0xdfff)
494 && r != 0xfffe && r != 0xffff)
500 /* We simply don't support invalid characters. We also
501 don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
502 cannot be valid Java characters. */
503 java_lex_error ("malformed UTF-8 character", 0);
507 /* We only get here on error. */
512 java_store_unicode (l, c, unicode_escape_p)
515 int unicode_escape_p;
517 if (l->size == l->max)
519 l->max += JAVA_LINE_MAX;
520 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
521 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
522 sizeof (char)*l->max);
524 l->line [l->size] = c;
525 l->unicode_escape_p [l->size++] = unicode_escape_p;
529 java_read_unicode (lex, unicode_escape_p)
531 int *unicode_escape_p;
535 c = java_read_char (lex);
536 *unicode_escape_p = 0;
545 if ((lex->bs_count) % 2 == 1)
547 /* Odd number of \ seen. */
548 c = java_read_char (lex);
551 unicode_t unicode = 0;
554 /* Recognize any number of `u's in \u. */
555 while ((c = java_read_char (lex)) == 'u')
558 /* Unget the most recent character as it is not a `u'. */
561 lex->unget_value = c;
563 /* Next should be 4 hex digits, otherwise it's an error.
564 The hex value is converted into the unicode, pushed into
565 the Unicode stream. */
566 for (shift = 12; shift >= 0; shift -= 4)
568 if ((c = java_read_char (lex)) == UEOF)
571 unicode |= (unicode_t)(hex_value (c) << shift);
573 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
576 *unicode_escape_p = 1;
579 lex->unget_value = c;
581 return (unicode_t) '\\';
585 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
587 int *unicode_escape_p;
589 int c = java_read_unicode (lex, unicode_escape_p);
593 /* We have to read ahead to see if we got \r\n. In that case we
594 return a single line terminator. */
596 c = java_read_unicode (lex, &dummy);
598 lex->unget_value = c;
599 /* In either case we must return a newline. */
609 /* It's time to read a line when... */
610 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
615 if (ctxp->lexer->hit_eof)
618 java_allocate_new_line ();
619 if (ctxp->c_line->line[0] != '\n')
623 int unicode_escape_p;
624 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
629 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
630 if (ctxp->c_line->white_space_only
631 && !JAVA_WHITE_SPACE_P (c)
633 ctxp->c_line->white_space_only = 0;
635 if ((c == '\n') || (c == UEOF))
639 if (c == UEOF && ! found_chars)
641 ctxp->lexer->hit_eof = 1;
646 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
647 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
648 return ctxp->c_line->line [ctxp->c_line->current++];
651 /* Parse the end of a C style comment.
652 * C is the first character following the '/' and '*'. */
654 java_parse_end_comment (c)
657 for ( ;; c = java_get_unicode ())
662 java_lex_error ("Comment not terminated at end of input", 0);
665 switch (c = java_get_unicode ())
668 java_lex_error ("Comment not terminated at end of input", 0);
672 case '*': /* reparse only '*' */
673 java_unget_unicode ();
679 /* Parse the documentation section. Keywords must be at the beginning
680 of a documentation comment line (ignoring white space and any `*'
681 character). Parsed keyword(s): @DEPRECATED. */
684 java_parse_doc_section (c)
687 int valid_tag = 0, seen_star = 0;
689 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
701 c = java_get_unicode();
705 java_lex_error ("Comment not terminated at end of input", 0);
707 if (seen_star && (c == '/'))
708 return 1; /* Goto step1 in caller */
710 /* We're parsing @deprecated */
711 if (valid_tag && (c == '@'))
716 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
718 c = java_get_unicode ();
719 tag [tag_index++] = c;
723 java_lex_error ("Comment not terminated at end of input", 0);
724 tag [tag_index] = '\0';
726 if (!strcmp (tag, "deprecated"))
727 ctxp->deprecated = 1;
729 java_unget_unicode ();
733 /* Return true if C is a valid start character for a Java identifier.
734 This is only called if C >= 128 -- smaller values are handled
735 inline. However, this function handles all values anyway. */
737 java_start_char_p (c)
740 unsigned int hi = c / 256;
741 const char *const page = type_table[hi];
742 unsigned long val = (unsigned long) page;
745 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
746 flags = page[c & 255];
750 return flags & LETTER_START;
753 /* Return true if C is a valid part character for a Java identifier.
754 This is only called if C >= 128 -- smaller values are handled
755 inline. However, this function handles all values anyway. */
760 unsigned int hi = c / 256;
761 const char *const page = type_table[hi];
762 unsigned long val = (unsigned long) page;
765 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
766 flags = page[c & 255];
770 return flags & LETTER_PART;
774 java_parse_escape_sequence ()
779 switch (c = java_get_unicode ())
782 return (unicode_t)0x8;
784 return (unicode_t)0x9;
786 return (unicode_t)0xa;
788 return (unicode_t)0xc;
790 return (unicode_t)0xd;
792 return (unicode_t)0x22;
794 return (unicode_t)0x27;
796 return (unicode_t)0x5c;
797 case '0': case '1': case '2': case '3': case '4':
798 case '5': case '6': case '7':
801 int octal_escape_index = 0;
805 for (; octal_escape_index < max && RANGE (c, '0', '7');
806 c = java_get_unicode ())
808 if (octal_escape_index == 0 && c > '3')
810 /* According to the grammar, `\477' has a well-defined
811 meaning -- it is `\47' followed by `7'. */
814 octal_escape [octal_escape_index++] = c;
817 java_unget_unicode ();
819 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
820 i < octal_escape_index; i++, shift -= 3)
821 char_lit |= (octal_escape [i] - '0') << shift;
826 java_lex_error ("Invalid character in escape sequence", 0);
827 return JAVA_CHAR_ERROR;
831 /* Isolate the code which may raise an arithmetic exception in its
840 int number_beginning;
843 #ifdef REAL_ARITHMETIC
844 #define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
846 #define IS_ZERO(X) ((X) == 0)
849 static void java_perform_atof PARAMS ((PTR));
852 java_perform_atof (av)
855 struct jpa_args *a = (struct jpa_args *)av;
856 YYSTYPE *java_lval = a->java_lval;
857 int number_beginning = a->number_beginning;
858 REAL_VALUE_TYPE value;
859 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
861 SET_REAL_VALUE_ATOF (value,
862 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
864 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
866 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
869 else if (IS_ZERO (value))
871 /* We check to see if the value is really 0 or if we've found an
872 underflow. We do this in the most primitive imaginable way. */
874 char *p = a->literal_token;
877 while (*p && *p != 'e' && *p != 'E')
879 if (*p != '0' && *p != '.')
888 int i = ctxp->c_line->current;
889 ctxp->c_line->current = number_beginning;
890 java_lex_error ("Floating point literal underflow", 0);
891 ctxp->c_line->current = i;
895 SET_LVAL_NODE_TYPE (build_real (type, value), type);
899 static int yylex PARAMS ((YYSTYPE *));
910 unicode_t first_unicode;
911 int ascii_index, all_ascii;
914 /* Translation of the Unicode escape in the raw stream of Unicode
915 characters. Takes care of line terminator. */
917 /* Skip white spaces: SP, TAB and FF or ULT */
918 for (c = java_get_unicode ();
919 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
922 ctxp->elc.line = ctxp->c_line->lineno;
923 ctxp->elc.col = ctxp->c_line->char_col-2;
926 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
928 if (c == 0x1a) /* CTRL-Z */
930 if ((c = java_get_unicode ()) == UEOF)
931 return 0; /* Ok here */
933 java_unget_unicode (); /* Caught later, at the end of the function */
935 /* Handle EOF here */
936 if (c == UEOF) /* Should probably do something here... */
939 /* Take care of eventual comments. */
942 switch (c = java_get_unicode ())
947 c = java_get_unicode ();
950 /* It is ok to end a `//' comment with EOF, unless
951 we're being pedantic. */
953 java_lex_error ("Comment not terminated at end of input",
957 if (c == '\n') /* ULT */
963 if ((c = java_get_unicode ()) == '*')
965 if ((c = java_get_unicode ()) == '/')
966 goto step1; /* Empy documentation comment */
967 else if (java_parse_doc_section (c))
971 java_parse_end_comment ((c = java_get_unicode ()));
975 java_unget_unicode ();
981 ctxp->elc.line = ctxp->c_line->lineno;
982 ctxp->elc.prev_col = ctxp->elc.col;
983 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
984 if (ctxp->elc.col < 0)
987 /* Numeric literals */
988 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
990 /* This section of code is borrowed from gcc/c-lex.c */
991 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
992 int parts[TOTAL_PARTS];
993 HOST_WIDE_INT high, low;
994 /* End borrowed section */
995 char literal_token [256];
996 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
997 int found_hex_digits = 0;
1000 int number_beginning = ctxp->c_line->current;
1004 /* We might have a . separator instead of a FP like .[0-9]* */
1007 unicode_t peep = java_sneak_unicode ();
1009 if (!JAVA_ASCII_DIGIT (peep))
1012 BUILD_OPERATOR (DOT_TK);
1016 for (i = 0; i < TOTAL_PARTS; i++)
1021 c = java_get_unicode ();
1022 if (c == 'x' || c == 'X')
1025 c = java_get_unicode ();
1027 else if (JAVA_ASCII_DIGIT (c))
1031 /* Push the '.' back and prepare for a FP parsing... */
1032 java_unget_unicode ();
1037 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
1038 JAVA_LEX_LIT ("0", 10);
1042 SET_LVAL_NODE (long_zero_node);
1043 return (INT_LIT_TK);
1045 SET_LVAL_NODE (float_zero_node);
1048 SET_LVAL_NODE (double_zero_node);
1051 java_unget_unicode ();
1052 SET_LVAL_NODE (integer_zero_node);
1053 return (INT_LIT_TK);
1057 /* Parse the first part of the literal, until we find something
1058 which is not a number. */
1059 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
1060 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1061 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
1063 /* We store in a string (in case it turns out to be a FP) and in
1064 PARTS if we have to process a integer literal. */
1065 int numeric = hex_value (c);
1068 /* Remember when we find a valid hexadecimal digit */
1070 found_hex_digits = 1;
1072 literal_token [literal_index++] = c;
1073 /* This section of code if borrowed from gcc/c-lex.c */
1074 for (count = 0; count < TOTAL_PARTS; count++)
1076 parts[count] *= radix;
1079 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1080 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1083 parts[0] += numeric;
1085 if (parts [TOTAL_PARTS-1] != 0)
1087 /* End borrowed section. */
1088 c = java_get_unicode ();
1091 /* If we have something from the FP char set but not a digit, parse
1093 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1096 int seen_digit = (literal_index ? 1 : 0);
1097 int seen_exponent = 0;
1098 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1099 double unless specified. */
1101 /* It is ok if the radix is 8 because this just means we've
1102 seen a leading `0'. However, radix==16 is invalid. */
1104 java_lex_error ("Can't express non-decimal FP literal", 0);
1114 literal_token [literal_index++ ] = c;
1115 c = java_get_unicode ();
1118 java_lex_error ("Invalid character in FP literal", 0);
1121 if (c == 'e' || c == 'E')
1125 /* {E,e} must have seen at list a digit */
1127 java_lex_error ("Invalid FP literal", 0);
1131 literal_token [literal_index++] = c;
1132 c = java_get_unicode ();
1135 java_lex_error ("Invalid character in FP literal", 0);
1137 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1139 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1140 stage = 4; /* So we fall through */
1143 if ((c=='-' || c =='+') && stage == 2)
1146 literal_token [literal_index++] = c;
1147 c = java_get_unicode ();
1150 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1151 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1152 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1153 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1155 if (JAVA_ASCII_DIGIT (c))
1157 literal_token [literal_index++ ] = c;
1158 c = java_get_unicode ();
1165 if (stage != 4) /* Don't push back fF/dD */
1166 java_unget_unicode ();
1168 /* An exponent (if any) must have seen a digit. */
1169 if (seen_exponent && !seen_digit)
1170 java_lex_error ("Invalid FP literal", 0);
1172 literal_token [literal_index] = '\0';
1173 JAVA_LEX_LIT (literal_token, radix);
1176 a.literal_token = literal_token;
1178 a.java_lval = java_lval;
1179 a.number_beginning = number_beginning;
1180 if (do_float_handler (java_perform_atof, (PTR) &a))
1183 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
1189 } /* JAVA_ASCCI_FPCHAR (c) */
1191 if (radix == 16 && ! found_hex_digits)
1193 ("0x must be followed by at least one hexadecimal digit", 0);
1195 /* Here we get back to converting the integral literal. */
1196 if (c == 'L' || c == 'l')
1198 else if (radix == 16 && JAVA_ASCII_LETTER (c))
1199 java_lex_error ("Digit out of range in hexadecimal literal", 0);
1200 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
1201 java_lex_error ("Digit out of range in octal literal", 0);
1202 else if (radix == 16 && !literal_index)
1203 java_lex_error ("No digit specified for hexadecimal literal", 0);
1205 java_unget_unicode ();
1207 #ifdef JAVA_LEX_DEBUG
1208 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1209 JAVA_LEX_LIT (literal_token, radix);
1211 /* This section of code is borrowed from gcc/c-lex.c */
1214 bytes = GET_TYPE_PRECISION (long_type_node);
1215 for (i = bytes; i < TOTAL_PARTS; i++)
1223 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1225 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1226 / HOST_BITS_PER_CHAR)]
1227 << (i * HOST_BITS_PER_CHAR));
1228 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1230 /* End borrowed section. */
1232 /* Range checking */
1235 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1236 9223372036854775807L is the biggest `long' literal that can be
1237 expressed using a 10 radix. For other radixes, everything that
1238 fits withing 64 bits is OK. */
1239 int hb = (high >> 31);
1240 if (overflow || (hb && low && radix == 10)
1241 || (hb && high & 0x7fffffff && radix == 10))
1242 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1246 /* 2147483648 is valid if operand of a '-'. Otherwise,
1247 2147483647 is the biggest `int' literal that can be
1248 expressed using a 10 radix. For other radixes, everything
1249 that fits within 32 bits is OK. As all literals are
1250 signed, we sign extend here. */
1251 int hb = (low >> 31) & 0x1;
1252 if (overflow || high || (hb && low & 0x7fffffff && radix == 10))
1253 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1257 value = build_int_2 (low, high);
1258 JAVA_RADIX10_FLAG (value) = radix == 10;
1259 SET_LVAL_NODE_TYPE (value, long_suffix ? long_type_node : int_type_node);
1261 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1262 long_suffix ? long_type_node : int_type_node);
1267 /* Character literals */
1271 if ((c = java_get_unicode ()) == '\\')
1272 char_lit = java_parse_escape_sequence ();
1275 if (c == '\n' || c == '\'')
1276 java_lex_error ("Invalid character literal", 0);
1280 c = java_get_unicode ();
1282 if ((c == '\n') || (c == UEOF))
1283 java_lex_error ("Character literal not terminated at end of line", 0);
1285 java_lex_error ("Syntax error in character literal", 0);
1287 if (char_lit == JAVA_CHAR_ERROR)
1288 char_lit = 0; /* We silently convert it to zero */
1290 JAVA_LEX_CHAR_LIT (char_lit);
1291 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1295 /* String literals */
1301 for (no_error = 1, c = java_get_unicode ();
1302 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1305 c = java_parse_escape_sequence ();
1306 if (c == JAVA_CHAR_ERROR)
1309 c = 0; /* We silently convert it to zero. */
1311 java_unicode_2_utf8 (c);
1313 if (c == '\n' || c == UEOF) /* ULT */
1315 lineno--; /* Refer to the line the terminator was seen */
1316 java_lex_error ("String not terminated at end of line", 0);
1320 obstack_1grow (&temporary_obstack, '\0');
1321 string = obstack_finish (&temporary_obstack);
1323 if (!no_error || (c != '"'))
1324 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1326 java_lval->node = build_string (strlen (string), string);
1328 obstack_free (&temporary_obstack, string);
1329 return STRING_LIT_TK;
1337 BUILD_OPERATOR (OP_TK);
1343 if (ctxp->ccb_indent == 1)
1344 ctxp->first_ccb_indent1 = lineno;
1346 BUILD_OPERATOR (OCB_TK);
1350 if (ctxp->ccb_indent == 1)
1351 ctxp->last_ccb_indent1 = lineno;
1352 BUILD_OPERATOR (CCB_TK);
1355 BUILD_OPERATOR (OSB_TK);
1367 BUILD_OPERATOR (DOT_TK);
1368 /* return DOT_TK; */
1375 if ((c = java_get_unicode ()) == '=')
1377 BUILD_OPERATOR (EQ_TK);
1381 /* Equals is used in two different locations. In the
1382 variable_declarator: rule, it has to be seen as '=' as opposed
1383 to being seen as an ordinary assignment operator in
1384 assignment_operators: rule. */
1385 java_unget_unicode ();
1386 BUILD_OPERATOR (ASSIGN_TK);
1390 switch ((c = java_get_unicode ()))
1393 BUILD_OPERATOR (GTE_TK);
1395 switch ((c = java_get_unicode ()))
1398 if ((c = java_get_unicode ()) == '=')
1400 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1404 java_unget_unicode ();
1405 BUILD_OPERATOR (ZRS_TK);
1408 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1410 java_unget_unicode ();
1411 BUILD_OPERATOR (SRS_TK);
1414 java_unget_unicode ();
1415 BUILD_OPERATOR (GT_TK);
1419 switch ((c = java_get_unicode ()))
1422 BUILD_OPERATOR (LTE_TK);
1424 if ((c = java_get_unicode ()) == '=')
1426 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1430 java_unget_unicode ();
1431 BUILD_OPERATOR (LS_TK);
1434 java_unget_unicode ();
1435 BUILD_OPERATOR (LT_TK);
1439 switch ((c = java_get_unicode ()))
1442 BUILD_OPERATOR (BOOL_AND_TK);
1444 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1446 java_unget_unicode ();
1447 BUILD_OPERATOR (AND_TK);
1451 switch ((c = java_get_unicode ()))
1454 BUILD_OPERATOR (BOOL_OR_TK);
1456 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1458 java_unget_unicode ();
1459 BUILD_OPERATOR (OR_TK);
1463 switch ((c = java_get_unicode ()))
1466 BUILD_OPERATOR (INCR_TK);
1468 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1470 java_unget_unicode ();
1471 BUILD_OPERATOR (PLUS_TK);
1475 switch ((c = java_get_unicode ()))
1478 BUILD_OPERATOR (DECR_TK);
1480 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1482 java_unget_unicode ();
1483 BUILD_OPERATOR (MINUS_TK);
1487 if ((c = java_get_unicode ()) == '=')
1489 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1493 java_unget_unicode ();
1494 BUILD_OPERATOR (MULT_TK);
1498 if ((c = java_get_unicode ()) == '=')
1500 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1504 java_unget_unicode ();
1505 BUILD_OPERATOR (DIV_TK);
1509 if ((c = java_get_unicode ()) == '=')
1511 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1515 java_unget_unicode ();
1516 BUILD_OPERATOR (XOR_TK);
1520 if ((c = java_get_unicode ()) == '=')
1522 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1526 java_unget_unicode ();
1527 BUILD_OPERATOR (REM_TK);
1531 if ((c = java_get_unicode()) == '=')
1533 BUILD_OPERATOR (NEQ_TK);
1537 java_unget_unicode ();
1538 BUILD_OPERATOR (NEG_TK);
1543 BUILD_OPERATOR (REL_QM_TK);
1546 BUILD_OPERATOR (REL_CL_TK);
1548 BUILD_OPERATOR (NOT_TK);
1551 /* Keyword, boolean literal or null literal */
1552 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1553 JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1555 java_unicode_2_utf8 (c);
1556 if (all_ascii && c >= 128)
1561 obstack_1grow (&temporary_obstack, '\0');
1562 string = obstack_finish (&temporary_obstack);
1563 java_unget_unicode ();
1565 /* If we have something all ascii, we consider a keyword, a boolean
1566 literal, a null literal or an all ASCII identifier. Otherwise,
1567 this is an identifier (possibly not respecting formation rule). */
1570 const struct java_keyword *kw;
1571 if ((kw=java_keyword (string, ascii_index)))
1573 JAVA_LEX_KW (string);
1576 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1577 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1578 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1579 case PRIVATE_TK: case STRICT_TK:
1580 SET_MODIFIER_CTX (kw->token);
1583 SET_LVAL_NODE (float_type_node);
1586 SET_LVAL_NODE (double_type_node);
1589 SET_LVAL_NODE (boolean_type_node);
1592 SET_LVAL_NODE (byte_type_node);
1595 SET_LVAL_NODE (short_type_node);
1598 SET_LVAL_NODE (int_type_node);
1601 SET_LVAL_NODE (long_type_node);
1604 SET_LVAL_NODE (char_type_node);
1607 /* Keyword based literals */
1610 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1611 boolean_true_node : boolean_false_node));
1614 SET_LVAL_NODE (null_pointer_node);
1617 /* Some keyword we want to retain information on the location
1630 BUILD_OPERATOR (kw->token);
1638 /* We may have an ID here */
1639 if (JAVA_START_CHAR_P (first_unicode))
1641 JAVA_LEX_ID (string);
1642 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1646 /* Everything else is an invalid character in the input */
1648 char lex_error_buffer [128];
1649 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1650 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1651 java_lex_error (lex_error_buffer, 1);
1657 /* This is called by the parser to see if an error should be generated
1658 due to numeric overflow. This function only handles the particular
1659 case of the largest negative value, and is only called in the case
1660 where this value is not preceded by `-'. */
1662 error_if_numeric_overflow (value)
1665 if (TREE_CODE (value) == INTEGER_CST && JAVA_RADIX10_FLAG (value))
1667 unsigned HOST_WIDE_INT lo, hi;
1669 lo = TREE_INT_CST_LOW (value);
1670 hi = TREE_INT_CST_HIGH (value);
1671 if (TREE_TYPE (value) == long_type_node)
1673 int hb = (hi >> 31);
1674 if (hb && !(hi & 0x7fffffff))
1675 java_lex_error ("Numeric overflow for `long' literal", 0);
1679 int hb = (lo >> 31) & 0x1;
1680 if (hb && !(lo & 0x7fffffff))
1681 java_lex_error ("Numeric overflow for `int' literal", 0);
1685 #endif /* JC1_LITE */
1688 java_unicode_2_utf8 (unicode)
1691 if (RANGE (unicode, 0x01, 0x7f))
1692 obstack_1grow (&temporary_obstack, (char)unicode);
1693 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1695 obstack_1grow (&temporary_obstack,
1696 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1697 obstack_1grow (&temporary_obstack,
1698 (unsigned char)(0x80 | (unicode & 0x3f)));
1700 else /* Range 0x800-0xffff */
1702 obstack_1grow (&temporary_obstack,
1703 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1704 obstack_1grow (&temporary_obstack,
1705 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1706 obstack_1grow (&temporary_obstack,
1707 (unsigned char)(0x80 | (unicode & 0x003f)));
1713 build_wfl_node (node)
1716 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1717 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1718 TREE_TYPE (node) = NULL_TREE;
1724 java_lex_error (msg, forward)
1725 const char *msg ATTRIBUTE_UNUSED;
1726 int forward ATTRIBUTE_UNUSED;
1729 ctxp->elc.line = ctxp->c_line->lineno;
1730 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1732 /* Might be caught in the middle of some error report */
1733 ctxp->java_error_flag = 0;
1750 if (next != '\n' && next != EOF)
1762 java_get_line_col (filename, line, col)
1763 const char *filename ATTRIBUTE_UNUSED;
1764 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1769 /* Dumb implementation. Doesn't try to cache or optimize things. */
1770 /* First line of the file is line 1, first column is 1 */
1772 /* COL == -1 means, at the CR/LF in LINE */
1773 /* COL == -2 means, at the first non space char in LINE */
1776 int c, ccol, cline = 1;
1777 int current_line_col = 0;
1778 int first_non_space = 0;
1781 if (!(fp = fopen (filename, "r")))
1782 fatal_io_error ("can't open %s", filename);
1784 while (cline != line)
1789 static const char msg[] = "<<file too short - unexpected EOF>>";
1790 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1793 if (java_is_eol (fp, c))
1797 /* Gather the chars of the current line in a buffer */
1801 if (c < 0 || java_is_eol (fp, c))
1803 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1804 first_non_space = current_line_col;
1805 obstack_1grow (&temporary_obstack, c);
1810 obstack_1grow (&temporary_obstack, '\n');
1814 col = current_line_col;
1815 first_non_space = 0;
1818 col = first_non_space;
1820 first_non_space = 0;
1822 /* Place the '^' a the right position */
1823 base = obstack_base (&temporary_obstack);
1824 for (ccol = 1; ccol <= col+3; ccol++)
1826 /* Compute \t when reaching first_non_space */
1827 char c = (first_non_space ?
1828 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1829 obstack_1grow (&temporary_obstack, c);
1831 obstack_grow0 (&temporary_obstack, "^", 1);
1834 return obstack_finish (&temporary_obstack);
1840 utf8_cmp (str, length, name)
1841 const unsigned char *str;
1845 const unsigned char *limit = str + length;
1848 for (i = 0; name[i]; ++i)
1850 int ch = UTF8_GET (str, limit);
1852 return ch - name[i];
1855 return str == limit ? 0 : 1;
1858 /* A sorted list of all C++ keywords. */
1860 static const char *const cxx_keywords[] =
1968 /* Return true if NAME is a C++ keyword. */
1971 cxx_keyword_p (name, length)
1975 int last = ARRAY_SIZE (cxx_keywords);
1977 int mid = (last + first) / 2;
1980 for (mid = (last + first) / 2;
1982 old = mid, mid = (last + first) / 2)
1984 int kwl = strlen (cxx_keywords[mid]);
1985 int min_length = kwl > length ? length : kwl;
1986 int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1991 /* We've found a match if all the remaining characters are
1993 for (i = min_length; i < length && name[i] == '$'; ++i)
2007 #endif /* JC1_LITE */