1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
40 /* Function declaration */
41 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
42 static void java_unicode_2_utf8 PARAMS ((unicode_t));
43 static void java_lex_error PARAMS ((const char *, int));
45 static int java_is_eol PARAMS ((FILE *, int));
46 static tree build_wfl_node PARAMS ((tree));
48 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
49 static unicode_t java_parse_escape_sequence PARAMS ((void));
50 static int java_letter_or_digit_p PARAMS ((unicode_t));
51 static int java_ignorable_control_p PARAMS ((unicode_t));
52 static int java_parse_doc_section PARAMS ((unicode_t));
53 static void java_parse_end_comment PARAMS ((unicode_t));
54 static unicode_t java_get_unicode PARAMS ((void));
55 static unicode_t java_read_unicode PARAMS ((java_lexer *, int *));
56 static unicode_t java_read_unicode_collapsing_terminators
57 PARAMS ((java_lexer *, int *));
58 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
59 static unicode_t java_read_char PARAMS ((java_lexer *));
60 static void java_allocate_new_line PARAMS ((void));
61 static void java_unget_unicode PARAMS ((void));
62 static unicode_t java_sneak_unicode PARAMS ((void));
63 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
65 /* This is nonzero if we have initialized `need_byteswap'. */
66 static int byteswap_init = 0;
68 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
69 big-endian order -- not native endian order. We handle this by
70 doing a conversion once at startup and seeing what happens. This
71 flag holds the results of this determination. */
72 static int need_byteswap = 0;
75 java_init_lex (finput, encoding)
80 int java_lang_imported = 0;
83 java_lang_id = get_identifier ("java.lang");
84 if (!java_lang_cloneable)
85 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
86 if (!java_io_serializable)
87 java_io_serializable = get_identifier ("java.io.Serializable");
89 inst_id = get_identifier ("inst$");
91 wpv_id = get_identifier ("write_parm_value$");
93 if (!java_lang_imported)
95 tree node = build_tree_list
96 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
97 read_import_dir (TREE_PURPOSE (node));
98 TREE_CHAIN (node) = ctxp->import_demand_list;
99 ctxp->import_demand_list = node;
100 java_lang_imported = 1;
104 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
106 label_id = get_identifier ("$L");
108 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
109 if (!wfl_string_buffer)
111 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
113 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
115 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
116 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
118 memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
119 memset ((PTR) current_jcf, 0, sizeof (JCF));
120 ctxp->current_parsed_class = NULL;
121 ctxp->package = NULL_TREE;
124 ctxp->filename = input_filename;
125 ctxp->lineno = lineno = 0;
128 ctxp->minus_seen = 0;
129 ctxp->java_error_flag = 0;
130 ctxp->lexer = java_new_lexer (finput, encoding);
134 java_sprint_unicode (line, i)
135 struct java_line *line;
138 static char buffer [10];
139 if (line->unicode_escape_p [i] || line->line [i] > 128)
140 sprintf (buffer, "\\u%04x", line->line [i]);
143 buffer [0] = line->line [i];
150 java_sneak_unicode ()
152 return (ctxp->c_line->line [ctxp->c_line->current]);
156 java_unget_unicode ()
158 if (!ctxp->c_line->current)
159 fatal ("can't unget unicode - java_unget_unicode");
160 ctxp->c_line->current--;
161 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
165 java_allocate_new_line ()
167 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
168 char ahead_escape_p = (ctxp->c_line ?
169 ctxp->c_line->unicode_escape_ahead_p : 0);
171 if (ctxp->c_line && !ctxp->c_line->white_space_only)
175 free (ctxp->p_line->unicode_escape_p);
176 free (ctxp->p_line->line);
179 ctxp->p_line = ctxp->c_line;
180 ctxp->c_line = NULL; /* Reallocated */
185 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
186 ctxp->c_line->max = JAVA_LINE_MAX;
187 ctxp->c_line->line = (unicode_t *)xmalloc
188 (sizeof (unicode_t)*ctxp->c_line->max);
189 ctxp->c_line->unicode_escape_p =
190 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
191 ctxp->c_line->white_space_only = 0;
194 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
195 ctxp->c_line->char_col = ctxp->c_line->current = 0;
198 ctxp->c_line->line [ctxp->c_line->size] = ahead;
199 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
200 ctxp->c_line->size++;
202 ctxp->c_line->ahead [0] = 0;
203 ctxp->c_line->unicode_escape_ahead_p = 0;
204 ctxp->c_line->lineno = ++lineno;
205 ctxp->c_line->white_space_only = 1;
208 /* Create a new lexer object. */
210 java_new_lexer (finput, encoding)
212 const char *encoding;
214 java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
217 lex->finput = finput;
219 lex->unget_value = 0;
222 lex->handle = iconv_open ("UCS-2", encoding);
223 if (lex->handle != (iconv_t) -1)
229 lex->read_anything = 0;
230 lex->use_fallback = 0;
232 /* Work around broken iconv() implementations by doing checking at
233 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
234 then all UCS-2 encoders will be broken. Perhaps not a valid
242 handle = iconv_open ("UCS-2", "UTF-8");
243 if (handle != (iconv_t) -1)
250 /* This is the UTF-8 encoding of \ufeff. */
257 outp = (char *) &result;
260 r = iconv (handle, (const char **) &inp, &inc, &outp, &outc);
261 /* Conversion must be complete for us to use the result. */
262 if (r != (size_t) -1 && inc == 0 && outc == 0)
263 need_byteswap = (result != 0xfeff);
267 lex->byte_swap = need_byteswap;
270 #endif /* HAVE_ICONV */
272 /* If iconv failed, use the internal decoder if the default
273 encoding was requested. This code is used on platforms where
274 iconv() exists but is insufficient for our needs. For
275 instance, on Solaris 2.5 iconv() cannot handle UTF-8 or UCS-2. */
276 if (strcmp (encoding, DEFAULT_ENCODING))
280 lex->use_fallback = 1;
281 #endif /* HAVE_ICONV */
285 fatal ("unknown encoding: `%s'", encoding);
291 java_destroy_lexer (lex)
295 if (! lex->use_fallback)
296 iconv_close (lex->handle);
305 if (lex->unget_value)
307 unicode_t r = lex->unget_value;
308 lex->unget_value = 0;
313 if (! lex->use_fallback)
315 size_t ir, inbytesleft, in_save, out_count, out_save;
319 /* If there is data which has already been converted, use it. */
320 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
327 /* See if we need to read more data. If FIRST == 0 then
328 the previous conversion attempt ended in the middle of
329 a character at the end of the buffer. Otherwise we
330 only have to read if the buffer is empty. */
331 if (lex->first == 0 || lex->first >= lex->last)
335 if (lex->first >= lex->last)
340 if (feof (lex->finput))
342 r = fread (&lex->buffer[lex->last], 1,
343 sizeof (lex->buffer) - lex->last,
348 inbytesleft = lex->last - lex->first;
349 out_count = sizeof (lex->out_buffer) - lex->out_last;
351 if (inbytesleft == 0)
353 /* We've tried to read and there is nothing left. */
357 in_save = inbytesleft;
358 out_save = out_count;
359 inp = &lex->buffer[lex->first];
360 outp = &lex->out_buffer[lex->out_last];
361 ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
364 /* If we haven't read any bytes, then look to see if we
366 if (! lex->read_anything && out_save - out_count >= 2)
368 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
374 else if (uc == 0xfffe)
379 lex->read_anything = 1;
385 for (i = 0; i < out_save - out_count; i += 2)
387 char t = lex->out_buffer[lex->out_last + i];
388 lex->out_buffer[lex->out_last + i]
389 = lex->out_buffer[lex->out_last + i + 1];
390 lex->out_buffer[lex->out_last + i + 1] = t;
394 lex->first += in_save - inbytesleft;
395 lex->out_last += out_save - out_count;
397 /* If we converted anything at all, move along. */
398 if (out_count != out_save)
401 if (ir == (size_t) -1)
405 /* This is ok. This means that the end of our buffer
406 is in the middle of a character sequence. We just
407 move the valid part of the buffer to the beginning
409 /* We use bcopy() because it should work for
410 overlapping strings. Use memmove() instead... */
411 bcopy (&lex->buffer[lex->first], &lex->buffer[0],
412 lex->last - lex->first);
413 lex->last -= lex->first;
418 /* A more serious error. */
419 java_lex_error ("unrecognized character in input stream",
427 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
429 /* Don't have any data. */
434 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
439 #endif /* HAVE_ICONV */
442 c = getc (lex->finput);
450 if ((c & 0xe0) == 0xc0)
452 c1 = getc (lex->finput);
453 if ((c1 & 0xc0) == 0x80)
454 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
457 else if ((c & 0xf0) == 0xe0)
459 c1 = getc (lex->finput);
460 if ((c1 & 0xc0) == 0x80)
462 c2 = getc (lex->finput);
463 if ((c2 & 0xc0) == 0x80)
464 return (unicode_t)(((c & 0xf) << 12) +
465 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
473 /* We simply don't support invalid characters. */
474 java_lex_error ("malformed UTF-8 character", 0);
478 /* We only get here on error. */
483 java_store_unicode (l, c, unicode_escape_p)
486 int unicode_escape_p;
488 if (l->size == l->max)
490 l->max += JAVA_LINE_MAX;
491 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
492 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
493 sizeof (char)*l->max);
495 l->line [l->size] = c;
496 l->unicode_escape_p [l->size++] = unicode_escape_p;
500 java_read_unicode (lex, unicode_escape_p)
502 int *unicode_escape_p;
506 c = java_read_char (lex);
507 *unicode_escape_p = 0;
516 if ((lex->bs_count) % 2 == 1)
518 /* Odd number of \ seen. */
519 c = java_read_char (lex);
522 unicode_t unicode = 0;
524 /* Next should be 4 hex digits, otherwise it's an error.
525 The hex value is converted into the unicode, pushed into
526 the Unicode stream. */
527 for (shift = 12; shift >= 0; shift -= 4)
529 if ((c = java_read_char (lex)) == UEOF)
531 if (c >= '0' && c <= '9')
532 unicode |= (unicode_t)((c-'0') << shift);
533 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
534 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
537 /* Recognize any number of u in \u. */
541 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
544 *unicode_escape_p = 1;
547 lex->unget_value = c;
549 return (unicode_t) '\\';
553 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
555 int *unicode_escape_p;
557 unicode_t c = java_read_unicode (lex, unicode_escape_p);
561 /* We have to read ahead to see if we got \r\n. In that case we
562 return a single line terminator. */
564 c = java_read_unicode (lex, &dummy);
566 lex->unget_value = c;
567 /* In either case we must return a newline. */
577 /* It's time to read a line when... */
578 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
581 java_allocate_new_line ();
582 if (ctxp->c_line->line[0] != '\n')
586 int unicode_escape_p;
587 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
589 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
590 if (ctxp->c_line->white_space_only
591 && !JAVA_WHITE_SPACE_P (c)
594 ctxp->c_line->white_space_only = 0;
595 if ((c == '\n') || (c == UEOF))
600 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
601 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
602 return ctxp->c_line->line [ctxp->c_line->current++];
605 /* Parse the end of a C style comment.
606 * C is the first character following the '/' and '*'. */
608 java_parse_end_comment (c)
612 for ( ;; c = java_get_unicode ())
617 java_lex_error ("Comment not terminated at end of input", 0);
620 switch (c = java_get_unicode ())
623 java_lex_error ("Comment not terminated at end of input", 0);
627 case '*': /* reparse only '*' */
628 java_unget_unicode ();
634 /* Parse the documentation section. Keywords must be at the beginning
635 of a documentation comment line (ignoring white space and any `*'
636 character). Parsed keyword(s): @DEPRECATED. */
639 java_parse_doc_section (c)
642 int valid_tag = 0, seen_star = 0;
644 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
656 c = java_get_unicode();
660 java_lex_error ("Comment not terminated at end of input", 0);
662 if (seen_star && (c == '/'))
663 return 1; /* Goto step1 in caller */
665 /* We're parsing @deprecated */
666 if (valid_tag && (c == '@'))
671 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
673 c = java_get_unicode ();
674 tag [tag_index++] = c;
678 java_lex_error ("Comment not terminated at end of input", 0);
679 tag [tag_index] = '\0';
681 if (!strcmp (tag, "deprecated"))
682 ctxp->deprecated = 1;
684 java_unget_unicode ();
688 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
689 will return a wrong result. */
691 java_letter_or_digit_p (c)
694 return _JAVA_LETTER_OR_DIGIT_P (c);
697 /* This function to be used only by JAVA_ID_CHAR_P (). */
699 java_ignorable_control_p (c)
702 return _JAVA_IDENTIFIER_IGNORABLE (c);
706 java_parse_escape_sequence ()
711 switch (c = java_get_unicode ())
714 return (unicode_t)0x8;
716 return (unicode_t)0x9;
718 return (unicode_t)0xa;
720 return (unicode_t)0xc;
722 return (unicode_t)0xd;
724 return (unicode_t)0x22;
726 return (unicode_t)0x27;
728 return (unicode_t)0x5c;
729 case '0': case '1': case '2': case '3': case '4':
730 case '5': case '6': case '7':
733 int octal_escape_index = 0;
737 for (; octal_escape_index < max && RANGE (c, '0', '7');
738 c = java_get_unicode ())
740 if (octal_escape_index == 0 && c > '3')
742 /* According to the grammar, `\477' has a well-defined
743 meaning -- it is `\47' followed by `7'. */
746 octal_escape [octal_escape_index++] = c;
749 java_unget_unicode ();
751 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
752 i < octal_escape_index; i++, shift -= 3)
753 char_lit |= (octal_escape [i] - '0') << shift;
758 return '\n'; /* ULT, caught latter as a specific error */
760 java_lex_error ("Invalid character in escape sequence", 0);
761 return JAVA_CHAR_ERROR;
765 /* Isolate the code which may raise an arithmetic exception in its
774 int number_beginning;
777 #ifdef REAL_ARITHMETIC
778 #define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
780 #define IS_ZERO(X) ((X) == 0)
783 static void java_perform_atof PARAMS ((PTR));
786 java_perform_atof (av)
789 struct jpa_args *a = (struct jpa_args *)av;
790 YYSTYPE *java_lval = a->java_lval;
791 int number_beginning = a->number_beginning;
792 REAL_VALUE_TYPE value;
793 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
795 SET_REAL_VALUE_ATOF (value,
796 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
798 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
800 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
803 else if (IS_ZERO (value))
805 /* We check to see if the value is really 0 or if we've found an
806 underflow. We do this in the most primitive imaginable way. */
808 char *p = a->literal_token;
811 while (*p && *p != 'e' && *p != 'E')
813 if (*p != '0' && *p != '.')
822 int i = ctxp->c_line->current;
823 ctxp->c_line->current = number_beginning;
824 java_lex_error ("Floating point literal underflow", 0);
825 ctxp->c_line->current = i;
829 SET_LVAL_NODE_TYPE (build_real (type, value), type);
833 static int yylex PARAMS ((YYSTYPE *));
843 unicode_t c, first_unicode;
844 int ascii_index, all_ascii;
847 /* Translation of the Unicode escape in the raw stream of Unicode
848 characters. Takes care of line terminator. */
850 /* Skip white spaces: SP, TAB and FF or ULT */
851 for (c = java_get_unicode ();
852 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
855 ctxp->elc.line = ctxp->c_line->lineno;
856 ctxp->elc.col = ctxp->c_line->char_col-2;
859 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
861 if (c == 0x1a) /* CTRL-Z */
863 if ((c = java_get_unicode ()) == UEOF)
864 return 0; /* Ok here */
866 java_unget_unicode (); /* Caught latter at the end the function */
868 /* Handle EOF here */
869 if (c == UEOF) /* Should probably do something here... */
872 /* Take care of eventual comments. */
875 switch (c = java_get_unicode ())
880 c = java_get_unicode ();
883 /* It is ok to end a `//' comment with EOF, unless
884 we're being pedantic. */
886 java_lex_error ("Comment not terminated at end of input",
890 if (c == '\n') /* ULT */
896 if ((c = java_get_unicode ()) == '*')
898 if ((c = java_get_unicode ()) == '/')
899 goto step1; /* Empy documentation comment */
900 else if (java_parse_doc_section (c))
904 java_parse_end_comment ((c = java_get_unicode ()));
908 java_unget_unicode ();
914 ctxp->elc.line = ctxp->c_line->lineno;
915 ctxp->elc.prev_col = ctxp->elc.col;
916 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
917 if (ctxp->elc.col < 0)
918 fatal ("ctxp->elc.col < 0 - java_lex");
920 /* Numeric literals */
921 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
923 /* This section of code is borrowed from gcc/c-lex.c */
924 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
925 int parts[TOTAL_PARTS];
926 HOST_WIDE_INT high, low;
927 /* End borrowed section */
928 char literal_token [256];
929 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
932 int number_beginning = ctxp->c_line->current;
935 /* We might have a . separator instead of a FP like .[0-9]* */
938 unicode_t peep = java_sneak_unicode ();
940 if (!JAVA_ASCII_DIGIT (peep))
943 BUILD_OPERATOR (DOT_TK);
947 for (i = 0; i < TOTAL_PARTS; i++)
952 c = java_get_unicode ();
953 if (c == 'x' || c == 'X')
956 c = java_get_unicode ();
958 else if (JAVA_ASCII_DIGIT (c))
962 /* Push the '.' back and prepare for a FP parsing... */
963 java_unget_unicode ();
968 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
969 JAVA_LEX_LIT ("0", 10);
973 SET_LVAL_NODE (long_zero_node);
976 SET_LVAL_NODE (float_zero_node);
979 SET_LVAL_NODE (double_zero_node);
982 java_unget_unicode ();
983 SET_LVAL_NODE (integer_zero_node);
988 /* Parse the first part of the literal, until we find something
989 which is not a number. */
990 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
991 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
992 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
994 /* We store in a string (in case it turns out to be a FP) and in
995 PARTS if we have to process a integer literal. */
996 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
999 literal_token [literal_index++] = c;
1000 /* This section of code if borrowed from gcc/c-lex.c */
1001 for (count = 0; count < TOTAL_PARTS; count++)
1003 parts[count] *= radix;
1006 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1007 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1010 parts[0] += numeric;
1012 if (parts [TOTAL_PARTS-1] != 0)
1014 /* End borrowed section. */
1015 c = java_get_unicode ();
1018 /* If we have something from the FP char set but not a digit, parse
1020 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1023 int seen_digit = (literal_index ? 1 : 0);
1024 int seen_exponent = 0;
1025 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1026 double unless specified. */
1028 /* It is ok if the radix is 8 because this just means we've
1029 seen a leading `0'. However, radix==16 is invalid. */
1031 java_lex_error ("Can't express non-decimal FP literal", 0);
1041 literal_token [literal_index++ ] = c;
1042 c = java_get_unicode ();
1045 java_lex_error ("Invalid character in FP literal", 0);
1048 if (c == 'e' || c == 'E')
1052 /* {E,e} must have seen at list a digit */
1054 java_lex_error ("Invalid FP literal", 0);
1058 literal_token [literal_index++] = c;
1059 c = java_get_unicode ();
1062 java_lex_error ("Invalid character in FP literal", 0);
1064 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1066 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1067 stage = 4; /* So we fall through */
1070 if ((c=='-' || c =='+') && stage == 2)
1073 literal_token [literal_index++] = c;
1074 c = java_get_unicode ();
1077 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1078 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1079 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1080 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1082 if (JAVA_ASCII_DIGIT (c))
1084 literal_token [literal_index++ ] = c;
1085 c = java_get_unicode ();
1092 if (stage != 4) /* Don't push back fF/dD */
1093 java_unget_unicode ();
1095 /* An exponent (if any) must have seen a digit. */
1096 if (seen_exponent && !seen_digit)
1097 java_lex_error ("Invalid FP literal", 0);
1099 literal_token [literal_index] = '\0';
1100 JAVA_LEX_LIT (literal_token, radix);
1103 a.literal_token = literal_token;
1105 a.java_lval = java_lval;
1106 a.number_beginning = number_beginning;
1107 if (do_float_handler (java_perform_atof, (PTR) &a))
1110 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
1116 } /* JAVA_ASCCI_FPCHAR (c) */
1118 /* Here we get back to converting the integral literal. */
1119 if (c == 'L' || c == 'l')
1121 else if (radix == 16 && JAVA_ASCII_LETTER (c))
1122 java_lex_error ("Digit out of range in hexadecimal literal", 0);
1123 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
1124 java_lex_error ("Digit out of range in octal literal", 0);
1125 else if (radix == 16 && !literal_index)
1126 java_lex_error ("No digit specified for hexadecimal literal", 0);
1128 java_unget_unicode ();
1130 #ifdef JAVA_LEX_DEBUG
1131 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1132 JAVA_LEX_LIT (literal_token, radix);
1134 /* This section of code is borrowed from gcc/c-lex.c */
1137 bytes = GET_TYPE_PRECISION (long_type_node);
1138 for (i = bytes; i < TOTAL_PARTS; i++)
1146 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1148 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1149 / HOST_BITS_PER_CHAR)]
1150 << (i * HOST_BITS_PER_CHAR));
1151 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1153 /* End borrowed section. */
1155 /* Range checking */
1158 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1159 9223372036854775807L is the biggest `long' literal that can be
1160 expressed using a 10 radix. For other radixes, everything that
1161 fits withing 64 bits is OK. */
1162 int hb = (high >> 31);
1163 if (overflow || (hb && low && radix == 10) ||
1164 (hb && high & 0x7fffffff && radix == 10) ||
1165 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1166 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1170 /* 2147483648 is valid if operand of a '-'. Otherwise,
1171 2147483647 is the biggest `int' literal that can be
1172 expressed using a 10 radix. For other radixes, everything
1173 that fits within 32 bits is OK. As all literals are
1174 signed, we sign extend here. */
1175 int hb = (low >> 31) & 0x1;
1176 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1177 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1178 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1181 ctxp->minus_seen = 0;
1182 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1183 (long_suffix ? long_type_node : int_type_node));
1187 ctxp->minus_seen = 0;
1189 /* Character literals */
1193 if ((c = java_get_unicode ()) == '\\')
1194 char_lit = java_parse_escape_sequence ();
1197 if (c == '\n' || c == '\'')
1198 java_lex_error ("Invalid character literal", 0);
1202 c = java_get_unicode ();
1204 if ((c == '\n') || (c == UEOF))
1205 java_lex_error ("Character literal not terminated at end of line", 0);
1207 java_lex_error ("Syntax error in character literal", 0);
1209 if (c == JAVA_CHAR_ERROR)
1210 char_lit = 0; /* We silently convert it to zero */
1212 JAVA_LEX_CHAR_LIT (char_lit);
1213 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1217 /* String literals */
1223 for (no_error = 1, c = java_get_unicode ();
1224 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1227 c = java_parse_escape_sequence ();
1228 no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
1229 java_unicode_2_utf8 (c);
1231 if (c == '\n' || c == UEOF) /* ULT */
1233 lineno--; /* Refer to the line the terminator was seen */
1234 java_lex_error ("String not terminated at end of line.", 0);
1238 obstack_1grow (&temporary_obstack, '\0');
1239 string = obstack_finish (&temporary_obstack);
1241 if (!no_error || (c != '"'))
1242 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1244 java_lval->node = build_string (strlen (string), string);
1246 obstack_free (&temporary_obstack, string);
1247 return STRING_LIT_TK;
1255 BUILD_OPERATOR (OP_TK);
1261 if (ctxp->ccb_indent == 1)
1262 ctxp->first_ccb_indent1 = lineno;
1264 BUILD_OPERATOR (OCB_TK);
1268 if (ctxp->ccb_indent == 1)
1269 ctxp->last_ccb_indent1 = lineno;
1270 BUILD_OPERATOR (CCB_TK);
1273 BUILD_OPERATOR (OSB_TK);
1285 BUILD_OPERATOR (DOT_TK);
1286 /* return DOT_TK; */
1293 if ((c = java_get_unicode ()) == '=')
1295 BUILD_OPERATOR (EQ_TK);
1299 /* Equals is used in two different locations. In the
1300 variable_declarator: rule, it has to be seen as '=' as opposed
1301 to being seen as an ordinary assignment operator in
1302 assignment_operators: rule. */
1303 java_unget_unicode ();
1304 BUILD_OPERATOR (ASSIGN_TK);
1308 switch ((c = java_get_unicode ()))
1311 BUILD_OPERATOR (GTE_TK);
1313 switch ((c = java_get_unicode ()))
1316 if ((c = java_get_unicode ()) == '=')
1318 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1322 java_unget_unicode ();
1323 BUILD_OPERATOR (ZRS_TK);
1326 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1328 java_unget_unicode ();
1329 BUILD_OPERATOR (SRS_TK);
1332 java_unget_unicode ();
1333 BUILD_OPERATOR (GT_TK);
1337 switch ((c = java_get_unicode ()))
1340 BUILD_OPERATOR (LTE_TK);
1342 if ((c = java_get_unicode ()) == '=')
1344 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1348 java_unget_unicode ();
1349 BUILD_OPERATOR (LS_TK);
1352 java_unget_unicode ();
1353 BUILD_OPERATOR (LT_TK);
1357 switch ((c = java_get_unicode ()))
1360 BUILD_OPERATOR (BOOL_AND_TK);
1362 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1364 java_unget_unicode ();
1365 BUILD_OPERATOR (AND_TK);
1369 switch ((c = java_get_unicode ()))
1372 BUILD_OPERATOR (BOOL_OR_TK);
1374 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1376 java_unget_unicode ();
1377 BUILD_OPERATOR (OR_TK);
1381 switch ((c = java_get_unicode ()))
1384 BUILD_OPERATOR (INCR_TK);
1386 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1388 java_unget_unicode ();
1389 BUILD_OPERATOR (PLUS_TK);
1393 switch ((c = java_get_unicode ()))
1396 BUILD_OPERATOR (DECR_TK);
1398 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1400 java_unget_unicode ();
1401 ctxp->minus_seen = 1;
1402 BUILD_OPERATOR (MINUS_TK);
1406 if ((c = java_get_unicode ()) == '=')
1408 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1412 java_unget_unicode ();
1413 BUILD_OPERATOR (MULT_TK);
1417 if ((c = java_get_unicode ()) == '=')
1419 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1423 java_unget_unicode ();
1424 BUILD_OPERATOR (DIV_TK);
1428 if ((c = java_get_unicode ()) == '=')
1430 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1434 java_unget_unicode ();
1435 BUILD_OPERATOR (XOR_TK);
1439 if ((c = java_get_unicode ()) == '=')
1441 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1445 java_unget_unicode ();
1446 BUILD_OPERATOR (REM_TK);
1450 if ((c = java_get_unicode()) == '=')
1452 BUILD_OPERATOR (NEQ_TK);
1456 java_unget_unicode ();
1457 BUILD_OPERATOR (NEG_TK);
1462 BUILD_OPERATOR (REL_QM_TK);
1465 BUILD_OPERATOR (REL_CL_TK);
1467 BUILD_OPERATOR (NOT_TK);
1470 /* Keyword, boolean literal or null literal */
1471 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1472 JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1474 java_unicode_2_utf8 (c);
1475 if (all_ascii && c >= 128)
1480 obstack_1grow (&temporary_obstack, '\0');
1481 string = obstack_finish (&temporary_obstack);
1482 java_unget_unicode ();
1484 /* If we have something all ascii, we consider a keyword, a boolean
1485 literal, a null literal or an all ASCII identifier. Otherwise,
1486 this is an identifier (possibly not respecting formation rule). */
1489 struct java_keyword *kw;
1490 if ((kw=java_keyword (string, ascii_index)))
1492 JAVA_LEX_KW (string);
1495 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1496 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1497 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1498 case PRIVATE_TK: case STRICT_TK:
1499 SET_MODIFIER_CTX (kw->token);
1502 SET_LVAL_NODE (float_type_node);
1505 SET_LVAL_NODE (double_type_node);
1508 SET_LVAL_NODE (boolean_type_node);
1511 SET_LVAL_NODE (byte_type_node);
1514 SET_LVAL_NODE (short_type_node);
1517 SET_LVAL_NODE (int_type_node);
1520 SET_LVAL_NODE (long_type_node);
1523 SET_LVAL_NODE (char_type_node);
1526 /* Keyword based literals */
1529 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1530 boolean_true_node : boolean_false_node));
1533 SET_LVAL_NODE (null_pointer_node);
1536 /* Some keyword we want to retain information on the location
1549 BUILD_OPERATOR (kw->token);
1557 /* We may have and ID here */
1558 if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1560 JAVA_LEX_ID (string);
1561 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1565 /* Everything else is an invalid character in the input */
1567 char lex_error_buffer [128];
1568 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1569 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1570 java_lex_error (lex_error_buffer, 1);
1576 java_unicode_2_utf8 (unicode)
1579 if (RANGE (unicode, 0x01, 0x7f))
1580 obstack_1grow (&temporary_obstack, (char)unicode);
1581 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1583 obstack_1grow (&temporary_obstack,
1584 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1585 obstack_1grow (&temporary_obstack,
1586 (unsigned char)(0x80 | (unicode & 0x3f)));
1588 else /* Range 0x800-0xffff */
1590 obstack_1grow (&temporary_obstack,
1591 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1592 obstack_1grow (&temporary_obstack,
1593 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1594 obstack_1grow (&temporary_obstack,
1595 (unsigned char)(0x80 | (unicode & 0x003f)));
1601 build_wfl_node (node)
1604 return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1609 java_lex_error (msg, forward)
1610 const char *msg ATTRIBUTE_UNUSED;
1611 int forward ATTRIBUTE_UNUSED;
1614 ctxp->elc.line = ctxp->c_line->lineno;
1615 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1617 /* Might be caught in the middle of some error report */
1618 ctxp->java_error_flag = 0;
1635 if (next != '\n' && next != EOF)
1647 java_get_line_col (filename, line, col)
1648 const char *filename ATTRIBUTE_UNUSED;
1649 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1654 /* Dumb implementation. Doesn't try to cache or optimize things. */
1655 /* First line of the file is line 1, first column is 1 */
1657 /* COL == -1 means, at the CR/LF in LINE */
1658 /* COL == -2 means, at the first non space char in LINE */
1661 int c, ccol, cline = 1;
1662 int current_line_col = 0;
1663 int first_non_space = 0;
1666 if (!(fp = fopen (filename, "r")))
1667 fatal ("Can't open file - java_display_line_col");
1669 while (cline != line)
1674 static char msg[] = "<<file too short - unexpected EOF>>";
1675 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1678 if (java_is_eol (fp, c))
1682 /* Gather the chars of the current line in a buffer */
1686 if (c < 0 || java_is_eol (fp, c))
1688 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1689 first_non_space = current_line_col;
1690 obstack_1grow (&temporary_obstack, c);
1695 obstack_1grow (&temporary_obstack, '\n');
1699 col = current_line_col;
1700 first_non_space = 0;
1703 col = first_non_space;
1705 first_non_space = 0;
1707 /* Place the '^' a the right position */
1708 base = obstack_base (&temporary_obstack);
1709 for (ccol = 1; ccol <= col+3; ccol++)
1711 /* Compute \t when reaching first_non_space */
1712 char c = (first_non_space ?
1713 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1714 obstack_1grow (&temporary_obstack, c);
1716 obstack_grow0 (&temporary_obstack, "^", 1);
1719 return obstack_finish (&temporary_obstack);