1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
39 #include "chartables.h"
41 /* Function declaration */
42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
44 static void java_lex_error PARAMS ((const char *, int));
46 static int java_is_eol PARAMS ((FILE *, int));
47 static tree build_wfl_node PARAMS ((tree));
49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50 static int java_parse_escape_sequence PARAMS ((void));
51 static int java_start_char_p PARAMS ((unicode_t));
52 static int java_part_char_p PARAMS ((unicode_t));
53 static int java_parse_doc_section PARAMS ((int));
54 static void java_parse_end_comment PARAMS ((int));
55 static int java_get_unicode PARAMS ((void));
56 static int java_read_unicode PARAMS ((java_lexer *, int *));
57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static int java_read_char PARAMS ((java_lexer *));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
70 /* This is nonzero if we have initialized `need_byteswap'. */
71 static int byteswap_init = 0;
73 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
74 big-endian order -- not native endian order. We handle this by
75 doing a conversion once at startup and seeing what happens. This
76 flag holds the results of this determination. */
77 static int need_byteswap = 0;
80 java_init_lex (finput, encoding)
85 int java_lang_imported = 0;
88 java_lang_id = get_identifier ("java.lang");
89 if (!java_lang_cloneable)
90 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
91 if (!java_io_serializable)
92 java_io_serializable = get_identifier ("java.io.Serializable");
94 inst_id = get_identifier ("inst$");
96 wpv_id = get_identifier ("write_parm_value$");
98 if (!java_lang_imported)
100 tree node = build_tree_list
101 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
102 read_import_dir (TREE_PURPOSE (node));
103 TREE_CHAIN (node) = ctxp->import_demand_list;
104 ctxp->import_demand_list = node;
105 java_lang_imported = 1;
109 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
111 label_id = get_identifier ("$L");
113 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
114 if (!wfl_string_buffer)
116 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
118 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
120 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
121 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
123 memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
124 memset ((PTR) current_jcf, 0, sizeof (JCF));
125 ctxp->current_parsed_class = NULL;
126 ctxp->package = NULL_TREE;
129 ctxp->filename = input_filename;
130 ctxp->lineno = lineno = 0;
133 ctxp->minus_seen = 0;
134 ctxp->java_error_flag = 0;
135 ctxp->lexer = java_new_lexer (finput, encoding);
139 java_sprint_unicode (line, i)
140 struct java_line *line;
143 static char buffer [10];
144 if (line->unicode_escape_p [i] || line->line [i] > 128)
145 sprintf (buffer, "\\u%04x", line->line [i]);
148 buffer [0] = line->line [i];
155 java_sneak_unicode ()
157 return (ctxp->c_line->line [ctxp->c_line->current]);
161 java_unget_unicode ()
163 if (!ctxp->c_line->current)
164 fatal ("can't unget unicode - java_unget_unicode");
165 ctxp->c_line->current--;
166 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
170 java_allocate_new_line ()
172 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
173 char ahead_escape_p = (ctxp->c_line ?
174 ctxp->c_line->unicode_escape_ahead_p : 0);
176 if (ctxp->c_line && !ctxp->c_line->white_space_only)
180 free (ctxp->p_line->unicode_escape_p);
181 free (ctxp->p_line->line);
184 ctxp->p_line = ctxp->c_line;
185 ctxp->c_line = NULL; /* Reallocated */
190 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
191 ctxp->c_line->max = JAVA_LINE_MAX;
192 ctxp->c_line->line = (unicode_t *)xmalloc
193 (sizeof (unicode_t)*ctxp->c_line->max);
194 ctxp->c_line->unicode_escape_p =
195 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
196 ctxp->c_line->white_space_only = 0;
199 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
200 ctxp->c_line->char_col = ctxp->c_line->current = 0;
203 ctxp->c_line->line [ctxp->c_line->size] = ahead;
204 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
205 ctxp->c_line->size++;
207 ctxp->c_line->ahead [0] = 0;
208 ctxp->c_line->unicode_escape_ahead_p = 0;
209 ctxp->c_line->lineno = ++lineno;
210 ctxp->c_line->white_space_only = 1;
213 /* Create a new lexer object. */
215 java_new_lexer (finput, encoding)
217 const char *encoding;
219 java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
222 lex->finput = finput;
224 lex->unget_value = 0;
228 lex->handle = iconv_open ("UCS-2", encoding);
229 if (lex->handle != (iconv_t) -1)
235 lex->read_anything = 0;
236 lex->use_fallback = 0;
238 /* Work around broken iconv() implementations by doing checking at
239 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
240 then all UCS-2 encoders will be broken. Perhaps not a valid
248 handle = iconv_open ("UCS-2", "UTF-8");
249 if (handle != (iconv_t) -1)
256 /* This is the UTF-8 encoding of \ufeff. */
263 outp = (char *) &result;
266 r = iconv (handle, (const char **) &inp, &inc, &outp, &outc);
267 /* Conversion must be complete for us to use the result. */
268 if (r != (size_t) -1 && inc == 0 && outc == 0)
269 need_byteswap = (result != 0xfeff);
273 lex->byte_swap = need_byteswap;
276 #endif /* HAVE_ICONV */
278 /* If iconv failed, use the internal decoder if the default
279 encoding was requested. This code is used on platforms where
280 iconv() exists but is insufficient for our needs. For
281 instance, on Solaris 2.5 iconv() cannot handle UTF-8 or UCS-2. */
282 if (strcmp (encoding, DEFAULT_ENCODING))
286 lex->use_fallback = 1;
287 #endif /* HAVE_ICONV */
291 fatal ("unknown encoding: `%s'", encoding);
297 java_destroy_lexer (lex)
301 if (! lex->use_fallback)
302 iconv_close (lex->handle);
311 if (lex->unget_value)
313 unicode_t r = lex->unget_value;
314 lex->unget_value = 0;
319 if (! lex->use_fallback)
321 size_t ir, inbytesleft, in_save, out_count, out_save;
325 /* If there is data which has already been converted, use it. */
326 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
333 /* See if we need to read more data. If FIRST == 0 then
334 the previous conversion attempt ended in the middle of
335 a character at the end of the buffer. Otherwise we
336 only have to read if the buffer is empty. */
337 if (lex->first == 0 || lex->first >= lex->last)
341 if (lex->first >= lex->last)
346 if (feof (lex->finput))
348 r = fread (&lex->buffer[lex->last], 1,
349 sizeof (lex->buffer) - lex->last,
354 inbytesleft = lex->last - lex->first;
355 out_count = sizeof (lex->out_buffer) - lex->out_last;
357 if (inbytesleft == 0)
359 /* We've tried to read and there is nothing left. */
363 in_save = inbytesleft;
364 out_save = out_count;
365 inp = &lex->buffer[lex->first];
366 outp = &lex->out_buffer[lex->out_last];
367 ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
370 /* If we haven't read any bytes, then look to see if we
372 if (! lex->read_anything && out_save - out_count >= 2)
374 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
380 else if (uc == 0xfffe)
385 lex->read_anything = 1;
391 for (i = 0; i < out_save - out_count; i += 2)
393 char t = lex->out_buffer[lex->out_last + i];
394 lex->out_buffer[lex->out_last + i]
395 = lex->out_buffer[lex->out_last + i + 1];
396 lex->out_buffer[lex->out_last + i + 1] = t;
400 lex->first += in_save - inbytesleft;
401 lex->out_last += out_save - out_count;
403 /* If we converted anything at all, move along. */
404 if (out_count != out_save)
407 if (ir == (size_t) -1)
411 /* This is ok. This means that the end of our buffer
412 is in the middle of a character sequence. We just
413 move the valid part of the buffer to the beginning
415 /* We use bcopy() because it should work for
416 overlapping strings. Use memmove() instead... */
417 bcopy (&lex->buffer[lex->first], &lex->buffer[0],
418 lex->last - lex->first);
419 lex->last -= lex->first;
424 /* A more serious error. */
425 java_lex_error ("unrecognized character in input stream",
433 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
435 /* Don't have any data. */
440 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
445 #endif /* HAVE_ICONV */
448 c = getc (lex->finput);
456 if ((c & 0xe0) == 0xc0)
458 c1 = getc (lex->finput);
459 if ((c1 & 0xc0) == 0x80)
460 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
463 else if ((c & 0xf0) == 0xe0)
465 c1 = getc (lex->finput);
466 if ((c1 & 0xc0) == 0x80)
468 c2 = getc (lex->finput);
469 if ((c2 & 0xc0) == 0x80)
470 return (unicode_t)(((c & 0xf) << 12) +
471 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
479 /* We simply don't support invalid characters. */
480 java_lex_error ("malformed UTF-8 character", 0);
484 /* We only get here on error. */
489 java_store_unicode (l, c, unicode_escape_p)
492 int unicode_escape_p;
494 if (l->size == l->max)
496 l->max += JAVA_LINE_MAX;
497 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
498 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
499 sizeof (char)*l->max);
501 l->line [l->size] = c;
502 l->unicode_escape_p [l->size++] = unicode_escape_p;
506 java_read_unicode (lex, unicode_escape_p)
508 int *unicode_escape_p;
512 c = java_read_char (lex);
513 *unicode_escape_p = 0;
522 if ((lex->bs_count) % 2 == 1)
524 /* Odd number of \ seen. */
525 c = java_read_char (lex);
528 unicode_t unicode = 0;
530 /* Next should be 4 hex digits, otherwise it's an error.
531 The hex value is converted into the unicode, pushed into
532 the Unicode stream. */
533 for (shift = 12; shift >= 0; shift -= 4)
535 if ((c = java_read_char (lex)) == UEOF)
537 if (c >= '0' && c <= '9')
538 unicode |= (unicode_t)((c-'0') << shift);
539 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
540 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
543 /* Recognize any number of u in \u. */
547 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
550 *unicode_escape_p = 1;
553 lex->unget_value = c;
555 return (unicode_t) '\\';
559 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
561 int *unicode_escape_p;
563 int c = java_read_unicode (lex, unicode_escape_p);
567 /* We have to read ahead to see if we got \r\n. In that case we
568 return a single line terminator. */
570 c = java_read_unicode (lex, &dummy);
572 lex->unget_value = c;
573 /* In either case we must return a newline. */
583 /* It's time to read a line when... */
584 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
589 if (ctxp->lexer->hit_eof)
592 java_allocate_new_line ();
593 if (ctxp->c_line->line[0] != '\n')
597 int unicode_escape_p;
598 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
603 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
604 if (ctxp->c_line->white_space_only
605 && !JAVA_WHITE_SPACE_P (c)
607 ctxp->c_line->white_space_only = 0;
609 if ((c == '\n') || (c == UEOF))
613 if (c == UEOF && ! found_chars)
615 ctxp->lexer->hit_eof = 1;
620 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
621 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
622 return ctxp->c_line->line [ctxp->c_line->current++];
625 /* Parse the end of a C style comment.
626 * C is the first character following the '/' and '*'. */
628 java_parse_end_comment (c)
631 for ( ;; c = java_get_unicode ())
636 java_lex_error ("Comment not terminated at end of input", 0);
639 switch (c = java_get_unicode ())
642 java_lex_error ("Comment not terminated at end of input", 0);
646 case '*': /* reparse only '*' */
647 java_unget_unicode ();
653 /* Parse the documentation section. Keywords must be at the beginning
654 of a documentation comment line (ignoring white space and any `*'
655 character). Parsed keyword(s): @DEPRECATED. */
658 java_parse_doc_section (c)
661 int valid_tag = 0, seen_star = 0;
663 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
675 c = java_get_unicode();
679 java_lex_error ("Comment not terminated at end of input", 0);
681 if (seen_star && (c == '/'))
682 return 1; /* Goto step1 in caller */
684 /* We're parsing @deprecated */
685 if (valid_tag && (c == '@'))
690 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
692 c = java_get_unicode ();
693 tag [tag_index++] = c;
697 java_lex_error ("Comment not terminated at end of input", 0);
698 tag [tag_index] = '\0';
700 if (!strcmp (tag, "deprecated"))
701 ctxp->deprecated = 1;
703 java_unget_unicode ();
707 /* Return true if C is a valid start character for a Java identifier.
708 This is only called if C >= 128 -- smaller values are handled
709 inline. However, this function handles all values anyway. */
711 java_start_char_p (c)
714 unsigned int hi = c / 256;
715 char *page = type_table[hi];
716 unsigned long val = (unsigned long) page;
719 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
720 flags = page[c & 255];
724 return flags & LETTER_START;
727 /* Return true if C is a valid part character for a Java identifier.
728 This is only called if C >= 128 -- smaller values are handled
729 inline. However, this function handles all values anyway. */
734 unsigned int hi = c / 256;
735 char *page = type_table[hi];
736 unsigned long val = (unsigned long) page;
739 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
740 flags = page[c & 255];
744 return flags & LETTER_PART;
748 java_parse_escape_sequence ()
753 switch (c = java_get_unicode ())
756 return (unicode_t)0x8;
758 return (unicode_t)0x9;
760 return (unicode_t)0xa;
762 return (unicode_t)0xc;
764 return (unicode_t)0xd;
766 return (unicode_t)0x22;
768 return (unicode_t)0x27;
770 return (unicode_t)0x5c;
771 case '0': case '1': case '2': case '3': case '4':
772 case '5': case '6': case '7':
775 int octal_escape_index = 0;
779 for (; octal_escape_index < max && RANGE (c, '0', '7');
780 c = java_get_unicode ())
782 if (octal_escape_index == 0 && c > '3')
784 /* According to the grammar, `\477' has a well-defined
785 meaning -- it is `\47' followed by `7'. */
788 octal_escape [octal_escape_index++] = c;
791 java_unget_unicode ();
793 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
794 i < octal_escape_index; i++, shift -= 3)
795 char_lit |= (octal_escape [i] - '0') << shift;
800 java_lex_error ("Invalid character in escape sequence", 0);
801 return JAVA_CHAR_ERROR;
805 /* Isolate the code which may raise an arithmetic exception in its
814 int number_beginning;
817 #ifdef REAL_ARITHMETIC
818 #define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
820 #define IS_ZERO(X) ((X) == 0)
823 static void java_perform_atof PARAMS ((PTR));
826 java_perform_atof (av)
829 struct jpa_args *a = (struct jpa_args *)av;
830 YYSTYPE *java_lval = a->java_lval;
831 int number_beginning = a->number_beginning;
832 REAL_VALUE_TYPE value;
833 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
835 SET_REAL_VALUE_ATOF (value,
836 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
838 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
840 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
843 else if (IS_ZERO (value))
845 /* We check to see if the value is really 0 or if we've found an
846 underflow. We do this in the most primitive imaginable way. */
848 char *p = a->literal_token;
851 while (*p && *p != 'e' && *p != 'E')
853 if (*p != '0' && *p != '.')
862 int i = ctxp->c_line->current;
863 ctxp->c_line->current = number_beginning;
864 java_lex_error ("Floating point literal underflow", 0);
865 ctxp->c_line->current = i;
869 SET_LVAL_NODE_TYPE (build_real (type, value), type);
873 static int yylex PARAMS ((YYSTYPE *));
884 unicode_t first_unicode;
885 int ascii_index, all_ascii;
888 /* Translation of the Unicode escape in the raw stream of Unicode
889 characters. Takes care of line terminator. */
891 /* Skip white spaces: SP, TAB and FF or ULT */
892 for (c = java_get_unicode ();
893 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
896 ctxp->elc.line = ctxp->c_line->lineno;
897 ctxp->elc.col = ctxp->c_line->char_col-2;
900 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
902 if (c == 0x1a) /* CTRL-Z */
904 if ((c = java_get_unicode ()) == UEOF)
905 return 0; /* Ok here */
907 java_unget_unicode (); /* Caught later, at the end of the function */
909 /* Handle EOF here */
910 if (c == UEOF) /* Should probably do something here... */
913 /* Take care of eventual comments. */
916 switch (c = java_get_unicode ())
921 c = java_get_unicode ();
924 /* It is ok to end a `//' comment with EOF, unless
925 we're being pedantic. */
927 java_lex_error ("Comment not terminated at end of input",
931 if (c == '\n') /* ULT */
937 if ((c = java_get_unicode ()) == '*')
939 if ((c = java_get_unicode ()) == '/')
940 goto step1; /* Empy documentation comment */
941 else if (java_parse_doc_section (c))
945 java_parse_end_comment ((c = java_get_unicode ()));
949 java_unget_unicode ();
955 ctxp->elc.line = ctxp->c_line->lineno;
956 ctxp->elc.prev_col = ctxp->elc.col;
957 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
958 if (ctxp->elc.col < 0)
959 fatal ("ctxp->elc.col < 0 - java_lex");
961 /* Numeric literals */
962 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
964 /* This section of code is borrowed from gcc/c-lex.c */
965 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
966 int parts[TOTAL_PARTS];
967 HOST_WIDE_INT high, low;
968 /* End borrowed section */
969 char literal_token [256];
970 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
973 int number_beginning = ctxp->c_line->current;
976 /* We might have a . separator instead of a FP like .[0-9]* */
979 unicode_t peep = java_sneak_unicode ();
981 if (!JAVA_ASCII_DIGIT (peep))
984 BUILD_OPERATOR (DOT_TK);
988 for (i = 0; i < TOTAL_PARTS; i++)
993 c = java_get_unicode ();
994 if (c == 'x' || c == 'X')
997 c = java_get_unicode ();
999 else if (JAVA_ASCII_DIGIT (c))
1003 /* Push the '.' back and prepare for a FP parsing... */
1004 java_unget_unicode ();
1009 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
1010 JAVA_LEX_LIT ("0", 10);
1014 SET_LVAL_NODE (long_zero_node);
1015 return (INT_LIT_TK);
1017 SET_LVAL_NODE (float_zero_node);
1020 SET_LVAL_NODE (double_zero_node);
1023 java_unget_unicode ();
1024 SET_LVAL_NODE (integer_zero_node);
1025 return (INT_LIT_TK);
1029 /* Parse the first part of the literal, until we find something
1030 which is not a number. */
1031 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
1032 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1033 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
1035 /* We store in a string (in case it turns out to be a FP) and in
1036 PARTS if we have to process a integer literal. */
1037 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
1040 literal_token [literal_index++] = c;
1041 /* This section of code if borrowed from gcc/c-lex.c */
1042 for (count = 0; count < TOTAL_PARTS; count++)
1044 parts[count] *= radix;
1047 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1048 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1051 parts[0] += numeric;
1053 if (parts [TOTAL_PARTS-1] != 0)
1055 /* End borrowed section. */
1056 c = java_get_unicode ();
1059 /* If we have something from the FP char set but not a digit, parse
1061 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1064 int seen_digit = (literal_index ? 1 : 0);
1065 int seen_exponent = 0;
1066 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1067 double unless specified. */
1069 /* It is ok if the radix is 8 because this just means we've
1070 seen a leading `0'. However, radix==16 is invalid. */
1072 java_lex_error ("Can't express non-decimal FP literal", 0);
1082 literal_token [literal_index++ ] = c;
1083 c = java_get_unicode ();
1086 java_lex_error ("Invalid character in FP literal", 0);
1089 if (c == 'e' || c == 'E')
1093 /* {E,e} must have seen at list a digit */
1095 java_lex_error ("Invalid FP literal", 0);
1099 literal_token [literal_index++] = c;
1100 c = java_get_unicode ();
1103 java_lex_error ("Invalid character in FP literal", 0);
1105 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1107 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1108 stage = 4; /* So we fall through */
1111 if ((c=='-' || c =='+') && stage == 2)
1114 literal_token [literal_index++] = c;
1115 c = java_get_unicode ();
1118 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1119 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1120 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1121 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1123 if (JAVA_ASCII_DIGIT (c))
1125 literal_token [literal_index++ ] = c;
1126 c = java_get_unicode ();
1133 if (stage != 4) /* Don't push back fF/dD */
1134 java_unget_unicode ();
1136 /* An exponent (if any) must have seen a digit. */
1137 if (seen_exponent && !seen_digit)
1138 java_lex_error ("Invalid FP literal", 0);
1140 literal_token [literal_index] = '\0';
1141 JAVA_LEX_LIT (literal_token, radix);
1144 a.literal_token = literal_token;
1146 a.java_lval = java_lval;
1147 a.number_beginning = number_beginning;
1148 if (do_float_handler (java_perform_atof, (PTR) &a))
1151 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
1157 } /* JAVA_ASCCI_FPCHAR (c) */
1159 /* Here we get back to converting the integral literal. */
1160 if (c == 'L' || c == 'l')
1162 else if (radix == 16 && JAVA_ASCII_LETTER (c))
1163 java_lex_error ("Digit out of range in hexadecimal literal", 0);
1164 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
1165 java_lex_error ("Digit out of range in octal literal", 0);
1166 else if (radix == 16 && !literal_index)
1167 java_lex_error ("No digit specified for hexadecimal literal", 0);
1169 java_unget_unicode ();
1171 #ifdef JAVA_LEX_DEBUG
1172 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1173 JAVA_LEX_LIT (literal_token, radix);
1175 /* This section of code is borrowed from gcc/c-lex.c */
1178 bytes = GET_TYPE_PRECISION (long_type_node);
1179 for (i = bytes; i < TOTAL_PARTS; i++)
1187 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1189 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1190 / HOST_BITS_PER_CHAR)]
1191 << (i * HOST_BITS_PER_CHAR));
1192 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1194 /* End borrowed section. */
1196 /* Range checking */
1199 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1200 9223372036854775807L is the biggest `long' literal that can be
1201 expressed using a 10 radix. For other radixes, everything that
1202 fits withing 64 bits is OK. */
1203 int hb = (high >> 31);
1204 if (overflow || (hb && low && radix == 10) ||
1205 (hb && high & 0x7fffffff && radix == 10) ||
1206 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1207 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1211 /* 2147483648 is valid if operand of a '-'. Otherwise,
1212 2147483647 is the biggest `int' literal that can be
1213 expressed using a 10 radix. For other radixes, everything
1214 that fits within 32 bits is OK. As all literals are
1215 signed, we sign extend here. */
1216 int hb = (low >> 31) & 0x1;
1217 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1218 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1219 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1222 ctxp->minus_seen = 0;
1223 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1224 (long_suffix ? long_type_node : int_type_node));
1228 ctxp->minus_seen = 0;
1230 /* Character literals */
1234 if ((c = java_get_unicode ()) == '\\')
1235 char_lit = java_parse_escape_sequence ();
1238 if (c == '\n' || c == '\'')
1239 java_lex_error ("Invalid character literal", 0);
1243 c = java_get_unicode ();
1245 if ((c == '\n') || (c == UEOF))
1246 java_lex_error ("Character literal not terminated at end of line", 0);
1248 java_lex_error ("Syntax error in character literal", 0);
1250 if (char_lit == JAVA_CHAR_ERROR)
1251 char_lit = 0; /* We silently convert it to zero */
1253 JAVA_LEX_CHAR_LIT (char_lit);
1254 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1258 /* String literals */
1264 for (no_error = 1, c = java_get_unicode ();
1265 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1268 c = java_parse_escape_sequence ();
1269 if (c == JAVA_CHAR_ERROR)
1272 c = 0; /* We silently convert it to zero. */
1274 java_unicode_2_utf8 (c);
1276 if (c == '\n' || c == UEOF) /* ULT */
1278 lineno--; /* Refer to the line the terminator was seen */
1279 java_lex_error ("String not terminated at end of line.", 0);
1283 obstack_1grow (&temporary_obstack, '\0');
1284 string = obstack_finish (&temporary_obstack);
1286 if (!no_error || (c != '"'))
1287 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1289 java_lval->node = build_string (strlen (string), string);
1291 obstack_free (&temporary_obstack, string);
1292 return STRING_LIT_TK;
1300 BUILD_OPERATOR (OP_TK);
1306 if (ctxp->ccb_indent == 1)
1307 ctxp->first_ccb_indent1 = lineno;
1309 BUILD_OPERATOR (OCB_TK);
1313 if (ctxp->ccb_indent == 1)
1314 ctxp->last_ccb_indent1 = lineno;
1315 BUILD_OPERATOR (CCB_TK);
1318 BUILD_OPERATOR (OSB_TK);
1330 BUILD_OPERATOR (DOT_TK);
1331 /* return DOT_TK; */
1338 if ((c = java_get_unicode ()) == '=')
1340 BUILD_OPERATOR (EQ_TK);
1344 /* Equals is used in two different locations. In the
1345 variable_declarator: rule, it has to be seen as '=' as opposed
1346 to being seen as an ordinary assignment operator in
1347 assignment_operators: rule. */
1348 java_unget_unicode ();
1349 BUILD_OPERATOR (ASSIGN_TK);
1353 switch ((c = java_get_unicode ()))
1356 BUILD_OPERATOR (GTE_TK);
1358 switch ((c = java_get_unicode ()))
1361 if ((c = java_get_unicode ()) == '=')
1363 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1367 java_unget_unicode ();
1368 BUILD_OPERATOR (ZRS_TK);
1371 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1373 java_unget_unicode ();
1374 BUILD_OPERATOR (SRS_TK);
1377 java_unget_unicode ();
1378 BUILD_OPERATOR (GT_TK);
1382 switch ((c = java_get_unicode ()))
1385 BUILD_OPERATOR (LTE_TK);
1387 if ((c = java_get_unicode ()) == '=')
1389 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1393 java_unget_unicode ();
1394 BUILD_OPERATOR (LS_TK);
1397 java_unget_unicode ();
1398 BUILD_OPERATOR (LT_TK);
1402 switch ((c = java_get_unicode ()))
1405 BUILD_OPERATOR (BOOL_AND_TK);
1407 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1409 java_unget_unicode ();
1410 BUILD_OPERATOR (AND_TK);
1414 switch ((c = java_get_unicode ()))
1417 BUILD_OPERATOR (BOOL_OR_TK);
1419 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1421 java_unget_unicode ();
1422 BUILD_OPERATOR (OR_TK);
1426 switch ((c = java_get_unicode ()))
1429 BUILD_OPERATOR (INCR_TK);
1431 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1433 java_unget_unicode ();
1434 BUILD_OPERATOR (PLUS_TK);
1438 switch ((c = java_get_unicode ()))
1441 BUILD_OPERATOR (DECR_TK);
1443 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1445 java_unget_unicode ();
1446 ctxp->minus_seen = 1;
1447 BUILD_OPERATOR (MINUS_TK);
1451 if ((c = java_get_unicode ()) == '=')
1453 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1457 java_unget_unicode ();
1458 BUILD_OPERATOR (MULT_TK);
1462 if ((c = java_get_unicode ()) == '=')
1464 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1468 java_unget_unicode ();
1469 BUILD_OPERATOR (DIV_TK);
1473 if ((c = java_get_unicode ()) == '=')
1475 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1479 java_unget_unicode ();
1480 BUILD_OPERATOR (XOR_TK);
1484 if ((c = java_get_unicode ()) == '=')
1486 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1490 java_unget_unicode ();
1491 BUILD_OPERATOR (REM_TK);
1495 if ((c = java_get_unicode()) == '=')
1497 BUILD_OPERATOR (NEQ_TK);
1501 java_unget_unicode ();
1502 BUILD_OPERATOR (NEG_TK);
1507 BUILD_OPERATOR (REL_QM_TK);
1510 BUILD_OPERATOR (REL_CL_TK);
1512 BUILD_OPERATOR (NOT_TK);
1515 /* Keyword, boolean literal or null literal */
1516 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1517 JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1519 java_unicode_2_utf8 (c);
1520 if (all_ascii && c >= 128)
1525 obstack_1grow (&temporary_obstack, '\0');
1526 string = obstack_finish (&temporary_obstack);
1527 java_unget_unicode ();
1529 /* If we have something all ascii, we consider a keyword, a boolean
1530 literal, a null literal or an all ASCII identifier. Otherwise,
1531 this is an identifier (possibly not respecting formation rule). */
1534 struct java_keyword *kw;
1535 if ((kw=java_keyword (string, ascii_index)))
1537 JAVA_LEX_KW (string);
1540 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1541 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1542 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1543 case PRIVATE_TK: case STRICT_TK:
1544 SET_MODIFIER_CTX (kw->token);
1547 SET_LVAL_NODE (float_type_node);
1550 SET_LVAL_NODE (double_type_node);
1553 SET_LVAL_NODE (boolean_type_node);
1556 SET_LVAL_NODE (byte_type_node);
1559 SET_LVAL_NODE (short_type_node);
1562 SET_LVAL_NODE (int_type_node);
1565 SET_LVAL_NODE (long_type_node);
1568 SET_LVAL_NODE (char_type_node);
1571 /* Keyword based literals */
1574 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1575 boolean_true_node : boolean_false_node));
1578 SET_LVAL_NODE (null_pointer_node);
1581 /* Some keyword we want to retain information on the location
1594 BUILD_OPERATOR (kw->token);
1602 /* We may have an ID here */
1603 if (JAVA_START_CHAR_P (first_unicode))
1605 JAVA_LEX_ID (string);
1606 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1610 /* Everything else is an invalid character in the input */
1612 char lex_error_buffer [128];
1613 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1614 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1615 java_lex_error (lex_error_buffer, 1);
1621 java_unicode_2_utf8 (unicode)
1624 if (RANGE (unicode, 0x01, 0x7f))
1625 obstack_1grow (&temporary_obstack, (char)unicode);
1626 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1628 obstack_1grow (&temporary_obstack,
1629 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1630 obstack_1grow (&temporary_obstack,
1631 (unsigned char)(0x80 | (unicode & 0x3f)));
1633 else /* Range 0x800-0xffff */
1635 obstack_1grow (&temporary_obstack,
1636 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1637 obstack_1grow (&temporary_obstack,
1638 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1639 obstack_1grow (&temporary_obstack,
1640 (unsigned char)(0x80 | (unicode & 0x003f)));
1646 build_wfl_node (node)
1649 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1650 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1651 TREE_TYPE (node) = NULL_TREE;
1657 java_lex_error (msg, forward)
1658 const char *msg ATTRIBUTE_UNUSED;
1659 int forward ATTRIBUTE_UNUSED;
1662 ctxp->elc.line = ctxp->c_line->lineno;
1663 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1665 /* Might be caught in the middle of some error report */
1666 ctxp->java_error_flag = 0;
1683 if (next != '\n' && next != EOF)
1695 java_get_line_col (filename, line, col)
1696 const char *filename ATTRIBUTE_UNUSED;
1697 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1702 /* Dumb implementation. Doesn't try to cache or optimize things. */
1703 /* First line of the file is line 1, first column is 1 */
1705 /* COL == -1 means, at the CR/LF in LINE */
1706 /* COL == -2 means, at the first non space char in LINE */
1709 int c, ccol, cline = 1;
1710 int current_line_col = 0;
1711 int first_non_space = 0;
1714 if (!(fp = fopen (filename, "r")))
1715 fatal ("Can't open file - java_display_line_col");
1717 while (cline != line)
1722 static char msg[] = "<<file too short - unexpected EOF>>";
1723 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1726 if (java_is_eol (fp, c))
1730 /* Gather the chars of the current line in a buffer */
1734 if (c < 0 || java_is_eol (fp, c))
1736 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1737 first_non_space = current_line_col;
1738 obstack_1grow (&temporary_obstack, c);
1743 obstack_1grow (&temporary_obstack, '\n');
1747 col = current_line_col;
1748 first_non_space = 0;
1751 col = first_non_space;
1753 first_non_space = 0;
1755 /* Place the '^' a the right position */
1756 base = obstack_base (&temporary_obstack);
1757 for (ccol = 1; ccol <= col+3; ccol++)
1759 /* Compute \t when reaching first_non_space */
1760 char c = (first_non_space ?
1761 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1762 obstack_1grow (&temporary_obstack, c);
1764 obstack_grow0 (&temporary_obstack, "^", 1);
1767 return obstack_finish (&temporary_obstack);
1773 utf8_cmp (str, length, name)
1774 const unsigned char *str;
1778 const unsigned char *limit = str + length;
1781 for (i = 0; name[i]; ++i)
1783 int ch = UTF8_GET (str, limit);
1785 return ch - name[i];
1788 return str == limit ? 0 : 1;
1791 /* A sorted list of all C++ keywords. */
1793 static const char *cxx_keywords[] =
1828 /* Return true if NAME is a C++ keyword. */
1831 cxx_keyword_p (name, length)
1835 int last = ARRAY_SIZE (cxx_keywords);
1837 int mid = (last + first) / 2;
1840 for (mid = (last + first) / 2;
1842 old = mid, mid = (last + first) / 2)
1844 int kwl = strlen (cxx_keywords[mid]);
1845 int min_length = kwl > length ? length : kwl;
1846 int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1851 /* We've found a match if all the remaining characters are
1853 for (i = min_length; i < length && name[i] == '$'; ++i)
1867 #endif /* JC1_LITE */