1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
39 #include "chartables.h"
41 /* Function declaration */
42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
44 static void java_lex_error PARAMS ((const char *, int));
46 static int java_is_eol PARAMS ((FILE *, int));
47 static tree build_wfl_node PARAMS ((tree));
49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50 static int java_parse_escape_sequence PARAMS ((void));
51 static int java_start_char_p PARAMS ((unicode_t));
52 static int java_part_char_p PARAMS ((unicode_t));
53 static int java_parse_doc_section PARAMS ((int));
54 static void java_parse_end_comment PARAMS ((int));
55 static int java_get_unicode PARAMS ((void));
56 static int java_read_unicode PARAMS ((java_lexer *, int *));
57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static int java_read_char PARAMS ((java_lexer *));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
71 /* This is nonzero if we have initialized `need_byteswap'. */
72 static int byteswap_init = 0;
74 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
75 big-endian order -- not native endian order. We handle this by
76 doing a conversion once at startup and seeing what happens. This
77 flag holds the results of this determination. */
78 static int need_byteswap = 0;
82 java_init_lex (finput, encoding)
87 int java_lang_imported = 0;
90 java_lang_id = get_identifier ("java.lang");
91 if (!java_lang_cloneable)
92 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
93 if (!java_io_serializable)
94 java_io_serializable = get_identifier ("java.io.Serializable");
96 inst_id = get_identifier ("inst$");
98 wpv_id = get_identifier ("write_parm_value$");
100 if (!java_lang_imported)
102 tree node = build_tree_list
103 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
104 read_import_dir (TREE_PURPOSE (node));
105 TREE_CHAIN (node) = ctxp->import_demand_list;
106 ctxp->import_demand_list = node;
107 java_lang_imported = 1;
111 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
113 label_id = get_identifier ("$L");
115 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
116 if (!wfl_string_buffer)
118 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
120 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
122 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
123 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
125 memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
126 memset ((PTR) current_jcf, 0, sizeof (JCF));
127 ctxp->current_parsed_class = NULL;
128 ctxp->package = NULL_TREE;
131 ctxp->filename = input_filename;
132 ctxp->lineno = lineno = 0;
135 ctxp->minus_seen = 0;
136 ctxp->java_error_flag = 0;
137 ctxp->lexer = java_new_lexer (finput, encoding);
141 java_sprint_unicode (line, i)
142 struct java_line *line;
145 static char buffer [10];
146 if (line->unicode_escape_p [i] || line->line [i] > 128)
147 sprintf (buffer, "\\u%04x", line->line [i]);
150 buffer [0] = line->line [i];
157 java_sneak_unicode ()
159 return (ctxp->c_line->line [ctxp->c_line->current]);
163 java_unget_unicode ()
165 if (!ctxp->c_line->current)
166 /* Can't unget unicode. */
169 ctxp->c_line->current--;
170 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
174 java_allocate_new_line ()
176 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
177 char ahead_escape_p = (ctxp->c_line ?
178 ctxp->c_line->unicode_escape_ahead_p : 0);
180 if (ctxp->c_line && !ctxp->c_line->white_space_only)
184 free (ctxp->p_line->unicode_escape_p);
185 free (ctxp->p_line->line);
188 ctxp->p_line = ctxp->c_line;
189 ctxp->c_line = NULL; /* Reallocated */
194 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
195 ctxp->c_line->max = JAVA_LINE_MAX;
196 ctxp->c_line->line = (unicode_t *)xmalloc
197 (sizeof (unicode_t)*ctxp->c_line->max);
198 ctxp->c_line->unicode_escape_p =
199 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
200 ctxp->c_line->white_space_only = 0;
203 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
204 ctxp->c_line->char_col = ctxp->c_line->current = 0;
207 ctxp->c_line->line [ctxp->c_line->size] = ahead;
208 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
209 ctxp->c_line->size++;
211 ctxp->c_line->ahead [0] = 0;
212 ctxp->c_line->unicode_escape_ahead_p = 0;
213 ctxp->c_line->lineno = ++lineno;
214 ctxp->c_line->white_space_only = 1;
217 /* Create a new lexer object. */
220 java_new_lexer (finput, encoding)
222 const char *encoding;
224 java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
227 lex->finput = finput;
229 lex->unget_value = 0;
233 lex->handle = iconv_open ("UCS-2", encoding);
234 if (lex->handle != (iconv_t) -1)
240 lex->read_anything = 0;
241 lex->use_fallback = 0;
243 /* Work around broken iconv() implementations by doing checking at
244 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
245 then all UCS-2 encoders will be broken. Perhaps not a valid
253 handle = iconv_open ("UCS-2", "UTF-8");
254 if (handle != (iconv_t) -1)
261 /* This is the UTF-8 encoding of \ufeff. */
268 outp = (char *) &result;
271 r = iconv (handle, (const char **) &inp, &inc, &outp, &outc);
272 iconv_close (handle);
273 /* Conversion must be complete for us to use the result. */
274 if (r != (size_t) -1 && inc == 0 && outc == 0)
275 need_byteswap = (result != 0xfeff);
279 lex->byte_swap = need_byteswap;
282 #endif /* HAVE_ICONV */
284 /* If iconv failed, use the internal decoder if the default
285 encoding was requested. This code is used on platforms where
286 iconv exists but is insufficient for our needs. For
287 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2. */
288 if (strcmp (encoding, DEFAULT_ENCODING))
292 lex->use_fallback = 1;
293 #endif /* HAVE_ICONV */
297 fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option.", encoding);
303 java_destroy_lexer (lex)
307 if (! lex->use_fallback)
308 iconv_close (lex->handle);
317 if (lex->unget_value)
319 unicode_t r = lex->unget_value;
320 lex->unget_value = 0;
325 if (! lex->use_fallback)
327 size_t ir, inbytesleft, in_save, out_count, out_save;
331 /* If there is data which has already been converted, use it. */
332 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
339 /* See if we need to read more data. If FIRST == 0 then
340 the previous conversion attempt ended in the middle of
341 a character at the end of the buffer. Otherwise we
342 only have to read if the buffer is empty. */
343 if (lex->first == 0 || lex->first >= lex->last)
347 if (lex->first >= lex->last)
352 if (feof (lex->finput))
354 r = fread (&lex->buffer[lex->last], 1,
355 sizeof (lex->buffer) - lex->last,
360 inbytesleft = lex->last - lex->first;
361 out_count = sizeof (lex->out_buffer) - lex->out_last;
363 if (inbytesleft == 0)
365 /* We've tried to read and there is nothing left. */
369 in_save = inbytesleft;
370 out_save = out_count;
371 inp = &lex->buffer[lex->first];
372 outp = &lex->out_buffer[lex->out_last];
373 ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
376 /* If we haven't read any bytes, then look to see if we
378 if (! lex->read_anything && out_save - out_count >= 2)
380 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
386 else if (uc == 0xfffe)
391 lex->read_anything = 1;
397 for (i = 0; i < out_save - out_count; i += 2)
399 char t = lex->out_buffer[lex->out_last + i];
400 lex->out_buffer[lex->out_last + i]
401 = lex->out_buffer[lex->out_last + i + 1];
402 lex->out_buffer[lex->out_last + i + 1] = t;
406 lex->first += in_save - inbytesleft;
407 lex->out_last += out_save - out_count;
409 /* If we converted anything at all, move along. */
410 if (out_count != out_save)
413 if (ir == (size_t) -1)
417 /* This is ok. This means that the end of our buffer
418 is in the middle of a character sequence. We just
419 move the valid part of the buffer to the beginning
421 memmove (&lex->buffer[0], &lex->buffer[lex->first],
422 lex->last - lex->first);
423 lex->last -= lex->first;
428 /* A more serious error. */
429 java_lex_error ("unrecognized character in input stream",
437 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
439 /* Don't have any data. */
444 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
449 #endif /* HAVE_ICONV */
452 c = getc (lex->finput);
460 if ((c & 0xe0) == 0xc0)
462 c1 = getc (lex->finput);
463 if ((c1 & 0xc0) == 0x80)
464 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
467 else if ((c & 0xf0) == 0xe0)
469 c1 = getc (lex->finput);
470 if ((c1 & 0xc0) == 0x80)
472 c2 = getc (lex->finput);
473 if ((c2 & 0xc0) == 0x80)
474 return (unicode_t)(((c & 0xf) << 12) +
475 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
483 /* We simply don't support invalid characters. */
484 java_lex_error ("malformed UTF-8 character", 0);
488 /* We only get here on error. */
493 java_store_unicode (l, c, unicode_escape_p)
496 int unicode_escape_p;
498 if (l->size == l->max)
500 l->max += JAVA_LINE_MAX;
501 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
502 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
503 sizeof (char)*l->max);
505 l->line [l->size] = c;
506 l->unicode_escape_p [l->size++] = unicode_escape_p;
510 java_read_unicode (lex, unicode_escape_p)
512 int *unicode_escape_p;
516 c = java_read_char (lex);
517 *unicode_escape_p = 0;
526 if ((lex->bs_count) % 2 == 1)
528 /* Odd number of \ seen. */
529 c = java_read_char (lex);
532 unicode_t unicode = 0;
535 /* Recognize any number of `u's in \u. */
536 while ((c = java_read_char (lex)) == 'u')
539 /* Unget the most recent character as it is not a `u'. */
542 lex->unget_value = c;
544 /* Next should be 4 hex digits, otherwise it's an error.
545 The hex value is converted into the unicode, pushed into
546 the Unicode stream. */
547 for (shift = 12; shift >= 0; shift -= 4)
549 if ((c = java_read_char (lex)) == UEOF)
551 if (c >= '0' && c <= '9')
552 unicode |= (unicode_t)((c-'0') << shift);
553 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
554 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
556 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
559 *unicode_escape_p = 1;
562 lex->unget_value = c;
564 return (unicode_t) '\\';
568 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
570 int *unicode_escape_p;
572 int c = java_read_unicode (lex, unicode_escape_p);
576 /* We have to read ahead to see if we got \r\n. In that case we
577 return a single line terminator. */
579 c = java_read_unicode (lex, &dummy);
581 lex->unget_value = c;
582 /* In either case we must return a newline. */
592 /* It's time to read a line when... */
593 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
598 if (ctxp->lexer->hit_eof)
601 java_allocate_new_line ();
602 if (ctxp->c_line->line[0] != '\n')
606 int unicode_escape_p;
607 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
612 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
613 if (ctxp->c_line->white_space_only
614 && !JAVA_WHITE_SPACE_P (c)
616 ctxp->c_line->white_space_only = 0;
618 if ((c == '\n') || (c == UEOF))
622 if (c == UEOF && ! found_chars)
624 ctxp->lexer->hit_eof = 1;
629 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
630 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
631 return ctxp->c_line->line [ctxp->c_line->current++];
634 /* Parse the end of a C style comment.
635 * C is the first character following the '/' and '*'. */
637 java_parse_end_comment (c)
640 for ( ;; c = java_get_unicode ())
645 java_lex_error ("Comment not terminated at end of input", 0);
648 switch (c = java_get_unicode ())
651 java_lex_error ("Comment not terminated at end of input", 0);
655 case '*': /* reparse only '*' */
656 java_unget_unicode ();
662 /* Parse the documentation section. Keywords must be at the beginning
663 of a documentation comment line (ignoring white space and any `*'
664 character). Parsed keyword(s): @DEPRECATED. */
667 java_parse_doc_section (c)
670 int valid_tag = 0, seen_star = 0;
672 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
684 c = java_get_unicode();
688 java_lex_error ("Comment not terminated at end of input", 0);
690 if (seen_star && (c == '/'))
691 return 1; /* Goto step1 in caller */
693 /* We're parsing @deprecated */
694 if (valid_tag && (c == '@'))
699 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
701 c = java_get_unicode ();
702 tag [tag_index++] = c;
706 java_lex_error ("Comment not terminated at end of input", 0);
707 tag [tag_index] = '\0';
709 if (!strcmp (tag, "deprecated"))
710 ctxp->deprecated = 1;
712 java_unget_unicode ();
716 /* Return true if C is a valid start character for a Java identifier.
717 This is only called if C >= 128 -- smaller values are handled
718 inline. However, this function handles all values anyway. */
720 java_start_char_p (c)
723 unsigned int hi = c / 256;
724 char *page = type_table[hi];
725 unsigned long val = (unsigned long) page;
728 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
729 flags = page[c & 255];
733 return flags & LETTER_START;
736 /* Return true if C is a valid part character for a Java identifier.
737 This is only called if C >= 128 -- smaller values are handled
738 inline. However, this function handles all values anyway. */
743 unsigned int hi = c / 256;
744 char *page = type_table[hi];
745 unsigned long val = (unsigned long) page;
748 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
749 flags = page[c & 255];
753 return flags & LETTER_PART;
757 java_parse_escape_sequence ()
762 switch (c = java_get_unicode ())
765 return (unicode_t)0x8;
767 return (unicode_t)0x9;
769 return (unicode_t)0xa;
771 return (unicode_t)0xc;
773 return (unicode_t)0xd;
775 return (unicode_t)0x22;
777 return (unicode_t)0x27;
779 return (unicode_t)0x5c;
780 case '0': case '1': case '2': case '3': case '4':
781 case '5': case '6': case '7':
784 int octal_escape_index = 0;
788 for (; octal_escape_index < max && RANGE (c, '0', '7');
789 c = java_get_unicode ())
791 if (octal_escape_index == 0 && c > '3')
793 /* According to the grammar, `\477' has a well-defined
794 meaning -- it is `\47' followed by `7'. */
797 octal_escape [octal_escape_index++] = c;
800 java_unget_unicode ();
802 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
803 i < octal_escape_index; i++, shift -= 3)
804 char_lit |= (octal_escape [i] - '0') << shift;
809 java_lex_error ("Invalid character in escape sequence", 0);
810 return JAVA_CHAR_ERROR;
814 /* Isolate the code which may raise an arithmetic exception in its
823 int number_beginning;
826 #ifdef REAL_ARITHMETIC
827 #define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
829 #define IS_ZERO(X) ((X) == 0)
832 static void java_perform_atof PARAMS ((PTR));
835 java_perform_atof (av)
838 struct jpa_args *a = (struct jpa_args *)av;
839 YYSTYPE *java_lval = a->java_lval;
840 int number_beginning = a->number_beginning;
841 REAL_VALUE_TYPE value;
842 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
844 SET_REAL_VALUE_ATOF (value,
845 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
847 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
849 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
852 else if (IS_ZERO (value))
854 /* We check to see if the value is really 0 or if we've found an
855 underflow. We do this in the most primitive imaginable way. */
857 char *p = a->literal_token;
860 while (*p && *p != 'e' && *p != 'E')
862 if (*p != '0' && *p != '.')
871 int i = ctxp->c_line->current;
872 ctxp->c_line->current = number_beginning;
873 java_lex_error ("Floating point literal underflow", 0);
874 ctxp->c_line->current = i;
878 SET_LVAL_NODE_TYPE (build_real (type, value), type);
882 static int yylex PARAMS ((YYSTYPE *));
893 unicode_t first_unicode;
894 int ascii_index, all_ascii;
897 /* Translation of the Unicode escape in the raw stream of Unicode
898 characters. Takes care of line terminator. */
900 /* Skip white spaces: SP, TAB and FF or ULT */
901 for (c = java_get_unicode ();
902 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
905 ctxp->elc.line = ctxp->c_line->lineno;
906 ctxp->elc.col = ctxp->c_line->char_col-2;
909 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
911 if (c == 0x1a) /* CTRL-Z */
913 if ((c = java_get_unicode ()) == UEOF)
914 return 0; /* Ok here */
916 java_unget_unicode (); /* Caught later, at the end of the function */
918 /* Handle EOF here */
919 if (c == UEOF) /* Should probably do something here... */
922 /* Take care of eventual comments. */
925 switch (c = java_get_unicode ())
930 c = java_get_unicode ();
933 /* It is ok to end a `//' comment with EOF, unless
934 we're being pedantic. */
936 java_lex_error ("Comment not terminated at end of input",
940 if (c == '\n') /* ULT */
946 if ((c = java_get_unicode ()) == '*')
948 if ((c = java_get_unicode ()) == '/')
949 goto step1; /* Empy documentation comment */
950 else if (java_parse_doc_section (c))
954 java_parse_end_comment ((c = java_get_unicode ()));
958 java_unget_unicode ();
964 ctxp->elc.line = ctxp->c_line->lineno;
965 ctxp->elc.prev_col = ctxp->elc.col;
966 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
967 if (ctxp->elc.col < 0)
970 /* Numeric literals */
971 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
973 /* This section of code is borrowed from gcc/c-lex.c */
974 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
975 int parts[TOTAL_PARTS];
976 HOST_WIDE_INT high, low;
977 /* End borrowed section */
978 char literal_token [256];
979 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
982 int number_beginning = ctxp->c_line->current;
985 /* We might have a . separator instead of a FP like .[0-9]* */
988 unicode_t peep = java_sneak_unicode ();
990 if (!JAVA_ASCII_DIGIT (peep))
993 BUILD_OPERATOR (DOT_TK);
997 for (i = 0; i < TOTAL_PARTS; i++)
1002 c = java_get_unicode ();
1003 if (c == 'x' || c == 'X')
1006 c = java_get_unicode ();
1008 else if (JAVA_ASCII_DIGIT (c))
1012 /* Push the '.' back and prepare for a FP parsing... */
1013 java_unget_unicode ();
1018 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
1019 JAVA_LEX_LIT ("0", 10);
1023 SET_LVAL_NODE (long_zero_node);
1024 return (INT_LIT_TK);
1026 SET_LVAL_NODE (float_zero_node);
1029 SET_LVAL_NODE (double_zero_node);
1032 java_unget_unicode ();
1033 SET_LVAL_NODE (integer_zero_node);
1034 return (INT_LIT_TK);
1038 /* Parse the first part of the literal, until we find something
1039 which is not a number. */
1040 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
1041 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1042 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
1044 /* We store in a string (in case it turns out to be a FP) and in
1045 PARTS if we have to process a integer literal. */
1046 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
1049 literal_token [literal_index++] = c;
1050 /* This section of code if borrowed from gcc/c-lex.c */
1051 for (count = 0; count < TOTAL_PARTS; count++)
1053 parts[count] *= radix;
1056 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1057 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1060 parts[0] += numeric;
1062 if (parts [TOTAL_PARTS-1] != 0)
1064 /* End borrowed section. */
1065 c = java_get_unicode ();
1068 /* If we have something from the FP char set but not a digit, parse
1070 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1073 int seen_digit = (literal_index ? 1 : 0);
1074 int seen_exponent = 0;
1075 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1076 double unless specified. */
1078 /* It is ok if the radix is 8 because this just means we've
1079 seen a leading `0'. However, radix==16 is invalid. */
1081 java_lex_error ("Can't express non-decimal FP literal", 0);
1091 literal_token [literal_index++ ] = c;
1092 c = java_get_unicode ();
1095 java_lex_error ("Invalid character in FP literal", 0);
1098 if (c == 'e' || c == 'E')
1102 /* {E,e} must have seen at list a digit */
1104 java_lex_error ("Invalid FP literal", 0);
1108 literal_token [literal_index++] = c;
1109 c = java_get_unicode ();
1112 java_lex_error ("Invalid character in FP literal", 0);
1114 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1116 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1117 stage = 4; /* So we fall through */
1120 if ((c=='-' || c =='+') && stage == 2)
1123 literal_token [literal_index++] = c;
1124 c = java_get_unicode ();
1127 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1128 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1129 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1130 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1132 if (JAVA_ASCII_DIGIT (c))
1134 literal_token [literal_index++ ] = c;
1135 c = java_get_unicode ();
1142 if (stage != 4) /* Don't push back fF/dD */
1143 java_unget_unicode ();
1145 /* An exponent (if any) must have seen a digit. */
1146 if (seen_exponent && !seen_digit)
1147 java_lex_error ("Invalid FP literal", 0);
1149 literal_token [literal_index] = '\0';
1150 JAVA_LEX_LIT (literal_token, radix);
1153 a.literal_token = literal_token;
1155 a.java_lval = java_lval;
1156 a.number_beginning = number_beginning;
1157 if (do_float_handler (java_perform_atof, (PTR) &a))
1160 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
1166 } /* JAVA_ASCCI_FPCHAR (c) */
1168 /* Here we get back to converting the integral literal. */
1169 if (c == 'L' || c == 'l')
1171 else if (radix == 16 && JAVA_ASCII_LETTER (c))
1172 java_lex_error ("Digit out of range in hexadecimal literal", 0);
1173 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
1174 java_lex_error ("Digit out of range in octal literal", 0);
1175 else if (radix == 16 && !literal_index)
1176 java_lex_error ("No digit specified for hexadecimal literal", 0);
1178 java_unget_unicode ();
1180 #ifdef JAVA_LEX_DEBUG
1181 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1182 JAVA_LEX_LIT (literal_token, radix);
1184 /* This section of code is borrowed from gcc/c-lex.c */
1187 bytes = GET_TYPE_PRECISION (long_type_node);
1188 for (i = bytes; i < TOTAL_PARTS; i++)
1196 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1198 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1199 / HOST_BITS_PER_CHAR)]
1200 << (i * HOST_BITS_PER_CHAR));
1201 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1203 /* End borrowed section. */
1205 /* Range checking */
1208 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1209 9223372036854775807L is the biggest `long' literal that can be
1210 expressed using a 10 radix. For other radixes, everything that
1211 fits withing 64 bits is OK. */
1212 int hb = (high >> 31);
1213 if (overflow || (hb && low && radix == 10) ||
1214 (hb && high & 0x7fffffff && radix == 10) ||
1215 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1216 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1220 /* 2147483648 is valid if operand of a '-'. Otherwise,
1221 2147483647 is the biggest `int' literal that can be
1222 expressed using a 10 radix. For other radixes, everything
1223 that fits within 32 bits is OK. As all literals are
1224 signed, we sign extend here. */
1225 int hb = (low >> 31) & 0x1;
1226 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1227 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1228 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1231 ctxp->minus_seen = 0;
1232 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1233 (long_suffix ? long_type_node : int_type_node));
1237 ctxp->minus_seen = 0;
1239 /* Character literals */
1243 if ((c = java_get_unicode ()) == '\\')
1244 char_lit = java_parse_escape_sequence ();
1247 if (c == '\n' || c == '\'')
1248 java_lex_error ("Invalid character literal", 0);
1252 c = java_get_unicode ();
1254 if ((c == '\n') || (c == UEOF))
1255 java_lex_error ("Character literal not terminated at end of line", 0);
1257 java_lex_error ("Syntax error in character literal", 0);
1259 if (char_lit == JAVA_CHAR_ERROR)
1260 char_lit = 0; /* We silently convert it to zero */
1262 JAVA_LEX_CHAR_LIT (char_lit);
1263 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1267 /* String literals */
1273 for (no_error = 1, c = java_get_unicode ();
1274 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1277 c = java_parse_escape_sequence ();
1278 if (c == JAVA_CHAR_ERROR)
1281 c = 0; /* We silently convert it to zero. */
1283 java_unicode_2_utf8 (c);
1285 if (c == '\n' || c == UEOF) /* ULT */
1287 lineno--; /* Refer to the line the terminator was seen */
1288 java_lex_error ("String not terminated at end of line.", 0);
1292 obstack_1grow (&temporary_obstack, '\0');
1293 string = obstack_finish (&temporary_obstack);
1295 if (!no_error || (c != '"'))
1296 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1298 java_lval->node = build_string (strlen (string), string);
1300 obstack_free (&temporary_obstack, string);
1301 return STRING_LIT_TK;
1309 BUILD_OPERATOR (OP_TK);
1315 if (ctxp->ccb_indent == 1)
1316 ctxp->first_ccb_indent1 = lineno;
1318 BUILD_OPERATOR (OCB_TK);
1322 if (ctxp->ccb_indent == 1)
1323 ctxp->last_ccb_indent1 = lineno;
1324 BUILD_OPERATOR (CCB_TK);
1327 BUILD_OPERATOR (OSB_TK);
1339 BUILD_OPERATOR (DOT_TK);
1340 /* return DOT_TK; */
1347 if ((c = java_get_unicode ()) == '=')
1349 BUILD_OPERATOR (EQ_TK);
1353 /* Equals is used in two different locations. In the
1354 variable_declarator: rule, it has to be seen as '=' as opposed
1355 to being seen as an ordinary assignment operator in
1356 assignment_operators: rule. */
1357 java_unget_unicode ();
1358 BUILD_OPERATOR (ASSIGN_TK);
1362 switch ((c = java_get_unicode ()))
1365 BUILD_OPERATOR (GTE_TK);
1367 switch ((c = java_get_unicode ()))
1370 if ((c = java_get_unicode ()) == '=')
1372 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1376 java_unget_unicode ();
1377 BUILD_OPERATOR (ZRS_TK);
1380 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1382 java_unget_unicode ();
1383 BUILD_OPERATOR (SRS_TK);
1386 java_unget_unicode ();
1387 BUILD_OPERATOR (GT_TK);
1391 switch ((c = java_get_unicode ()))
1394 BUILD_OPERATOR (LTE_TK);
1396 if ((c = java_get_unicode ()) == '=')
1398 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1402 java_unget_unicode ();
1403 BUILD_OPERATOR (LS_TK);
1406 java_unget_unicode ();
1407 BUILD_OPERATOR (LT_TK);
1411 switch ((c = java_get_unicode ()))
1414 BUILD_OPERATOR (BOOL_AND_TK);
1416 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1418 java_unget_unicode ();
1419 BUILD_OPERATOR (AND_TK);
1423 switch ((c = java_get_unicode ()))
1426 BUILD_OPERATOR (BOOL_OR_TK);
1428 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1430 java_unget_unicode ();
1431 BUILD_OPERATOR (OR_TK);
1435 switch ((c = java_get_unicode ()))
1438 BUILD_OPERATOR (INCR_TK);
1440 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1442 java_unget_unicode ();
1443 BUILD_OPERATOR (PLUS_TK);
1447 switch ((c = java_get_unicode ()))
1450 BUILD_OPERATOR (DECR_TK);
1452 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1454 java_unget_unicode ();
1455 ctxp->minus_seen = 1;
1456 BUILD_OPERATOR (MINUS_TK);
1460 if ((c = java_get_unicode ()) == '=')
1462 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1466 java_unget_unicode ();
1467 BUILD_OPERATOR (MULT_TK);
1471 if ((c = java_get_unicode ()) == '=')
1473 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1477 java_unget_unicode ();
1478 BUILD_OPERATOR (DIV_TK);
1482 if ((c = java_get_unicode ()) == '=')
1484 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1488 java_unget_unicode ();
1489 BUILD_OPERATOR (XOR_TK);
1493 if ((c = java_get_unicode ()) == '=')
1495 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1499 java_unget_unicode ();
1500 BUILD_OPERATOR (REM_TK);
1504 if ((c = java_get_unicode()) == '=')
1506 BUILD_OPERATOR (NEQ_TK);
1510 java_unget_unicode ();
1511 BUILD_OPERATOR (NEG_TK);
1516 BUILD_OPERATOR (REL_QM_TK);
1519 BUILD_OPERATOR (REL_CL_TK);
1521 BUILD_OPERATOR (NOT_TK);
1524 /* Keyword, boolean literal or null literal */
1525 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1526 JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1528 java_unicode_2_utf8 (c);
1529 if (all_ascii && c >= 128)
1534 obstack_1grow (&temporary_obstack, '\0');
1535 string = obstack_finish (&temporary_obstack);
1536 java_unget_unicode ();
1538 /* If we have something all ascii, we consider a keyword, a boolean
1539 literal, a null literal or an all ASCII identifier. Otherwise,
1540 this is an identifier (possibly not respecting formation rule). */
1543 struct java_keyword *kw;
1544 if ((kw=java_keyword (string, ascii_index)))
1546 JAVA_LEX_KW (string);
1549 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1550 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1551 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1552 case PRIVATE_TK: case STRICT_TK:
1553 SET_MODIFIER_CTX (kw->token);
1556 SET_LVAL_NODE (float_type_node);
1559 SET_LVAL_NODE (double_type_node);
1562 SET_LVAL_NODE (boolean_type_node);
1565 SET_LVAL_NODE (byte_type_node);
1568 SET_LVAL_NODE (short_type_node);
1571 SET_LVAL_NODE (int_type_node);
1574 SET_LVAL_NODE (long_type_node);
1577 SET_LVAL_NODE (char_type_node);
1580 /* Keyword based literals */
1583 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1584 boolean_true_node : boolean_false_node));
1587 SET_LVAL_NODE (null_pointer_node);
1590 /* Some keyword we want to retain information on the location
1603 BUILD_OPERATOR (kw->token);
1611 /* We may have an ID here */
1612 if (JAVA_START_CHAR_P (first_unicode))
1614 JAVA_LEX_ID (string);
1615 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1619 /* Everything else is an invalid character in the input */
1621 char lex_error_buffer [128];
1622 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1623 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1624 java_lex_error (lex_error_buffer, 1);
1630 java_unicode_2_utf8 (unicode)
1633 if (RANGE (unicode, 0x01, 0x7f))
1634 obstack_1grow (&temporary_obstack, (char)unicode);
1635 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1637 obstack_1grow (&temporary_obstack,
1638 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1639 obstack_1grow (&temporary_obstack,
1640 (unsigned char)(0x80 | (unicode & 0x3f)));
1642 else /* Range 0x800-0xffff */
1644 obstack_1grow (&temporary_obstack,
1645 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1646 obstack_1grow (&temporary_obstack,
1647 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1648 obstack_1grow (&temporary_obstack,
1649 (unsigned char)(0x80 | (unicode & 0x003f)));
1655 build_wfl_node (node)
1658 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1659 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1660 TREE_TYPE (node) = NULL_TREE;
1666 java_lex_error (msg, forward)
1667 const char *msg ATTRIBUTE_UNUSED;
1668 int forward ATTRIBUTE_UNUSED;
1671 ctxp->elc.line = ctxp->c_line->lineno;
1672 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1674 /* Might be caught in the middle of some error report */
1675 ctxp->java_error_flag = 0;
1692 if (next != '\n' && next != EOF)
1704 java_get_line_col (filename, line, col)
1705 const char *filename ATTRIBUTE_UNUSED;
1706 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1711 /* Dumb implementation. Doesn't try to cache or optimize things. */
1712 /* First line of the file is line 1, first column is 1 */
1714 /* COL == -1 means, at the CR/LF in LINE */
1715 /* COL == -2 means, at the first non space char in LINE */
1718 int c, ccol, cline = 1;
1719 int current_line_col = 0;
1720 int first_non_space = 0;
1723 if (!(fp = fopen (filename, "r")))
1724 fatal_io_error ("can't open %s", filename);
1726 while (cline != line)
1731 static char msg[] = "<<file too short - unexpected EOF>>";
1732 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1735 if (java_is_eol (fp, c))
1739 /* Gather the chars of the current line in a buffer */
1743 if (c < 0 || java_is_eol (fp, c))
1745 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1746 first_non_space = current_line_col;
1747 obstack_1grow (&temporary_obstack, c);
1752 obstack_1grow (&temporary_obstack, '\n');
1756 col = current_line_col;
1757 first_non_space = 0;
1760 col = first_non_space;
1762 first_non_space = 0;
1764 /* Place the '^' a the right position */
1765 base = obstack_base (&temporary_obstack);
1766 for (ccol = 1; ccol <= col+3; ccol++)
1768 /* Compute \t when reaching first_non_space */
1769 char c = (first_non_space ?
1770 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1771 obstack_1grow (&temporary_obstack, c);
1773 obstack_grow0 (&temporary_obstack, "^", 1);
1776 return obstack_finish (&temporary_obstack);
1782 utf8_cmp (str, length, name)
1783 const unsigned char *str;
1787 const unsigned char *limit = str + length;
1790 for (i = 0; name[i]; ++i)
1792 int ch = UTF8_GET (str, limit);
1794 return ch - name[i];
1797 return str == limit ? 0 : 1;
1800 /* A sorted list of all C++ keywords. */
1802 static const char *cxx_keywords[] =
1910 /* Return true if NAME is a C++ keyword. */
1913 cxx_keyword_p (name, length)
1917 int last = ARRAY_SIZE (cxx_keywords);
1919 int mid = (last + first) / 2;
1922 for (mid = (last + first) / 2;
1924 old = mid, mid = (last + first) / 2)
1926 int kwl = strlen (cxx_keywords[mid]);
1927 int min_length = kwl > length ? length : kwl;
1928 int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1933 /* We've found a match if all the remaining characters are
1935 for (i = min_length; i < length && name[i] == '$'; ++i)
1949 #endif /* JC1_LITE */