1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
39 #include "chartables.h"
41 /* Function declaration */
42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
44 static void java_lex_error PARAMS ((const char *, int));
46 static int java_is_eol PARAMS ((FILE *, int));
47 static tree build_wfl_node PARAMS ((tree));
49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50 static int java_parse_escape_sequence PARAMS ((void));
51 static int java_start_char_p PARAMS ((unicode_t));
52 static int java_part_char_p PARAMS ((unicode_t));
53 static int java_parse_doc_section PARAMS ((int));
54 static void java_parse_end_comment PARAMS ((int));
55 static int java_get_unicode PARAMS ((void));
56 static int java_read_unicode PARAMS ((java_lexer *, int *));
57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static int java_read_char PARAMS ((java_lexer *));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
71 /* This is nonzero if we have initialized `need_byteswap'. */
72 static int byteswap_init = 0;
74 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
75 big-endian order -- not native endian order. We handle this by
76 doing a conversion once at startup and seeing what happens. This
77 flag holds the results of this determination. */
78 static int need_byteswap = 0;
82 java_init_lex (finput, encoding)
87 int java_lang_imported = 0;
90 java_lang_id = get_identifier ("java.lang");
91 if (!java_lang_cloneable)
92 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
93 if (!java_io_serializable)
94 java_io_serializable = get_identifier ("java.io.Serializable");
96 inst_id = get_identifier ("inst$");
98 wpv_id = get_identifier ("write_parm_value$");
100 if (!java_lang_imported)
102 tree node = build_tree_list
103 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
104 read_import_dir (TREE_PURPOSE (node));
105 TREE_CHAIN (node) = ctxp->import_demand_list;
106 ctxp->import_demand_list = node;
107 java_lang_imported = 1;
111 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
113 label_id = get_identifier ("$L");
115 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
116 if (!wfl_string_buffer)
118 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
120 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
122 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
123 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
125 memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
126 memset ((PTR) current_jcf, 0, sizeof (JCF));
127 ctxp->current_parsed_class = NULL;
128 ctxp->package = NULL_TREE;
131 ctxp->filename = input_filename;
132 ctxp->lineno = lineno = 0;
135 ctxp->minus_seen = 0;
136 ctxp->java_error_flag = 0;
137 ctxp->lexer = java_new_lexer (finput, encoding);
141 java_sprint_unicode (line, i)
142 struct java_line *line;
145 static char buffer [10];
146 if (line->unicode_escape_p [i] || line->line [i] > 128)
147 sprintf (buffer, "\\u%04x", line->line [i]);
150 buffer [0] = line->line [i];
157 java_sneak_unicode ()
159 return (ctxp->c_line->line [ctxp->c_line->current]);
163 java_unget_unicode ()
165 if (!ctxp->c_line->current)
166 /* Can't unget unicode. */
169 ctxp->c_line->current--;
170 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
174 java_allocate_new_line ()
176 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
177 char ahead_escape_p = (ctxp->c_line ?
178 ctxp->c_line->unicode_escape_ahead_p : 0);
180 if (ctxp->c_line && !ctxp->c_line->white_space_only)
184 free (ctxp->p_line->unicode_escape_p);
185 free (ctxp->p_line->line);
188 ctxp->p_line = ctxp->c_line;
189 ctxp->c_line = NULL; /* Reallocated */
194 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
195 ctxp->c_line->max = JAVA_LINE_MAX;
196 ctxp->c_line->line = (unicode_t *)xmalloc
197 (sizeof (unicode_t)*ctxp->c_line->max);
198 ctxp->c_line->unicode_escape_p =
199 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
200 ctxp->c_line->white_space_only = 0;
203 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
204 ctxp->c_line->char_col = ctxp->c_line->current = 0;
207 ctxp->c_line->line [ctxp->c_line->size] = ahead;
208 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
209 ctxp->c_line->size++;
211 ctxp->c_line->ahead [0] = 0;
212 ctxp->c_line->unicode_escape_ahead_p = 0;
213 ctxp->c_line->lineno = ++lineno;
214 ctxp->c_line->white_space_only = 1;
217 /* Create a new lexer object. */
220 java_new_lexer (finput, encoding)
222 const char *encoding;
224 java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
227 lex->finput = finput;
229 lex->unget_value = 0;
233 lex->handle = iconv_open ("UCS-2", encoding);
234 if (lex->handle != (iconv_t) -1)
240 lex->read_anything = 0;
241 lex->use_fallback = 0;
243 /* Work around broken iconv() implementations by doing checking at
244 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
245 then all UCS-2 encoders will be broken. Perhaps not a valid
253 handle = iconv_open ("UCS-2", "UTF-8");
254 if (handle != (iconv_t) -1)
261 /* This is the UTF-8 encoding of \ufeff. */
268 outp = (char *) &result;
271 r = iconv (handle, (const char **) &inp, &inc, &outp, &outc);
272 /* Conversion must be complete for us to use the result. */
273 if (r != (size_t) -1 && inc == 0 && outc == 0)
274 need_byteswap = (result != 0xfeff);
278 lex->byte_swap = need_byteswap;
281 #endif /* HAVE_ICONV */
283 /* If iconv failed, use the internal decoder if the default
284 encoding was requested. This code is used on platforms where
285 iconv exists but is insufficient for our needs. For
286 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2. */
287 if (strcmp (encoding, DEFAULT_ENCODING))
291 lex->use_fallback = 1;
292 #endif /* HAVE_ICONV */
296 fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option.", encoding);
302 java_destroy_lexer (lex)
306 if (! lex->use_fallback)
307 iconv_close (lex->handle);
316 if (lex->unget_value)
318 unicode_t r = lex->unget_value;
319 lex->unget_value = 0;
324 if (! lex->use_fallback)
326 size_t ir, inbytesleft, in_save, out_count, out_save;
330 /* If there is data which has already been converted, use it. */
331 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
338 /* See if we need to read more data. If FIRST == 0 then
339 the previous conversion attempt ended in the middle of
340 a character at the end of the buffer. Otherwise we
341 only have to read if the buffer is empty. */
342 if (lex->first == 0 || lex->first >= lex->last)
346 if (lex->first >= lex->last)
351 if (feof (lex->finput))
353 r = fread (&lex->buffer[lex->last], 1,
354 sizeof (lex->buffer) - lex->last,
359 inbytesleft = lex->last - lex->first;
360 out_count = sizeof (lex->out_buffer) - lex->out_last;
362 if (inbytesleft == 0)
364 /* We've tried to read and there is nothing left. */
368 in_save = inbytesleft;
369 out_save = out_count;
370 inp = &lex->buffer[lex->first];
371 outp = &lex->out_buffer[lex->out_last];
372 ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
375 /* If we haven't read any bytes, then look to see if we
377 if (! lex->read_anything && out_save - out_count >= 2)
379 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
385 else if (uc == 0xfffe)
390 lex->read_anything = 1;
396 for (i = 0; i < out_save - out_count; i += 2)
398 char t = lex->out_buffer[lex->out_last + i];
399 lex->out_buffer[lex->out_last + i]
400 = lex->out_buffer[lex->out_last + i + 1];
401 lex->out_buffer[lex->out_last + i + 1] = t;
405 lex->first += in_save - inbytesleft;
406 lex->out_last += out_save - out_count;
408 /* If we converted anything at all, move along. */
409 if (out_count != out_save)
412 if (ir == (size_t) -1)
416 /* This is ok. This means that the end of our buffer
417 is in the middle of a character sequence. We just
418 move the valid part of the buffer to the beginning
420 /* We use bcopy() because it should work for
421 overlapping strings. Use memmove() instead... */
422 bcopy (&lex->buffer[lex->first], &lex->buffer[0],
423 lex->last - lex->first);
424 lex->last -= lex->first;
429 /* A more serious error. */
430 java_lex_error ("unrecognized character in input stream",
438 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
440 /* Don't have any data. */
445 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
450 #endif /* HAVE_ICONV */
453 c = getc (lex->finput);
461 if ((c & 0xe0) == 0xc0)
463 c1 = getc (lex->finput);
464 if ((c1 & 0xc0) == 0x80)
465 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
468 else if ((c & 0xf0) == 0xe0)
470 c1 = getc (lex->finput);
471 if ((c1 & 0xc0) == 0x80)
473 c2 = getc (lex->finput);
474 if ((c2 & 0xc0) == 0x80)
475 return (unicode_t)(((c & 0xf) << 12) +
476 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
484 /* We simply don't support invalid characters. */
485 java_lex_error ("malformed UTF-8 character", 0);
489 /* We only get here on error. */
494 java_store_unicode (l, c, unicode_escape_p)
497 int unicode_escape_p;
499 if (l->size == l->max)
501 l->max += JAVA_LINE_MAX;
502 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
503 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
504 sizeof (char)*l->max);
506 l->line [l->size] = c;
507 l->unicode_escape_p [l->size++] = unicode_escape_p;
511 java_read_unicode (lex, unicode_escape_p)
513 int *unicode_escape_p;
517 c = java_read_char (lex);
518 *unicode_escape_p = 0;
527 if ((lex->bs_count) % 2 == 1)
529 /* Odd number of \ seen. */
530 c = java_read_char (lex);
533 unicode_t unicode = 0;
535 /* Next should be 4 hex digits, otherwise it's an error.
536 The hex value is converted into the unicode, pushed into
537 the Unicode stream. */
538 for (shift = 12; shift >= 0; shift -= 4)
540 if ((c = java_read_char (lex)) == UEOF)
542 if (c >= '0' && c <= '9')
543 unicode |= (unicode_t)((c-'0') << shift);
544 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
545 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
548 /* Recognize any number of u in \u. */
552 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
555 *unicode_escape_p = 1;
558 lex->unget_value = c;
560 return (unicode_t) '\\';
564 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
566 int *unicode_escape_p;
568 int c = java_read_unicode (lex, unicode_escape_p);
572 /* We have to read ahead to see if we got \r\n. In that case we
573 return a single line terminator. */
575 c = java_read_unicode (lex, &dummy);
577 lex->unget_value = c;
578 /* In either case we must return a newline. */
588 /* It's time to read a line when... */
589 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
594 if (ctxp->lexer->hit_eof)
597 java_allocate_new_line ();
598 if (ctxp->c_line->line[0] != '\n')
602 int unicode_escape_p;
603 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
608 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
609 if (ctxp->c_line->white_space_only
610 && !JAVA_WHITE_SPACE_P (c)
612 ctxp->c_line->white_space_only = 0;
614 if ((c == '\n') || (c == UEOF))
618 if (c == UEOF && ! found_chars)
620 ctxp->lexer->hit_eof = 1;
625 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
626 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
627 return ctxp->c_line->line [ctxp->c_line->current++];
630 /* Parse the end of a C style comment.
631 * C is the first character following the '/' and '*'. */
633 java_parse_end_comment (c)
636 for ( ;; c = java_get_unicode ())
641 java_lex_error ("Comment not terminated at end of input", 0);
644 switch (c = java_get_unicode ())
647 java_lex_error ("Comment not terminated at end of input", 0);
651 case '*': /* reparse only '*' */
652 java_unget_unicode ();
658 /* Parse the documentation section. Keywords must be at the beginning
659 of a documentation comment line (ignoring white space and any `*'
660 character). Parsed keyword(s): @DEPRECATED. */
663 java_parse_doc_section (c)
666 int valid_tag = 0, seen_star = 0;
668 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
680 c = java_get_unicode();
684 java_lex_error ("Comment not terminated at end of input", 0);
686 if (seen_star && (c == '/'))
687 return 1; /* Goto step1 in caller */
689 /* We're parsing @deprecated */
690 if (valid_tag && (c == '@'))
695 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
697 c = java_get_unicode ();
698 tag [tag_index++] = c;
702 java_lex_error ("Comment not terminated at end of input", 0);
703 tag [tag_index] = '\0';
705 if (!strcmp (tag, "deprecated"))
706 ctxp->deprecated = 1;
708 java_unget_unicode ();
712 /* Return true if C is a valid start character for a Java identifier.
713 This is only called if C >= 128 -- smaller values are handled
714 inline. However, this function handles all values anyway. */
716 java_start_char_p (c)
719 unsigned int hi = c / 256;
720 char *page = type_table[hi];
721 unsigned long val = (unsigned long) page;
724 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
725 flags = page[c & 255];
729 return flags & LETTER_START;
732 /* Return true if C is a valid part character for a Java identifier.
733 This is only called if C >= 128 -- smaller values are handled
734 inline. However, this function handles all values anyway. */
739 unsigned int hi = c / 256;
740 char *page = type_table[hi];
741 unsigned long val = (unsigned long) page;
744 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
745 flags = page[c & 255];
749 return flags & LETTER_PART;
753 java_parse_escape_sequence ()
758 switch (c = java_get_unicode ())
761 return (unicode_t)0x8;
763 return (unicode_t)0x9;
765 return (unicode_t)0xa;
767 return (unicode_t)0xc;
769 return (unicode_t)0xd;
771 return (unicode_t)0x22;
773 return (unicode_t)0x27;
775 return (unicode_t)0x5c;
776 case '0': case '1': case '2': case '3': case '4':
777 case '5': case '6': case '7':
780 int octal_escape_index = 0;
784 for (; octal_escape_index < max && RANGE (c, '0', '7');
785 c = java_get_unicode ())
787 if (octal_escape_index == 0 && c > '3')
789 /* According to the grammar, `\477' has a well-defined
790 meaning -- it is `\47' followed by `7'. */
793 octal_escape [octal_escape_index++] = c;
796 java_unget_unicode ();
798 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
799 i < octal_escape_index; i++, shift -= 3)
800 char_lit |= (octal_escape [i] - '0') << shift;
805 java_lex_error ("Invalid character in escape sequence", 0);
806 return JAVA_CHAR_ERROR;
810 /* Isolate the code which may raise an arithmetic exception in its
819 int number_beginning;
822 #ifdef REAL_ARITHMETIC
823 #define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
825 #define IS_ZERO(X) ((X) == 0)
828 static void java_perform_atof PARAMS ((PTR));
831 java_perform_atof (av)
834 struct jpa_args *a = (struct jpa_args *)av;
835 YYSTYPE *java_lval = a->java_lval;
836 int number_beginning = a->number_beginning;
837 REAL_VALUE_TYPE value;
838 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
840 SET_REAL_VALUE_ATOF (value,
841 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
843 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
845 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
848 else if (IS_ZERO (value))
850 /* We check to see if the value is really 0 or if we've found an
851 underflow. We do this in the most primitive imaginable way. */
853 char *p = a->literal_token;
856 while (*p && *p != 'e' && *p != 'E')
858 if (*p != '0' && *p != '.')
867 int i = ctxp->c_line->current;
868 ctxp->c_line->current = number_beginning;
869 java_lex_error ("Floating point literal underflow", 0);
870 ctxp->c_line->current = i;
874 SET_LVAL_NODE_TYPE (build_real (type, value), type);
878 static int yylex PARAMS ((YYSTYPE *));
889 unicode_t first_unicode;
890 int ascii_index, all_ascii;
893 /* Translation of the Unicode escape in the raw stream of Unicode
894 characters. Takes care of line terminator. */
896 /* Skip white spaces: SP, TAB and FF or ULT */
897 for (c = java_get_unicode ();
898 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
901 ctxp->elc.line = ctxp->c_line->lineno;
902 ctxp->elc.col = ctxp->c_line->char_col-2;
905 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
907 if (c == 0x1a) /* CTRL-Z */
909 if ((c = java_get_unicode ()) == UEOF)
910 return 0; /* Ok here */
912 java_unget_unicode (); /* Caught later, at the end of the function */
914 /* Handle EOF here */
915 if (c == UEOF) /* Should probably do something here... */
918 /* Take care of eventual comments. */
921 switch (c = java_get_unicode ())
926 c = java_get_unicode ();
929 /* It is ok to end a `//' comment with EOF, unless
930 we're being pedantic. */
932 java_lex_error ("Comment not terminated at end of input",
936 if (c == '\n') /* ULT */
942 if ((c = java_get_unicode ()) == '*')
944 if ((c = java_get_unicode ()) == '/')
945 goto step1; /* Empy documentation comment */
946 else if (java_parse_doc_section (c))
950 java_parse_end_comment ((c = java_get_unicode ()));
954 java_unget_unicode ();
960 ctxp->elc.line = ctxp->c_line->lineno;
961 ctxp->elc.prev_col = ctxp->elc.col;
962 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
963 if (ctxp->elc.col < 0)
966 /* Numeric literals */
967 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
969 /* This section of code is borrowed from gcc/c-lex.c */
970 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
971 int parts[TOTAL_PARTS];
972 HOST_WIDE_INT high, low;
973 /* End borrowed section */
974 char literal_token [256];
975 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
978 int number_beginning = ctxp->c_line->current;
981 /* We might have a . separator instead of a FP like .[0-9]* */
984 unicode_t peep = java_sneak_unicode ();
986 if (!JAVA_ASCII_DIGIT (peep))
989 BUILD_OPERATOR (DOT_TK);
993 for (i = 0; i < TOTAL_PARTS; i++)
998 c = java_get_unicode ();
999 if (c == 'x' || c == 'X')
1002 c = java_get_unicode ();
1004 else if (JAVA_ASCII_DIGIT (c))
1008 /* Push the '.' back and prepare for a FP parsing... */
1009 java_unget_unicode ();
1014 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
1015 JAVA_LEX_LIT ("0", 10);
1019 SET_LVAL_NODE (long_zero_node);
1020 return (INT_LIT_TK);
1022 SET_LVAL_NODE (float_zero_node);
1025 SET_LVAL_NODE (double_zero_node);
1028 java_unget_unicode ();
1029 SET_LVAL_NODE (integer_zero_node);
1030 return (INT_LIT_TK);
1034 /* Parse the first part of the literal, until we find something
1035 which is not a number. */
1036 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
1037 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1038 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
1040 /* We store in a string (in case it turns out to be a FP) and in
1041 PARTS if we have to process a integer literal. */
1042 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
1045 literal_token [literal_index++] = c;
1046 /* This section of code if borrowed from gcc/c-lex.c */
1047 for (count = 0; count < TOTAL_PARTS; count++)
1049 parts[count] *= radix;
1052 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1053 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1056 parts[0] += numeric;
1058 if (parts [TOTAL_PARTS-1] != 0)
1060 /* End borrowed section. */
1061 c = java_get_unicode ();
1064 /* If we have something from the FP char set but not a digit, parse
1066 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1069 int seen_digit = (literal_index ? 1 : 0);
1070 int seen_exponent = 0;
1071 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1072 double unless specified. */
1074 /* It is ok if the radix is 8 because this just means we've
1075 seen a leading `0'. However, radix==16 is invalid. */
1077 java_lex_error ("Can't express non-decimal FP literal", 0);
1087 literal_token [literal_index++ ] = c;
1088 c = java_get_unicode ();
1091 java_lex_error ("Invalid character in FP literal", 0);
1094 if (c == 'e' || c == 'E')
1098 /* {E,e} must have seen at list a digit */
1100 java_lex_error ("Invalid FP literal", 0);
1104 literal_token [literal_index++] = c;
1105 c = java_get_unicode ();
1108 java_lex_error ("Invalid character in FP literal", 0);
1110 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1112 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1113 stage = 4; /* So we fall through */
1116 if ((c=='-' || c =='+') && stage == 2)
1119 literal_token [literal_index++] = c;
1120 c = java_get_unicode ();
1123 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1124 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1125 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1126 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1128 if (JAVA_ASCII_DIGIT (c))
1130 literal_token [literal_index++ ] = c;
1131 c = java_get_unicode ();
1138 if (stage != 4) /* Don't push back fF/dD */
1139 java_unget_unicode ();
1141 /* An exponent (if any) must have seen a digit. */
1142 if (seen_exponent && !seen_digit)
1143 java_lex_error ("Invalid FP literal", 0);
1145 literal_token [literal_index] = '\0';
1146 JAVA_LEX_LIT (literal_token, radix);
1149 a.literal_token = literal_token;
1151 a.java_lval = java_lval;
1152 a.number_beginning = number_beginning;
1153 if (do_float_handler (java_perform_atof, (PTR) &a))
1156 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
1162 } /* JAVA_ASCCI_FPCHAR (c) */
1164 /* Here we get back to converting the integral literal. */
1165 if (c == 'L' || c == 'l')
1167 else if (radix == 16 && JAVA_ASCII_LETTER (c))
1168 java_lex_error ("Digit out of range in hexadecimal literal", 0);
1169 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
1170 java_lex_error ("Digit out of range in octal literal", 0);
1171 else if (radix == 16 && !literal_index)
1172 java_lex_error ("No digit specified for hexadecimal literal", 0);
1174 java_unget_unicode ();
1176 #ifdef JAVA_LEX_DEBUG
1177 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1178 JAVA_LEX_LIT (literal_token, radix);
1180 /* This section of code is borrowed from gcc/c-lex.c */
1183 bytes = GET_TYPE_PRECISION (long_type_node);
1184 for (i = bytes; i < TOTAL_PARTS; i++)
1192 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1194 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1195 / HOST_BITS_PER_CHAR)]
1196 << (i * HOST_BITS_PER_CHAR));
1197 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1199 /* End borrowed section. */
1201 /* Range checking */
1204 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1205 9223372036854775807L is the biggest `long' literal that can be
1206 expressed using a 10 radix. For other radixes, everything that
1207 fits withing 64 bits is OK. */
1208 int hb = (high >> 31);
1209 if (overflow || (hb && low && radix == 10) ||
1210 (hb && high & 0x7fffffff && radix == 10) ||
1211 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1212 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1216 /* 2147483648 is valid if operand of a '-'. Otherwise,
1217 2147483647 is the biggest `int' literal that can be
1218 expressed using a 10 radix. For other radixes, everything
1219 that fits within 32 bits is OK. As all literals are
1220 signed, we sign extend here. */
1221 int hb = (low >> 31) & 0x1;
1222 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1223 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1224 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1227 ctxp->minus_seen = 0;
1228 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1229 (long_suffix ? long_type_node : int_type_node));
1233 ctxp->minus_seen = 0;
1235 /* Character literals */
1239 if ((c = java_get_unicode ()) == '\\')
1240 char_lit = java_parse_escape_sequence ();
1243 if (c == '\n' || c == '\'')
1244 java_lex_error ("Invalid character literal", 0);
1248 c = java_get_unicode ();
1250 if ((c == '\n') || (c == UEOF))
1251 java_lex_error ("Character literal not terminated at end of line", 0);
1253 java_lex_error ("Syntax error in character literal", 0);
1255 if (char_lit == JAVA_CHAR_ERROR)
1256 char_lit = 0; /* We silently convert it to zero */
1258 JAVA_LEX_CHAR_LIT (char_lit);
1259 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1263 /* String literals */
1269 for (no_error = 1, c = java_get_unicode ();
1270 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1273 c = java_parse_escape_sequence ();
1274 if (c == JAVA_CHAR_ERROR)
1277 c = 0; /* We silently convert it to zero. */
1279 java_unicode_2_utf8 (c);
1281 if (c == '\n' || c == UEOF) /* ULT */
1283 lineno--; /* Refer to the line the terminator was seen */
1284 java_lex_error ("String not terminated at end of line.", 0);
1288 obstack_1grow (&temporary_obstack, '\0');
1289 string = obstack_finish (&temporary_obstack);
1291 if (!no_error || (c != '"'))
1292 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1294 java_lval->node = build_string (strlen (string), string);
1296 obstack_free (&temporary_obstack, string);
1297 return STRING_LIT_TK;
1305 BUILD_OPERATOR (OP_TK);
1311 if (ctxp->ccb_indent == 1)
1312 ctxp->first_ccb_indent1 = lineno;
1314 BUILD_OPERATOR (OCB_TK);
1318 if (ctxp->ccb_indent == 1)
1319 ctxp->last_ccb_indent1 = lineno;
1320 BUILD_OPERATOR (CCB_TK);
1323 BUILD_OPERATOR (OSB_TK);
1335 BUILD_OPERATOR (DOT_TK);
1336 /* return DOT_TK; */
1343 if ((c = java_get_unicode ()) == '=')
1345 BUILD_OPERATOR (EQ_TK);
1349 /* Equals is used in two different locations. In the
1350 variable_declarator: rule, it has to be seen as '=' as opposed
1351 to being seen as an ordinary assignment operator in
1352 assignment_operators: rule. */
1353 java_unget_unicode ();
1354 BUILD_OPERATOR (ASSIGN_TK);
1358 switch ((c = java_get_unicode ()))
1361 BUILD_OPERATOR (GTE_TK);
1363 switch ((c = java_get_unicode ()))
1366 if ((c = java_get_unicode ()) == '=')
1368 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1372 java_unget_unicode ();
1373 BUILD_OPERATOR (ZRS_TK);
1376 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1378 java_unget_unicode ();
1379 BUILD_OPERATOR (SRS_TK);
1382 java_unget_unicode ();
1383 BUILD_OPERATOR (GT_TK);
1387 switch ((c = java_get_unicode ()))
1390 BUILD_OPERATOR (LTE_TK);
1392 if ((c = java_get_unicode ()) == '=')
1394 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1398 java_unget_unicode ();
1399 BUILD_OPERATOR (LS_TK);
1402 java_unget_unicode ();
1403 BUILD_OPERATOR (LT_TK);
1407 switch ((c = java_get_unicode ()))
1410 BUILD_OPERATOR (BOOL_AND_TK);
1412 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1414 java_unget_unicode ();
1415 BUILD_OPERATOR (AND_TK);
1419 switch ((c = java_get_unicode ()))
1422 BUILD_OPERATOR (BOOL_OR_TK);
1424 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1426 java_unget_unicode ();
1427 BUILD_OPERATOR (OR_TK);
1431 switch ((c = java_get_unicode ()))
1434 BUILD_OPERATOR (INCR_TK);
1436 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1438 java_unget_unicode ();
1439 BUILD_OPERATOR (PLUS_TK);
1443 switch ((c = java_get_unicode ()))
1446 BUILD_OPERATOR (DECR_TK);
1448 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1450 java_unget_unicode ();
1451 ctxp->minus_seen = 1;
1452 BUILD_OPERATOR (MINUS_TK);
1456 if ((c = java_get_unicode ()) == '=')
1458 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1462 java_unget_unicode ();
1463 BUILD_OPERATOR (MULT_TK);
1467 if ((c = java_get_unicode ()) == '=')
1469 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1473 java_unget_unicode ();
1474 BUILD_OPERATOR (DIV_TK);
1478 if ((c = java_get_unicode ()) == '=')
1480 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1484 java_unget_unicode ();
1485 BUILD_OPERATOR (XOR_TK);
1489 if ((c = java_get_unicode ()) == '=')
1491 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1495 java_unget_unicode ();
1496 BUILD_OPERATOR (REM_TK);
1500 if ((c = java_get_unicode()) == '=')
1502 BUILD_OPERATOR (NEQ_TK);
1506 java_unget_unicode ();
1507 BUILD_OPERATOR (NEG_TK);
1512 BUILD_OPERATOR (REL_QM_TK);
1515 BUILD_OPERATOR (REL_CL_TK);
1517 BUILD_OPERATOR (NOT_TK);
1520 /* Keyword, boolean literal or null literal */
1521 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1522 JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1524 java_unicode_2_utf8 (c);
1525 if (all_ascii && c >= 128)
1530 obstack_1grow (&temporary_obstack, '\0');
1531 string = obstack_finish (&temporary_obstack);
1532 java_unget_unicode ();
1534 /* If we have something all ascii, we consider a keyword, a boolean
1535 literal, a null literal or an all ASCII identifier. Otherwise,
1536 this is an identifier (possibly not respecting formation rule). */
1539 struct java_keyword *kw;
1540 if ((kw=java_keyword (string, ascii_index)))
1542 JAVA_LEX_KW (string);
1545 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1546 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1547 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1548 case PRIVATE_TK: case STRICT_TK:
1549 SET_MODIFIER_CTX (kw->token);
1552 SET_LVAL_NODE (float_type_node);
1555 SET_LVAL_NODE (double_type_node);
1558 SET_LVAL_NODE (boolean_type_node);
1561 SET_LVAL_NODE (byte_type_node);
1564 SET_LVAL_NODE (short_type_node);
1567 SET_LVAL_NODE (int_type_node);
1570 SET_LVAL_NODE (long_type_node);
1573 SET_LVAL_NODE (char_type_node);
1576 /* Keyword based literals */
1579 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1580 boolean_true_node : boolean_false_node));
1583 SET_LVAL_NODE (null_pointer_node);
1586 /* Some keyword we want to retain information on the location
1599 BUILD_OPERATOR (kw->token);
1607 /* We may have an ID here */
1608 if (JAVA_START_CHAR_P (first_unicode))
1610 JAVA_LEX_ID (string);
1611 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1615 /* Everything else is an invalid character in the input */
1617 char lex_error_buffer [128];
1618 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1619 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1620 java_lex_error (lex_error_buffer, 1);
1626 java_unicode_2_utf8 (unicode)
1629 if (RANGE (unicode, 0x01, 0x7f))
1630 obstack_1grow (&temporary_obstack, (char)unicode);
1631 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1633 obstack_1grow (&temporary_obstack,
1634 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1635 obstack_1grow (&temporary_obstack,
1636 (unsigned char)(0x80 | (unicode & 0x3f)));
1638 else /* Range 0x800-0xffff */
1640 obstack_1grow (&temporary_obstack,
1641 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1642 obstack_1grow (&temporary_obstack,
1643 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1644 obstack_1grow (&temporary_obstack,
1645 (unsigned char)(0x80 | (unicode & 0x003f)));
1651 build_wfl_node (node)
1654 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1655 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1656 TREE_TYPE (node) = NULL_TREE;
1662 java_lex_error (msg, forward)
1663 const char *msg ATTRIBUTE_UNUSED;
1664 int forward ATTRIBUTE_UNUSED;
1667 ctxp->elc.line = ctxp->c_line->lineno;
1668 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1670 /* Might be caught in the middle of some error report */
1671 ctxp->java_error_flag = 0;
1688 if (next != '\n' && next != EOF)
1700 java_get_line_col (filename, line, col)
1701 const char *filename ATTRIBUTE_UNUSED;
1702 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1707 /* Dumb implementation. Doesn't try to cache or optimize things. */
1708 /* First line of the file is line 1, first column is 1 */
1710 /* COL == -1 means, at the CR/LF in LINE */
1711 /* COL == -2 means, at the first non space char in LINE */
1714 int c, ccol, cline = 1;
1715 int current_line_col = 0;
1716 int first_non_space = 0;
1719 if (!(fp = fopen (filename, "r")))
1720 fatal_io_error ("can't open %s", filename);
1722 while (cline != line)
1727 static char msg[] = "<<file too short - unexpected EOF>>";
1728 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1731 if (java_is_eol (fp, c))
1735 /* Gather the chars of the current line in a buffer */
1739 if (c < 0 || java_is_eol (fp, c))
1741 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1742 first_non_space = current_line_col;
1743 obstack_1grow (&temporary_obstack, c);
1748 obstack_1grow (&temporary_obstack, '\n');
1752 col = current_line_col;
1753 first_non_space = 0;
1756 col = first_non_space;
1758 first_non_space = 0;
1760 /* Place the '^' a the right position */
1761 base = obstack_base (&temporary_obstack);
1762 for (ccol = 1; ccol <= col+3; ccol++)
1764 /* Compute \t when reaching first_non_space */
1765 char c = (first_non_space ?
1766 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1767 obstack_1grow (&temporary_obstack, c);
1769 obstack_grow0 (&temporary_obstack, "^", 1);
1772 return obstack_finish (&temporary_obstack);
1778 utf8_cmp (str, length, name)
1779 const unsigned char *str;
1783 const unsigned char *limit = str + length;
1786 for (i = 0; name[i]; ++i)
1788 int ch = UTF8_GET (str, limit);
1790 return ch - name[i];
1793 return str == limit ? 0 : 1;
1796 /* A sorted list of all C++ keywords. */
1798 static const char *cxx_keywords[] =
1833 /* Return true if NAME is a C++ keyword. */
1836 cxx_keyword_p (name, length)
1840 int last = ARRAY_SIZE (cxx_keywords);
1842 int mid = (last + first) / 2;
1845 for (mid = (last + first) / 2;
1847 old = mid, mid = (last + first) / 2)
1849 int kwl = strlen (cxx_keywords[mid]);
1850 int min_length = kwl > length ? length : kwl;
1851 int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1856 /* We've found a match if all the remaining characters are
1858 for (i = min_length; i < length && name[i] == '$'; ++i)
1872 #endif /* JC1_LITE */