1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
40 extern struct obstack *expression_obstack;
43 /* Function declaration */
44 static int java_lineterminator PARAMS ((unicode_t));
45 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
46 static void java_unicode_2_utf8 PARAMS ((unicode_t));
47 static void java_lex_error PARAMS ((const char *, int));
49 static int java_is_eol PARAMS ((FILE *, int));
50 static tree build_wfl_node PARAMS ((tree));
52 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
53 static unicode_t java_parse_escape_sequence PARAMS ((void));
54 static int java_letter_or_digit_p PARAMS ((unicode_t));
55 static int java_parse_doc_section PARAMS ((unicode_t));
56 static void java_parse_end_comment PARAMS ((unicode_t));
57 static unicode_t java_get_unicode PARAMS ((void));
58 static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static unicode_t java_read_char PARAMS ((java_lexer *));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
64 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
67 java_init_lex (finput, encoding)
72 int java_lang_imported = 0;
75 java_lang_id = get_identifier ("java.lang");
76 if (!java_lang_cloneable)
77 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
78 if (!java_io_serializable)
79 java_io_serializable = get_identifier ("java.io.Serializable");
81 inst_id = get_identifier ("inst$");
83 wpv_id = get_identifier ("write_parm_value$");
85 if (!java_lang_imported)
87 tree node = build_tree_list
88 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
89 read_import_dir (TREE_PURPOSE (node));
90 TREE_CHAIN (node) = ctxp->import_demand_list;
91 ctxp->import_demand_list = node;
92 java_lang_imported = 1;
96 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
98 label_id = get_identifier ("$L");
100 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
101 if (!wfl_string_buffer)
103 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
105 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
107 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
108 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
110 bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
111 bzero ((PTR) current_jcf, sizeof (JCF));
112 ctxp->current_parsed_class = NULL;
113 ctxp->package = NULL_TREE;
116 ctxp->filename = input_filename;
117 ctxp->lineno = lineno = 0;
120 ctxp->minus_seen = 0;
121 ctxp->java_error_flag = 0;
122 ctxp->lexer = java_new_lexer (finput, encoding);
126 java_sprint_unicode (line, i)
127 struct java_line *line;
130 static char buffer [10];
131 if (line->unicode_escape_p [i] || line->line [i] > 128)
132 sprintf (buffer, "\\u%04x", line->line [i]);
135 buffer [0] = line->line [i];
142 java_sneak_unicode ()
144 return (ctxp->c_line->line [ctxp->c_line->current]);
148 java_unget_unicode ()
150 if (!ctxp->c_line->current)
151 fatal ("can't unget unicode - java_unget_unicode");
152 ctxp->c_line->current--;
153 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
157 java_allocate_new_line ()
159 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
160 char ahead_escape_p = (ctxp->c_line ?
161 ctxp->c_line->unicode_escape_ahead_p : 0);
163 if (ctxp->c_line && !ctxp->c_line->white_space_only)
167 free (ctxp->p_line->unicode_escape_p);
168 free (ctxp->p_line->line);
171 ctxp->p_line = ctxp->c_line;
172 ctxp->c_line = NULL; /* Reallocated */
177 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
178 ctxp->c_line->max = JAVA_LINE_MAX;
179 ctxp->c_line->line = (unicode_t *)xmalloc
180 (sizeof (unicode_t)*ctxp->c_line->max);
181 ctxp->c_line->unicode_escape_p =
182 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
183 ctxp->c_line->white_space_only = 0;
186 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
187 ctxp->c_line->char_col = ctxp->c_line->current = 0;
190 ctxp->c_line->line [ctxp->c_line->size] = ahead;
191 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
192 ctxp->c_line->size++;
194 ctxp->c_line->ahead [0] = 0;
195 ctxp->c_line->unicode_escape_ahead_p = 0;
196 ctxp->c_line->lineno = ++lineno;
197 ctxp->c_line->white_space_only = 1;
200 /* Create a new lexer object. */
202 java_new_lexer (finput, encoding)
204 const char *encoding;
206 java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
209 lex->finput = finput;
211 lex->unget_value = 0;
214 lex->handle = iconv_open ("UCS-2", encoding);
215 if (lex->handle == (iconv_t) -1)
217 /* FIXME: we should give a nice error based on errno here. */
222 #else /* HAVE_ICONV */
223 if (strcmp (encoding, DEFAULT_ENCODING))
225 #endif /* HAVE_ICONV */
228 fatal ("unknown encoding: `%s'", encoding);
234 java_destroy_lexer (lex)
238 iconv_close (lex->handle);
247 if (lex->unget_value)
249 unicode_t r = lex->unget_value;
250 lex->unget_value = 0;
257 size_t ir, inbytesleft, in_save, out_count;
262 /* See if we need to read more data. If FIRST == 0 then the
263 previous conversion attempt ended in the middle of a
264 character at the end of the buffer. Otherwise we only have
265 to read if the buffer is empty. */
266 if (lex->first == 0 || lex->first >= lex->last)
270 if (lex->first >= lex->last)
275 if (feof (lex->finput))
277 r = fread (&lex->buffer[lex->last], 1,
278 sizeof (lex->buffer) - lex->last,
283 inbytesleft = lex->last - lex->first;
285 if (inbytesleft == 0)
287 /* We've tried to read and there is nothing left. */
291 in_save = inbytesleft;
293 inp = &lex->buffer[lex->first];
295 ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
297 lex->first += in_save - inbytesleft;
301 /* Success. We assume that UCS-2 is big-endian. This
302 appears to be an ok assumption. */
304 result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
308 if (ir == (size_t) -1)
312 /* This is ok. This means that the end of our buffer
313 is in the middle of a character sequence. We just
314 move the valid part of the buffer to the beginning
316 /* We use bcopy() because it should work for
317 overlapping strings. Use memmove() instead... */
318 bcopy (&lex->buffer[lex->first], &lex->buffer[0],
319 lex->last - lex->first);
320 lex->last -= lex->first;
325 /* A more serious error. */
326 java_lex_error ("unrecognized character in input stream", 0);
332 #else /* HAVE_ICONV */
335 c = getc (lex->finput);
343 if ((c & 0xe0) == 0xc0)
345 c1 = getc (lex->finput);
346 if ((c1 & 0xc0) == 0x80)
347 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
350 else if ((c & 0xf0) == 0xe0)
352 c1 = getc (lex->finput);
353 if ((c1 & 0xc0) == 0x80)
355 c2 = getc (lex->finput);
356 if ((c2 & 0xc0) == 0x80)
357 return (unicode_t)(((c & 0xf) << 12) +
358 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
366 /* We simply don't support invalid characters. */
367 java_lex_error ("malformed UTF-8 character", 0);
370 #endif /* HAVE_ICONV */
372 /* We only get here on error. */
377 java_store_unicode (l, c, unicode_escape_p)
380 int unicode_escape_p;
382 if (l->size == l->max)
384 l->max += JAVA_LINE_MAX;
385 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
386 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
387 sizeof (char)*l->max);
389 l->line [l->size] = c;
390 l->unicode_escape_p [l->size++] = unicode_escape_p;
394 java_read_unicode (lex, term_context, unicode_escape_p)
397 int *unicode_escape_p;
401 c = java_read_char (lex);
402 *unicode_escape_p = 0;
407 return (term_context ? c : (java_lineterminator (c)
413 if ((lex->bs_count) % 2 == 1)
415 /* Odd number of \ seen. */
416 c = java_read_char (lex);
419 unicode_t unicode = 0;
421 /* Next should be 4 hex digits, otherwise it's an error.
422 The hex value is converted into the unicode, pushed into
423 the Unicode stream. */
424 for (shift = 12; shift >= 0; shift -= 4)
426 if ((c = java_read_char (lex)) == UEOF)
428 if (c >= '0' && c <= '9')
429 unicode |= (unicode_t)((c-'0') << shift);
430 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
431 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
433 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
435 *unicode_escape_p = 1;
437 ? unicode : (java_lineterminator (c) ? '\n' : unicode));
439 lex->unget_value = c;
441 return (unicode_t) '\\';
447 /* It's time to read a line when... */
448 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
451 java_allocate_new_line ();
452 if (ctxp->c_line->line[0] != '\n')
455 int unicode_escape_p;
456 c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
457 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
458 if (ctxp->c_line->white_space_only
459 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
460 ctxp->c_line->white_space_only = 0;
461 if ((c == '\n') || (c == UEOF))
465 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
466 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
467 return ctxp->c_line->line [ctxp->c_line->current++];
471 java_lineterminator (c)
474 if (c == '\n') /* LF */
476 else if (c == '\r') /* CR */
478 int unicode_escape_p;
479 c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
482 /* In this case we will have another terminator. For some
483 reason the lexer has several different unget methods. We
484 can't use the `ahead' method because then the \r will end
485 up in the actual text of the line, causing an error. So
486 instead we choose a very low-level method. FIXME: this
487 is incredibly ugly. */
488 ctxp->lexer->unget_value = c;
492 ctxp->c_line->ahead [0] = c;
493 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
501 /* Parse the end of a C style comment.
502 * C is the first character following the '/' and '*'. */
504 java_parse_end_comment (c)
508 for ( ;; c = java_get_unicode ())
513 java_lex_error ("Comment not terminated at end of input", 0);
515 switch (c = java_get_unicode ())
518 java_lex_error ("Comment not terminated at end of input", 0);
521 case '*': /* reparse only '*' */
522 java_unget_unicode ();
528 /* Parse the documentation section. Keywords must be at the beginning
529 of a documentation comment line (ignoring white space and any `*'
530 character). Parsed keyword(s): @DEPRECATED. */
533 java_parse_doc_section (c)
536 int valid_tag = 0, seen_star = 0;
538 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
550 c = java_get_unicode();
554 java_lex_error ("Comment not terminated at end of input", 0);
556 if (seen_star && (c == '/'))
557 return 1; /* Goto step1 in caller */
559 /* We're parsing @deprecated */
560 if (valid_tag && (c == '@'))
565 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
567 c = java_get_unicode ();
568 tag [tag_index++] = c;
572 java_lex_error ("Comment not terminated at end of input", 0);
573 tag [tag_index] = '\0';
575 if (!strcmp (tag, "deprecated"))
576 ctxp->deprecated = 1;
578 java_unget_unicode ();
582 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
583 will return a wrong result. */
585 java_letter_or_digit_p (c)
588 return _JAVA_LETTER_OR_DIGIT_P (c);
592 java_parse_escape_sequence ()
597 switch (c = java_get_unicode ())
600 return (unicode_t)0x8;
602 return (unicode_t)0x9;
604 return (unicode_t)0xa;
606 return (unicode_t)0xc;
608 return (unicode_t)0xd;
610 return (unicode_t)0x22;
612 return (unicode_t)0x27;
614 return (unicode_t)0x5c;
615 case '0': case '1': case '2': case '3': case '4':
616 case '5': case '6': case '7': case '8': case '9':
619 int octal_escape_index = 0;
621 for (; octal_escape_index < 3 && RANGE (c, '0', '9');
622 c = java_get_unicode ())
623 octal_escape [octal_escape_index++] = c;
625 java_unget_unicode ();
627 if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
629 java_lex_error ("Literal octal escape out of range", 0);
630 return JAVA_CHAR_ERROR;
635 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
636 i < octal_escape_index; i++, shift -= 3)
637 char_lit |= (octal_escape [i] - '0') << shift;
644 return '\n'; /* ULT, caught latter as a specific error */
646 java_lex_error ("Illegal character in escape sequence", 0);
647 return JAVA_CHAR_ERROR;
651 /* Isolate the code which may raise an arithmetic exception in its
660 int number_beginning;
663 static void java_perform_atof PARAMS ((PTR));
666 java_perform_atof (av)
669 struct jpa_args *a = (struct jpa_args *)av;
670 YYSTYPE *java_lval = a->java_lval;
671 int number_beginning = a->number_beginning;
672 REAL_VALUE_TYPE value;
673 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
675 SET_REAL_VALUE_ATOF (value,
676 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
678 if (REAL_VALUE_ISINF (value)
679 || REAL_VALUE_ISNAN (value))
681 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
685 SET_LVAL_NODE_TYPE (build_real (type, value), type);
689 static int yylex PARAMS ((YYSTYPE *));
699 unicode_t c, first_unicode;
700 int ascii_index, all_ascii;
703 /* Translation of the Unicode escape in the raw stream of Unicode
704 characters. Takes care of line terminator. */
706 /* Skip white spaces: SP, TAB and FF or ULT */
707 for (c = java_get_unicode ();
708 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
711 ctxp->elc.line = ctxp->c_line->lineno;
712 ctxp->elc.col = ctxp->c_line->char_col-2;
715 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
717 if (c == 0x1a) /* CTRL-Z */
719 if ((c = java_get_unicode ()) == UEOF)
720 return 0; /* Ok here */
722 java_unget_unicode (); /* Caught latter at the end the function */
724 /* Handle EOF here */
725 if (c == UEOF) /* Should probably do something here... */
728 /* Take care of eventual comments. */
731 switch (c = java_get_unicode ())
736 c = java_get_unicode ();
738 java_lex_error ("Comment not terminated at end of input", 0);
739 if (c == '\n') /* ULT */
745 if ((c = java_get_unicode ()) == '*')
747 if ((c = java_get_unicode ()) == '/')
748 goto step1; /* Empy documentation comment */
749 else if (java_parse_doc_section (c))
753 java_parse_end_comment ((c = java_get_unicode ()));
757 java_unget_unicode ();
763 ctxp->elc.line = ctxp->c_line->lineno;
764 ctxp->elc.prev_col = ctxp->elc.col;
765 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
766 if (ctxp->elc.col < 0)
767 fatal ("ctxp->elc.col < 0 - java_lex");
769 /* Numeric literals */
770 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
772 /* This section of code is borrowed from gcc/c-lex.c */
773 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
774 int parts[TOTAL_PARTS];
775 HOST_WIDE_INT high, low;
776 /* End borrowed section */
777 char literal_token [256];
778 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
781 int number_beginning = ctxp->c_line->current;
784 /* We might have a . separator instead of a FP like .[0-9]* */
787 unicode_t peep = java_sneak_unicode ();
789 if (!JAVA_ASCII_DIGIT (peep))
792 BUILD_OPERATOR (DOT_TK);
796 for (i = 0; i < TOTAL_PARTS; i++)
801 c = java_get_unicode ();
802 if (c == 'x' || c == 'X')
805 c = java_get_unicode ();
807 else if (JAVA_ASCII_DIGIT (c))
811 /* Push the '.' back and prepare for a FP parsing... */
812 java_unget_unicode ();
817 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
818 JAVA_LEX_LIT ("0", 10);
822 SET_LVAL_NODE (long_zero_node);
825 SET_LVAL_NODE (float_zero_node);
828 SET_LVAL_NODE (double_zero_node);
831 java_unget_unicode ();
832 SET_LVAL_NODE (integer_zero_node);
837 /* Parse the first part of the literal, until we find something
838 which is not a number. */
839 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
840 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
841 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
843 /* We store in a string (in case it turns out to be a FP) and in
844 PARTS if we have to process a integer literal. */
845 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
848 literal_token [literal_index++] = c;
849 /* This section of code if borrowed from gcc/c-lex.c */
850 for (count = 0; count < TOTAL_PARTS; count++)
852 parts[count] *= radix;
855 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
856 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
861 if (parts [TOTAL_PARTS-1] != 0)
863 /* End borrowed section. */
864 c = java_get_unicode ();
867 /* If we have something from the FP char set but not a digit, parse
869 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
872 int seen_digit = (literal_index ? 1 : 0);
873 int seen_exponent = 0;
874 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
875 double unless specified. */
877 java_lex_error ("Can't express non-decimal FP literal", 0);
886 literal_token [literal_index++ ] = c;
887 c = java_get_unicode ();
890 java_lex_error ("Invalid character in FP literal", 0);
893 if (c == 'e' || c == 'E')
897 /* {E,e} must have seen at list a digit */
899 java_lex_error ("Invalid FP literal", 0);
903 literal_token [literal_index++] = c;
904 c = java_get_unicode ();
907 java_lex_error ("Invalid character in FP literal", 0);
909 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
911 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
912 stage = 4; /* So we fall through */
915 if ((c=='-' || c =='+') && stage == 2)
918 literal_token [literal_index++] = c;
919 c = java_get_unicode ();
922 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
923 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
924 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
925 (stage == 3 && JAVA_ASCII_DIGIT (c)))
927 if (JAVA_ASCII_DIGIT (c))
929 literal_token [literal_index++ ] = c;
930 c = java_get_unicode ();
937 if (stage != 4) /* Don't push back fF/dD */
938 java_unget_unicode ();
940 /* An exponent (if any) must have seen a digit. */
941 if (seen_exponent && !seen_digit)
942 java_lex_error ("Invalid FP literal", 0);
944 literal_token [literal_index] = '\0';
945 JAVA_LEX_LIT (literal_token, radix);
948 a.literal_token = literal_token;
950 a.java_lval = java_lval;
951 a.number_beginning = number_beginning;
952 if (do_float_handler (java_perform_atof, (PTR) &a))
955 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
961 } /* JAVA_ASCCI_FPCHAR (c) */
963 /* Here we get back to converting the integral literal. */
964 if (c == 'L' || c == 'l')
966 else if (radix == 16 && JAVA_ASCII_LETTER (c))
967 java_lex_error ("Digit out of range in hexadecimal literal", 0);
968 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
969 java_lex_error ("Digit out of range in octal literal", 0);
970 else if (radix == 16 && !literal_index)
971 java_lex_error ("No digit specified for hexadecimal literal", 0);
973 java_unget_unicode ();
975 #ifdef JAVA_LEX_DEBUG
976 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
977 JAVA_LEX_LIT (literal_token, radix);
979 /* This section of code is borrowed from gcc/c-lex.c */
982 bytes = GET_TYPE_PRECISION (long_type_node);
983 for (i = bytes; i < TOTAL_PARTS; i++)
991 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
993 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
994 / HOST_BITS_PER_CHAR)]
995 << (i * HOST_BITS_PER_CHAR));
996 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
998 /* End borrowed section. */
1000 /* Range checking */
1003 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1004 9223372036854775807L is the biggest `long' literal that can be
1005 expressed using a 10 radix. For other radixes, everything that
1006 fits withing 64 bits is OK. */
1007 int hb = (high >> 31);
1008 if (overflow || (hb && low && radix == 10) ||
1009 (hb && high & 0x7fffffff && radix == 10) ||
1010 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1011 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1015 /* 2147483648 is valid if operand of a '-'. Otherwise,
1016 2147483647 is the biggest `int' literal that can be
1017 expressed using a 10 radix. For other radixes, everything
1018 that fits within 32 bits is OK. As all literals are
1019 signed, we sign extend here. */
1020 int hb = (low >> 31) & 0x1;
1021 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1022 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1023 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1026 ctxp->minus_seen = 0;
1027 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1028 (long_suffix ? long_type_node : int_type_node));
1032 ctxp->minus_seen = 0;
1033 /* Character literals */
1037 if ((c = java_get_unicode ()) == '\\')
1038 char_lit = java_parse_escape_sequence ();
1042 c = java_get_unicode ();
1044 if ((c == '\n') || (c == UEOF))
1045 java_lex_error ("Character literal not terminated at end of line", 0);
1047 java_lex_error ("Syntax error in character literal", 0);
1049 if (c == JAVA_CHAR_ERROR)
1050 char_lit = 0; /* We silently convert it to zero */
1052 JAVA_LEX_CHAR_LIT (char_lit);
1053 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1057 /* String literals */
1063 for (no_error = 1, c = java_get_unicode ();
1064 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1067 c = java_parse_escape_sequence ();
1068 no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
1069 java_unicode_2_utf8 (c);
1071 if (c == '\n' || c == UEOF) /* ULT */
1073 lineno--; /* Refer to the line the terminator was seen */
1074 java_lex_error ("String not terminated at end of line.", 0);
1078 obstack_1grow (&temporary_obstack, '\0');
1079 string = obstack_finish (&temporary_obstack);
1081 if (!no_error || (c != '"'))
1082 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1085 tree s = make_node (STRING_CST);
1086 TREE_STRING_LENGTH (s) = strlen (string);
1087 TREE_STRING_POINTER (s) =
1088 obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
1089 strcpy (TREE_STRING_POINTER (s), string);
1090 java_lval->node = s;
1093 return STRING_LIT_TK;
1101 BUILD_OPERATOR (OP_TK);
1107 if (ctxp->ccb_indent == 1)
1108 ctxp->first_ccb_indent1 = lineno;
1110 BUILD_OPERATOR (OCB_TK);
1114 if (ctxp->ccb_indent == 1)
1115 ctxp->last_ccb_indent1 = lineno;
1116 BUILD_OPERATOR (CCB_TK);
1119 BUILD_OPERATOR (OSB_TK);
1131 BUILD_OPERATOR (DOT_TK);
1132 /* return DOT_TK; */
1139 if ((c = java_get_unicode ()) == '=')
1141 BUILD_OPERATOR (EQ_TK);
1145 /* Equals is used in two different locations. In the
1146 variable_declarator: rule, it has to be seen as '=' as opposed
1147 to being seen as an ordinary assignment operator in
1148 assignment_operators: rule. */
1149 java_unget_unicode ();
1150 BUILD_OPERATOR (ASSIGN_TK);
1154 switch ((c = java_get_unicode ()))
1157 BUILD_OPERATOR (GTE_TK);
1159 switch ((c = java_get_unicode ()))
1162 if ((c = java_get_unicode ()) == '=')
1164 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1168 java_unget_unicode ();
1169 BUILD_OPERATOR (ZRS_TK);
1172 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1174 java_unget_unicode ();
1175 BUILD_OPERATOR (SRS_TK);
1178 java_unget_unicode ();
1179 BUILD_OPERATOR (GT_TK);
1183 switch ((c = java_get_unicode ()))
1186 BUILD_OPERATOR (LTE_TK);
1188 if ((c = java_get_unicode ()) == '=')
1190 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1194 java_unget_unicode ();
1195 BUILD_OPERATOR (LS_TK);
1198 java_unget_unicode ();
1199 BUILD_OPERATOR (LT_TK);
1203 switch ((c = java_get_unicode ()))
1206 BUILD_OPERATOR (BOOL_AND_TK);
1208 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1210 java_unget_unicode ();
1211 BUILD_OPERATOR (AND_TK);
1215 switch ((c = java_get_unicode ()))
1218 BUILD_OPERATOR (BOOL_OR_TK);
1220 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1222 java_unget_unicode ();
1223 BUILD_OPERATOR (OR_TK);
1227 switch ((c = java_get_unicode ()))
1230 BUILD_OPERATOR (INCR_TK);
1232 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1234 java_unget_unicode ();
1235 BUILD_OPERATOR (PLUS_TK);
1239 switch ((c = java_get_unicode ()))
1242 BUILD_OPERATOR (DECR_TK);
1244 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1246 java_unget_unicode ();
1247 ctxp->minus_seen = 1;
1248 BUILD_OPERATOR (MINUS_TK);
1252 if ((c = java_get_unicode ()) == '=')
1254 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1258 java_unget_unicode ();
1259 BUILD_OPERATOR (MULT_TK);
1263 if ((c = java_get_unicode ()) == '=')
1265 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1269 java_unget_unicode ();
1270 BUILD_OPERATOR (DIV_TK);
1274 if ((c = java_get_unicode ()) == '=')
1276 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1280 java_unget_unicode ();
1281 BUILD_OPERATOR (XOR_TK);
1285 if ((c = java_get_unicode ()) == '=')
1287 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1291 java_unget_unicode ();
1292 BUILD_OPERATOR (REM_TK);
1296 if ((c = java_get_unicode()) == '=')
1298 BUILD_OPERATOR (NEQ_TK);
1302 java_unget_unicode ();
1303 BUILD_OPERATOR (NEG_TK);
1308 BUILD_OPERATOR (REL_QM_TK);
1311 BUILD_OPERATOR (REL_CL_TK);
1313 BUILD_OPERATOR (NOT_TK);
1316 /* Keyword, boolean literal or null literal */
1317 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1318 JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1320 java_unicode_2_utf8 (c);
1321 if (all_ascii && c >= 128)
1326 obstack_1grow (&temporary_obstack, '\0');
1327 string = obstack_finish (&temporary_obstack);
1328 java_unget_unicode ();
1330 /* If we have something all ascii, we consider a keyword, a boolean
1331 literal, a null literal or an all ASCII identifier. Otherwise,
1332 this is an identifier (possibly not respecting formation rule). */
1335 struct java_keyword *kw;
1336 if ((kw=java_keyword (string, ascii_index)))
1338 JAVA_LEX_KW (string);
1341 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1342 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1343 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1345 SET_MODIFIER_CTX (kw->token);
1348 SET_LVAL_NODE (float_type_node);
1351 SET_LVAL_NODE (double_type_node);
1354 SET_LVAL_NODE (boolean_type_node);
1357 SET_LVAL_NODE (byte_type_node);
1360 SET_LVAL_NODE (short_type_node);
1363 SET_LVAL_NODE (int_type_node);
1366 SET_LVAL_NODE (long_type_node);
1369 SET_LVAL_NODE (char_type_node);
1372 /* Keyword based literals */
1375 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1376 boolean_true_node : boolean_false_node));
1379 SET_LVAL_NODE (null_pointer_node);
1382 /* Some keyword we want to retain information on the location
1395 BUILD_OPERATOR (kw->token);
1403 /* We may have and ID here */
1404 if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1406 JAVA_LEX_ID (string);
1407 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1411 /* Everything else is an invalid character in the input */
1413 char lex_error_buffer [128];
1414 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1415 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1416 java_lex_error (lex_error_buffer, 1);
1422 java_unicode_2_utf8 (unicode)
1425 if (RANGE (unicode, 0x01, 0x7f))
1426 obstack_1grow (&temporary_obstack, (char)unicode);
1427 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1429 obstack_1grow (&temporary_obstack,
1430 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1431 obstack_1grow (&temporary_obstack,
1432 (unsigned char)(0x80 | (unicode & 0x3f)));
1434 else /* Range 0x800-0xffff */
1436 obstack_1grow (&temporary_obstack,
1437 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1438 obstack_1grow (&temporary_obstack,
1439 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1440 obstack_1grow (&temporary_obstack,
1441 (unsigned char)(0x80 | (unicode & 0x003f)));
1447 build_wfl_node (node)
1450 return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1455 java_lex_error (msg, forward)
1456 const char *msg ATTRIBUTE_UNUSED;
1457 int forward ATTRIBUTE_UNUSED;
1460 ctxp->elc.line = ctxp->c_line->lineno;
1461 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1463 /* Might be caught in the middle of some error report */
1464 ctxp->java_error_flag = 0;
1481 if (next != '\n' && next != EOF)
1493 java_get_line_col (filename, line, col)
1494 const char *filename ATTRIBUTE_UNUSED;
1495 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1500 /* Dumb implementation. Doesn't try to cache or optimize things. */
1501 /* First line of the file is line 1, first column is 1 */
1503 /* COL == -1 means, at the CR/LF in LINE */
1504 /* COL == -2 means, at the first non space char in LINE */
1507 int c, ccol, cline = 1;
1508 int current_line_col = 0;
1509 int first_non_space = 0;
1512 if (!(fp = fopen (filename, "r")))
1513 fatal ("Can't open file - java_display_line_col");
1515 while (cline != line)
1520 static char msg[] = "<<file too short - unexpected EOF>>";
1521 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1524 if (java_is_eol (fp, c))
1528 /* Gather the chars of the current line in a buffer */
1532 if (c < 0 || java_is_eol (fp, c))
1534 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1535 first_non_space = current_line_col;
1536 obstack_1grow (&temporary_obstack, c);
1541 obstack_1grow (&temporary_obstack, '\n');
1545 col = current_line_col;
1546 first_non_space = 0;
1549 col = first_non_space;
1551 first_non_space = 0;
1553 /* Place the '^' a the right position */
1554 base = obstack_base (&temporary_obstack);
1555 for (ccol = 1; ccol <= col; ccol++)
1557 /* Compute \t when reaching first_non_space */
1558 char c = (first_non_space ?
1559 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1560 obstack_1grow (&temporary_obstack, c);
1562 obstack_grow0 (&temporary_obstack, "^", 1);
1565 return obstack_finish (&temporary_obstack);