1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded characters
28 and returns a token for everything found but comments, white spaces
29 and line terminators. When necessary, it also fills the java_lval
30 (yylval) union. It's implemented to be called by a re-entrant parser
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
40 extern struct obstack *expression_obstack;
43 /* Function declaration */
44 static int java_lineterminator PARAMS ((unicode_t));
45 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
46 static void java_unicode_2_utf8 PARAMS ((unicode_t));
47 static void java_lex_error PARAMS ((const char *, int));
49 static int java_is_eol PARAMS ((FILE *, int));
50 static tree build_wfl_node PARAMS ((tree));
52 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
53 static unicode_t java_parse_escape_sequence PARAMS ((void));
54 static int java_letter_or_digit_p PARAMS ((unicode_t));
55 static int java_parse_doc_section PARAMS ((unicode_t));
56 static void java_parse_end_comment PARAMS ((unicode_t));
57 static unicode_t java_get_unicode PARAMS ((void));
58 static unicode_t java_read_unicode PARAMS ((int, int *));
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static unicode_t java_read_char PARAMS ((void));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
69 int java_lang_imported = 0;
72 java_lang_id = get_identifier ("java.lang");
73 if (!java_lang_cloneable)
74 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
76 if (!java_lang_imported)
78 tree node = build_tree_list
79 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
80 read_import_dir (TREE_PURPOSE (node));
81 TREE_CHAIN (node) = ctxp->import_demand_list;
82 ctxp->import_demand_list = node;
83 java_lang_imported = 1;
87 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
89 label_id = get_identifier ("$L");
91 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
92 if (!wfl_string_buffer)
94 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
96 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
98 ctxp->static_initialized = ctxp->non_static_initialized =
99 ctxp->incomplete_class = NULL_TREE;
101 bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
102 bzero ((PTR) current_jcf, sizeof (JCF));
103 ctxp->current_parsed_class = NULL;
104 ctxp->package = NULL_TREE;
107 ctxp->filename = input_filename;
108 ctxp->lineno = lineno = 0;
111 ctxp->unget_utf8_value = 0;
112 ctxp->minus_seen = 0;
113 ctxp->java_error_flag = 0;
117 java_sprint_unicode (line, i)
118 struct java_line *line;
121 static char buffer [10];
122 if (line->unicode_escape_p [i] || line->line [i] > 128)
123 sprintf (buffer, "\\u%04x", line->line [i]);
126 buffer [0] = line->line [i];
133 java_sneak_unicode ()
135 return (ctxp->c_line->line [ctxp->c_line->current]);
139 java_unget_unicode ()
141 if (!ctxp->c_line->current)
142 fatal ("can't unget unicode - java_unget_unicode");
143 ctxp->c_line->current--;
144 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
148 java_allocate_new_line ()
150 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
151 char ahead_escape_p = (ctxp->c_line ?
152 ctxp->c_line->unicode_escape_ahead_p : 0);
154 if (ctxp->c_line && !ctxp->c_line->white_space_only)
158 free (ctxp->p_line->unicode_escape_p);
159 free (ctxp->p_line->line);
162 ctxp->p_line = ctxp->c_line;
163 ctxp->c_line = NULL; /* Reallocated */
168 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
169 ctxp->c_line->max = JAVA_LINE_MAX;
170 ctxp->c_line->line = (unicode_t *)xmalloc
171 (sizeof (unicode_t)*ctxp->c_line->max);
172 ctxp->c_line->unicode_escape_p =
173 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
174 ctxp->c_line->white_space_only = 0;
177 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
178 ctxp->c_line->char_col = ctxp->c_line->current = 0;
181 ctxp->c_line->line [ctxp->c_line->size] = ahead;
182 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
183 ctxp->c_line->size++;
185 ctxp->c_line->ahead [0] = 0;
186 ctxp->c_line->unicode_escape_ahead_p = 0;
187 ctxp->c_line->lineno = ++lineno;
188 ctxp->c_line->white_space_only = 1;
191 #define BAD_UTF8_VALUE 0xFFFE
199 if (ctxp->unget_utf8_value)
201 int to_return = ctxp->unget_utf8_value;
202 ctxp->unget_utf8_value = 0;
214 if ((c & 0xe0) == 0xc0)
217 if ((c1 & 0xc0) == 0x80)
218 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
221 else if ((c & 0xf0) == 0xe0)
224 if ((c1 & 0xc0) == 0x80)
227 if ((c2 & 0xc0) == 0x80)
228 return (unicode_t)(((c & 0xf) << 12) +
229 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
236 /* We looked for a UTF8 multi-byte sequence (since we saw an initial
237 byte with the high bit set), but found invalid bytes instead.
238 If the most recent byte was Ascii (and not EOF), we should
239 unget it, in case it was a comment terminator or other delimitor. */
242 return BAD_UTF8_VALUE;
247 java_store_unicode (l, c, unicode_escape_p)
250 int unicode_escape_p;
252 if (l->size == l->max)
254 l->max += JAVA_LINE_MAX;
255 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
256 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
257 sizeof (char)*l->max);
259 l->line [l->size] = c;
260 l->unicode_escape_p [l->size++] = unicode_escape_p;
264 java_read_unicode (term_context, unicode_escape_p)
266 int *unicode_escape_p;
271 c = java_read_char ();
272 *unicode_escape_p = 0;
275 return ((term_context ? c :
276 java_lineterminator (c) ? '\n' : (unicode_t)c));
278 /* Count the number of preceeding '\' */
279 for (base = ftell (finput), i = base-2; c == '\\';)
281 fseek (finput, i--, SEEK_SET);
282 c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */
284 fseek (finput, base, SEEK_SET);
285 if ((base-i-3)%2 == 0) /* If odd number of \ seen */
287 c = java_read_char ();
290 unsigned short unicode = 0;
292 /* Next should be 4 hex digits, otherwise it's an error.
293 The hex value is converted into the unicode, pushed into
294 the Unicode stream. */
295 for (shift = 12; shift >= 0; shift -= 4)
297 if ((c = java_read_char ()) == UEOF)
299 if (c >= '0' && c <= '9')
300 unicode |= (unicode_t)((c-'0') << shift);
301 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
302 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
305 ("Non hex digit in Unicode escape sequence", 0);
307 *unicode_escape_p = 1;
308 return (term_context ? unicode :
309 (java_lineterminator (c) ? '\n' : unicode));
311 ctxp->unget_utf8_value = c;
313 return (unicode_t)'\\';
319 /* It's time to read a line when... */
320 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
323 java_allocate_new_line ();
324 if (ctxp->c_line->line[0] != '\n')
327 int unicode_escape_p;
328 c = java_read_unicode (0, &unicode_escape_p);
329 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
330 if (ctxp->c_line->white_space_only
331 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
332 ctxp->c_line->white_space_only = 0;
333 if ((c == '\n') || (c == UEOF))
337 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
338 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
339 return ctxp->c_line->line [ctxp->c_line->current++];
343 java_lineterminator (c)
346 int unicode_escape_p;
347 if (c == '\n') /* CR */
349 if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r')
351 ctxp->c_line->ahead [0] = c;
352 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
356 else if (c == '\r') /* LF */
358 if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n')
360 ctxp->c_line->ahead [0] = c;
361 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
369 /* Parse the end of a C style comment.
370 * C is the first character following the '/' and '*'. */
372 java_parse_end_comment (c)
376 for ( ;; c = java_get_unicode ())
381 java_lex_error ("Comment not terminated at end of input", 0);
383 switch (c = java_get_unicode ())
386 java_lex_error ("Comment not terminated at end of input", 0);
389 case '*': /* reparse only '*' */
390 java_unget_unicode ();
396 /* Parse the documentation section. Keywords must be at the beginning
397 of a documentation comment line (ignoring white space and any `*'
398 character). Parsed keyword(s): @DEPRECATED. */
401 java_parse_doc_section (c)
404 int valid_tag = 0, seen_star = 0;
406 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
418 c = java_get_unicode();
422 java_lex_error ("Comment not terminated at end of input", 0);
424 if (seen_star && (c == '/'))
425 return 1; /* Goto step1 in caller */
427 /* We're parsing @deprecated */
428 if (valid_tag && (c == '@'))
433 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
435 c = java_get_unicode ();
436 tag [tag_index++] = c;
440 java_lex_error ("Comment not terminated at end of input", 0);
442 java_unget_unicode ();
443 tag [tag_index] = '\0';
445 if (!strcmp (tag, "deprecated"))
446 ctxp->deprecated = 1;
451 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
452 will return a wrong result. */
454 java_letter_or_digit_p (c)
457 return _JAVA_LETTER_OR_DIGIT_P (c);
461 java_parse_escape_sequence ()
466 switch (c = java_get_unicode ())
469 return (unicode_t)0x8;
471 return (unicode_t)0x9;
473 return (unicode_t)0xa;
475 return (unicode_t)0xc;
477 return (unicode_t)0xd;
479 return (unicode_t)0x22;
481 return (unicode_t)0x27;
483 return (unicode_t)0x5c;
484 case '0': case '1': case '2': case '3': case '4':
485 case '5': case '6': case '7': case '8': case '9':
488 int octal_escape_index = 0;
490 for (; octal_escape_index < 3 && RANGE (c, '0', '9');
491 c = java_get_unicode ())
492 octal_escape [octal_escape_index++] = c;
494 java_unget_unicode ();
496 if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
498 java_lex_error ("Literal octal escape out of range", 0);
499 return JAVA_CHAR_ERROR;
504 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
505 i < octal_escape_index; i++, shift -= 3)
506 char_lit |= (octal_escape [i] - '0') << shift;
513 return '\n'; /* ULT, caught latter as a specific error */
515 java_lex_error ("Illegal character in escape sequence", 0);
516 return JAVA_CHAR_ERROR;
520 static int yylex PARAMS ((YYSTYPE *));
530 unicode_t c, first_unicode;
531 int ascii_index, all_ascii;
534 /* Translation of the Unicode escape in the raw stream of Unicode
535 characters. Takes care of line terminator. */
537 /* Skip white spaces: SP, TAB and FF or ULT */
538 for (c = java_get_unicode ();
539 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
542 ctxp->elc.line = ctxp->c_line->lineno;
543 ctxp->elc.col = ctxp->c_line->char_col-2;
546 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
548 if (c == 0x1a) /* CTRL-Z */
550 if ((c = java_get_unicode ()) == UEOF)
551 return 0; /* Ok here */
553 java_unget_unicode (); /* Caught latter at the end the function */
555 /* Handle EOF here */
556 if (c == UEOF) /* Should probably do something here... */
559 /* Take care of eventual comments. */
562 switch (c = java_get_unicode ())
567 c = java_get_unicode ();
569 java_lex_error ("Comment not terminated at end of input", 0);
570 if (c == '\n') /* ULT */
576 if ((c = java_get_unicode ()) == '*')
578 if ((c = java_get_unicode ()) == '/')
579 goto step1; /* Empy documentation comment */
580 else if (java_parse_doc_section (c))
584 java_parse_end_comment (c);
588 java_unget_unicode ();
594 ctxp->elc.line = ctxp->c_line->lineno;
595 ctxp->elc.prev_col = ctxp->elc.col;
596 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
597 if (ctxp->elc.col < 0)
598 fatal ("ctxp->elc.col < 0 - java_lex");
600 /* Numeric literals */
601 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
603 /* This section of code is borrowed from gcc/c-lex.c */
604 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
605 int parts[TOTAL_PARTS];
606 HOST_WIDE_INT high, low;
607 /* End borrowed section */
608 char literal_token [256];
609 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
612 int number_beginning = ctxp->c_line->current;
615 /* We might have a . separator instead of a FP like .[0-9]* */
618 unicode_t peep = java_sneak_unicode ();
620 if (!JAVA_ASCII_DIGIT (peep))
623 BUILD_OPERATOR (DOT_TK);
627 for (i = 0; i < TOTAL_PARTS; i++)
632 c = java_get_unicode ();
633 if (c == 'x' || c == 'X')
636 c = java_get_unicode ();
638 else if (JAVA_ASCII_DIGIT (c))
642 /* Push the '.' back and prepare for a FP parsing... */
643 java_unget_unicode ();
648 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
649 JAVA_LEX_LIT ("0", 10);
653 SET_LVAL_NODE (long_zero_node);
656 SET_LVAL_NODE (float_zero_node);
659 SET_LVAL_NODE (double_zero_node);
662 java_unget_unicode ();
663 SET_LVAL_NODE (integer_zero_node);
668 /* Parse the first part of the literal, until we find something
669 which is not a number. */
670 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
671 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
672 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
674 /* We store in a string (in case it turns out to be a FP) and in
675 PARTS if we have to process a integer literal. */
676 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
679 literal_token [literal_index++] = c;
680 /* This section of code if borrowed from gcc/c-lex.c */
681 for (count = 0; count < TOTAL_PARTS; count++)
683 parts[count] *= radix;
686 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
687 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
692 if (parts [TOTAL_PARTS-1] != 0)
694 /* End borrowed section. */
695 c = java_get_unicode ();
698 /* If we have something from the FP char set but not a digit, parse
700 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
703 int seen_digit = (literal_index ? 1 : 0);
704 int seen_exponent = 0;
705 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
706 double unless specified. */
708 java_lex_error ("Can't express non-decimal FP literal", 0);
717 literal_token [literal_index++ ] = c;
718 c = java_get_unicode ();
721 java_lex_error ("Invalid character in FP literal", 0);
724 if (c == 'e' || c == 'E')
728 /* {E,e} must have seen at list a digit */
730 java_lex_error ("Invalid FP literal", 0);
734 literal_token [literal_index++] = c;
735 c = java_get_unicode ();
738 java_lex_error ("Invalid character in FP literal", 0);
740 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
742 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
743 stage = 4; /* So we fall through */
746 if ((c=='-' || c =='+') && stage == 2)
749 literal_token [literal_index++] = c;
750 c = java_get_unicode ();
753 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
754 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
755 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
756 (stage == 3 && JAVA_ASCII_DIGIT (c)))
758 if (JAVA_ASCII_DIGIT (c))
760 literal_token [literal_index++ ] = c;
761 c = java_get_unicode ();
766 REAL_VALUE_TYPE value;
768 tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
771 if (stage != 4) /* Don't push back fF/dD */
772 java_unget_unicode ();
774 /* An exponent (if any) must have seen a digit. */
775 if (seen_exponent && !seen_digit)
776 java_lex_error ("Invalid FP literal", 0);
778 literal_token [literal_index] = '\0';
779 JAVA_LEX_LIT (literal_token, radix);
781 if (setjmp (handler))
783 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
788 SET_FLOAT_HANDLER (handler);
790 (value, REAL_VALUE_ATOF (literal_token,
793 if (REAL_VALUE_ISINF (value))
794 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
796 if (REAL_VALUE_ISNAN (value))
797 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
799 SET_LVAL_NODE_TYPE (build_real (type, value), type);
800 SET_FLOAT_HANDLER (NULL_PTR);
805 } /* JAVA_ASCCI_FPCHAR (c) */
807 /* Here we get back to converting the integral literal. */
808 if (c == 'L' || c == 'l')
810 else if (radix == 16 && JAVA_ASCII_LETTER (c))
811 java_lex_error ("Digit out of range in hexadecimal literal", 0);
812 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
813 java_lex_error ("Digit out of range in octal literal", 0);
814 else if (radix == 16 && !literal_index)
815 java_lex_error ("No digit specified for hexadecimal literal", 0);
817 java_unget_unicode ();
819 #ifdef JAVA_LEX_DEBUG
820 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
821 JAVA_LEX_LIT (literal_token, radix);
823 /* This section of code is borrowed from gcc/c-lex.c */
826 bytes = GET_TYPE_PRECISION (long_type_node);
827 for (i = bytes; i < TOTAL_PARTS; i++)
835 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
837 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
838 / HOST_BITS_PER_CHAR)]
839 << (i * HOST_BITS_PER_CHAR));
840 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
842 /* End borrowed section. */
847 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
848 9223372036854775807L is the biggest `long' literal that can be
849 expressed using a 10 radix. For other radixes, everything that
850 fits withing 64 bits is OK. */
851 int hb = (high >> 31);
852 if (overflow || (hb && low && radix == 10) ||
853 (hb && high & 0x7fffffff && radix == 10) ||
854 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
855 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
859 /* 2147483648 is valid if operand of a '-'. Otherwise,
860 2147483647 is the biggest `int' literal that can be
861 expressed using a 10 radix. For other radixes, everything
862 that fits within 32 bits is OK. As all literals are
863 signed, we sign extend here. */
864 int hb = (low >> 31) & 0x1;
865 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
866 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
867 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
870 ctxp->minus_seen = 0;
871 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
872 (long_suffix ? long_type_node : int_type_node));
876 ctxp->minus_seen = 0;
877 /* Character literals */
881 if ((c = java_get_unicode ()) == '\\')
882 char_lit = java_parse_escape_sequence ();
886 c = java_get_unicode ();
888 if ((c == '\n') || (c == UEOF))
889 java_lex_error ("Character literal not terminated at end of line", 0);
891 java_lex_error ("Syntax error in character literal", 0);
893 if (c == JAVA_CHAR_ERROR)
894 char_lit = 0; /* We silently convert it to zero */
896 JAVA_LEX_CHAR_LIT (char_lit);
897 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
901 /* String literals */
907 for (no_error = 1, c = java_get_unicode ();
908 c != '"' && c != '\n'; c = java_get_unicode ())
911 c = java_parse_escape_sequence ();
912 no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
913 java_unicode_2_utf8 (c);
915 if (c == '\n' || c == UEOF) /* ULT */
917 lineno--; /* Refer to the line the terminator was seen */
918 java_lex_error ("String not terminated at end of line.", 0);
922 obstack_1grow (&temporary_obstack, '\0');
923 string = obstack_finish (&temporary_obstack);
925 if (!no_error || (c != '"'))
926 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
929 tree s = make_node (STRING_CST);
930 TREE_STRING_LENGTH (s) = strlen (string);
931 TREE_STRING_POINTER (s) =
932 obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
933 strcpy (TREE_STRING_POINTER (s), string);
937 return STRING_LIT_TK;
945 BUILD_OPERATOR (OP_TK);
951 if (ctxp->ccb_indent == 1)
952 ctxp->first_ccb_indent1 = lineno;
954 BUILD_OPERATOR (OCB_TK);
958 if (ctxp->ccb_indent == 1)
959 ctxp->last_ccb_indent1 = lineno;
960 BUILD_OPERATOR (CCB_TK);
963 BUILD_OPERATOR (OSB_TK);
975 BUILD_OPERATOR (DOT_TK);
983 if ((c = java_get_unicode ()) == '=')
985 BUILD_OPERATOR (EQ_TK);
989 /* Equals is used in two different locations. In the
990 variable_declarator: rule, it has to be seen as '=' as opposed
991 to being seen as an ordinary assignment operator in
992 assignment_operators: rule. */
993 java_unget_unicode ();
994 BUILD_OPERATOR (ASSIGN_TK);
998 switch ((c = java_get_unicode ()))
1001 BUILD_OPERATOR (GTE_TK);
1003 switch ((c = java_get_unicode ()))
1006 if ((c = java_get_unicode ()) == '=')
1008 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1012 java_unget_unicode ();
1013 BUILD_OPERATOR (ZRS_TK);
1016 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1018 java_unget_unicode ();
1019 BUILD_OPERATOR (SRS_TK);
1022 java_unget_unicode ();
1023 BUILD_OPERATOR (GT_TK);
1027 switch ((c = java_get_unicode ()))
1030 BUILD_OPERATOR (LTE_TK);
1032 if ((c = java_get_unicode ()) == '=')
1034 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1038 java_unget_unicode ();
1039 BUILD_OPERATOR (LS_TK);
1042 java_unget_unicode ();
1043 BUILD_OPERATOR (LT_TK);
1047 switch ((c = java_get_unicode ()))
1050 BUILD_OPERATOR (BOOL_AND_TK);
1052 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1054 java_unget_unicode ();
1055 BUILD_OPERATOR (AND_TK);
1059 switch ((c = java_get_unicode ()))
1062 BUILD_OPERATOR (BOOL_OR_TK);
1064 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1066 java_unget_unicode ();
1067 BUILD_OPERATOR (OR_TK);
1071 switch ((c = java_get_unicode ()))
1074 BUILD_OPERATOR (INCR_TK);
1076 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1078 java_unget_unicode ();
1079 BUILD_OPERATOR (PLUS_TK);
1083 switch ((c = java_get_unicode ()))
1086 BUILD_OPERATOR (DECR_TK);
1088 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1090 java_unget_unicode ();
1091 ctxp->minus_seen = 1;
1092 BUILD_OPERATOR (MINUS_TK);
1096 if ((c = java_get_unicode ()) == '=')
1098 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1102 java_unget_unicode ();
1103 BUILD_OPERATOR (MULT_TK);
1107 if ((c = java_get_unicode ()) == '=')
1109 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1113 java_unget_unicode ();
1114 BUILD_OPERATOR (DIV_TK);
1118 if ((c = java_get_unicode ()) == '=')
1120 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1124 java_unget_unicode ();
1125 BUILD_OPERATOR (XOR_TK);
1129 if ((c = java_get_unicode ()) == '=')
1131 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1135 java_unget_unicode ();
1136 BUILD_OPERATOR (REM_TK);
1140 if ((c = java_get_unicode()) == '=')
1142 BUILD_OPERATOR (NEQ_TK);
1146 java_unget_unicode ();
1147 BUILD_OPERATOR (NEG_TK);
1152 BUILD_OPERATOR (REL_QM_TK);
1155 BUILD_OPERATOR (REL_CL_TK);
1157 BUILD_OPERATOR (NOT_TK);
1160 /* Keyword, boolean literal or null literal */
1161 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1162 JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1164 java_unicode_2_utf8 (c);
1165 if (all_ascii && c >= 128)
1170 obstack_1grow (&temporary_obstack, '\0');
1171 string = obstack_finish (&temporary_obstack);
1172 java_unget_unicode ();
1174 /* If we have something all ascii, we consider a keyword, a boolean
1175 literal, a null literal or an all ASCII identifier. Otherwise,
1176 this is an identifier (possibly not respecting formation rule). */
1179 struct java_keyword *kw;
1180 if ((kw=java_keyword (string, ascii_index)))
1182 JAVA_LEX_KW (string);
1185 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1186 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1187 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1189 SET_MODIFIER_CTX (kw->token);
1192 SET_LVAL_NODE (float_type_node);
1195 SET_LVAL_NODE (double_type_node);
1198 SET_LVAL_NODE (boolean_type_node);
1201 SET_LVAL_NODE (byte_type_node);
1204 SET_LVAL_NODE (short_type_node);
1207 SET_LVAL_NODE (int_type_node);
1210 SET_LVAL_NODE (long_type_node);
1213 SET_LVAL_NODE (char_type_node);
1216 /* Keyword based literals */
1219 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1220 boolean_true_node : boolean_false_node));
1223 SET_LVAL_NODE (null_pointer_node);
1226 /* Some keyword we want to retain information on the location
1239 BUILD_OPERATOR (kw->token);
1247 /* We may have and ID here */
1248 if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1250 JAVA_LEX_ID (string);
1251 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1255 /* Everything else is an invalid character in the input */
1257 char lex_error_buffer [128];
1258 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1259 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1260 java_lex_error (lex_error_buffer, 1);
1266 java_unicode_2_utf8 (unicode)
1269 if (RANGE (unicode, 0x01, 0x7f))
1270 obstack_1grow (&temporary_obstack, (char)unicode);
1271 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1273 obstack_1grow (&temporary_obstack,
1274 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1275 obstack_1grow (&temporary_obstack,
1276 (unsigned char)(0x80 | (unicode & 0x3f)));
1278 else /* Range 0x800-0xffff */
1280 obstack_1grow (&temporary_obstack,
1281 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1282 obstack_1grow (&temporary_obstack,
1283 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1284 obstack_1grow (&temporary_obstack,
1285 (unsigned char)(0x80 | (unicode & 0x003f)));
1291 build_wfl_node (node)
1294 return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1299 java_lex_error (msg, forward)
1300 const char *msg ATTRIBUTE_UNUSED;
1301 int forward ATTRIBUTE_UNUSED;
1304 ctxp->elc.line = ctxp->c_line->lineno;
1305 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1307 /* Might be caught in the middle of some error report */
1308 ctxp->java_error_flag = 0;
1325 if (next != '\n' && next != EOF)
1337 java_get_line_col (filename, line, col)
1338 char *filename ATTRIBUTE_UNUSED;
1339 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1344 /* Dumb implementation. Doesn't try to cache or optimize things. */
1345 /* First line of the file is line 1, first column is 1 */
1347 /* COL == -1 means, at the CR/LF in LINE */
1348 /* COL == -2 means, at the first non space char in LINE */
1351 int c, ccol, cline = 1;
1352 int current_line_col = 0;
1353 int first_non_space = 0;
1356 if (!(fp = fopen (filename, "r")))
1357 fatal ("Can't open file - java_display_line_col");
1359 while (cline != line)
1364 static char msg[] = "<<file too short - unexpected EOF>>";
1365 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1368 if (java_is_eol (fp, c))
1372 /* Gather the chars of the current line in a buffer */
1376 if (c < 0 || java_is_eol (fp, c))
1378 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1379 first_non_space = current_line_col;
1380 obstack_1grow (&temporary_obstack, c);
1385 obstack_1grow (&temporary_obstack, '\n');
1389 col = current_line_col;
1390 first_non_space = 0;
1393 col = first_non_space;
1395 first_non_space = 0;
1397 /* Place the '^' a the right position */
1398 base = obstack_base (&temporary_obstack);
1399 for (ccol = 1; ccol <= col; ccol++)
1401 /* Compute \t when reaching first_non_space */
1402 char c = (first_non_space ?
1403 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1404 obstack_1grow (&temporary_obstack, c);
1406 obstack_grow0 (&temporary_obstack, "^", 1);
1409 return obstack_finish (&temporary_obstack);