1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded characters
28 and returns a token for everything found but comments, white spaces
29 and line terminators. When necessary, it also fills the java_lval
30 (yylval) union. It's implemented to be called by a re-entrant parser
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
40 extern struct obstack *expression_obstack;
43 /* Function declaration */
44 static int java_lineterminator PARAMS ((unicode_t));
45 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
46 static void java_unicode_2_utf8 PARAMS ((unicode_t));
47 static void java_lex_error PARAMS ((const char *, int));
49 static int java_is_eol PARAMS ((FILE *, int));
50 static tree build_wfl_node PARAMS ((tree));
52 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
53 static unicode_t java_parse_escape_sequence PARAMS ((void));
54 static int java_letter_or_digit_p PARAMS ((unicode_t));
55 static int java_parse_doc_section PARAMS ((unicode_t));
56 static void java_parse_end_comment PARAMS ((unicode_t));
57 static unicode_t java_get_unicode PARAMS ((void));
58 static unicode_t java_read_unicode PARAMS ((int, int *));
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static unicode_t java_read_char PARAMS ((void));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
69 int java_lang_imported = 0;
72 java_lang_id = get_identifier ("java.lang");
73 if (!java_lang_cloneable)
74 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
76 inst_id = get_identifier ("inst$");
78 wpv_id = get_identifier ("write_parm_value$");
80 if (!java_lang_imported)
82 tree node = build_tree_list
83 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
84 read_import_dir (TREE_PURPOSE (node));
85 TREE_CHAIN (node) = ctxp->import_demand_list;
86 ctxp->import_demand_list = node;
87 java_lang_imported = 1;
91 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
93 label_id = get_identifier ("$L");
95 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
96 if (!wfl_string_buffer)
98 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
100 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
102 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
103 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
105 bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
106 bzero ((PTR) current_jcf, sizeof (JCF));
107 ctxp->current_parsed_class = NULL;
108 ctxp->package = NULL_TREE;
111 ctxp->filename = input_filename;
112 ctxp->lineno = lineno = 0;
115 ctxp->unget_utf8_value = 0;
116 ctxp->minus_seen = 0;
117 ctxp->java_error_flag = 0;
121 java_sprint_unicode (line, i)
122 struct java_line *line;
125 static char buffer [10];
126 if (line->unicode_escape_p [i] || line->line [i] > 128)
127 sprintf (buffer, "\\u%04x", line->line [i]);
130 buffer [0] = line->line [i];
137 java_sneak_unicode ()
139 return (ctxp->c_line->line [ctxp->c_line->current]);
143 java_unget_unicode ()
145 if (!ctxp->c_line->current)
146 fatal ("can't unget unicode - java_unget_unicode");
147 ctxp->c_line->current--;
148 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
152 java_allocate_new_line ()
154 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
155 char ahead_escape_p = (ctxp->c_line ?
156 ctxp->c_line->unicode_escape_ahead_p : 0);
158 if (ctxp->c_line && !ctxp->c_line->white_space_only)
162 free (ctxp->p_line->unicode_escape_p);
163 free (ctxp->p_line->line);
166 ctxp->p_line = ctxp->c_line;
167 ctxp->c_line = NULL; /* Reallocated */
172 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
173 ctxp->c_line->max = JAVA_LINE_MAX;
174 ctxp->c_line->line = (unicode_t *)xmalloc
175 (sizeof (unicode_t)*ctxp->c_line->max);
176 ctxp->c_line->unicode_escape_p =
177 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
178 ctxp->c_line->white_space_only = 0;
181 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
182 ctxp->c_line->char_col = ctxp->c_line->current = 0;
185 ctxp->c_line->line [ctxp->c_line->size] = ahead;
186 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
187 ctxp->c_line->size++;
189 ctxp->c_line->ahead [0] = 0;
190 ctxp->c_line->unicode_escape_ahead_p = 0;
191 ctxp->c_line->lineno = ++lineno;
192 ctxp->c_line->white_space_only = 1;
195 #define BAD_UTF8_VALUE 0xFFFE
203 if (ctxp->unget_utf8_value)
205 int to_return = ctxp->unget_utf8_value;
206 ctxp->unget_utf8_value = 0;
218 if ((c & 0xe0) == 0xc0)
221 if ((c1 & 0xc0) == 0x80)
222 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
225 else if ((c & 0xf0) == 0xe0)
228 if ((c1 & 0xc0) == 0x80)
231 if ((c2 & 0xc0) == 0x80)
232 return (unicode_t)(((c & 0xf) << 12) +
233 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
240 /* We looked for a UTF8 multi-byte sequence (since we saw an initial
241 byte with the high bit set), but found invalid bytes instead.
242 If the most recent byte was Ascii (and not EOF), we should
243 unget it, in case it was a comment terminator or other delimitor. */
246 return BAD_UTF8_VALUE;
251 java_store_unicode (l, c, unicode_escape_p)
254 int unicode_escape_p;
256 if (l->size == l->max)
258 l->max += JAVA_LINE_MAX;
259 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
260 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
261 sizeof (char)*l->max);
263 l->line [l->size] = c;
264 l->unicode_escape_p [l->size++] = unicode_escape_p;
268 java_read_unicode (term_context, unicode_escape_p)
270 int *unicode_escape_p;
275 c = java_read_char ();
276 *unicode_escape_p = 0;
279 return ((term_context ? c :
280 java_lineterminator (c) ? '\n' : (unicode_t)c));
282 /* Count the number of preceeding '\' */
283 for (base = ftell (finput), i = base-2; c == '\\';)
285 fseek (finput, i--, SEEK_SET);
286 c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */
288 fseek (finput, base, SEEK_SET);
289 if ((base-i-3)%2 == 0) /* If odd number of \ seen */
291 c = java_read_char ();
294 unsigned short unicode = 0;
296 /* Next should be 4 hex digits, otherwise it's an error.
297 The hex value is converted into the unicode, pushed into
298 the Unicode stream. */
299 for (shift = 12; shift >= 0; shift -= 4)
301 if ((c = java_read_char ()) == UEOF)
303 if (c >= '0' && c <= '9')
304 unicode |= (unicode_t)((c-'0') << shift);
305 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
306 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
309 ("Non hex digit in Unicode escape sequence", 0);
311 *unicode_escape_p = 1;
312 return (term_context ? unicode :
313 (java_lineterminator (c) ? '\n' : unicode));
315 ctxp->unget_utf8_value = c;
317 return (unicode_t)'\\';
323 /* It's time to read a line when... */
324 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
327 java_allocate_new_line ();
328 if (ctxp->c_line->line[0] != '\n')
331 int unicode_escape_p;
332 c = java_read_unicode (0, &unicode_escape_p);
333 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
334 if (ctxp->c_line->white_space_only
335 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
336 ctxp->c_line->white_space_only = 0;
337 if ((c == '\n') || (c == UEOF))
341 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
342 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
343 return ctxp->c_line->line [ctxp->c_line->current++];
347 java_lineterminator (c)
350 if (c == '\n') /* LF */
352 else if (c == '\r') /* CR */
354 int unicode_escape_p;
355 c = java_read_unicode (1, &unicode_escape_p);
358 /* In this case we will have another terminator. For some
359 reason the lexer has several different unget methods. We
360 can't use the `ahead' method because then the \r will end
361 up in the actual text of the line, causing an error. So
362 instead we choose a very low-level method. FIXME: this
363 is incredibly ugly. */
368 ctxp->c_line->ahead [0] = c;
369 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
377 /* Parse the end of a C style comment.
378 * C is the first character following the '/' and '*'. */
380 java_parse_end_comment (c)
384 for ( ;; c = java_get_unicode ())
389 java_lex_error ("Comment not terminated at end of input", 0);
391 switch (c = java_get_unicode ())
394 java_lex_error ("Comment not terminated at end of input", 0);
397 case '*': /* reparse only '*' */
398 java_unget_unicode ();
404 /* Parse the documentation section. Keywords must be at the beginning
405 of a documentation comment line (ignoring white space and any `*'
406 character). Parsed keyword(s): @DEPRECATED. */
409 java_parse_doc_section (c)
412 int valid_tag = 0, seen_star = 0;
414 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
426 c = java_get_unicode();
430 java_lex_error ("Comment not terminated at end of input", 0);
432 if (seen_star && (c == '/'))
433 return 1; /* Goto step1 in caller */
435 /* We're parsing @deprecated */
436 if (valid_tag && (c == '@'))
441 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
443 c = java_get_unicode ();
444 tag [tag_index++] = c;
448 java_lex_error ("Comment not terminated at end of input", 0);
449 tag [tag_index] = '\0';
451 if (!strcmp (tag, "deprecated"))
452 ctxp->deprecated = 1;
454 java_unget_unicode ();
458 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
459 will return a wrong result. */
461 java_letter_or_digit_p (c)
464 return _JAVA_LETTER_OR_DIGIT_P (c);
468 java_parse_escape_sequence ()
473 switch (c = java_get_unicode ())
476 return (unicode_t)0x8;
478 return (unicode_t)0x9;
480 return (unicode_t)0xa;
482 return (unicode_t)0xc;
484 return (unicode_t)0xd;
486 return (unicode_t)0x22;
488 return (unicode_t)0x27;
490 return (unicode_t)0x5c;
491 case '0': case '1': case '2': case '3': case '4':
492 case '5': case '6': case '7': case '8': case '9':
495 int octal_escape_index = 0;
497 for (; octal_escape_index < 3 && RANGE (c, '0', '9');
498 c = java_get_unicode ())
499 octal_escape [octal_escape_index++] = c;
501 java_unget_unicode ();
503 if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
505 java_lex_error ("Literal octal escape out of range", 0);
506 return JAVA_CHAR_ERROR;
511 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
512 i < octal_escape_index; i++, shift -= 3)
513 char_lit |= (octal_escape [i] - '0') << shift;
520 return '\n'; /* ULT, caught latter as a specific error */
522 java_lex_error ("Illegal character in escape sequence", 0);
523 return JAVA_CHAR_ERROR;
527 /* Isolate the code which may raise an arithmetic exception in its
536 int number_beginning;
539 static void java_perform_atof PARAMS ((PTR));
542 java_perform_atof (av)
545 struct jpa_args *a = (struct jpa_args *)av;
546 YYSTYPE *java_lval = a->java_lval;
547 int number_beginning = a->number_beginning;
548 REAL_VALUE_TYPE value;
549 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
551 SET_REAL_VALUE_ATOF (value,
552 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
554 if (REAL_VALUE_ISINF (value)
555 || REAL_VALUE_ISNAN (value))
557 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
561 SET_LVAL_NODE_TYPE (build_real (type, value), type);
565 static int yylex PARAMS ((YYSTYPE *));
575 unicode_t c, first_unicode;
576 int ascii_index, all_ascii;
579 /* Translation of the Unicode escape in the raw stream of Unicode
580 characters. Takes care of line terminator. */
582 /* Skip white spaces: SP, TAB and FF or ULT */
583 for (c = java_get_unicode ();
584 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
587 ctxp->elc.line = ctxp->c_line->lineno;
588 ctxp->elc.col = ctxp->c_line->char_col-2;
591 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
593 if (c == 0x1a) /* CTRL-Z */
595 if ((c = java_get_unicode ()) == UEOF)
596 return 0; /* Ok here */
598 java_unget_unicode (); /* Caught latter at the end the function */
600 /* Handle EOF here */
601 if (c == UEOF) /* Should probably do something here... */
604 /* Take care of eventual comments. */
607 switch (c = java_get_unicode ())
612 c = java_get_unicode ();
614 java_lex_error ("Comment not terminated at end of input", 0);
615 if (c == '\n') /* ULT */
621 if ((c = java_get_unicode ()) == '*')
623 if ((c = java_get_unicode ()) == '/')
624 goto step1; /* Empy documentation comment */
625 else if (java_parse_doc_section (c))
629 java_parse_end_comment ((c = java_get_unicode ()));
633 java_unget_unicode ();
639 ctxp->elc.line = ctxp->c_line->lineno;
640 ctxp->elc.prev_col = ctxp->elc.col;
641 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
642 if (ctxp->elc.col < 0)
643 fatal ("ctxp->elc.col < 0 - java_lex");
645 /* Numeric literals */
646 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
648 /* This section of code is borrowed from gcc/c-lex.c */
649 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
650 int parts[TOTAL_PARTS];
651 HOST_WIDE_INT high, low;
652 /* End borrowed section */
653 char literal_token [256];
654 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
657 int number_beginning = ctxp->c_line->current;
660 /* We might have a . separator instead of a FP like .[0-9]* */
663 unicode_t peep = java_sneak_unicode ();
665 if (!JAVA_ASCII_DIGIT (peep))
668 BUILD_OPERATOR (DOT_TK);
672 for (i = 0; i < TOTAL_PARTS; i++)
677 c = java_get_unicode ();
678 if (c == 'x' || c == 'X')
681 c = java_get_unicode ();
683 else if (JAVA_ASCII_DIGIT (c))
687 /* Push the '.' back and prepare for a FP parsing... */
688 java_unget_unicode ();
693 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
694 JAVA_LEX_LIT ("0", 10);
698 SET_LVAL_NODE (long_zero_node);
701 SET_LVAL_NODE (float_zero_node);
704 SET_LVAL_NODE (double_zero_node);
707 java_unget_unicode ();
708 SET_LVAL_NODE (integer_zero_node);
713 /* Parse the first part of the literal, until we find something
714 which is not a number. */
715 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
716 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
717 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
719 /* We store in a string (in case it turns out to be a FP) and in
720 PARTS if we have to process a integer literal. */
721 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
724 literal_token [literal_index++] = c;
725 /* This section of code if borrowed from gcc/c-lex.c */
726 for (count = 0; count < TOTAL_PARTS; count++)
728 parts[count] *= radix;
731 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
732 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
737 if (parts [TOTAL_PARTS-1] != 0)
739 /* End borrowed section. */
740 c = java_get_unicode ();
743 /* If we have something from the FP char set but not a digit, parse
745 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
748 int seen_digit = (literal_index ? 1 : 0);
749 int seen_exponent = 0;
750 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
751 double unless specified. */
753 java_lex_error ("Can't express non-decimal FP literal", 0);
762 literal_token [literal_index++ ] = c;
763 c = java_get_unicode ();
766 java_lex_error ("Invalid character in FP literal", 0);
769 if (c == 'e' || c == 'E')
773 /* {E,e} must have seen at list a digit */
775 java_lex_error ("Invalid FP literal", 0);
779 literal_token [literal_index++] = c;
780 c = java_get_unicode ();
783 java_lex_error ("Invalid character in FP literal", 0);
785 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
787 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
788 stage = 4; /* So we fall through */
791 if ((c=='-' || c =='+') && stage == 2)
794 literal_token [literal_index++] = c;
795 c = java_get_unicode ();
798 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
799 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
800 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
801 (stage == 3 && JAVA_ASCII_DIGIT (c)))
803 if (JAVA_ASCII_DIGIT (c))
805 literal_token [literal_index++ ] = c;
806 c = java_get_unicode ();
813 if (stage != 4) /* Don't push back fF/dD */
814 java_unget_unicode ();
816 /* An exponent (if any) must have seen a digit. */
817 if (seen_exponent && !seen_digit)
818 java_lex_error ("Invalid FP literal", 0);
820 literal_token [literal_index] = '\0';
821 JAVA_LEX_LIT (literal_token, radix);
824 a.literal_token = literal_token;
826 a.java_lval = java_lval;
827 a.number_beginning = number_beginning;
828 if (do_float_handler (java_perform_atof, (PTR) &a))
831 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
837 } /* JAVA_ASCCI_FPCHAR (c) */
839 /* Here we get back to converting the integral literal. */
840 if (c == 'L' || c == 'l')
842 else if (radix == 16 && JAVA_ASCII_LETTER (c))
843 java_lex_error ("Digit out of range in hexadecimal literal", 0);
844 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
845 java_lex_error ("Digit out of range in octal literal", 0);
846 else if (radix == 16 && !literal_index)
847 java_lex_error ("No digit specified for hexadecimal literal", 0);
849 java_unget_unicode ();
851 #ifdef JAVA_LEX_DEBUG
852 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
853 JAVA_LEX_LIT (literal_token, radix);
855 /* This section of code is borrowed from gcc/c-lex.c */
858 bytes = GET_TYPE_PRECISION (long_type_node);
859 for (i = bytes; i < TOTAL_PARTS; i++)
867 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
869 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
870 / HOST_BITS_PER_CHAR)]
871 << (i * HOST_BITS_PER_CHAR));
872 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
874 /* End borrowed section. */
879 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
880 9223372036854775807L is the biggest `long' literal that can be
881 expressed using a 10 radix. For other radixes, everything that
882 fits withing 64 bits is OK. */
883 int hb = (high >> 31);
884 if (overflow || (hb && low && radix == 10) ||
885 (hb && high & 0x7fffffff && radix == 10) ||
886 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
887 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
891 /* 2147483648 is valid if operand of a '-'. Otherwise,
892 2147483647 is the biggest `int' literal that can be
893 expressed using a 10 radix. For other radixes, everything
894 that fits within 32 bits is OK. As all literals are
895 signed, we sign extend here. */
896 int hb = (low >> 31) & 0x1;
897 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
898 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
899 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
902 ctxp->minus_seen = 0;
903 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
904 (long_suffix ? long_type_node : int_type_node));
908 ctxp->minus_seen = 0;
909 /* Character literals */
913 if ((c = java_get_unicode ()) == '\\')
914 char_lit = java_parse_escape_sequence ();
918 c = java_get_unicode ();
920 if ((c == '\n') || (c == UEOF))
921 java_lex_error ("Character literal not terminated at end of line", 0);
923 java_lex_error ("Syntax error in character literal", 0);
925 if (c == JAVA_CHAR_ERROR)
926 char_lit = 0; /* We silently convert it to zero */
928 JAVA_LEX_CHAR_LIT (char_lit);
929 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
933 /* String literals */
939 for (no_error = 1, c = java_get_unicode ();
940 c != '"' && c != '\n'; c = java_get_unicode ())
943 c = java_parse_escape_sequence ();
944 no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
945 java_unicode_2_utf8 (c);
947 if (c == '\n' || c == UEOF) /* ULT */
949 lineno--; /* Refer to the line the terminator was seen */
950 java_lex_error ("String not terminated at end of line.", 0);
954 obstack_1grow (&temporary_obstack, '\0');
955 string = obstack_finish (&temporary_obstack);
957 if (!no_error || (c != '"'))
958 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
961 tree s = make_node (STRING_CST);
962 TREE_STRING_LENGTH (s) = strlen (string);
963 TREE_STRING_POINTER (s) =
964 obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
965 strcpy (TREE_STRING_POINTER (s), string);
969 return STRING_LIT_TK;
977 BUILD_OPERATOR (OP_TK);
983 if (ctxp->ccb_indent == 1)
984 ctxp->first_ccb_indent1 = lineno;
986 BUILD_OPERATOR (OCB_TK);
990 if (ctxp->ccb_indent == 1)
991 ctxp->last_ccb_indent1 = lineno;
992 BUILD_OPERATOR (CCB_TK);
995 BUILD_OPERATOR (OSB_TK);
1007 BUILD_OPERATOR (DOT_TK);
1008 /* return DOT_TK; */
1015 if ((c = java_get_unicode ()) == '=')
1017 BUILD_OPERATOR (EQ_TK);
1021 /* Equals is used in two different locations. In the
1022 variable_declarator: rule, it has to be seen as '=' as opposed
1023 to being seen as an ordinary assignment operator in
1024 assignment_operators: rule. */
1025 java_unget_unicode ();
1026 BUILD_OPERATOR (ASSIGN_TK);
1030 switch ((c = java_get_unicode ()))
1033 BUILD_OPERATOR (GTE_TK);
1035 switch ((c = java_get_unicode ()))
1038 if ((c = java_get_unicode ()) == '=')
1040 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1044 java_unget_unicode ();
1045 BUILD_OPERATOR (ZRS_TK);
1048 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1050 java_unget_unicode ();
1051 BUILD_OPERATOR (SRS_TK);
1054 java_unget_unicode ();
1055 BUILD_OPERATOR (GT_TK);
1059 switch ((c = java_get_unicode ()))
1062 BUILD_OPERATOR (LTE_TK);
1064 if ((c = java_get_unicode ()) == '=')
1066 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1070 java_unget_unicode ();
1071 BUILD_OPERATOR (LS_TK);
1074 java_unget_unicode ();
1075 BUILD_OPERATOR (LT_TK);
1079 switch ((c = java_get_unicode ()))
1082 BUILD_OPERATOR (BOOL_AND_TK);
1084 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1086 java_unget_unicode ();
1087 BUILD_OPERATOR (AND_TK);
1091 switch ((c = java_get_unicode ()))
1094 BUILD_OPERATOR (BOOL_OR_TK);
1096 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1098 java_unget_unicode ();
1099 BUILD_OPERATOR (OR_TK);
1103 switch ((c = java_get_unicode ()))
1106 BUILD_OPERATOR (INCR_TK);
1108 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1110 java_unget_unicode ();
1111 BUILD_OPERATOR (PLUS_TK);
1115 switch ((c = java_get_unicode ()))
1118 BUILD_OPERATOR (DECR_TK);
1120 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1122 java_unget_unicode ();
1123 ctxp->minus_seen = 1;
1124 BUILD_OPERATOR (MINUS_TK);
1128 if ((c = java_get_unicode ()) == '=')
1130 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1134 java_unget_unicode ();
1135 BUILD_OPERATOR (MULT_TK);
1139 if ((c = java_get_unicode ()) == '=')
1141 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1145 java_unget_unicode ();
1146 BUILD_OPERATOR (DIV_TK);
1150 if ((c = java_get_unicode ()) == '=')
1152 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1156 java_unget_unicode ();
1157 BUILD_OPERATOR (XOR_TK);
1161 if ((c = java_get_unicode ()) == '=')
1163 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1167 java_unget_unicode ();
1168 BUILD_OPERATOR (REM_TK);
1172 if ((c = java_get_unicode()) == '=')
1174 BUILD_OPERATOR (NEQ_TK);
1178 java_unget_unicode ();
1179 BUILD_OPERATOR (NEG_TK);
1184 BUILD_OPERATOR (REL_QM_TK);
1187 BUILD_OPERATOR (REL_CL_TK);
1189 BUILD_OPERATOR (NOT_TK);
1192 /* Keyword, boolean literal or null literal */
1193 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1194 JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1196 java_unicode_2_utf8 (c);
1197 if (all_ascii && c >= 128)
1202 obstack_1grow (&temporary_obstack, '\0');
1203 string = obstack_finish (&temporary_obstack);
1204 java_unget_unicode ();
1206 /* If we have something all ascii, we consider a keyword, a boolean
1207 literal, a null literal or an all ASCII identifier. Otherwise,
1208 this is an identifier (possibly not respecting formation rule). */
1211 struct java_keyword *kw;
1212 if ((kw=java_keyword (string, ascii_index)))
1214 JAVA_LEX_KW (string);
1217 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1218 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1219 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1221 SET_MODIFIER_CTX (kw->token);
1224 SET_LVAL_NODE (float_type_node);
1227 SET_LVAL_NODE (double_type_node);
1230 SET_LVAL_NODE (boolean_type_node);
1233 SET_LVAL_NODE (byte_type_node);
1236 SET_LVAL_NODE (short_type_node);
1239 SET_LVAL_NODE (int_type_node);
1242 SET_LVAL_NODE (long_type_node);
1245 SET_LVAL_NODE (char_type_node);
1248 /* Keyword based literals */
1251 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1252 boolean_true_node : boolean_false_node));
1255 SET_LVAL_NODE (null_pointer_node);
1258 /* Some keyword we want to retain information on the location
1271 BUILD_OPERATOR (kw->token);
1279 /* We may have and ID here */
1280 if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1282 JAVA_LEX_ID (string);
1283 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1287 /* Everything else is an invalid character in the input */
1289 char lex_error_buffer [128];
1290 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1291 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1292 java_lex_error (lex_error_buffer, 1);
1298 java_unicode_2_utf8 (unicode)
1301 if (RANGE (unicode, 0x01, 0x7f))
1302 obstack_1grow (&temporary_obstack, (char)unicode);
1303 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1305 obstack_1grow (&temporary_obstack,
1306 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1307 obstack_1grow (&temporary_obstack,
1308 (unsigned char)(0x80 | (unicode & 0x3f)));
1310 else /* Range 0x800-0xffff */
1312 obstack_1grow (&temporary_obstack,
1313 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1314 obstack_1grow (&temporary_obstack,
1315 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1316 obstack_1grow (&temporary_obstack,
1317 (unsigned char)(0x80 | (unicode & 0x003f)));
1323 build_wfl_node (node)
1326 return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1331 java_lex_error (msg, forward)
1332 const char *msg ATTRIBUTE_UNUSED;
1333 int forward ATTRIBUTE_UNUSED;
1336 ctxp->elc.line = ctxp->c_line->lineno;
1337 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1339 /* Might be caught in the middle of some error report */
1340 ctxp->java_error_flag = 0;
1357 if (next != '\n' && next != EOF)
1369 java_get_line_col (filename, line, col)
1370 const char *filename ATTRIBUTE_UNUSED;
1371 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1376 /* Dumb implementation. Doesn't try to cache or optimize things. */
1377 /* First line of the file is line 1, first column is 1 */
1379 /* COL == -1 means, at the CR/LF in LINE */
1380 /* COL == -2 means, at the first non space char in LINE */
1383 int c, ccol, cline = 1;
1384 int current_line_col = 0;
1385 int first_non_space = 0;
1388 if (!(fp = fopen (filename, "r")))
1389 fatal ("Can't open file - java_display_line_col");
1391 while (cline != line)
1396 static char msg[] = "<<file too short - unexpected EOF>>";
1397 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1400 if (java_is_eol (fp, c))
1404 /* Gather the chars of the current line in a buffer */
1408 if (c < 0 || java_is_eol (fp, c))
1410 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1411 first_non_space = current_line_col;
1412 obstack_1grow (&temporary_obstack, c);
1417 obstack_1grow (&temporary_obstack, '\n');
1421 col = current_line_col;
1422 first_non_space = 0;
1425 col = first_non_space;
1427 first_non_space = 0;
1429 /* Place the '^' a the right position */
1430 base = obstack_base (&temporary_obstack);
1431 for (ccol = 1; ccol <= col; ccol++)
1433 /* Compute \t when reaching first_non_space */
1434 char c = (first_non_space ?
1435 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1436 obstack_1grow (&temporary_obstack, c);
1438 obstack_grow0 (&temporary_obstack, "^", 1);
1441 return obstack_finish (&temporary_obstack);