1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded characters
28 and returns a token for everything found but comments, white spaces
29 and line terminators. When necessary, it also fills the java_lval
30 (yylval) union. It's implemented to be called by a re-entrant parser
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
40 extern struct obstack *expression_obstack;
43 /* Function declaration */
44 static int java_lineterminator PARAMS ((unicode_t));
45 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
46 static void java_unicode_2_utf8 PARAMS ((unicode_t));
47 static void java_lex_error PARAMS ((const char *, int));
49 static int java_is_eol PARAMS ((FILE *, int));
50 static tree build_wfl_node PARAMS ((tree));
52 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
53 static unicode_t java_parse_escape_sequence PARAMS ((void));
54 static int java_letter_or_digit_p PARAMS ((unicode_t));
55 static int java_parse_doc_section PARAMS ((unicode_t));
56 static void java_parse_end_comment PARAMS ((unicode_t));
57 static unicode_t java_get_unicode PARAMS ((void));
58 static unicode_t java_read_unicode PARAMS ((int, int *));
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static unicode_t java_read_char PARAMS ((void));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
69 int java_lang_imported = 0;
72 java_lang_id = get_identifier ("java.lang");
73 if (!java_lang_cloneable)
74 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
75 if (!java_io_serializable)
76 java_io_serializable = get_identifier ("java.io.Serializable");
78 inst_id = get_identifier ("inst$");
80 wpv_id = get_identifier ("write_parm_value$");
82 if (!java_lang_imported)
84 tree node = build_tree_list
85 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
86 read_import_dir (TREE_PURPOSE (node));
87 TREE_CHAIN (node) = ctxp->import_demand_list;
88 ctxp->import_demand_list = node;
89 java_lang_imported = 1;
93 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
95 label_id = get_identifier ("$L");
97 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
98 if (!wfl_string_buffer)
100 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
102 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
104 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
105 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
107 bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
108 bzero ((PTR) current_jcf, sizeof (JCF));
109 ctxp->current_parsed_class = NULL;
110 ctxp->package = NULL_TREE;
113 ctxp->filename = input_filename;
114 ctxp->lineno = lineno = 0;
117 ctxp->unget_utf8_value = 0;
118 ctxp->minus_seen = 0;
119 ctxp->java_error_flag = 0;
123 java_sprint_unicode (line, i)
124 struct java_line *line;
127 static char buffer [10];
128 if (line->unicode_escape_p [i] || line->line [i] > 128)
129 sprintf (buffer, "\\u%04x", line->line [i]);
132 buffer [0] = line->line [i];
139 java_sneak_unicode ()
141 return (ctxp->c_line->line [ctxp->c_line->current]);
145 java_unget_unicode ()
147 if (!ctxp->c_line->current)
148 fatal ("can't unget unicode - java_unget_unicode");
149 ctxp->c_line->current--;
150 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
154 java_allocate_new_line ()
156 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
157 char ahead_escape_p = (ctxp->c_line ?
158 ctxp->c_line->unicode_escape_ahead_p : 0);
160 if (ctxp->c_line && !ctxp->c_line->white_space_only)
164 free (ctxp->p_line->unicode_escape_p);
165 free (ctxp->p_line->line);
168 ctxp->p_line = ctxp->c_line;
169 ctxp->c_line = NULL; /* Reallocated */
174 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
175 ctxp->c_line->max = JAVA_LINE_MAX;
176 ctxp->c_line->line = (unicode_t *)xmalloc
177 (sizeof (unicode_t)*ctxp->c_line->max);
178 ctxp->c_line->unicode_escape_p =
179 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
180 ctxp->c_line->white_space_only = 0;
183 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
184 ctxp->c_line->char_col = ctxp->c_line->current = 0;
187 ctxp->c_line->line [ctxp->c_line->size] = ahead;
188 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
189 ctxp->c_line->size++;
191 ctxp->c_line->ahead [0] = 0;
192 ctxp->c_line->unicode_escape_ahead_p = 0;
193 ctxp->c_line->lineno = ++lineno;
194 ctxp->c_line->white_space_only = 1;
197 #define BAD_UTF8_VALUE 0xFFFE
205 if (ctxp->unget_utf8_value)
207 int to_return = ctxp->unget_utf8_value;
208 ctxp->unget_utf8_value = 0;
220 if ((c & 0xe0) == 0xc0)
223 if ((c1 & 0xc0) == 0x80)
224 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
227 else if ((c & 0xf0) == 0xe0)
230 if ((c1 & 0xc0) == 0x80)
233 if ((c2 & 0xc0) == 0x80)
234 return (unicode_t)(((c & 0xf) << 12) +
235 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
242 /* We looked for a UTF8 multi-byte sequence (since we saw an initial
243 byte with the high bit set), but found invalid bytes instead.
244 If the most recent byte was Ascii (and not EOF), we should
245 unget it, in case it was a comment terminator or other delimitor. */
248 return BAD_UTF8_VALUE;
253 java_store_unicode (l, c, unicode_escape_p)
256 int unicode_escape_p;
258 if (l->size == l->max)
260 l->max += JAVA_LINE_MAX;
261 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
262 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
263 sizeof (char)*l->max);
265 l->line [l->size] = c;
266 l->unicode_escape_p [l->size++] = unicode_escape_p;
270 java_read_unicode (term_context, unicode_escape_p)
272 int *unicode_escape_p;
277 c = java_read_char ();
278 *unicode_escape_p = 0;
281 return ((term_context ? c :
282 java_lineterminator (c) ? '\n' : (unicode_t)c));
284 /* Count the number of preceeding '\' */
285 for (base = ftell (finput), i = base-2; c == '\\';)
287 fseek (finput, i--, SEEK_SET);
288 c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */
290 fseek (finput, base, SEEK_SET);
291 if ((base-i-3)%2 == 0) /* If odd number of \ seen */
293 c = java_read_char ();
296 unsigned short unicode = 0;
298 /* Next should be 4 hex digits, otherwise it's an error.
299 The hex value is converted into the unicode, pushed into
300 the Unicode stream. */
301 for (shift = 12; shift >= 0; shift -= 4)
303 if ((c = java_read_char ()) == UEOF)
305 if (c >= '0' && c <= '9')
306 unicode |= (unicode_t)((c-'0') << shift);
307 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
308 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
311 ("Non hex digit in Unicode escape sequence", 0);
313 *unicode_escape_p = 1;
314 return (term_context ? unicode :
315 (java_lineterminator (c) ? '\n' : unicode));
317 ctxp->unget_utf8_value = c;
319 return (unicode_t)'\\';
325 /* It's time to read a line when... */
326 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
329 java_allocate_new_line ();
330 if (ctxp->c_line->line[0] != '\n')
333 int unicode_escape_p;
334 c = java_read_unicode (0, &unicode_escape_p);
335 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
336 if (ctxp->c_line->white_space_only
337 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
338 ctxp->c_line->white_space_only = 0;
339 if ((c == '\n') || (c == UEOF))
343 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
344 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
345 return ctxp->c_line->line [ctxp->c_line->current++];
349 java_lineterminator (c)
352 if (c == '\n') /* LF */
354 else if (c == '\r') /* CR */
356 int unicode_escape_p;
357 c = java_read_unicode (1, &unicode_escape_p);
360 /* In this case we will have another terminator. For some
361 reason the lexer has several different unget methods. We
362 can't use the `ahead' method because then the \r will end
363 up in the actual text of the line, causing an error. So
364 instead we choose a very low-level method. FIXME: this
365 is incredibly ugly. */
370 ctxp->c_line->ahead [0] = c;
371 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
379 /* Parse the end of a C style comment.
380 * C is the first character following the '/' and '*'. */
382 java_parse_end_comment (c)
386 for ( ;; c = java_get_unicode ())
391 java_lex_error ("Comment not terminated at end of input", 0);
393 switch (c = java_get_unicode ())
396 java_lex_error ("Comment not terminated at end of input", 0);
399 case '*': /* reparse only '*' */
400 java_unget_unicode ();
406 /* Parse the documentation section. Keywords must be at the beginning
407 of a documentation comment line (ignoring white space and any `*'
408 character). Parsed keyword(s): @DEPRECATED. */
411 java_parse_doc_section (c)
414 int valid_tag = 0, seen_star = 0;
416 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
428 c = java_get_unicode();
432 java_lex_error ("Comment not terminated at end of input", 0);
434 if (seen_star && (c == '/'))
435 return 1; /* Goto step1 in caller */
437 /* We're parsing @deprecated */
438 if (valid_tag && (c == '@'))
443 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
445 c = java_get_unicode ();
446 tag [tag_index++] = c;
450 java_lex_error ("Comment not terminated at end of input", 0);
451 tag [tag_index] = '\0';
453 if (!strcmp (tag, "deprecated"))
454 ctxp->deprecated = 1;
456 java_unget_unicode ();
460 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
461 will return a wrong result. */
463 java_letter_or_digit_p (c)
466 return _JAVA_LETTER_OR_DIGIT_P (c);
470 java_parse_escape_sequence ()
475 switch (c = java_get_unicode ())
478 return (unicode_t)0x8;
480 return (unicode_t)0x9;
482 return (unicode_t)0xa;
484 return (unicode_t)0xc;
486 return (unicode_t)0xd;
488 return (unicode_t)0x22;
490 return (unicode_t)0x27;
492 return (unicode_t)0x5c;
493 case '0': case '1': case '2': case '3': case '4':
494 case '5': case '6': case '7': case '8': case '9':
497 int octal_escape_index = 0;
499 for (; octal_escape_index < 3 && RANGE (c, '0', '9');
500 c = java_get_unicode ())
501 octal_escape [octal_escape_index++] = c;
503 java_unget_unicode ();
505 if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
507 java_lex_error ("Literal octal escape out of range", 0);
508 return JAVA_CHAR_ERROR;
513 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
514 i < octal_escape_index; i++, shift -= 3)
515 char_lit |= (octal_escape [i] - '0') << shift;
522 return '\n'; /* ULT, caught latter as a specific error */
524 java_lex_error ("Illegal character in escape sequence", 0);
525 return JAVA_CHAR_ERROR;
529 /* Isolate the code which may raise an arithmetic exception in its
538 int number_beginning;
541 static void java_perform_atof PARAMS ((PTR));
544 java_perform_atof (av)
547 struct jpa_args *a = (struct jpa_args *)av;
548 YYSTYPE *java_lval = a->java_lval;
549 int number_beginning = a->number_beginning;
550 REAL_VALUE_TYPE value;
551 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
553 SET_REAL_VALUE_ATOF (value,
554 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
556 if (REAL_VALUE_ISINF (value)
557 || REAL_VALUE_ISNAN (value))
559 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
563 SET_LVAL_NODE_TYPE (build_real (type, value), type);
567 static int yylex PARAMS ((YYSTYPE *));
577 unicode_t c, first_unicode;
578 int ascii_index, all_ascii;
581 /* Translation of the Unicode escape in the raw stream of Unicode
582 characters. Takes care of line terminator. */
584 /* Skip white spaces: SP, TAB and FF or ULT */
585 for (c = java_get_unicode ();
586 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
589 ctxp->elc.line = ctxp->c_line->lineno;
590 ctxp->elc.col = ctxp->c_line->char_col-2;
593 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
595 if (c == 0x1a) /* CTRL-Z */
597 if ((c = java_get_unicode ()) == UEOF)
598 return 0; /* Ok here */
600 java_unget_unicode (); /* Caught latter at the end the function */
602 /* Handle EOF here */
603 if (c == UEOF) /* Should probably do something here... */
606 /* Take care of eventual comments. */
609 switch (c = java_get_unicode ())
614 c = java_get_unicode ();
616 java_lex_error ("Comment not terminated at end of input", 0);
617 if (c == '\n') /* ULT */
623 if ((c = java_get_unicode ()) == '*')
625 if ((c = java_get_unicode ()) == '/')
626 goto step1; /* Empy documentation comment */
627 else if (java_parse_doc_section (c))
631 java_parse_end_comment ((c = java_get_unicode ()));
635 java_unget_unicode ();
641 ctxp->elc.line = ctxp->c_line->lineno;
642 ctxp->elc.prev_col = ctxp->elc.col;
643 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
644 if (ctxp->elc.col < 0)
645 fatal ("ctxp->elc.col < 0 - java_lex");
647 /* Numeric literals */
648 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
650 /* This section of code is borrowed from gcc/c-lex.c */
651 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
652 int parts[TOTAL_PARTS];
653 HOST_WIDE_INT high, low;
654 /* End borrowed section */
655 char literal_token [256];
656 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
659 int number_beginning = ctxp->c_line->current;
662 /* We might have a . separator instead of a FP like .[0-9]* */
665 unicode_t peep = java_sneak_unicode ();
667 if (!JAVA_ASCII_DIGIT (peep))
670 BUILD_OPERATOR (DOT_TK);
674 for (i = 0; i < TOTAL_PARTS; i++)
679 c = java_get_unicode ();
680 if (c == 'x' || c == 'X')
683 c = java_get_unicode ();
685 else if (JAVA_ASCII_DIGIT (c))
689 /* Push the '.' back and prepare for a FP parsing... */
690 java_unget_unicode ();
695 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
696 JAVA_LEX_LIT ("0", 10);
700 SET_LVAL_NODE (long_zero_node);
703 SET_LVAL_NODE (float_zero_node);
706 SET_LVAL_NODE (double_zero_node);
709 java_unget_unicode ();
710 SET_LVAL_NODE (integer_zero_node);
715 /* Parse the first part of the literal, until we find something
716 which is not a number. */
717 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
718 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
719 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
721 /* We store in a string (in case it turns out to be a FP) and in
722 PARTS if we have to process a integer literal. */
723 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
726 literal_token [literal_index++] = c;
727 /* This section of code if borrowed from gcc/c-lex.c */
728 for (count = 0; count < TOTAL_PARTS; count++)
730 parts[count] *= radix;
733 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
734 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
739 if (parts [TOTAL_PARTS-1] != 0)
741 /* End borrowed section. */
742 c = java_get_unicode ();
745 /* If we have something from the FP char set but not a digit, parse
747 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
750 int seen_digit = (literal_index ? 1 : 0);
751 int seen_exponent = 0;
752 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
753 double unless specified. */
755 java_lex_error ("Can't express non-decimal FP literal", 0);
764 literal_token [literal_index++ ] = c;
765 c = java_get_unicode ();
768 java_lex_error ("Invalid character in FP literal", 0);
771 if (c == 'e' || c == 'E')
775 /* {E,e} must have seen at list a digit */
777 java_lex_error ("Invalid FP literal", 0);
781 literal_token [literal_index++] = c;
782 c = java_get_unicode ();
785 java_lex_error ("Invalid character in FP literal", 0);
787 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
789 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
790 stage = 4; /* So we fall through */
793 if ((c=='-' || c =='+') && stage == 2)
796 literal_token [literal_index++] = c;
797 c = java_get_unicode ();
800 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
801 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
802 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
803 (stage == 3 && JAVA_ASCII_DIGIT (c)))
805 if (JAVA_ASCII_DIGIT (c))
807 literal_token [literal_index++ ] = c;
808 c = java_get_unicode ();
815 if (stage != 4) /* Don't push back fF/dD */
816 java_unget_unicode ();
818 /* An exponent (if any) must have seen a digit. */
819 if (seen_exponent && !seen_digit)
820 java_lex_error ("Invalid FP literal", 0);
822 literal_token [literal_index] = '\0';
823 JAVA_LEX_LIT (literal_token, radix);
826 a.literal_token = literal_token;
828 a.java_lval = java_lval;
829 a.number_beginning = number_beginning;
830 if (do_float_handler (java_perform_atof, (PTR) &a))
833 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
839 } /* JAVA_ASCCI_FPCHAR (c) */
841 /* Here we get back to converting the integral literal. */
842 if (c == 'L' || c == 'l')
844 else if (radix == 16 && JAVA_ASCII_LETTER (c))
845 java_lex_error ("Digit out of range in hexadecimal literal", 0);
846 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
847 java_lex_error ("Digit out of range in octal literal", 0);
848 else if (radix == 16 && !literal_index)
849 java_lex_error ("No digit specified for hexadecimal literal", 0);
851 java_unget_unicode ();
853 #ifdef JAVA_LEX_DEBUG
854 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
855 JAVA_LEX_LIT (literal_token, radix);
857 /* This section of code is borrowed from gcc/c-lex.c */
860 bytes = GET_TYPE_PRECISION (long_type_node);
861 for (i = bytes; i < TOTAL_PARTS; i++)
869 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
871 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
872 / HOST_BITS_PER_CHAR)]
873 << (i * HOST_BITS_PER_CHAR));
874 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
876 /* End borrowed section. */
881 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
882 9223372036854775807L is the biggest `long' literal that can be
883 expressed using a 10 radix. For other radixes, everything that
884 fits withing 64 bits is OK. */
885 int hb = (high >> 31);
886 if (overflow || (hb && low && radix == 10) ||
887 (hb && high & 0x7fffffff && radix == 10) ||
888 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
889 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
893 /* 2147483648 is valid if operand of a '-'. Otherwise,
894 2147483647 is the biggest `int' literal that can be
895 expressed using a 10 radix. For other radixes, everything
896 that fits within 32 bits is OK. As all literals are
897 signed, we sign extend here. */
898 int hb = (low >> 31) & 0x1;
899 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
900 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
901 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
904 ctxp->minus_seen = 0;
905 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
906 (long_suffix ? long_type_node : int_type_node));
910 ctxp->minus_seen = 0;
911 /* Character literals */
915 if ((c = java_get_unicode ()) == '\\')
916 char_lit = java_parse_escape_sequence ();
920 c = java_get_unicode ();
922 if ((c == '\n') || (c == UEOF))
923 java_lex_error ("Character literal not terminated at end of line", 0);
925 java_lex_error ("Syntax error in character literal", 0);
927 if (c == JAVA_CHAR_ERROR)
928 char_lit = 0; /* We silently convert it to zero */
930 JAVA_LEX_CHAR_LIT (char_lit);
931 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
935 /* String literals */
941 for (no_error = 1, c = java_get_unicode ();
942 c != '"' && c != '\n'; c = java_get_unicode ())
945 c = java_parse_escape_sequence ();
946 no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
947 java_unicode_2_utf8 (c);
949 if (c == '\n' || c == UEOF) /* ULT */
951 lineno--; /* Refer to the line the terminator was seen */
952 java_lex_error ("String not terminated at end of line.", 0);
956 obstack_1grow (&temporary_obstack, '\0');
957 string = obstack_finish (&temporary_obstack);
959 if (!no_error || (c != '"'))
960 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
963 tree s = make_node (STRING_CST);
964 TREE_STRING_LENGTH (s) = strlen (string);
965 TREE_STRING_POINTER (s) =
966 obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
967 strcpy (TREE_STRING_POINTER (s), string);
971 return STRING_LIT_TK;
979 BUILD_OPERATOR (OP_TK);
985 if (ctxp->ccb_indent == 1)
986 ctxp->first_ccb_indent1 = lineno;
988 BUILD_OPERATOR (OCB_TK);
992 if (ctxp->ccb_indent == 1)
993 ctxp->last_ccb_indent1 = lineno;
994 BUILD_OPERATOR (CCB_TK);
997 BUILD_OPERATOR (OSB_TK);
1009 BUILD_OPERATOR (DOT_TK);
1010 /* return DOT_TK; */
1017 if ((c = java_get_unicode ()) == '=')
1019 BUILD_OPERATOR (EQ_TK);
1023 /* Equals is used in two different locations. In the
1024 variable_declarator: rule, it has to be seen as '=' as opposed
1025 to being seen as an ordinary assignment operator in
1026 assignment_operators: rule. */
1027 java_unget_unicode ();
1028 BUILD_OPERATOR (ASSIGN_TK);
1032 switch ((c = java_get_unicode ()))
1035 BUILD_OPERATOR (GTE_TK);
1037 switch ((c = java_get_unicode ()))
1040 if ((c = java_get_unicode ()) == '=')
1042 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1046 java_unget_unicode ();
1047 BUILD_OPERATOR (ZRS_TK);
1050 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1052 java_unget_unicode ();
1053 BUILD_OPERATOR (SRS_TK);
1056 java_unget_unicode ();
1057 BUILD_OPERATOR (GT_TK);
1061 switch ((c = java_get_unicode ()))
1064 BUILD_OPERATOR (LTE_TK);
1066 if ((c = java_get_unicode ()) == '=')
1068 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1072 java_unget_unicode ();
1073 BUILD_OPERATOR (LS_TK);
1076 java_unget_unicode ();
1077 BUILD_OPERATOR (LT_TK);
1081 switch ((c = java_get_unicode ()))
1084 BUILD_OPERATOR (BOOL_AND_TK);
1086 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1088 java_unget_unicode ();
1089 BUILD_OPERATOR (AND_TK);
1093 switch ((c = java_get_unicode ()))
1096 BUILD_OPERATOR (BOOL_OR_TK);
1098 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1100 java_unget_unicode ();
1101 BUILD_OPERATOR (OR_TK);
1105 switch ((c = java_get_unicode ()))
1108 BUILD_OPERATOR (INCR_TK);
1110 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1112 java_unget_unicode ();
1113 BUILD_OPERATOR (PLUS_TK);
1117 switch ((c = java_get_unicode ()))
1120 BUILD_OPERATOR (DECR_TK);
1122 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1124 java_unget_unicode ();
1125 ctxp->minus_seen = 1;
1126 BUILD_OPERATOR (MINUS_TK);
1130 if ((c = java_get_unicode ()) == '=')
1132 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1136 java_unget_unicode ();
1137 BUILD_OPERATOR (MULT_TK);
1141 if ((c = java_get_unicode ()) == '=')
1143 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1147 java_unget_unicode ();
1148 BUILD_OPERATOR (DIV_TK);
1152 if ((c = java_get_unicode ()) == '=')
1154 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1158 java_unget_unicode ();
1159 BUILD_OPERATOR (XOR_TK);
1163 if ((c = java_get_unicode ()) == '=')
1165 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1169 java_unget_unicode ();
1170 BUILD_OPERATOR (REM_TK);
1174 if ((c = java_get_unicode()) == '=')
1176 BUILD_OPERATOR (NEQ_TK);
1180 java_unget_unicode ();
1181 BUILD_OPERATOR (NEG_TK);
1186 BUILD_OPERATOR (REL_QM_TK);
1189 BUILD_OPERATOR (REL_CL_TK);
1191 BUILD_OPERATOR (NOT_TK);
1194 /* Keyword, boolean literal or null literal */
1195 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1196 JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1198 java_unicode_2_utf8 (c);
1199 if (all_ascii && c >= 128)
1204 obstack_1grow (&temporary_obstack, '\0');
1205 string = obstack_finish (&temporary_obstack);
1206 java_unget_unicode ();
1208 /* If we have something all ascii, we consider a keyword, a boolean
1209 literal, a null literal or an all ASCII identifier. Otherwise,
1210 this is an identifier (possibly not respecting formation rule). */
1213 struct java_keyword *kw;
1214 if ((kw=java_keyword (string, ascii_index)))
1216 JAVA_LEX_KW (string);
1219 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1220 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1221 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1223 SET_MODIFIER_CTX (kw->token);
1226 SET_LVAL_NODE (float_type_node);
1229 SET_LVAL_NODE (double_type_node);
1232 SET_LVAL_NODE (boolean_type_node);
1235 SET_LVAL_NODE (byte_type_node);
1238 SET_LVAL_NODE (short_type_node);
1241 SET_LVAL_NODE (int_type_node);
1244 SET_LVAL_NODE (long_type_node);
1247 SET_LVAL_NODE (char_type_node);
1250 /* Keyword based literals */
1253 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1254 boolean_true_node : boolean_false_node));
1257 SET_LVAL_NODE (null_pointer_node);
1260 /* Some keyword we want to retain information on the location
1273 BUILD_OPERATOR (kw->token);
1281 /* We may have and ID here */
1282 if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1284 JAVA_LEX_ID (string);
1285 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1289 /* Everything else is an invalid character in the input */
1291 char lex_error_buffer [128];
1292 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1293 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1294 java_lex_error (lex_error_buffer, 1);
1300 java_unicode_2_utf8 (unicode)
1303 if (RANGE (unicode, 0x01, 0x7f))
1304 obstack_1grow (&temporary_obstack, (char)unicode);
1305 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1307 obstack_1grow (&temporary_obstack,
1308 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1309 obstack_1grow (&temporary_obstack,
1310 (unsigned char)(0x80 | (unicode & 0x3f)));
1312 else /* Range 0x800-0xffff */
1314 obstack_1grow (&temporary_obstack,
1315 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1316 obstack_1grow (&temporary_obstack,
1317 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1318 obstack_1grow (&temporary_obstack,
1319 (unsigned char)(0x80 | (unicode & 0x003f)));
1325 build_wfl_node (node)
1328 return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1333 java_lex_error (msg, forward)
1334 const char *msg ATTRIBUTE_UNUSED;
1335 int forward ATTRIBUTE_UNUSED;
1338 ctxp->elc.line = ctxp->c_line->lineno;
1339 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1341 /* Might be caught in the middle of some error report */
1342 ctxp->java_error_flag = 0;
1359 if (next != '\n' && next != EOF)
1371 java_get_line_col (filename, line, col)
1372 const char *filename ATTRIBUTE_UNUSED;
1373 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1378 /* Dumb implementation. Doesn't try to cache or optimize things. */
1379 /* First line of the file is line 1, first column is 1 */
1381 /* COL == -1 means, at the CR/LF in LINE */
1382 /* COL == -2 means, at the first non space char in LINE */
1385 int c, ccol, cline = 1;
1386 int current_line_col = 0;
1387 int first_non_space = 0;
1390 if (!(fp = fopen (filename, "r")))
1391 fatal ("Can't open file - java_display_line_col");
1393 while (cline != line)
1398 static char msg[] = "<<file too short - unexpected EOF>>";
1399 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1402 if (java_is_eol (fp, c))
1406 /* Gather the chars of the current line in a buffer */
1410 if (c < 0 || java_is_eol (fp, c))
1412 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1413 first_non_space = current_line_col;
1414 obstack_1grow (&temporary_obstack, c);
1419 obstack_1grow (&temporary_obstack, '\n');
1423 col = current_line_col;
1424 first_non_space = 0;
1427 col = first_non_space;
1429 first_non_space = 0;
1431 /* Place the '^' a the right position */
1432 base = obstack_base (&temporary_obstack);
1433 for (ccol = 1; ccol <= col; ccol++)
1435 /* Compute \t when reaching first_non_space */
1436 char c = (first_non_space ?
1437 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1438 obstack_1grow (&temporary_obstack, c);
1440 obstack_grow0 (&temporary_obstack, "^", 1);
1443 return obstack_finish (&temporary_obstack);