1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded characters
28 and returns a token for everything found but comments, white spaces
29 and line terminators. When necessary, it also fills the java_lval
30 (yylval) union. It's implemented to be called by a re-entrant parser
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
52 extern struct obstack *expression_obstack;
55 /* Function declaration */
56 static int java_lineterminator PROTO ((unicode_t));
57 static char *java_sprint_unicode PROTO ((struct java_line *, int));
58 static void java_unicode_2_utf8 PROTO ((unicode_t));
59 static void java_lex_error PROTO ((char *, int));
60 static int java_is_eol PROTO ((FILE *, int));
61 static void java_store_unicode PROTO ((struct java_line *, unicode_t, int));
62 static unicode_t java_parse_escape_sequence PROTO (());
63 static int java_letter_or_digit_p PROTO ((unicode_t));
64 static int java_parse_doc_section PROTO ((unicode_t));
65 static void java_parse_end_comment PROTO (());
66 static unicode_t java_get_unicode PROTO (());
67 static unicode_t java_read_unicode PROTO ((int, int *));
68 static void java_store_unicode PROTO ((struct java_line *, unicode_t, int));
69 static unicode_t java_read_char PROTO (());
70 static void java_allocate_new_line PROTO (());
71 static void java_unget_unicode PROTO (());
72 static unicode_t java_sneak_unicode PROTO (());
77 int java_lang_imported = 0;
81 java_lang_id = get_identifier ("java.lang");
82 if (!java_lang_cloneable)
83 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
85 if (!java_lang_imported)
87 tree node = build_tree_list
88 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
89 read_import_dir (TREE_PURPOSE (node));
90 TREE_CHAIN (node) = ctxp->import_demand_list;
91 ctxp->import_demand_list = node;
92 java_lang_imported = 1;
96 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
98 label_id = get_identifier ("$L");
100 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
101 if (!wfl_string_buffer)
103 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
105 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
107 ctxp->static_initialized = ctxp->non_static_initialized =
108 ctxp->incomplete_class = NULL_TREE;
110 bzero (ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
111 bzero (current_jcf, sizeof (JCF));
112 ctxp->current_parsed_class = NULL;
113 ctxp->package = NULL_TREE;
116 ctxp->filename = input_filename;
117 ctxp->lineno = lineno = 0;
120 ctxp->unget_utf8_value = 0;
121 ctxp->minus_seen = 0;
122 ctxp->java_error_flag = 0;
126 java_sprint_unicode (line, i)
127 struct java_line *line;
130 static char buffer [10];
131 if (line->unicode_escape_p [i] || line->line [i] > 128)
132 sprintf (buffer, "\\u%04x", line->line [i]);
135 buffer [0] = line->line [i];
142 java_sneak_unicode ()
144 return (ctxp->c_line->line [ctxp->c_line->current]);
148 java_unget_unicode ()
150 if (!ctxp->c_line->current)
151 fatal ("can't unget unicode - java_unget_unicode");
152 ctxp->c_line->current--;
153 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
157 java_allocate_new_line ()
159 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
160 char ahead_escape_p = (ctxp->c_line ?
161 ctxp->c_line->unicode_escape_ahead_p : 0);
163 if (ctxp->c_line && !ctxp->c_line->white_space_only)
167 free (ctxp->p_line->unicode_escape_p);
168 free (ctxp->p_line->line);
171 ctxp->p_line = ctxp->c_line;
172 ctxp->c_line = NULL; /* Reallocated */
177 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
178 ctxp->c_line->max = JAVA_LINE_MAX;
179 ctxp->c_line->line = (unicode_t *)xmalloc
180 (sizeof (unicode_t)*ctxp->c_line->max);
181 ctxp->c_line->unicode_escape_p =
182 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
183 ctxp->c_line->white_space_only = 0;
186 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
187 ctxp->c_line->char_col = ctxp->c_line->current = 0;
190 ctxp->c_line->line [ctxp->c_line->size] = ahead;
191 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
192 ctxp->c_line->size++;
194 ctxp->c_line->ahead [0] = 0;
195 ctxp->c_line->unicode_escape_ahead_p = 0;
196 ctxp->c_line->lineno = ++lineno;
197 ctxp->c_line->white_space_only = 1;
200 #define BAD_UTF8_VALUE 0xFFFE
208 if (ctxp->unget_utf8_value)
210 int to_return = ctxp->unget_utf8_value;
211 ctxp->unget_utf8_value = 0;
223 if ((c & 0xe0) == 0xc0)
226 if ((c1 & 0xc0) == 0x80)
227 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
229 else if ((c & 0xf0) == 0xe0)
232 if ((c1 & 0xc0) == 0x80)
235 if ((c2 & 0xc0) == 0x80)
236 return (unicode_t)(((c & 0xf) << 12) +
237 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
240 return BAD_UTF8_VALUE;
245 java_store_unicode (l, c, unicode_escape_p)
248 int unicode_escape_p;
250 if (l->size == l->max)
252 l->max += JAVA_LINE_MAX;
253 l->line = (unicode_t *)realloc (l->line, sizeof (unicode_t)*l->max);
254 l->unicode_escape_p = (char *)realloc (l->unicode_escape_p,
255 sizeof (char)*l->max);
257 l->line [l->size] = c;
258 l->unicode_escape_p [l->size++] = unicode_escape_p;
262 java_read_unicode (term_context, unicode_escape_p)
264 int *unicode_escape_p;
269 c = java_read_char ();
270 *unicode_escape_p = 0;
273 return ((term_context ? c :
274 java_lineterminator (c) ? '\n' : (unicode_t)c));
276 /* Count the number of preceeding '\' */
277 for (base = ftell (finput), i = base-2; c == '\\';)
279 fseek (finput, i--, SEEK_SET);
280 c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */
282 fseek (finput, base, SEEK_SET);
283 if ((base-i-3)%2 == 0) /* If odd number of \ seen */
285 c = java_read_char ();
288 unsigned short unicode = 0;
290 /* Next should be 4 hex digits, otherwise it's an error.
291 The hex value is converted into the unicode, pushed into
292 the Unicode stream. */
293 for (shift = 12; shift >= 0; shift -= 4)
295 if ((c = java_read_char ()) == UEOF)
297 if (c >= '0' && c <= '9')
298 unicode |= (unicode_t)((c-'0') << shift);
299 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
300 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
303 ("Non hex digit in Unicode escape sequence", 0);
305 *unicode_escape_p = 1;
306 return (term_context ? unicode :
307 (java_lineterminator (c) ? '\n' : unicode));
311 return (unicode_t)'\\';
317 /* It's time to read a line when... */
318 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
321 java_allocate_new_line ();
322 if (ctxp->c_line->line[0] != '\n')
325 int unicode_escape_p;
326 c = java_read_unicode (0, &unicode_escape_p);
327 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
328 if (ctxp->c_line->white_space_only
329 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
330 ctxp->c_line->white_space_only = 0;
331 if ((c == '\n') || (c == UEOF))
335 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
336 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
337 return ctxp->c_line->line [ctxp->c_line->current++];
341 java_lineterminator (c)
344 int unicode_escape_p;
345 if (c == '\n') /* CR */
347 if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r')
349 ctxp->c_line->ahead [0] = c;
350 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
354 else if (c == '\r') /* LF */
356 if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n')
358 ctxp->c_line->ahead [0] = c;
359 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
367 /* Parse the end of a C style comment */
369 java_parse_end_comment ()
373 for (c = java_get_unicode ();; c = java_get_unicode ())
378 java_lex_error ("Comment not terminated at end of input", 0);
380 switch (c = java_get_unicode ())
383 java_lex_error ("Comment not terminated at end of input", 0);
386 case '*': /* reparse only '*' */
387 java_unget_unicode ();
393 /* Parse the documentation section. Keywords must be at the beginning
394 of a documentation comment line (ignoring white space and any `*'
395 character). Parsed keyword(s): @DEPRECATED. */
398 java_parse_doc_section (c)
401 int valid_tag = 0, seen_star;
403 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
415 c = java_get_unicode();
419 java_lex_error ("Comment not terminated at end of input", 0);
421 if (seen_star && (c == '/'))
422 return 1; /* Goto step1 in caller */
424 /* We're parsing @deprecated */
425 if (valid_tag && (c == '@'))
430 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
432 c = java_get_unicode ();
433 tag [tag_index++] = c;
437 java_lex_error ("Comment not terminated at end of input", 0);
439 java_unget_unicode ();
440 tag [tag_index] = '\0';
442 if (!strcmp (tag, "deprecated"))
443 ctxp->deprecated = 1;
448 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
449 will return a wrong result. */
451 java_letter_or_digit_p (c)
454 return _JAVA_LETTER_OR_DIGIT_P (c);
458 java_parse_escape_sequence ()
463 switch (c = java_get_unicode ())
466 return (unicode_t)0x8;
468 return (unicode_t)0x9;
470 return (unicode_t)0xa;
472 return (unicode_t)0xc;
474 return (unicode_t)0xd;
476 return (unicode_t)0x22;
478 return (unicode_t)0x27;
480 return (unicode_t)0x5c;
481 case '0': case '1': case '2': case '3': case '4':
482 case '5': case '6': case '7': case '8': case '9':
485 int octal_escape_index = 0;
487 for (; octal_escape_index < 3 && RANGE (c, '0', '9');
488 c = java_get_unicode ())
489 octal_escape [octal_escape_index++] = c;
491 java_unget_unicode ();
493 if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
495 java_lex_error ("Literal octal escape out of range", 0);
496 return JAVA_CHAR_ERROR;
501 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
502 i < octal_escape_index; i++, shift -= 3)
503 char_lit |= (octal_escape [i] - '0') << shift;
510 return '\n'; /* ULT, caught latter as a specific error */
512 java_lex_error ("Illegal character in escape sequence", 0);
513 return JAVA_CHAR_ERROR;
525 unicode_t c, first_unicode;
526 int ascii_index, all_ascii;
529 /* Translation of the Unicode escape in the raw stream of Unicode
530 characters. Takes care of line terminator. */
532 /* Skip white spaces: SP, TAB and FF or ULT */
533 for (c = java_get_unicode ();
534 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
537 ctxp->elc.line = ctxp->c_line->lineno;
538 ctxp->elc.col = ctxp->c_line->char_col-2;
541 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
543 if (c == 0x1a) /* CTRL-Z */
545 if ((c = java_get_unicode ()) == UEOF)
546 return 0; /* Ok here */
548 java_unget_unicode (); /* Caught latter at the end the function */
550 /* Handle EOF here */
551 if (c == UEOF) /* Should probably do something here... */
554 /* Take care of eventual comments. */
557 switch (c = java_get_unicode ())
560 for (c = java_get_unicode ();;c = java_get_unicode ())
563 java_lex_error ("Comment not terminated at end of input", 0);
564 if (c == '\n') /* ULT */
570 if ((c = java_get_unicode ()) == '*')
572 if ((c = java_get_unicode ()) == '/')
573 goto step1; /* Empy documentation comment */
574 else if (java_parse_doc_section (c))
578 java_unget_unicode ();
580 java_parse_end_comment ();
584 java_unget_unicode ();
590 ctxp->elc.line = ctxp->c_line->lineno;
591 ctxp->elc.prev_col = ctxp->elc.col;
592 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
593 if (ctxp->elc.col < 0)
594 fatal ("ctxp->elc.col < 0 - java_lex");
596 /* Numeric literals */
597 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
599 /* This section of code is borrowed from gcc/c-lex.c */
600 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
601 int parts[TOTAL_PARTS];
602 HOST_WIDE_INT high, low;
603 /* End borrowed section */
604 char literal_token [256];
605 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
607 int number_beginning = ctxp->c_line->current;
609 /* We might have a . separator instead of a FP like .[0-9]* */
612 unicode_t peep = java_sneak_unicode ();
614 if (!JAVA_ASCII_DIGIT (peep))
617 BUILD_OPERATOR (DOT_TK);
621 for (i = 0; i < TOTAL_PARTS; i++)
626 c = java_get_unicode ();
627 if (c == 'x' || c == 'X')
630 c = java_get_unicode ();
632 else if (JAVA_ASCII_DIGIT (c))
636 /* Push the '.' back and prepare for a FP parsing... */
637 java_unget_unicode ();
642 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
643 JAVA_LEX_LIT ("0", 10);
647 SET_LVAL_NODE (long_zero_node);
650 SET_LVAL_NODE (float_zero_node);
653 SET_LVAL_NODE (double_zero_node);
656 java_unget_unicode ();
657 SET_LVAL_NODE (integer_zero_node);
662 /* Parse the first part of the literal, until we find something
663 which is not a number. */
664 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
665 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
666 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
668 /* We store in a string (in case it turns out to be a FP) and in
669 PARTS if we have to process a integer literal. */
670 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
673 literal_token [literal_index++] = c;
674 /* This section of code if borrowed from gcc/c-lex.c */
675 for (count = 0; count < TOTAL_PARTS; count++)
677 parts[count] *= radix;
680 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
681 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
686 if (parts [TOTAL_PARTS-1] != 0)
688 /* End borrowed section. */
689 c = java_get_unicode ();
692 /* If we have something from the FP char set but not a digit, parse
694 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
697 int seen_digit = (literal_index ? 1 : 0);
698 int seen_exponent = 0;
699 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
700 double unless specified. */
702 java_lex_error ("Can't express non-decimal FP literal", 0);
711 literal_token [literal_index++ ] = c;
712 c = java_get_unicode ();
715 java_lex_error ("Invalid character in FP literal", 0);
718 if (c == 'e' || c == 'E')
722 /* {E,e} must have seen at list a digit */
724 java_lex_error ("Invalid FP literal", 0);
728 literal_token [literal_index++] = c;
729 c = java_get_unicode ();
732 java_lex_error ("Invalid character in FP literal", 0);
734 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
736 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
737 stage = 4; /* So we fall through */
740 if ((c=='-' || c =='+') && stage < 3)
743 literal_token [literal_index++] = c;
744 c = java_get_unicode ();
747 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
748 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
749 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
750 (stage == 3 && JAVA_ASCII_DIGIT (c)))
752 if (JAVA_ASCII_DIGIT (c))
754 literal_token [literal_index++ ] = c;
755 c = java_get_unicode ();
760 REAL_VALUE_TYPE value;
762 tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
765 if (stage != 4) /* Don't push back fF/dD */
766 java_unget_unicode ();
768 /* An exponent (if any) must have seen a digit. */
769 if (seen_exponent && !seen_digit)
770 java_lex_error ("Invalid FP literal", 0);
772 literal_token [literal_index] = '\0';
773 JAVA_LEX_LIT (literal_token, radix);
775 if (setjmp (handler))
777 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
782 SET_FLOAT_HANDLER (handler);
784 (value, REAL_VALUE_ATOF (literal_token,
787 if (REAL_VALUE_ISINF (value))
788 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
790 if (REAL_VALUE_ISNAN (value))
791 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
793 SET_LVAL_NODE_TYPE (build_real (type, value), type);
794 SET_FLOAT_HANDLER (NULL_PTR);
799 } /* JAVA_ASCCI_FPCHAR (c) */
801 /* Here we get back to converting the integral literal. */
802 if (c == 'L' || c == 'l')
804 else if (radix == 16 && JAVA_ASCII_LETTER (c))
805 java_lex_error ("Digit out of range in hexadecimal literal", 0);
806 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
807 java_lex_error ("Digit out of range in octal literal", 0);
808 else if (radix == 16 && !literal_index)
809 java_lex_error ("No digit specified for hexadecimal literal", 0);
811 java_unget_unicode ();
813 #ifdef JAVA_LEX_DEBUG
814 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
815 JAVA_LEX_LIT (literal_token, radix);
817 /* This section of code is borrowed from gcc/c-lex.c */
820 bytes = GET_TYPE_PRECISION (long_type_node);
821 for (i = bytes; i < TOTAL_PARTS; i++)
829 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
831 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
832 / HOST_BITS_PER_CHAR)]
833 << (i * HOST_BITS_PER_CHAR));
834 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
836 /* End borrowed section. */
841 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
842 9223372036854775807L is the biggest `long' literal that can be
843 expressed using a 10 radix. For other radixes, everything that
844 fits withing 64 bits is OK. */
845 int hb = (high >> 31);
846 if (overflow || (hb && low && radix == 10) ||
847 (hb && high & 0x7fffffff && radix == 10) ||
848 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
849 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
853 /* 2147483648 is valid if operand of a '-'. Otherwise,
854 2147483647 is the biggest `int' literal that can be
855 expressed using a 10 radix. For other radixes, everything
856 that fits within 32 bits is OK. As all literals are
857 signed, we sign extend here. */
858 int hb = (low >> 31) & 0x1;
859 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
860 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
861 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
864 ctxp->minus_seen = 0;
865 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
866 (long_suffix ? long_type_node : int_type_node));
870 ctxp->minus_seen = 0;
871 /* Character literals */
875 if ((c = java_get_unicode ()) == '\\')
876 char_lit = java_parse_escape_sequence ();
880 c = java_get_unicode ();
882 if ((c == '\n') || (c == UEOF))
883 java_lex_error ("Character literal not terminated at end of line", 0);
885 java_lex_error ("Syntax error in character literal", 0);
887 if (c == JAVA_CHAR_ERROR)
888 char_lit = 0; /* We silently convert it to zero */
890 JAVA_LEX_CHAR_LIT (char_lit);
891 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
895 /* String literals */
901 for (no_error = 1, c = java_get_unicode ();
902 c != '"' && c != '\n'; c = java_get_unicode ())
905 c = java_parse_escape_sequence ();
906 no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
907 java_unicode_2_utf8 (c);
909 if (c == '\n' || c == UEOF) /* ULT */
911 lineno--; /* Refer to the line the terminator was seen */
912 java_lex_error ("String not terminated at end of line.", 0);
916 obstack_1grow (&temporary_obstack, '\0');
917 string = obstack_finish (&temporary_obstack);
919 if (!no_error || (c != '"'))
920 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
923 tree s = make_node (STRING_CST);
924 TREE_STRING_LENGTH (s) = strlen (string);
925 TREE_STRING_POINTER (s) =
926 obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
927 strcpy (TREE_STRING_POINTER (s), string);
931 return STRING_LIT_TK;
939 BUILD_OPERATOR (OP_TK);
945 if (ctxp->ccb_indent == 1)
946 ctxp->first_ccb_indent1 = lineno;
948 BUILD_OPERATOR (OCB_TK);
952 if (ctxp->ccb_indent == 1)
953 ctxp->last_ccb_indent1 = lineno;
957 BUILD_OPERATOR (OSB_TK);
969 BUILD_OPERATOR (DOT_TK);
977 if ((c = java_get_unicode ()) == '=')
979 BUILD_OPERATOR (EQ_TK);
983 /* Equals is used in two different locations. In the
984 variable_declarator: rule, it has to be seen as '=' as opposed
985 to being seen as an ordinary assignment operator in
986 assignment_operators: rule. */
987 java_unget_unicode ();
988 BUILD_OPERATOR (ASSIGN_TK);
992 switch ((c = java_get_unicode ()))
995 BUILD_OPERATOR (GTE_TK);
997 switch ((c = java_get_unicode ()))
1000 if ((c = java_get_unicode ()) == '=')
1002 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1006 java_unget_unicode ();
1007 BUILD_OPERATOR (ZRS_TK);
1010 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1012 java_unget_unicode ();
1013 BUILD_OPERATOR (SRS_TK);
1016 java_unget_unicode ();
1017 BUILD_OPERATOR (GT_TK);
1021 switch ((c = java_get_unicode ()))
1024 BUILD_OPERATOR (LTE_TK);
1026 if ((c = java_get_unicode ()) == '=')
1028 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1032 java_unget_unicode ();
1033 BUILD_OPERATOR (LS_TK);
1036 java_unget_unicode ();
1037 BUILD_OPERATOR (LT_TK);
1041 switch ((c = java_get_unicode ()))
1044 BUILD_OPERATOR (BOOL_AND_TK);
1046 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1048 java_unget_unicode ();
1049 BUILD_OPERATOR (AND_TK);
1053 switch ((c = java_get_unicode ()))
1056 BUILD_OPERATOR (BOOL_OR_TK);
1058 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1060 java_unget_unicode ();
1061 BUILD_OPERATOR (OR_TK);
1065 switch ((c = java_get_unicode ()))
1068 BUILD_OPERATOR (INCR_TK);
1070 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1072 java_unget_unicode ();
1073 BUILD_OPERATOR (PLUS_TK);
1077 switch ((c = java_get_unicode ()))
1080 BUILD_OPERATOR (DECR_TK);
1082 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1084 java_unget_unicode ();
1085 ctxp->minus_seen = 1;
1086 BUILD_OPERATOR (MINUS_TK);
1090 if ((c = java_get_unicode ()) == '=')
1092 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1096 java_unget_unicode ();
1097 BUILD_OPERATOR (MULT_TK);
1101 if ((c = java_get_unicode ()) == '=')
1103 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1107 java_unget_unicode ();
1108 BUILD_OPERATOR (DIV_TK);
1112 if ((c = java_get_unicode ()) == '=')
1114 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1118 java_unget_unicode ();
1119 BUILD_OPERATOR (XOR_TK);
1123 if ((c = java_get_unicode ()) == '=')
1125 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1129 java_unget_unicode ();
1130 BUILD_OPERATOR (REM_TK);
1134 if ((c = java_get_unicode()) == '=')
1136 BUILD_OPERATOR (NEQ_TK);
1140 java_unget_unicode ();
1141 BUILD_OPERATOR (NEG_TK);
1146 BUILD_OPERATOR (REL_QM_TK);
1149 BUILD_OPERATOR (REL_CL_TK);
1151 BUILD_OPERATOR (NOT_TK);
1154 /* Keyword, boolean literal or null literal */
1155 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1156 JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1158 java_unicode_2_utf8 (c);
1159 if (all_ascii && c >= 128)
1164 obstack_1grow (&temporary_obstack, '\0');
1165 string = obstack_finish (&temporary_obstack);
1166 java_unget_unicode ();
1168 /* If we have something all ascii, we consider a keyword, a boolean
1169 literal, a null literal or an all ASCII identifier. Otherwise,
1170 this is an identifier (possibly not respecting formation rule). */
1173 struct java_keyword *kw;
1174 if ((kw=java_keyword (string, ascii_index)))
1176 JAVA_LEX_KW (string);
1179 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1180 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1181 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1183 SET_MODIFIER_CTX (kw->token);
1186 SET_LVAL_NODE (float_type_node);
1189 SET_LVAL_NODE (double_type_node);
1192 SET_LVAL_NODE (boolean_type_node);
1195 SET_LVAL_NODE (byte_type_node);
1198 SET_LVAL_NODE (short_type_node);
1201 SET_LVAL_NODE (int_type_node);
1204 SET_LVAL_NODE (long_type_node);
1207 SET_LVAL_NODE (char_type_node);
1210 /* Keyword based literals */
1213 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1214 boolean_true_node : boolean_false_node));
1217 SET_LVAL_NODE (null_pointer_node);
1220 /* Some keyword we want to retain information on the location
1233 BUILD_OPERATOR (kw->token);
1241 /* We may have and ID here */
1242 if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1244 JAVA_LEX_ID (string);
1245 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1249 /* Everything else is an invalid character in the input */
1251 char lex_error_buffer [128];
1252 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1253 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1254 java_lex_error (lex_error_buffer, 1);
1260 java_unicode_2_utf8 (unicode)
1263 if (RANGE (unicode, 0x01, 0x7f))
1264 obstack_1grow (&temporary_obstack, (char)unicode);
1265 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1267 obstack_1grow (&temporary_obstack,
1268 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1269 obstack_1grow (&temporary_obstack,
1270 (unsigned char)(0x80 | (unicode & 0x3f)));
1272 else /* Range 0x800-0xffff */
1274 obstack_1grow (&temporary_obstack,
1275 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1276 obstack_1grow (&temporary_obstack,
1277 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1278 obstack_1grow (&temporary_obstack,
1279 (unsigned char)(0x80 | (unicode & 0x003f)));
1285 build_wfl_node (node)
1288 return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1293 java_lex_error (msg, forward)
1298 ctxp->elc.line = ctxp->c_line->lineno;
1299 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1301 /* Might be caught in the middle of some error report */
1302 ctxp->java_error_flag = 0;
1318 if (next != '\n' && next != EOF)
1329 java_get_line_col (filename, line, col)
1336 /* Dumb implementation. Doesn't try to cache or optimize things. */
1337 /* First line of the file is line 1, first column is 1 */
1339 /* COL == -1 means, at the CR/LF in LINE */
1340 /* COL == -2 means, at the first non space char in LINE */
1343 int c, ccol, cline = 1;
1344 int current_line_col = 0;
1345 int first_non_space = 0;
1348 if (!(fp = fopen (filename, "r")))
1349 fatal ("Can't open file - java_display_line_col");
1351 while (cline != line)
1356 static char msg[] = "<<file too short - unexpected EOF>>";
1357 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1360 if (java_is_eol (fp, c))
1364 /* Gather the chars of the current line in a buffer */
1368 if (c < 0 || java_is_eol (fp, c))
1370 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1371 first_non_space = current_line_col;
1372 obstack_1grow (&temporary_obstack, c);
1377 obstack_1grow (&temporary_obstack, '\n');
1381 col = current_line_col;
1382 first_non_space = 0;
1385 col = first_non_space;
1387 first_non_space = 0;
1389 /* Place the '^' a the right position */
1390 base = obstack_base (&temporary_obstack);
1391 for (ccol = 1; ccol <= col; ccol++)
1393 /* Compute \t when reaching first_non_space */
1394 char c = (first_non_space ?
1395 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1396 obstack_1grow (&temporary_obstack, c);
1398 obstack_grow0 (&temporary_obstack, "^", 1);
1401 return obstack_finish (&temporary_obstack);