1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
40 extern struct obstack *expression_obstack;
43 /* Function declaration */
44 static int java_lineterminator PARAMS ((unicode_t));
45 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
46 static void java_unicode_2_utf8 PARAMS ((unicode_t));
47 static void java_lex_error PARAMS ((const char *, int));
49 static int java_is_eol PARAMS ((FILE *, int));
50 static tree build_wfl_node PARAMS ((tree));
52 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
53 static unicode_t java_parse_escape_sequence PARAMS ((void));
54 static int java_letter_or_digit_p PARAMS ((unicode_t));
55 static int java_parse_doc_section PARAMS ((unicode_t));
56 static void java_parse_end_comment PARAMS ((unicode_t));
57 static unicode_t java_get_unicode PARAMS ((void));
58 static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static unicode_t java_read_char PARAMS ((java_lexer *));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
64 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
67 java_init_lex (finput, encoding)
72 int java_lang_imported = 0;
75 java_lang_id = get_identifier ("java.lang");
76 if (!java_lang_cloneable)
77 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
78 if (!java_io_serializable)
79 java_io_serializable = get_identifier ("java.io.Serializable");
81 inst_id = get_identifier ("inst$");
83 wpv_id = get_identifier ("write_parm_value$");
85 if (!java_lang_imported)
87 tree node = build_tree_list
88 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
89 read_import_dir (TREE_PURPOSE (node));
90 TREE_CHAIN (node) = ctxp->import_demand_list;
91 ctxp->import_demand_list = node;
92 java_lang_imported = 1;
96 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
98 label_id = get_identifier ("$L");
100 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
101 if (!wfl_string_buffer)
103 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
105 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
107 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
108 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
110 bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
111 bzero ((PTR) current_jcf, sizeof (JCF));
112 ctxp->current_parsed_class = NULL;
113 ctxp->package = NULL_TREE;
116 ctxp->filename = input_filename;
117 ctxp->lineno = lineno = 0;
120 ctxp->minus_seen = 0;
121 ctxp->java_error_flag = 0;
122 ctxp->lexer = java_new_lexer (finput, encoding);
126 java_sprint_unicode (line, i)
127 struct java_line *line;
130 static char buffer [10];
131 if (line->unicode_escape_p [i] || line->line [i] > 128)
132 sprintf (buffer, "\\u%04x", line->line [i]);
135 buffer [0] = line->line [i];
142 java_sneak_unicode ()
144 return (ctxp->c_line->line [ctxp->c_line->current]);
148 java_unget_unicode ()
150 if (!ctxp->c_line->current)
151 fatal ("can't unget unicode - java_unget_unicode");
152 ctxp->c_line->current--;
153 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
157 java_allocate_new_line ()
159 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
160 char ahead_escape_p = (ctxp->c_line ?
161 ctxp->c_line->unicode_escape_ahead_p : 0);
163 if (ctxp->c_line && !ctxp->c_line->white_space_only)
167 free (ctxp->p_line->unicode_escape_p);
168 free (ctxp->p_line->line);
171 ctxp->p_line = ctxp->c_line;
172 ctxp->c_line = NULL; /* Reallocated */
177 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
178 ctxp->c_line->max = JAVA_LINE_MAX;
179 ctxp->c_line->line = (unicode_t *)xmalloc
180 (sizeof (unicode_t)*ctxp->c_line->max);
181 ctxp->c_line->unicode_escape_p =
182 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
183 ctxp->c_line->white_space_only = 0;
186 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
187 ctxp->c_line->char_col = ctxp->c_line->current = 0;
190 ctxp->c_line->line [ctxp->c_line->size] = ahead;
191 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
192 ctxp->c_line->size++;
194 ctxp->c_line->ahead [0] = 0;
195 ctxp->c_line->unicode_escape_ahead_p = 0;
196 ctxp->c_line->lineno = ++lineno;
197 ctxp->c_line->white_space_only = 1;
200 /* Create a new lexer object. */
202 java_new_lexer (finput, encoding)
204 const char *encoding;
206 java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
209 lex->finput = finput;
211 lex->unget_value = 0;
214 lex->handle = iconv_open ("UCS-2", encoding);
215 if (lex->handle == (iconv_t) -1)
217 /* FIXME: we should give a nice error based on errno here. */
222 #else /* HAVE_ICONV */
223 if (strcmp (encoding, DEFAULT_ENCODING))
225 #endif /* HAVE_ICONV */
228 fatal ("unknown encoding: `%s'", encoding);
234 java_destroy_lexer (lex)
238 iconv_close (lex->handle);
247 if (lex->unget_value)
249 unicode_t r = lex->unget_value;
250 lex->unget_value = 0;
257 size_t ir, inbytesleft, in_save, out_count;
262 /* See if we need to read more data. If FIRST == 0 then the
263 previous conversion attempt ended in the middle of a
264 character at the end of the buffer. Otherwise we only have
265 to read if the buffer is empty. */
266 if (lex->first == 0 || lex->first >= lex->last)
270 if (lex->first >= lex->last)
275 if (feof (lex->finput))
277 r = fread (&lex->buffer[lex->last], 1,
278 sizeof (lex->buffer) - lex->last,
283 inbytesleft = lex->last - lex->first;
285 if (inbytesleft == 0)
287 /* We've tried to read and there is nothing left. */
291 in_save = inbytesleft;
293 inp = &lex->buffer[lex->first];
295 ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
297 lex->first += in_save - inbytesleft;
301 /* Success. We assume that UCS-2 is big-endian. This
302 appears to be an ok assumption. */
304 result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
308 if (ir == (size_t) -1)
312 /* This is ok. This means that the end of our buffer
313 is in the middle of a character sequence. We just
314 move the valid part of the buffer to the beginning
316 /* We use bcopy() because it should work for
317 overlapping strings. Use memmove() instead... */
318 bcopy (&lex->buffer[lex->first], &lex->buffer[0],
319 lex->last - lex->first);
320 lex->last -= lex->first;
325 /* A more serious error. */
326 java_lex_error ("unrecognized character in input stream", 0);
332 #else /* HAVE_ICONV */
335 c = getc (lex->finput);
343 if ((c & 0xe0) == 0xc0)
345 c1 = getc (lex->finput);
346 if ((c1 & 0xc0) == 0x80)
347 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
350 else if ((c & 0xf0) == 0xe0)
352 c1 = getc (lex->finput);
353 if ((c1 & 0xc0) == 0x80)
355 c2 = getc (lex->finput);
356 if ((c2 & 0xc0) == 0x80)
357 return (unicode_t)(((c & 0xf) << 12) +
358 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
366 /* We simply don't support invalid characters. */
367 java_lex_error ("malformed UTF-8 character", 0);
370 #endif /* HAVE_ICONV */
372 /* We only get here on error. */
377 java_store_unicode (l, c, unicode_escape_p)
380 int unicode_escape_p;
382 if (l->size == l->max)
384 l->max += JAVA_LINE_MAX;
385 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
386 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
387 sizeof (char)*l->max);
389 l->line [l->size] = c;
390 l->unicode_escape_p [l->size++] = unicode_escape_p;
394 java_read_unicode (lex, term_context, unicode_escape_p)
397 int *unicode_escape_p;
401 c = java_read_char (lex);
402 *unicode_escape_p = 0;
407 return (term_context ? c : (java_lineterminator (c)
413 if ((lex->bs_count) % 2 == 1)
415 /* Odd number of \ seen. */
416 c = java_read_char (lex);
419 unicode_t unicode = 0;
421 /* Next should be 4 hex digits, otherwise it's an error.
422 The hex value is converted into the unicode, pushed into
423 the Unicode stream. */
424 for (shift = 12; shift >= 0; shift -= 4)
426 if ((c = java_read_char (lex)) == UEOF)
428 if (c >= '0' && c <= '9')
429 unicode |= (unicode_t)((c-'0') << shift);
430 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
431 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
433 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
436 *unicode_escape_p = 1;
438 ? unicode : (java_lineterminator (c) ? '\n' : unicode));
440 lex->unget_value = c;
442 return (unicode_t) '\\';
448 /* It's time to read a line when... */
449 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
452 java_allocate_new_line ();
453 if (ctxp->c_line->line[0] != '\n')
456 int unicode_escape_p;
457 c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
458 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
459 if (ctxp->c_line->white_space_only
460 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
461 ctxp->c_line->white_space_only = 0;
462 if ((c == '\n') || (c == UEOF))
466 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
467 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
468 return ctxp->c_line->line [ctxp->c_line->current++];
472 java_lineterminator (c)
475 if (c == '\n') /* LF */
477 else if (c == '\r') /* CR */
479 int unicode_escape_p;
480 c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
483 /* In this case we will have another terminator. For some
484 reason the lexer has several different unget methods. We
485 can't use the `ahead' method because then the \r will end
486 up in the actual text of the line, causing an error. So
487 instead we choose a very low-level method. FIXME: this
488 is incredibly ugly. */
489 ctxp->lexer->unget_value = c;
493 ctxp->c_line->ahead [0] = c;
494 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
502 /* Parse the end of a C style comment.
503 * C is the first character following the '/' and '*'. */
505 java_parse_end_comment (c)
509 for ( ;; c = java_get_unicode ())
514 java_lex_error ("Comment not terminated at end of input", 0);
516 switch (c = java_get_unicode ())
519 java_lex_error ("Comment not terminated at end of input", 0);
522 case '*': /* reparse only '*' */
523 java_unget_unicode ();
529 /* Parse the documentation section. Keywords must be at the beginning
530 of a documentation comment line (ignoring white space and any `*'
531 character). Parsed keyword(s): @DEPRECATED. */
534 java_parse_doc_section (c)
537 int valid_tag = 0, seen_star = 0;
539 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
551 c = java_get_unicode();
555 java_lex_error ("Comment not terminated at end of input", 0);
557 if (seen_star && (c == '/'))
558 return 1; /* Goto step1 in caller */
560 /* We're parsing @deprecated */
561 if (valid_tag && (c == '@'))
566 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
568 c = java_get_unicode ();
569 tag [tag_index++] = c;
573 java_lex_error ("Comment not terminated at end of input", 0);
574 tag [tag_index] = '\0';
576 if (!strcmp (tag, "deprecated"))
577 ctxp->deprecated = 1;
579 java_unget_unicode ();
583 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
584 will return a wrong result. */
586 java_letter_or_digit_p (c)
589 return _JAVA_LETTER_OR_DIGIT_P (c);
593 java_parse_escape_sequence ()
598 switch (c = java_get_unicode ())
601 return (unicode_t)0x8;
603 return (unicode_t)0x9;
605 return (unicode_t)0xa;
607 return (unicode_t)0xc;
609 return (unicode_t)0xd;
611 return (unicode_t)0x22;
613 return (unicode_t)0x27;
615 return (unicode_t)0x5c;
616 case '0': case '1': case '2': case '3': case '4':
617 case '5': case '6': case '7': case '8': case '9':
620 int octal_escape_index = 0;
622 for (; octal_escape_index < 3 && RANGE (c, '0', '9');
623 c = java_get_unicode ())
624 octal_escape [octal_escape_index++] = c;
626 java_unget_unicode ();
628 if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
630 java_lex_error ("Literal octal escape out of range", 0);
631 return JAVA_CHAR_ERROR;
636 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
637 i < octal_escape_index; i++, shift -= 3)
638 char_lit |= (octal_escape [i] - '0') << shift;
645 return '\n'; /* ULT, caught latter as a specific error */
647 java_lex_error ("Illegal character in escape sequence", 0);
648 return JAVA_CHAR_ERROR;
652 /* Isolate the code which may raise an arithmetic exception in its
661 int number_beginning;
664 static void java_perform_atof PARAMS ((PTR));
667 java_perform_atof (av)
670 struct jpa_args *a = (struct jpa_args *)av;
671 YYSTYPE *java_lval = a->java_lval;
672 int number_beginning = a->number_beginning;
673 REAL_VALUE_TYPE value;
674 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
676 SET_REAL_VALUE_ATOF (value,
677 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
679 if (REAL_VALUE_ISINF (value)
680 || REAL_VALUE_ISNAN (value))
682 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
686 SET_LVAL_NODE_TYPE (build_real (type, value), type);
690 static int yylex PARAMS ((YYSTYPE *));
700 unicode_t c, first_unicode;
701 int ascii_index, all_ascii;
704 /* Translation of the Unicode escape in the raw stream of Unicode
705 characters. Takes care of line terminator. */
707 /* Skip white spaces: SP, TAB and FF or ULT */
708 for (c = java_get_unicode ();
709 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
712 ctxp->elc.line = ctxp->c_line->lineno;
713 ctxp->elc.col = ctxp->c_line->char_col-2;
716 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
718 if (c == 0x1a) /* CTRL-Z */
720 if ((c = java_get_unicode ()) == UEOF)
721 return 0; /* Ok here */
723 java_unget_unicode (); /* Caught latter at the end the function */
725 /* Handle EOF here */
726 if (c == UEOF) /* Should probably do something here... */
729 /* Take care of eventual comments. */
732 switch (c = java_get_unicode ())
737 c = java_get_unicode ();
739 java_lex_error ("Comment not terminated at end of input", 0);
740 if (c == '\n') /* ULT */
746 if ((c = java_get_unicode ()) == '*')
748 if ((c = java_get_unicode ()) == '/')
749 goto step1; /* Empy documentation comment */
750 else if (java_parse_doc_section (c))
754 java_parse_end_comment ((c = java_get_unicode ()));
758 java_unget_unicode ();
764 ctxp->elc.line = ctxp->c_line->lineno;
765 ctxp->elc.prev_col = ctxp->elc.col;
766 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
767 if (ctxp->elc.col < 0)
768 fatal ("ctxp->elc.col < 0 - java_lex");
770 /* Numeric literals */
771 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
773 /* This section of code is borrowed from gcc/c-lex.c */
774 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
775 int parts[TOTAL_PARTS];
776 HOST_WIDE_INT high, low;
777 /* End borrowed section */
778 char literal_token [256];
779 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
782 int number_beginning = ctxp->c_line->current;
785 /* We might have a . separator instead of a FP like .[0-9]* */
788 unicode_t peep = java_sneak_unicode ();
790 if (!JAVA_ASCII_DIGIT (peep))
793 BUILD_OPERATOR (DOT_TK);
797 for (i = 0; i < TOTAL_PARTS; i++)
802 c = java_get_unicode ();
803 if (c == 'x' || c == 'X')
806 c = java_get_unicode ();
808 else if (JAVA_ASCII_DIGIT (c))
812 /* Push the '.' back and prepare for a FP parsing... */
813 java_unget_unicode ();
818 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
819 JAVA_LEX_LIT ("0", 10);
823 SET_LVAL_NODE (long_zero_node);
826 SET_LVAL_NODE (float_zero_node);
829 SET_LVAL_NODE (double_zero_node);
832 java_unget_unicode ();
833 SET_LVAL_NODE (integer_zero_node);
838 /* Parse the first part of the literal, until we find something
839 which is not a number. */
840 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
841 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
842 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
844 /* We store in a string (in case it turns out to be a FP) and in
845 PARTS if we have to process a integer literal. */
846 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
849 literal_token [literal_index++] = c;
850 /* This section of code if borrowed from gcc/c-lex.c */
851 for (count = 0; count < TOTAL_PARTS; count++)
853 parts[count] *= radix;
856 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
857 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
862 if (parts [TOTAL_PARTS-1] != 0)
864 /* End borrowed section. */
865 c = java_get_unicode ();
868 /* If we have something from the FP char set but not a digit, parse
870 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
873 int seen_digit = (literal_index ? 1 : 0);
874 int seen_exponent = 0;
875 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
876 double unless specified. */
878 java_lex_error ("Can't express non-decimal FP literal", 0);
887 literal_token [literal_index++ ] = c;
888 c = java_get_unicode ();
891 java_lex_error ("Invalid character in FP literal", 0);
894 if (c == 'e' || c == 'E')
898 /* {E,e} must have seen at list a digit */
900 java_lex_error ("Invalid FP literal", 0);
904 literal_token [literal_index++] = c;
905 c = java_get_unicode ();
908 java_lex_error ("Invalid character in FP literal", 0);
910 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
912 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
913 stage = 4; /* So we fall through */
916 if ((c=='-' || c =='+') && stage == 2)
919 literal_token [literal_index++] = c;
920 c = java_get_unicode ();
923 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
924 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
925 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
926 (stage == 3 && JAVA_ASCII_DIGIT (c)))
928 if (JAVA_ASCII_DIGIT (c))
930 literal_token [literal_index++ ] = c;
931 c = java_get_unicode ();
938 if (stage != 4) /* Don't push back fF/dD */
939 java_unget_unicode ();
941 /* An exponent (if any) must have seen a digit. */
942 if (seen_exponent && !seen_digit)
943 java_lex_error ("Invalid FP literal", 0);
945 literal_token [literal_index] = '\0';
946 JAVA_LEX_LIT (literal_token, radix);
949 a.literal_token = literal_token;
951 a.java_lval = java_lval;
952 a.number_beginning = number_beginning;
953 if (do_float_handler (java_perform_atof, (PTR) &a))
956 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
962 } /* JAVA_ASCCI_FPCHAR (c) */
964 /* Here we get back to converting the integral literal. */
965 if (c == 'L' || c == 'l')
967 else if (radix == 16 && JAVA_ASCII_LETTER (c))
968 java_lex_error ("Digit out of range in hexadecimal literal", 0);
969 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
970 java_lex_error ("Digit out of range in octal literal", 0);
971 else if (radix == 16 && !literal_index)
972 java_lex_error ("No digit specified for hexadecimal literal", 0);
974 java_unget_unicode ();
976 #ifdef JAVA_LEX_DEBUG
977 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
978 JAVA_LEX_LIT (literal_token, radix);
980 /* This section of code is borrowed from gcc/c-lex.c */
983 bytes = GET_TYPE_PRECISION (long_type_node);
984 for (i = bytes; i < TOTAL_PARTS; i++)
992 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
994 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
995 / HOST_BITS_PER_CHAR)]
996 << (i * HOST_BITS_PER_CHAR));
997 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
999 /* End borrowed section. */
1001 /* Range checking */
1004 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1005 9223372036854775807L is the biggest `long' literal that can be
1006 expressed using a 10 radix. For other radixes, everything that
1007 fits withing 64 bits is OK. */
1008 int hb = (high >> 31);
1009 if (overflow || (hb && low && radix == 10) ||
1010 (hb && high & 0x7fffffff && radix == 10) ||
1011 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1012 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1016 /* 2147483648 is valid if operand of a '-'. Otherwise,
1017 2147483647 is the biggest `int' literal that can be
1018 expressed using a 10 radix. For other radixes, everything
1019 that fits within 32 bits is OK. As all literals are
1020 signed, we sign extend here. */
1021 int hb = (low >> 31) & 0x1;
1022 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1023 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1024 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1027 ctxp->minus_seen = 0;
1028 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1029 (long_suffix ? long_type_node : int_type_node));
1033 ctxp->minus_seen = 0;
1034 /* Character literals */
1038 if ((c = java_get_unicode ()) == '\\')
1039 char_lit = java_parse_escape_sequence ();
1043 c = java_get_unicode ();
1045 if ((c == '\n') || (c == UEOF))
1046 java_lex_error ("Character literal not terminated at end of line", 0);
1048 java_lex_error ("Syntax error in character literal", 0);
1050 if (c == JAVA_CHAR_ERROR)
1051 char_lit = 0; /* We silently convert it to zero */
1053 JAVA_LEX_CHAR_LIT (char_lit);
1054 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1058 /* String literals */
1064 for (no_error = 1, c = java_get_unicode ();
1065 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1068 c = java_parse_escape_sequence ();
1069 no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
1070 java_unicode_2_utf8 (c);
1072 if (c == '\n' || c == UEOF) /* ULT */
1074 lineno--; /* Refer to the line the terminator was seen */
1075 java_lex_error ("String not terminated at end of line.", 0);
1079 obstack_1grow (&temporary_obstack, '\0');
1080 string = obstack_finish (&temporary_obstack);
1082 if (!no_error || (c != '"'))
1083 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1085 java_lval->node = build_string (strlen (string), string);
1087 return STRING_LIT_TK;
1095 BUILD_OPERATOR (OP_TK);
1101 if (ctxp->ccb_indent == 1)
1102 ctxp->first_ccb_indent1 = lineno;
1104 BUILD_OPERATOR (OCB_TK);
1108 if (ctxp->ccb_indent == 1)
1109 ctxp->last_ccb_indent1 = lineno;
1110 BUILD_OPERATOR (CCB_TK);
1113 BUILD_OPERATOR (OSB_TK);
1125 BUILD_OPERATOR (DOT_TK);
1126 /* return DOT_TK; */
1133 if ((c = java_get_unicode ()) == '=')
1135 BUILD_OPERATOR (EQ_TK);
1139 /* Equals is used in two different locations. In the
1140 variable_declarator: rule, it has to be seen as '=' as opposed
1141 to being seen as an ordinary assignment operator in
1142 assignment_operators: rule. */
1143 java_unget_unicode ();
1144 BUILD_OPERATOR (ASSIGN_TK);
1148 switch ((c = java_get_unicode ()))
1151 BUILD_OPERATOR (GTE_TK);
1153 switch ((c = java_get_unicode ()))
1156 if ((c = java_get_unicode ()) == '=')
1158 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1162 java_unget_unicode ();
1163 BUILD_OPERATOR (ZRS_TK);
1166 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1168 java_unget_unicode ();
1169 BUILD_OPERATOR (SRS_TK);
1172 java_unget_unicode ();
1173 BUILD_OPERATOR (GT_TK);
1177 switch ((c = java_get_unicode ()))
1180 BUILD_OPERATOR (LTE_TK);
1182 if ((c = java_get_unicode ()) == '=')
1184 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1188 java_unget_unicode ();
1189 BUILD_OPERATOR (LS_TK);
1192 java_unget_unicode ();
1193 BUILD_OPERATOR (LT_TK);
1197 switch ((c = java_get_unicode ()))
1200 BUILD_OPERATOR (BOOL_AND_TK);
1202 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1204 java_unget_unicode ();
1205 BUILD_OPERATOR (AND_TK);
1209 switch ((c = java_get_unicode ()))
1212 BUILD_OPERATOR (BOOL_OR_TK);
1214 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1216 java_unget_unicode ();
1217 BUILD_OPERATOR (OR_TK);
1221 switch ((c = java_get_unicode ()))
1224 BUILD_OPERATOR (INCR_TK);
1226 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1228 java_unget_unicode ();
1229 BUILD_OPERATOR (PLUS_TK);
1233 switch ((c = java_get_unicode ()))
1236 BUILD_OPERATOR (DECR_TK);
1238 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1240 java_unget_unicode ();
1241 ctxp->minus_seen = 1;
1242 BUILD_OPERATOR (MINUS_TK);
1246 if ((c = java_get_unicode ()) == '=')
1248 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1252 java_unget_unicode ();
1253 BUILD_OPERATOR (MULT_TK);
1257 if ((c = java_get_unicode ()) == '=')
1259 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1263 java_unget_unicode ();
1264 BUILD_OPERATOR (DIV_TK);
1268 if ((c = java_get_unicode ()) == '=')
1270 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1274 java_unget_unicode ();
1275 BUILD_OPERATOR (XOR_TK);
1279 if ((c = java_get_unicode ()) == '=')
1281 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1285 java_unget_unicode ();
1286 BUILD_OPERATOR (REM_TK);
1290 if ((c = java_get_unicode()) == '=')
1292 BUILD_OPERATOR (NEQ_TK);
1296 java_unget_unicode ();
1297 BUILD_OPERATOR (NEG_TK);
1302 BUILD_OPERATOR (REL_QM_TK);
1305 BUILD_OPERATOR (REL_CL_TK);
1307 BUILD_OPERATOR (NOT_TK);
1310 /* Keyword, boolean literal or null literal */
1311 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1312 JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1314 java_unicode_2_utf8 (c);
1315 if (all_ascii && c >= 128)
1320 obstack_1grow (&temporary_obstack, '\0');
1321 string = obstack_finish (&temporary_obstack);
1322 java_unget_unicode ();
1324 /* If we have something all ascii, we consider a keyword, a boolean
1325 literal, a null literal or an all ASCII identifier. Otherwise,
1326 this is an identifier (possibly not respecting formation rule). */
1329 struct java_keyword *kw;
1330 if ((kw=java_keyword (string, ascii_index)))
1332 JAVA_LEX_KW (string);
1335 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1336 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1337 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1339 SET_MODIFIER_CTX (kw->token);
1342 SET_LVAL_NODE (float_type_node);
1345 SET_LVAL_NODE (double_type_node);
1348 SET_LVAL_NODE (boolean_type_node);
1351 SET_LVAL_NODE (byte_type_node);
1354 SET_LVAL_NODE (short_type_node);
1357 SET_LVAL_NODE (int_type_node);
1360 SET_LVAL_NODE (long_type_node);
1363 SET_LVAL_NODE (char_type_node);
1366 /* Keyword based literals */
1369 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1370 boolean_true_node : boolean_false_node));
1373 SET_LVAL_NODE (null_pointer_node);
1376 /* Some keyword we want to retain information on the location
1389 BUILD_OPERATOR (kw->token);
1397 /* We may have and ID here */
1398 if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1400 JAVA_LEX_ID (string);
1401 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1405 /* Everything else is an invalid character in the input */
1407 char lex_error_buffer [128];
1408 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1409 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1410 java_lex_error (lex_error_buffer, 1);
1416 java_unicode_2_utf8 (unicode)
1419 if (RANGE (unicode, 0x01, 0x7f))
1420 obstack_1grow (&temporary_obstack, (char)unicode);
1421 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1423 obstack_1grow (&temporary_obstack,
1424 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1425 obstack_1grow (&temporary_obstack,
1426 (unsigned char)(0x80 | (unicode & 0x3f)));
1428 else /* Range 0x800-0xffff */
1430 obstack_1grow (&temporary_obstack,
1431 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1432 obstack_1grow (&temporary_obstack,
1433 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1434 obstack_1grow (&temporary_obstack,
1435 (unsigned char)(0x80 | (unicode & 0x003f)));
1441 build_wfl_node (node)
1444 return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1449 java_lex_error (msg, forward)
1450 const char *msg ATTRIBUTE_UNUSED;
1451 int forward ATTRIBUTE_UNUSED;
1454 ctxp->elc.line = ctxp->c_line->lineno;
1455 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1457 /* Might be caught in the middle of some error report */
1458 ctxp->java_error_flag = 0;
1475 if (next != '\n' && next != EOF)
1487 java_get_line_col (filename, line, col)
1488 const char *filename ATTRIBUTE_UNUSED;
1489 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1494 /* Dumb implementation. Doesn't try to cache or optimize things. */
1495 /* First line of the file is line 1, first column is 1 */
1497 /* COL == -1 means, at the CR/LF in LINE */
1498 /* COL == -2 means, at the first non space char in LINE */
1501 int c, ccol, cline = 1;
1502 int current_line_col = 0;
1503 int first_non_space = 0;
1506 if (!(fp = fopen (filename, "r")))
1507 fatal ("Can't open file - java_display_line_col");
1509 while (cline != line)
1514 static char msg[] = "<<file too short - unexpected EOF>>";
1515 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1518 if (java_is_eol (fp, c))
1522 /* Gather the chars of the current line in a buffer */
1526 if (c < 0 || java_is_eol (fp, c))
1528 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1529 first_non_space = current_line_col;
1530 obstack_1grow (&temporary_obstack, c);
1535 obstack_1grow (&temporary_obstack, '\n');
1539 col = current_line_col;
1540 first_non_space = 0;
1543 col = first_non_space;
1545 first_non_space = 0;
1547 /* Place the '^' a the right position */
1548 base = obstack_base (&temporary_obstack);
1549 for (ccol = 1; ccol <= col; ccol++)
1551 /* Compute \t when reaching first_non_space */
1552 char c = (first_non_space ?
1553 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1554 obstack_1grow (&temporary_obstack, c);
1556 obstack_grow0 (&temporary_obstack, "^", 1);
1559 return obstack_finish (&temporary_obstack);