1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
39 /* Function declaration */
40 static int java_lineterminator PARAMS ((unicode_t));
41 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
42 static void java_unicode_2_utf8 PARAMS ((unicode_t));
43 static void java_lex_error PARAMS ((const char *, int));
45 static int java_is_eol PARAMS ((FILE *, int));
46 static tree build_wfl_node PARAMS ((tree));
48 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
49 static unicode_t java_parse_escape_sequence PARAMS ((void));
50 static int java_letter_or_digit_p PARAMS ((unicode_t));
51 static int java_parse_doc_section PARAMS ((unicode_t));
52 static void java_parse_end_comment PARAMS ((unicode_t));
53 static unicode_t java_get_unicode PARAMS ((void));
54 static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
55 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
56 static unicode_t java_read_char PARAMS ((java_lexer *));
57 static void java_allocate_new_line PARAMS ((void));
58 static void java_unget_unicode PARAMS ((void));
59 static unicode_t java_sneak_unicode PARAMS ((void));
60 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
63 java_init_lex (finput, encoding)
68 int java_lang_imported = 0;
71 java_lang_id = get_identifier ("java.lang");
72 if (!java_lang_cloneable)
73 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
74 if (!java_io_serializable)
75 java_io_serializable = get_identifier ("java.io.Serializable");
77 inst_id = get_identifier ("inst$");
79 wpv_id = get_identifier ("write_parm_value$");
81 if (!java_lang_imported)
83 tree node = build_tree_list
84 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
85 read_import_dir (TREE_PURPOSE (node));
86 TREE_CHAIN (node) = ctxp->import_demand_list;
87 ctxp->import_demand_list = node;
88 java_lang_imported = 1;
92 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
94 label_id = get_identifier ("$L");
96 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
97 if (!wfl_string_buffer)
99 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
101 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
103 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
104 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE;
106 bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
107 bzero ((PTR) current_jcf, sizeof (JCF));
108 ctxp->current_parsed_class = NULL;
109 ctxp->package = NULL_TREE;
112 ctxp->filename = input_filename;
113 ctxp->lineno = lineno = 0;
116 ctxp->minus_seen = 0;
117 ctxp->java_error_flag = 0;
118 ctxp->lexer = java_new_lexer (finput, encoding);
122 java_sprint_unicode (line, i)
123 struct java_line *line;
126 static char buffer [10];
127 if (line->unicode_escape_p [i] || line->line [i] > 128)
128 sprintf (buffer, "\\u%04x", line->line [i]);
131 buffer [0] = line->line [i];
138 java_sneak_unicode ()
140 return (ctxp->c_line->line [ctxp->c_line->current]);
144 java_unget_unicode ()
146 if (!ctxp->c_line->current)
147 fatal ("can't unget unicode - java_unget_unicode");
148 ctxp->c_line->current--;
149 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
153 java_allocate_new_line ()
155 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
156 char ahead_escape_p = (ctxp->c_line ?
157 ctxp->c_line->unicode_escape_ahead_p : 0);
159 if (ctxp->c_line && !ctxp->c_line->white_space_only)
163 free (ctxp->p_line->unicode_escape_p);
164 free (ctxp->p_line->line);
167 ctxp->p_line = ctxp->c_line;
168 ctxp->c_line = NULL; /* Reallocated */
173 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
174 ctxp->c_line->max = JAVA_LINE_MAX;
175 ctxp->c_line->line = (unicode_t *)xmalloc
176 (sizeof (unicode_t)*ctxp->c_line->max);
177 ctxp->c_line->unicode_escape_p =
178 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
179 ctxp->c_line->white_space_only = 0;
182 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
183 ctxp->c_line->char_col = ctxp->c_line->current = 0;
186 ctxp->c_line->line [ctxp->c_line->size] = ahead;
187 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
188 ctxp->c_line->size++;
190 ctxp->c_line->ahead [0] = 0;
191 ctxp->c_line->unicode_escape_ahead_p = 0;
192 ctxp->c_line->lineno = ++lineno;
193 ctxp->c_line->white_space_only = 1;
196 /* Create a new lexer object. */
198 java_new_lexer (finput, encoding)
200 const char *encoding;
202 java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
205 lex->finput = finput;
207 lex->unget_value = 0;
210 lex->handle = iconv_open ("UCS-2", encoding);
211 if (lex->handle == (iconv_t) -1)
213 /* FIXME: we should give a nice error based on errno here. */
220 #else /* HAVE_ICONV */
221 if (strcmp (encoding, DEFAULT_ENCODING))
223 #endif /* HAVE_ICONV */
226 fatal ("unknown encoding: `%s'", encoding);
232 java_destroy_lexer (lex)
236 iconv_close (lex->handle);
245 if (lex->unget_value)
247 unicode_t r = lex->unget_value;
248 lex->unget_value = 0;
254 size_t ir, inbytesleft, in_save, out_count, out_save;
258 /* If there is data which has already been converted, use it. */
259 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
266 /* See if we need to read more data. If FIRST == 0 then
267 the previous conversion attempt ended in the middle of
268 a character at the end of the buffer. Otherwise we
269 only have to read if the buffer is empty. */
270 if (lex->first == 0 || lex->first >= lex->last)
274 if (lex->first >= lex->last)
279 if (feof (lex->finput))
281 r = fread (&lex->buffer[lex->last], 1,
282 sizeof (lex->buffer) - lex->last,
287 inbytesleft = lex->last - lex->first;
288 out_count = sizeof (lex->out_buffer) - lex->out_last;
290 if (inbytesleft == 0)
292 /* We've tried to read and there is nothing left. */
296 in_save = inbytesleft;
297 out_save = out_count;
298 inp = &lex->buffer[lex->first];
299 outp = &lex->out_buffer[lex->out_last];
300 ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
302 lex->first += in_save - inbytesleft;
303 lex->out_last += out_save - out_count;
305 /* If we converted anything at all, move along. */
306 if (out_count != out_save)
309 if (ir == (size_t) -1)
313 /* This is ok. This means that the end of our buffer
314 is in the middle of a character sequence. We just
315 move the valid part of the buffer to the beginning
317 /* We use bcopy() because it should work for
318 overlapping strings. Use memmove() instead... */
319 bcopy (&lex->buffer[lex->first], &lex->buffer[0],
320 lex->last - lex->first);
321 lex->last -= lex->first;
326 /* A more serious error. */
327 java_lex_error ("unrecognized character in input stream",
335 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
337 /* Don't have any data. */
341 /* Success. We assume that UCS-2 is big-endian. This appears to
342 be an ok assumption. */
343 result = ((((unsigned char) lex->out_buffer[lex->out_first]) << 8)
344 | (unsigned char) lex->out_buffer[lex->out_first + 1]);
348 #else /* HAVE_ICONV */
351 c = getc (lex->finput);
359 if ((c & 0xe0) == 0xc0)
361 c1 = getc (lex->finput);
362 if ((c1 & 0xc0) == 0x80)
363 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
366 else if ((c & 0xf0) == 0xe0)
368 c1 = getc (lex->finput);
369 if ((c1 & 0xc0) == 0x80)
371 c2 = getc (lex->finput);
372 if ((c2 & 0xc0) == 0x80)
373 return (unicode_t)(((c & 0xf) << 12) +
374 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
382 /* We simply don't support invalid characters. */
383 java_lex_error ("malformed UTF-8 character", 0);
386 #endif /* HAVE_ICONV */
388 /* We only get here on error. */
393 java_store_unicode (l, c, unicode_escape_p)
396 int unicode_escape_p;
398 if (l->size == l->max)
400 l->max += JAVA_LINE_MAX;
401 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
402 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
403 sizeof (char)*l->max);
405 l->line [l->size] = c;
406 l->unicode_escape_p [l->size++] = unicode_escape_p;
410 java_read_unicode (lex, term_context, unicode_escape_p)
413 int *unicode_escape_p;
417 c = java_read_char (lex);
418 *unicode_escape_p = 0;
423 return (term_context ? c : (java_lineterminator (c)
429 if ((lex->bs_count) % 2 == 1)
431 /* Odd number of \ seen. */
432 c = java_read_char (lex);
435 unicode_t unicode = 0;
437 /* Next should be 4 hex digits, otherwise it's an error.
438 The hex value is converted into the unicode, pushed into
439 the Unicode stream. */
440 for (shift = 12; shift >= 0; shift -= 4)
442 if ((c = java_read_char (lex)) == UEOF)
444 if (c >= '0' && c <= '9')
445 unicode |= (unicode_t)((c-'0') << shift);
446 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
447 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
449 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
452 *unicode_escape_p = 1;
454 ? unicode : (java_lineterminator (c) ? '\n' : unicode));
456 lex->unget_value = c;
458 return (unicode_t) '\\';
464 /* It's time to read a line when... */
465 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
468 java_allocate_new_line ();
469 if (ctxp->c_line->line[0] != '\n')
472 int unicode_escape_p;
473 c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
474 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
475 if (ctxp->c_line->white_space_only
476 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
477 ctxp->c_line->white_space_only = 0;
478 if ((c == '\n') || (c == UEOF))
482 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
483 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
484 return ctxp->c_line->line [ctxp->c_line->current++];
488 java_lineterminator (c)
491 if (c == '\n') /* LF */
493 else if (c == '\r') /* CR */
495 int unicode_escape_p;
496 c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
499 /* In this case we will have another terminator. For some
500 reason the lexer has several different unget methods. We
501 can't use the `ahead' method because then the \r will end
502 up in the actual text of the line, causing an error. So
503 instead we choose a very low-level method. FIXME: this
504 is incredibly ugly. */
505 ctxp->lexer->unget_value = c;
509 ctxp->c_line->ahead [0] = c;
510 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
518 /* Parse the end of a C style comment.
519 * C is the first character following the '/' and '*'. */
521 java_parse_end_comment (c)
525 for ( ;; c = java_get_unicode ())
530 java_lex_error ("Comment not terminated at end of input", 0);
532 switch (c = java_get_unicode ())
535 java_lex_error ("Comment not terminated at end of input", 0);
538 case '*': /* reparse only '*' */
539 java_unget_unicode ();
545 /* Parse the documentation section. Keywords must be at the beginning
546 of a documentation comment line (ignoring white space and any `*'
547 character). Parsed keyword(s): @DEPRECATED. */
550 java_parse_doc_section (c)
553 int valid_tag = 0, seen_star = 0;
555 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
567 c = java_get_unicode();
571 java_lex_error ("Comment not terminated at end of input", 0);
573 if (seen_star && (c == '/'))
574 return 1; /* Goto step1 in caller */
576 /* We're parsing @deprecated */
577 if (valid_tag && (c == '@'))
582 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
584 c = java_get_unicode ();
585 tag [tag_index++] = c;
589 java_lex_error ("Comment not terminated at end of input", 0);
590 tag [tag_index] = '\0';
592 if (!strcmp (tag, "deprecated"))
593 ctxp->deprecated = 1;
595 java_unget_unicode ();
599 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
600 will return a wrong result. */
602 java_letter_or_digit_p (c)
605 return _JAVA_LETTER_OR_DIGIT_P (c);
609 java_parse_escape_sequence ()
614 switch (c = java_get_unicode ())
617 return (unicode_t)0x8;
619 return (unicode_t)0x9;
621 return (unicode_t)0xa;
623 return (unicode_t)0xc;
625 return (unicode_t)0xd;
627 return (unicode_t)0x22;
629 return (unicode_t)0x27;
631 return (unicode_t)0x5c;
632 case '0': case '1': case '2': case '3': case '4':
633 case '5': case '6': case '7': case '8': case '9':
636 int octal_escape_index = 0;
638 for (; octal_escape_index < 3 && RANGE (c, '0', '9');
639 c = java_get_unicode ())
640 octal_escape [octal_escape_index++] = c;
642 java_unget_unicode ();
644 if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
646 java_lex_error ("Literal octal escape out of range", 0);
647 return JAVA_CHAR_ERROR;
652 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
653 i < octal_escape_index; i++, shift -= 3)
654 char_lit |= (octal_escape [i] - '0') << shift;
661 return '\n'; /* ULT, caught latter as a specific error */
663 java_lex_error ("Illegal character in escape sequence", 0);
664 return JAVA_CHAR_ERROR;
668 /* Isolate the code which may raise an arithmetic exception in its
677 int number_beginning;
680 static void java_perform_atof PARAMS ((PTR));
683 java_perform_atof (av)
686 struct jpa_args *a = (struct jpa_args *)av;
687 YYSTYPE *java_lval = a->java_lval;
688 int number_beginning = a->number_beginning;
689 REAL_VALUE_TYPE value;
690 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
692 SET_REAL_VALUE_ATOF (value,
693 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
695 if (REAL_VALUE_ISINF (value)
696 || REAL_VALUE_ISNAN (value))
698 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
702 SET_LVAL_NODE_TYPE (build_real (type, value), type);
706 static int yylex PARAMS ((YYSTYPE *));
716 unicode_t c, first_unicode;
717 int ascii_index, all_ascii;
720 /* Translation of the Unicode escape in the raw stream of Unicode
721 characters. Takes care of line terminator. */
723 /* Skip white spaces: SP, TAB and FF or ULT */
724 for (c = java_get_unicode ();
725 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
728 ctxp->elc.line = ctxp->c_line->lineno;
729 ctxp->elc.col = ctxp->c_line->char_col-2;
732 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
734 if (c == 0x1a) /* CTRL-Z */
736 if ((c = java_get_unicode ()) == UEOF)
737 return 0; /* Ok here */
739 java_unget_unicode (); /* Caught latter at the end the function */
741 /* Handle EOF here */
742 if (c == UEOF) /* Should probably do something here... */
745 /* Take care of eventual comments. */
748 switch (c = java_get_unicode ())
753 c = java_get_unicode ();
755 java_lex_error ("Comment not terminated at end of input", 0);
756 if (c == '\n') /* ULT */
762 if ((c = java_get_unicode ()) == '*')
764 if ((c = java_get_unicode ()) == '/')
765 goto step1; /* Empy documentation comment */
766 else if (java_parse_doc_section (c))
770 java_parse_end_comment ((c = java_get_unicode ()));
774 java_unget_unicode ();
780 ctxp->elc.line = ctxp->c_line->lineno;
781 ctxp->elc.prev_col = ctxp->elc.col;
782 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
783 if (ctxp->elc.col < 0)
784 fatal ("ctxp->elc.col < 0 - java_lex");
786 /* Numeric literals */
787 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
789 /* This section of code is borrowed from gcc/c-lex.c */
790 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
791 int parts[TOTAL_PARTS];
792 HOST_WIDE_INT high, low;
793 /* End borrowed section */
794 char literal_token [256];
795 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
798 int number_beginning = ctxp->c_line->current;
801 /* We might have a . separator instead of a FP like .[0-9]* */
804 unicode_t peep = java_sneak_unicode ();
806 if (!JAVA_ASCII_DIGIT (peep))
809 BUILD_OPERATOR (DOT_TK);
813 for (i = 0; i < TOTAL_PARTS; i++)
818 c = java_get_unicode ();
819 if (c == 'x' || c == 'X')
822 c = java_get_unicode ();
824 else if (JAVA_ASCII_DIGIT (c))
828 /* Push the '.' back and prepare for a FP parsing... */
829 java_unget_unicode ();
834 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
835 JAVA_LEX_LIT ("0", 10);
839 SET_LVAL_NODE (long_zero_node);
842 SET_LVAL_NODE (float_zero_node);
845 SET_LVAL_NODE (double_zero_node);
848 java_unget_unicode ();
849 SET_LVAL_NODE (integer_zero_node);
854 /* Parse the first part of the literal, until we find something
855 which is not a number. */
856 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
857 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
858 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
860 /* We store in a string (in case it turns out to be a FP) and in
861 PARTS if we have to process a integer literal. */
862 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
865 literal_token [literal_index++] = c;
866 /* This section of code if borrowed from gcc/c-lex.c */
867 for (count = 0; count < TOTAL_PARTS; count++)
869 parts[count] *= radix;
872 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
873 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
878 if (parts [TOTAL_PARTS-1] != 0)
880 /* End borrowed section. */
881 c = java_get_unicode ();
884 /* If we have something from the FP char set but not a digit, parse
886 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
889 int seen_digit = (literal_index ? 1 : 0);
890 int seen_exponent = 0;
891 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
892 double unless specified. */
894 java_lex_error ("Can't express non-decimal FP literal", 0);
903 literal_token [literal_index++ ] = c;
904 c = java_get_unicode ();
907 java_lex_error ("Invalid character in FP literal", 0);
910 if (c == 'e' || c == 'E')
914 /* {E,e} must have seen at list a digit */
916 java_lex_error ("Invalid FP literal", 0);
920 literal_token [literal_index++] = c;
921 c = java_get_unicode ();
924 java_lex_error ("Invalid character in FP literal", 0);
926 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
928 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
929 stage = 4; /* So we fall through */
932 if ((c=='-' || c =='+') && stage == 2)
935 literal_token [literal_index++] = c;
936 c = java_get_unicode ();
939 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
940 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
941 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
942 (stage == 3 && JAVA_ASCII_DIGIT (c)))
944 if (JAVA_ASCII_DIGIT (c))
946 literal_token [literal_index++ ] = c;
947 c = java_get_unicode ();
954 if (stage != 4) /* Don't push back fF/dD */
955 java_unget_unicode ();
957 /* An exponent (if any) must have seen a digit. */
958 if (seen_exponent && !seen_digit)
959 java_lex_error ("Invalid FP literal", 0);
961 literal_token [literal_index] = '\0';
962 JAVA_LEX_LIT (literal_token, radix);
965 a.literal_token = literal_token;
967 a.java_lval = java_lval;
968 a.number_beginning = number_beginning;
969 if (do_float_handler (java_perform_atof, (PTR) &a))
972 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
978 } /* JAVA_ASCCI_FPCHAR (c) */
980 /* Here we get back to converting the integral literal. */
981 if (c == 'L' || c == 'l')
983 else if (radix == 16 && JAVA_ASCII_LETTER (c))
984 java_lex_error ("Digit out of range in hexadecimal literal", 0);
985 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
986 java_lex_error ("Digit out of range in octal literal", 0);
987 else if (radix == 16 && !literal_index)
988 java_lex_error ("No digit specified for hexadecimal literal", 0);
990 java_unget_unicode ();
992 #ifdef JAVA_LEX_DEBUG
993 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
994 JAVA_LEX_LIT (literal_token, radix);
996 /* This section of code is borrowed from gcc/c-lex.c */
999 bytes = GET_TYPE_PRECISION (long_type_node);
1000 for (i = bytes; i < TOTAL_PARTS; i++)
1008 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1010 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1011 / HOST_BITS_PER_CHAR)]
1012 << (i * HOST_BITS_PER_CHAR));
1013 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1015 /* End borrowed section. */
1017 /* Range checking */
1020 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1021 9223372036854775807L is the biggest `long' literal that can be
1022 expressed using a 10 radix. For other radixes, everything that
1023 fits withing 64 bits is OK. */
1024 int hb = (high >> 31);
1025 if (overflow || (hb && low && radix == 10) ||
1026 (hb && high & 0x7fffffff && radix == 10) ||
1027 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1028 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1032 /* 2147483648 is valid if operand of a '-'. Otherwise,
1033 2147483647 is the biggest `int' literal that can be
1034 expressed using a 10 radix. For other radixes, everything
1035 that fits within 32 bits is OK. As all literals are
1036 signed, we sign extend here. */
1037 int hb = (low >> 31) & 0x1;
1038 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1039 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1040 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1043 ctxp->minus_seen = 0;
1044 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1045 (long_suffix ? long_type_node : int_type_node));
1049 ctxp->minus_seen = 0;
1050 /* Character literals */
1054 if ((c = java_get_unicode ()) == '\\')
1055 char_lit = java_parse_escape_sequence ();
1059 c = java_get_unicode ();
1061 if ((c == '\n') || (c == UEOF))
1062 java_lex_error ("Character literal not terminated at end of line", 0);
1064 java_lex_error ("Syntax error in character literal", 0);
1066 if (c == JAVA_CHAR_ERROR)
1067 char_lit = 0; /* We silently convert it to zero */
1069 JAVA_LEX_CHAR_LIT (char_lit);
1070 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1074 /* String literals */
1080 for (no_error = 1, c = java_get_unicode ();
1081 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1084 c = java_parse_escape_sequence ();
1085 no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
1086 java_unicode_2_utf8 (c);
1088 if (c == '\n' || c == UEOF) /* ULT */
1090 lineno--; /* Refer to the line the terminator was seen */
1091 java_lex_error ("String not terminated at end of line.", 0);
1095 obstack_1grow (&temporary_obstack, '\0');
1096 string = obstack_finish (&temporary_obstack);
1098 if (!no_error || (c != '"'))
1099 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1101 java_lval->node = build_string (strlen (string), string);
1103 obstack_free (&temporary_obstack, string);
1104 return STRING_LIT_TK;
1112 BUILD_OPERATOR (OP_TK);
1118 if (ctxp->ccb_indent == 1)
1119 ctxp->first_ccb_indent1 = lineno;
1121 BUILD_OPERATOR (OCB_TK);
1125 if (ctxp->ccb_indent == 1)
1126 ctxp->last_ccb_indent1 = lineno;
1127 BUILD_OPERATOR (CCB_TK);
1130 BUILD_OPERATOR (OSB_TK);
1142 BUILD_OPERATOR (DOT_TK);
1143 /* return DOT_TK; */
1150 if ((c = java_get_unicode ()) == '=')
1152 BUILD_OPERATOR (EQ_TK);
1156 /* Equals is used in two different locations. In the
1157 variable_declarator: rule, it has to be seen as '=' as opposed
1158 to being seen as an ordinary assignment operator in
1159 assignment_operators: rule. */
1160 java_unget_unicode ();
1161 BUILD_OPERATOR (ASSIGN_TK);
1165 switch ((c = java_get_unicode ()))
1168 BUILD_OPERATOR (GTE_TK);
1170 switch ((c = java_get_unicode ()))
1173 if ((c = java_get_unicode ()) == '=')
1175 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1179 java_unget_unicode ();
1180 BUILD_OPERATOR (ZRS_TK);
1183 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1185 java_unget_unicode ();
1186 BUILD_OPERATOR (SRS_TK);
1189 java_unget_unicode ();
1190 BUILD_OPERATOR (GT_TK);
1194 switch ((c = java_get_unicode ()))
1197 BUILD_OPERATOR (LTE_TK);
1199 if ((c = java_get_unicode ()) == '=')
1201 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1205 java_unget_unicode ();
1206 BUILD_OPERATOR (LS_TK);
1209 java_unget_unicode ();
1210 BUILD_OPERATOR (LT_TK);
1214 switch ((c = java_get_unicode ()))
1217 BUILD_OPERATOR (BOOL_AND_TK);
1219 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1221 java_unget_unicode ();
1222 BUILD_OPERATOR (AND_TK);
1226 switch ((c = java_get_unicode ()))
1229 BUILD_OPERATOR (BOOL_OR_TK);
1231 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1233 java_unget_unicode ();
1234 BUILD_OPERATOR (OR_TK);
1238 switch ((c = java_get_unicode ()))
1241 BUILD_OPERATOR (INCR_TK);
1243 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1245 java_unget_unicode ();
1246 BUILD_OPERATOR (PLUS_TK);
1250 switch ((c = java_get_unicode ()))
1253 BUILD_OPERATOR (DECR_TK);
1255 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1257 java_unget_unicode ();
1258 ctxp->minus_seen = 1;
1259 BUILD_OPERATOR (MINUS_TK);
1263 if ((c = java_get_unicode ()) == '=')
1265 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1269 java_unget_unicode ();
1270 BUILD_OPERATOR (MULT_TK);
1274 if ((c = java_get_unicode ()) == '=')
1276 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1280 java_unget_unicode ();
1281 BUILD_OPERATOR (DIV_TK);
1285 if ((c = java_get_unicode ()) == '=')
1287 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1291 java_unget_unicode ();
1292 BUILD_OPERATOR (XOR_TK);
1296 if ((c = java_get_unicode ()) == '=')
1298 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1302 java_unget_unicode ();
1303 BUILD_OPERATOR (REM_TK);
1307 if ((c = java_get_unicode()) == '=')
1309 BUILD_OPERATOR (NEQ_TK);
1313 java_unget_unicode ();
1314 BUILD_OPERATOR (NEG_TK);
1319 BUILD_OPERATOR (REL_QM_TK);
1322 BUILD_OPERATOR (REL_CL_TK);
1324 BUILD_OPERATOR (NOT_TK);
1327 /* Keyword, boolean literal or null literal */
1328 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1329 JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1331 java_unicode_2_utf8 (c);
1332 if (all_ascii && c >= 128)
1337 obstack_1grow (&temporary_obstack, '\0');
1338 string = obstack_finish (&temporary_obstack);
1339 java_unget_unicode ();
1341 /* If we have something all ascii, we consider a keyword, a boolean
1342 literal, a null literal or an all ASCII identifier. Otherwise,
1343 this is an identifier (possibly not respecting formation rule). */
1346 struct java_keyword *kw;
1347 if ((kw=java_keyword (string, ascii_index)))
1349 JAVA_LEX_KW (string);
1352 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1353 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1354 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1356 SET_MODIFIER_CTX (kw->token);
1359 SET_LVAL_NODE (float_type_node);
1362 SET_LVAL_NODE (double_type_node);
1365 SET_LVAL_NODE (boolean_type_node);
1368 SET_LVAL_NODE (byte_type_node);
1371 SET_LVAL_NODE (short_type_node);
1374 SET_LVAL_NODE (int_type_node);
1377 SET_LVAL_NODE (long_type_node);
1380 SET_LVAL_NODE (char_type_node);
1383 /* Keyword based literals */
1386 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1387 boolean_true_node : boolean_false_node));
1390 SET_LVAL_NODE (null_pointer_node);
1393 /* Some keyword we want to retain information on the location
1406 BUILD_OPERATOR (kw->token);
1414 /* We may have and ID here */
1415 if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1417 JAVA_LEX_ID (string);
1418 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1422 /* Everything else is an invalid character in the input */
1424 char lex_error_buffer [128];
1425 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1426 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1427 java_lex_error (lex_error_buffer, 1);
1433 java_unicode_2_utf8 (unicode)
1436 if (RANGE (unicode, 0x01, 0x7f))
1437 obstack_1grow (&temporary_obstack, (char)unicode);
1438 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1440 obstack_1grow (&temporary_obstack,
1441 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1442 obstack_1grow (&temporary_obstack,
1443 (unsigned char)(0x80 | (unicode & 0x3f)));
1445 else /* Range 0x800-0xffff */
1447 obstack_1grow (&temporary_obstack,
1448 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1449 obstack_1grow (&temporary_obstack,
1450 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1451 obstack_1grow (&temporary_obstack,
1452 (unsigned char)(0x80 | (unicode & 0x003f)));
1458 build_wfl_node (node)
1461 return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1466 java_lex_error (msg, forward)
1467 const char *msg ATTRIBUTE_UNUSED;
1468 int forward ATTRIBUTE_UNUSED;
1471 ctxp->elc.line = ctxp->c_line->lineno;
1472 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1474 /* Might be caught in the middle of some error report */
1475 ctxp->java_error_flag = 0;
1492 if (next != '\n' && next != EOF)
1504 java_get_line_col (filename, line, col)
1505 const char *filename ATTRIBUTE_UNUSED;
1506 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1511 /* Dumb implementation. Doesn't try to cache or optimize things. */
1512 /* First line of the file is line 1, first column is 1 */
1514 /* COL == -1 means, at the CR/LF in LINE */
1515 /* COL == -2 means, at the first non space char in LINE */
1518 int c, ccol, cline = 1;
1519 int current_line_col = 0;
1520 int first_non_space = 0;
1523 if (!(fp = fopen (filename, "r")))
1524 fatal ("Can't open file - java_display_line_col");
1526 while (cline != line)
1531 static char msg[] = "<<file too short - unexpected EOF>>";
1532 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1535 if (java_is_eol (fp, c))
1539 /* Gather the chars of the current line in a buffer */
1543 if (c < 0 || java_is_eol (fp, c))
1545 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1546 first_non_space = current_line_col;
1547 obstack_1grow (&temporary_obstack, c);
1552 obstack_1grow (&temporary_obstack, '\n');
1556 col = current_line_col;
1557 first_non_space = 0;
1560 col = first_non_space;
1562 first_non_space = 0;
1564 /* Place the '^' a the right position */
1565 base = obstack_base (&temporary_obstack);
1566 for (ccol = 1; ccol <= col+3; ccol++)
1568 /* Compute \t when reaching first_non_space */
1569 char c = (first_non_space ?
1570 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1571 obstack_1grow (&temporary_obstack, c);
1573 obstack_grow0 (&temporary_obstack, "^", 1);
1576 return obstack_finish (&temporary_obstack);