X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fjava%2Flex.c;h=77e38f898485707443b1cdc41d1500e4cb43e1c4;hp=be1fcf8e76fa2d349b3031a4b62758d8070cf8ca;hb=8e452f9c0c8f40aeee57cd573a9d638e53872aea;hpb=ab3a735944aa71e2e91e5f9824346756576f6806 diff --git a/gcc/java/lex.c b/gcc/java/lex.c index be1fcf8e76f..77e38f89848 100644 --- a/gcc/java/lex.c +++ b/gcc/java/lex.c @@ -1,21 +1,22 @@ /* Language lexer for the GNU compiler for the Java(TM) language. - Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003 + Free Software Foundation, Inc. Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com) -This file is part of GNU CC. +This file is part of GCC. -GNU CC is free software; you can redistribute it and/or modify +GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. -GNU CC is distributed in the hope that it will be useful, +GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to +along with GCC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. @@ -36,32 +37,46 @@ The Free Software Foundation is independent of Sun Microsystems, Inc. */ #include "keyword.h" #include "flags.h" +#include "chartables.h" +#ifndef JC1_LITE +#include "timevar.h" +#endif + +/* Function declarations. */ +static char *java_sprint_unicode (struct java_line *, int); +static void java_unicode_2_utf8 (unicode_t); +static void java_lex_error (const char *, int); +#ifndef JC1_LITE +static int do_java_lex (YYSTYPE *); +static int java_lex (YYSTYPE *); +static int java_is_eol (FILE *, int); +static tree build_wfl_node (tree); +#endif +static void java_store_unicode (struct java_line *, unicode_t, int); +static int java_parse_escape_sequence (void); +static int java_start_char_p (unicode_t); +static int java_part_char_p (unicode_t); +static int java_space_char_p (unicode_t); +static void java_parse_doc_section (int); +static void java_parse_end_comment (int); +static int java_get_unicode (void); +static int java_read_unicode (java_lexer *, int *); +static int java_read_unicode_collapsing_terminators (java_lexer *, int *); +static void java_store_unicode (struct java_line *, unicode_t, int); +static int java_read_char (java_lexer *); +static void java_allocate_new_line (void); +static void java_unget_unicode (void); +static unicode_t java_sneak_unicode (void); +#ifndef JC1_LITE +static int utf8_cmp (const unsigned char *, int, const char *); +#endif -/* Function declaration */ -static char *java_sprint_unicode PARAMS ((struct java_line *, int)); -static void java_unicode_2_utf8 PARAMS ((unicode_t)); -static void java_lex_error PARAMS ((const char *, int)); +java_lexer *java_new_lexer (FILE *, const char *); #ifndef JC1_LITE -static int java_is_eol PARAMS ((FILE *, int)); -static tree build_wfl_node PARAMS ((tree)); +static void error_if_numeric_overflow (tree); #endif -static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int)); -static unicode_t java_parse_escape_sequence PARAMS ((void)); -static int java_letter_or_digit_p PARAMS ((unicode_t)); -static int java_ignorable_control_p PARAMS ((unicode_t)); -static int java_parse_doc_section PARAMS ((unicode_t)); -static void java_parse_end_comment PARAMS ((unicode_t)); -static unicode_t java_get_unicode PARAMS ((void)); -static unicode_t java_read_unicode PARAMS ((java_lexer *, int *)); -static unicode_t java_read_unicode_collapsing_terminators - PARAMS ((java_lexer *, int *)); -static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int)); -static unicode_t java_read_char PARAMS ((java_lexer *)); -static void java_allocate_new_line PARAMS ((void)); -static void java_unget_unicode PARAMS ((void)); -static unicode_t java_sneak_unicode PARAMS ((void)); -java_lexer *java_new_lexer PARAMS ((FILE *, const char *)); +#ifdef HAVE_ICONV /* This is nonzero if we have initialized `need_byteswap'. */ static int byteswap_init = 0; @@ -70,21 +85,16 @@ static int byteswap_init = 0; doing a conversion once at startup and seeing what happens. This flag holds the results of this determination. */ static int need_byteswap = 0; +#endif void -java_init_lex (finput, encoding) - FILE *finput; - const char *encoding; +java_init_lex (FILE *finput, const char *encoding) { #ifndef JC1_LITE int java_lang_imported = 0; if (!java_lang_id) java_lang_id = get_identifier ("java.lang"); - if (!java_lang_cloneable) - java_lang_cloneable = get_identifier ("java.lang.Cloneable"); - if (!java_io_serializable) - java_io_serializable = get_identifier ("java.io.Serializable"); if (!inst_id) inst_id = get_identifier ("inst$"); if (!wpv_id) @@ -108,32 +118,32 @@ java_init_lex (finput, encoding) wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0); if (!wfl_string_buffer) wfl_string_buffer = - build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0); + build_expr_wfl (get_identifier (flag_emit_class_files + ? "java.lang.StringBuffer" + : "gnu.gcj.runtime.StringBuffer"), + NULL, 0, 0); if (!wfl_to_string) wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0); CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) = - CPC_INSTANCE_INITIALIZER_LIST (ctxp) = ctxp->incomplete_class = NULL_TREE; + CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE; - bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0])); - bzero ((PTR) current_jcf, sizeof (JCF)); + memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx)); + current_jcf = ggc_alloc_cleared (sizeof (JCF)); ctxp->current_parsed_class = NULL; ctxp->package = NULL_TREE; #endif ctxp->filename = input_filename; - ctxp->lineno = lineno = 0; + ctxp->lineno = input_line = 0; ctxp->p_line = NULL; ctxp->c_line = NULL; - ctxp->minus_seen = 0; ctxp->java_error_flag = 0; ctxp->lexer = java_new_lexer (finput, encoding); } static char * -java_sprint_unicode (line, i) - struct java_line *line; - int i; +java_sprint_unicode (struct java_line *line, int i) { static char buffer [10]; if (line->unicode_escape_p [i] || line->line [i] > 128) @@ -147,22 +157,24 @@ java_sprint_unicode (line, i) } static unicode_t -java_sneak_unicode () +java_sneak_unicode (void) { return (ctxp->c_line->line [ctxp->c_line->current]); } static void -java_unget_unicode () +java_unget_unicode (void) { if (!ctxp->c_line->current) - fatal ("can't unget unicode - java_unget_unicode"); + /* Can't unget unicode. */ + abort (); + ctxp->c_line->current--; ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0); } static void -java_allocate_new_line () +java_allocate_new_line (void) { unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0'); char ahead_escape_p = (ctxp->c_line ? @@ -177,17 +189,16 @@ java_allocate_new_line () free (ctxp->p_line); } ctxp->p_line = ctxp->c_line; - ctxp->c_line = NULL; /* Reallocated */ + ctxp->c_line = NULL; /* Reallocated. */ } if (!ctxp->c_line) { - ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line)); + ctxp->c_line = xmalloc (sizeof (struct java_line)); ctxp->c_line->max = JAVA_LINE_MAX; - ctxp->c_line->line = (unicode_t *)xmalloc - (sizeof (unicode_t)*ctxp->c_line->max); + ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max); ctxp->c_line->unicode_escape_p = - (char *)xmalloc (sizeof (char)*ctxp->c_line->max); + xmalloc (sizeof (char)*ctxp->c_line->max); ctxp->c_line->white_space_only = 0; } @@ -201,22 +212,23 @@ java_allocate_new_line () } ctxp->c_line->ahead [0] = 0; ctxp->c_line->unicode_escape_ahead_p = 0; - ctxp->c_line->lineno = ++lineno; + ctxp->c_line->lineno = ++input_line; ctxp->c_line->white_space_only = 1; } /* Create a new lexer object. */ + java_lexer * -java_new_lexer (finput, encoding) - FILE *finput; - const char *encoding; +java_new_lexer (FILE *finput, const char *encoding) { - java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer)); + java_lexer *lex = xmalloc (sizeof (java_lexer)); int enc_error = 0; lex->finput = finput; lex->bs_count = 0; lex->unget_value = 0; + lex->hit_eof = 0; + lex->encoding = encoding; #ifdef HAVE_ICONV lex->handle = iconv_open ("UCS-2", encoding); @@ -252,12 +264,14 @@ java_new_lexer (finput, encoding) in[1] = 0xbb; in[2] = 0xbf; - inp = in; + inp = (char *) in; inc = 3; outp = (char *) &result; outc = 2; - r = iconv (handle, (const char **) &inp, &inc, &outp, &outc); + r = iconv (handle, (ICONV_CONST char **) &inp, &inc, + &outp, &outc); + iconv_close (handle); /* Conversion must be complete for us to use the result. */ if (r != (size_t) -1 && inc == 0 && outc == 0) need_byteswap = (result != 0xfeff); @@ -271,25 +285,32 @@ java_new_lexer (finput, encoding) { /* If iconv failed, use the internal decoder if the default encoding was requested. This code is used on platforms where - iconv() exists but is insufficient for our needs. For - instance, on Solaris 2.5 iconv() cannot handle UTF-8 or UCS-2. */ - if (strcmp (encoding, DEFAULT_ENCODING)) + iconv exists but is insufficient for our needs. For + instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2. + + On Solaris the default encoding, as returned by nl_langinfo(), + is `646' (aka ASCII), but the Solaris iconv_open() doesn't + understand that. We work around that by pretending + `646' to be the same as UTF-8. */ + if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646")) enc_error = 1; #ifdef HAVE_ICONV else - lex->use_fallback = 1; + { + lex->use_fallback = 1; + lex->encoding = "UTF-8"; + } #endif /* HAVE_ICONV */ } if (enc_error) - fatal ("unknown encoding: `%s'", encoding); + fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding); return lex; } void -java_destroy_lexer (lex) - java_lexer *lex; +java_destroy_lexer (java_lexer *lex) { #ifdef HAVE_ICONV if (! lex->use_fallback) @@ -298,9 +319,8 @@ java_destroy_lexer (lex) free (lex); } -static unicode_t -java_read_char (lex) - java_lexer *lex; +static int +java_read_char (java_lexer *lex) { if (lex->unget_value) { @@ -357,9 +377,9 @@ java_read_char (lex) in_save = inbytesleft; out_save = out_count; inp = &lex->buffer[lex->first]; - outp = &lex->out_buffer[lex->out_last]; - ir = iconv (lex->handle, (const char **) &inp, &inbytesleft, - &outp, &out_count); + outp = (char *) &lex->out_buffer[lex->out_last]; + ir = iconv (lex->handle, (ICONV_CONST char **) &inp, + &inbytesleft, &outp, &out_count); /* If we haven't read any bytes, then look to see if we have read a BOM. */ @@ -406,18 +426,19 @@ java_read_char (lex) is in the middle of a character sequence. We just move the valid part of the buffer to the beginning to force a read. */ - /* We use bcopy() because it should work for - overlapping strings. Use memmove() instead... */ - bcopy (&lex->buffer[lex->first], &lex->buffer[0], - lex->last - lex->first); + memmove (&lex->buffer[0], &lex->buffer[lex->first], + lex->last - lex->first); lex->last -= lex->first; lex->first = 0; } else { /* A more serious error. */ - java_lex_error ("unrecognized character in input stream", - 0); + char buffer[128]; + sprintf (buffer, + "Unrecognized character for encoding '%s'", + lex->encoding); + java_lex_error (buffer, 0); return UEOF; } } @@ -441,18 +462,24 @@ java_read_char (lex) int c, c1, c2; c = getc (lex->finput); - if (c < 128) - return (unicode_t)c; if (c == EOF) return UEOF; + if (c < 128) + return (unicode_t) c; else { if ((c & 0xe0) == 0xc0) { c1 = getc (lex->finput); if ((c1 & 0xc0) == 0x80) - return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f)); - c = c1; + { + unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f)); + /* Check for valid 2-byte characters. We explicitly + allow \0 because this encoding is common in the + Java world. */ + if (r == 0 || (r >= 0x80 && r <= 0x7ff)) + return r; + } } else if ((c & 0xf0) == 0xe0) { @@ -461,16 +488,23 @@ java_read_char (lex) { c2 = getc (lex->finput); if ((c2 & 0xc0) == 0x80) - return (unicode_t)(((c & 0xf) << 12) + - (( c1 & 0x3f) << 6) + (c2 & 0x3f)); - else - c = c2; + { + unicode_t r = (unicode_t)(((c & 0xf) << 12) + + (( c1 & 0x3f) << 6) + + (c2 & 0x3f)); + /* Check for valid 3-byte characters. + Don't allow surrogate, \ufffe or \uffff. */ + if (IN_RANGE (r, 0x800, 0xffff) + && ! IN_RANGE (r, 0xd800, 0xdfff) + && r != 0xfffe && r != 0xffff) + return r; + } } - else - c = c1; } - /* We simply don't support invalid characters. */ + /* We simply don't support invalid characters. We also + don't support 4-, 5-, or 6-byte UTF-8 sequences, as these + cannot be valid Java characters. */ java_lex_error ("malformed UTF-8 character", 0); } } @@ -480,28 +514,23 @@ java_read_char (lex) } static void -java_store_unicode (l, c, unicode_escape_p) - struct java_line *l; - unicode_t c; - int unicode_escape_p; +java_store_unicode (struct java_line *l, unicode_t c, int unicode_escape_p) { if (l->size == l->max) { l->max += JAVA_LINE_MAX; - l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max); - l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p, - sizeof (char)*l->max); + l->line = xrealloc (l->line, sizeof (unicode_t)*l->max); + l->unicode_escape_p = xrealloc (l->unicode_escape_p, + sizeof (char)*l->max); } l->line [l->size] = c; l->unicode_escape_p [l->size++] = unicode_escape_p; } -static unicode_t -java_read_unicode (lex, unicode_escape_p) - java_lexer *lex; - int *unicode_escape_p; +static int +java_read_unicode (java_lexer *lex, int *unicode_escape_p) { - unicode_t c; + int c; c = java_read_char (lex); *unicode_escape_p = 0; @@ -521,25 +550,36 @@ java_read_unicode (lex, unicode_escape_p) { unicode_t unicode = 0; int shift = 12; - /* Next should be 4 hex digits, otherwise it's an error. - The hex value is converted into the unicode, pushed into - the Unicode stream. */ - for (shift = 12; shift >= 0; shift -= 4) + + /* Recognize any number of `u's in \u. */ + while ((c = java_read_char (lex)) == 'u') + ; + + shift = 12; + do { - if ((c = java_read_char (lex)) == UEOF) - return UEOF; - if (c >= '0' && c <= '9') - unicode |= (unicode_t)((c-'0') << shift); - else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) - unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift); - else if (c == 'u') + if (c == UEOF) { - /* Recognize any number of u in \u. */ - shift += 4; + java_lex_error ("prematurely terminated \\u sequence", 0); + return UEOF; } + + if (hex_p (c)) + unicode |= (unicode_t)(hex_value (c) << shift); else - java_lex_error ("Non hex digit in Unicode escape sequence", 0); + { + java_lex_error ("non-hex digit in \\u sequence", 0); + break; + } + + c = java_read_char (lex); + shift -= 4; } + while (shift >= 0); + + if (c != UEOF) + lex->unget_value = c; + lex->bs_count = 0; *unicode_escape_p = 1; return unicode; @@ -549,12 +589,11 @@ java_read_unicode (lex, unicode_escape_p) return (unicode_t) '\\'; } -static unicode_t -java_read_unicode_collapsing_terminators (lex, unicode_escape_p) - java_lexer *lex; - int *unicode_escape_p; +static int +java_read_unicode_collapsing_terminators (java_lexer *lex, + int *unicode_escape_p) { - unicode_t c = java_read_unicode (lex, unicode_escape_p); + int c = java_read_unicode (lex, unicode_escape_p); if (c == '\r') { @@ -562,7 +601,7 @@ java_read_unicode_collapsing_terminators (lex, unicode_escape_p) return a single line terminator. */ int dummy; c = java_read_unicode (lex, &dummy); - if (c != '\n') + if (c != '\n' && c != UEOF) lex->unget_value = c; /* In either case we must return a newline. */ c = '\n'; @@ -571,13 +610,18 @@ java_read_unicode_collapsing_terminators (lex, unicode_escape_p) return c; } -static unicode_t -java_get_unicode () +static int +java_get_unicode (void) { - /* It's time to read a line when... */ + /* It's time to read a line when... */ if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size) { - unicode_t c; + int c; + int found_chars = 0; + + if (ctxp->lexer->hit_eof) + return UEOF; + java_allocate_new_line (); if (ctxp->c_line->line[0] != '\n') { @@ -586,15 +630,24 @@ java_get_unicode () int unicode_escape_p; c = java_read_unicode_collapsing_terminators (ctxp->lexer, &unicode_escape_p); - java_store_unicode (ctxp->c_line, c, unicode_escape_p); - if (ctxp->c_line->white_space_only - && !JAVA_WHITE_SPACE_P (c) - && c != '\n' - && c != UEOF) - ctxp->c_line->white_space_only = 0; + if (c != UEOF) + { + found_chars = 1; + java_store_unicode (ctxp->c_line, c, unicode_escape_p); + if (ctxp->c_line->white_space_only + && !JAVA_WHITE_SPACE_P (c) + && c != '\n') + ctxp->c_line->white_space_only = 0; + } if ((c == '\n') || (c == UEOF)) break; } + + if (c == UEOF && ! found_chars) + { + ctxp->lexer->hit_eof = 1; + return UEOF; + } } } ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0); @@ -603,12 +656,10 @@ java_get_unicode () } /* Parse the end of a C style comment. - * C is the first character following the '/' and '*'. */ + * C is the first character following the '/' and '*'. */ static void -java_parse_end_comment (c) - unicode_t c; +java_parse_end_comment (int c) { - for ( ;; c = java_get_unicode ()) { switch (c) @@ -624,7 +675,7 @@ java_parse_end_comment (c) return; case '/': return; - case '*': /* reparse only '*' */ + case '*': /* Reparse only '*'. */ java_unget_unicode (); } } @@ -635,78 +686,154 @@ java_parse_end_comment (c) of a documentation comment line (ignoring white space and any `*' character). Parsed keyword(s): @DEPRECATED. */ -static int -java_parse_doc_section (c) - unicode_t c; +static void +java_parse_doc_section (int c) { - int valid_tag = 0, seen_star = 0; + int last_was_star; + + /* We reset this here, because only the most recent doc comment + applies to the following declaration. */ + ctxp->deprecated = 0; - while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n') + /* We loop over all the lines of the comment. We'll eventually exit + if we hit EOF prematurely, or when we see the comment + terminator. */ + while (1) { - switch (c) + /* These first steps need only be done if we're still looking + for the deprecated tag. If we've already seen it, we might + as well skip looking for it again. */ + if (! ctxp->deprecated) { - case '*': - seen_star = 1; - break; - case '\n': /* ULT */ - valid_tag = 1; - default: - seen_star = 0; - } - c = java_get_unicode(); - } - - if (c == UEOF) - java_lex_error ("Comment not terminated at end of input", 0); - - if (seen_star && (c == '/')) - return 1; /* Goto step1 in caller */ + /* Skip whitespace and '*'s. We must also check for the end + of the comment here. */ + while (JAVA_WHITE_SPACE_P (c) || c == '*') + { + last_was_star = (c == '*'); + c = java_get_unicode (); + if (last_was_star && c == '/') + { + /* We just saw the comment terminator. */ + return; + } + } - /* We're parsing @deprecated */ - if (valid_tag && (c == '@')) - { - char tag [11]; - int tag_index = 0; + if (c == UEOF) + goto eof; - while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n') + if (c == '@') + { + const char *deprecated = "@deprecated"; + int i; + + for (i = 0; deprecated[i]; ++i) + { + if (c != deprecated[i]) + break; + /* We write the code in this way, with the + update at the end, so that after the loop + we're left with the next character in C. */ + c = java_get_unicode (); + } + + if (c == UEOF) + goto eof; + + /* @deprecated must be followed by a space or newline. + We also allow a '*' in case it appears just before + the end of a comment. In this position only we also + must allow any Unicode space character. */ + if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c)) + { + if (! deprecated[i]) + ctxp->deprecated = 1; + } + } + } + + /* We've examined the relevant content from this line. Now we + skip the remaining characters and start over with the next + line. We also check for end of comment here. */ + while (c != '\n' && c != UEOF) { + last_was_star = (c == '*'); c = java_get_unicode (); - tag [tag_index++] = c; + if (last_was_star && c == '/') + return; } - - if (c == UEOF) - java_lex_error ("Comment not terminated at end of input", 0); - tag [tag_index] = '\0'; - if (!strcmp (tag, "deprecated")) - ctxp->deprecated = 1; + if (c == UEOF) + goto eof; + /* We have to advance past the \n. */ + c = java_get_unicode (); + if (c == UEOF) + goto eof; } - java_unget_unicode (); - return 0; + + eof: + java_lex_error ("Comment not terminated at end of input", 0); +} + +/* Return true if C is a valid start character for a Java identifier. + This is only called if C >= 128 -- smaller values are handled + inline. However, this function handles all values anyway. */ +static int +java_start_char_p (unicode_t c) +{ + unsigned int hi = c / 256; + const char *const page = type_table[hi]; + unsigned long val = (unsigned long) page; + int flags; + + if ((val & ~ LETTER_MASK) != 0) + flags = page[c & 255]; + else + flags = val; + + return flags & LETTER_START; } -/* This function to be used only by JAVA_ID_CHAR_P (), otherwise it - will return a wrong result. */ +/* Return true if C is a valid part character for a Java identifier. + This is only called if C >= 128 -- smaller values are handled + inline. However, this function handles all values anyway. */ static int -java_letter_or_digit_p (c) - unicode_t c; +java_part_char_p (unicode_t c) { - return _JAVA_LETTER_OR_DIGIT_P (c); + unsigned int hi = c / 256; + const char *const page = type_table[hi]; + unsigned long val = (unsigned long) page; + int flags; + + if ((val & ~ LETTER_MASK) != 0) + flags = page[c & 255]; + else + flags = val; + + return flags & LETTER_PART; } -/* This function to be used only by JAVA_ID_CHAR_P (). */ +/* Return true if C is whitespace. */ static int -java_ignorable_control_p (c) - unicode_t c; +java_space_char_p (unicode_t c) { - return _JAVA_IDENTIFIER_IGNORABLE (c); + unsigned int hi = c / 256; + const char *const page = type_table[hi]; + unsigned long val = (unsigned long) page; + int flags; + + if ((val & ~ LETTER_MASK) != 0) + flags = page[c & 255]; + else + flags = val; + + return flags & LETTER_SPACE; } -static unicode_t -java_parse_escape_sequence () +static int +java_parse_escape_sequence (void) { unicode_t char_lit; - unicode_t c; + int c; switch (c = java_get_unicode ()) { @@ -754,50 +881,36 @@ java_parse_escape_sequence () return char_lit; } - case '\n': - return '\n'; /* ULT, caught latter as a specific error */ default: java_lex_error ("Invalid character in escape sequence", 0); return JAVA_CHAR_ERROR; } } -/* Isolate the code which may raise an arithmetic exception in its - own function. */ - #ifndef JC1_LITE -struct jpa_args -{ - YYSTYPE *java_lval; - char *literal_token; - int fflag; - int number_beginning; -}; +#define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0) -#ifdef REAL_ARITHMETIC -#define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0) -#else -#define IS_ZERO(X) ((X) == 0) -#endif +/* Subroutine of java_lex: converts floating-point literals to tree + nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to + store the result. FFLAG indicates whether the literal was tagged + with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING + is the line number on which to report any error. */ -static void java_perform_atof PARAMS ((PTR)); +static void java_perform_atof (YYSTYPE *, char *, int, int); static void -java_perform_atof (av) - PTR av; +java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag, + int number_beginning) { - struct jpa_args *a = (struct jpa_args *)av; - YYSTYPE *java_lval = a->java_lval; - int number_beginning = a->number_beginning; REAL_VALUE_TYPE value; - tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE); + tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE); SET_REAL_VALUE_ATOF (value, - REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type))); + REAL_VALUE_ATOF (literal_token, TYPE_MODE (type))); if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value)) { - JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double")); + JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double"); value = DCONST0; } else if (IS_ZERO (value)) @@ -805,7 +918,7 @@ java_perform_atof (av) /* We check to see if the value is really 0 or if we've found an underflow. We do this in the most primitive imaginable way. */ int really_zero = 1; - char *p = a->literal_token; + char *p = literal_token; if (*p == '-') ++p; while (*p && *p != 'e' && *p != 'E') @@ -830,24 +943,24 @@ java_perform_atof (av) } #endif -static int yylex PARAMS ((YYSTYPE *)); +static int yylex (YYSTYPE *); static int #ifdef JC1_LITE -yylex (java_lval) +yylex (YYSTYPE *java_lval) #else -java_lex (java_lval) +do_java_lex (YYSTYPE *java_lval) #endif - YYSTYPE *java_lval; { - unicode_t c, first_unicode; + int c; + unicode_t first_unicode; int ascii_index, all_ascii; char *string; /* Translation of the Unicode escape in the raw stream of Unicode characters. Takes care of line terminator. */ step1: - /* Skip white spaces: SP, TAB and FF or ULT */ + /* Skip white spaces: SP, TAB and FF or ULT. */ for (c = java_get_unicode (); c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ()) if (c == '\n') @@ -858,15 +971,16 @@ java_lex (java_lval) ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col); - if (c == 0x1a) /* CTRL-Z */ + if (c == 0x1a) /* CTRL-Z. */ { if ((c = java_get_unicode ()) == UEOF) - return 0; /* Ok here */ + return 0; /* Ok here. */ else - java_unget_unicode (); /* Caught latter at the end the function */ + java_unget_unicode (); /* Caught later, at the end of the + function. */ } - /* Handle EOF here */ - if (c == UEOF) /* Should probably do something here... */ + /* Handle EOF here. */ + if (c == UEOF) /* Should probably do something here... */ return 0; /* Take care of eventual comments. */ @@ -895,13 +1009,19 @@ java_lex (java_lval) case '*': if ((c = java_get_unicode ()) == '*') { - if ((c = java_get_unicode ()) == '/') - goto step1; /* Empy documentation comment */ - else if (java_parse_doc_section (c)) - goto step1; + c = java_get_unicode (); + if (c == '/') + { + /* Empty documentation comment. We have to reset + the deprecation marker as only the most recent + doc comment applies. */ + ctxp->deprecated = 0; + } + else + java_parse_doc_section (c); } - - java_parse_end_comment ((c = java_get_unicode ())); + else + java_parse_end_comment ((c = java_get_unicode ())); goto step1; break; default: @@ -915,24 +1035,26 @@ java_lex (java_lval) ctxp->elc.prev_col = ctxp->elc.col; ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1); if (ctxp->elc.col < 0) - fatal ("ctxp->elc.col < 0 - java_lex"); + abort (); - /* Numeric literals */ + /* Numeric literals. */ if (JAVA_ASCII_DIGIT (c) || (c == '.')) { - /* This section of code is borrowed from gcc/c-lex.c */ + /* This section of code is borrowed from gcc/c-lex.c. */ #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2) int parts[TOTAL_PARTS]; HOST_WIDE_INT high, low; - /* End borrowed section */ + /* End borrowed section. */ char literal_token [256]; int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes; + int found_hex_digits = 0, found_non_octal_digits = 0; int i; #ifndef JC1_LITE int number_beginning = ctxp->c_line->current; + tree value; #endif - /* We might have a . separator instead of a FP like .[0-9]* */ + /* We might have a . separator instead of a FP like .[0-9]*. */ if (c == '.') { unicode_t peep = java_sneak_unicode (); @@ -957,15 +1079,16 @@ java_lex (java_lval) } else if (JAVA_ASCII_DIGIT (c)) radix = 8; - else if (c == '.') + else if (c == '.' || c == 'e' || c =='E') { - /* Push the '.' back and prepare for a FP parsing... */ + /* Push the '.', 'e', or 'E' back and prepare for a FP + parsing... */ java_unget_unicode (); c = '0'; } else { - /* We have a zero literal: 0, 0{f,F}, 0{d,D} */ + /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */ JAVA_LEX_LIT ("0", 10); switch (c) { @@ -987,17 +1110,23 @@ java_lex (java_lval) } /* Parse the first part of the literal, until we find something which is not a number. */ - while ((radix == 10 && JAVA_ASCII_DIGIT (c)) || - (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) || - (radix == 8 && JAVA_ASCII_OCTDIGIT (c))) + while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) || + JAVA_ASCII_DIGIT (c)) { /* We store in a string (in case it turns out to be a FP) and in PARTS if we have to process a integer literal. */ - int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a'); + int numeric = hex_value (c); int count; + /* Remember when we find a valid hexadecimal digit. */ + if (radix == 16) + found_hex_digits = 1; + /* Remember when we find an invalid octal digit. */ + else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c)) + found_non_octal_digits = 1; + literal_token [literal_index++] = c; - /* This section of code if borrowed from gcc/c-lex.c */ + /* This section of code if borrowed from gcc/c-lex.c. */ for (count = 0; count < TOTAL_PARTS; count++) { parts[count] *= radix; @@ -1023,9 +1152,13 @@ java_lex (java_lval) int seen_digit = (literal_index ? 1 : 0); int seen_exponent = 0; int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are - double unless specified. */ - if (radix != 10) + double unless specified. */ + + /* It is ok if the radix is 8 because this just means we've + seen a leading `0'. However, radix==16 is invalid. */ + if (radix == 16) java_lex_error ("Can't express non-decimal FP literal", 0); + radix = 10; for (;;) { @@ -1045,9 +1178,10 @@ java_lex (java_lval) { if (stage < 2) { - /* {E,e} must have seen at list a digit */ + /* {E,e} must have seen at least a digit. */ if (!seen_digit) - java_lex_error ("Invalid FP literal", 0); + java_lex_error + ("Invalid FP literal, mantissa must have digit", 0); seen_digit = 0; seen_exponent = 1; stage = 2; @@ -1060,7 +1194,7 @@ java_lex (java_lval) if ( c == 'f' || c == 'F' || c == 'd' || c == 'D') { fflag = ((c == 'd') || (c == 'D')) ? 0 : 1; - stage = 4; /* So we fall through */ + stage = 4; /* So we fall through. */ } if ((c=='-' || c =='+') && stage == 2) @@ -1077,57 +1211,49 @@ java_lex (java_lval) { if (JAVA_ASCII_DIGIT (c)) seen_digit = 1; + if (stage == 2) + stage = 3; literal_token [literal_index++ ] = c; c = java_get_unicode (); } else { -#ifndef JC1_LITE - struct jpa_args a; -#endif - if (stage != 4) /* Don't push back fF/dD */ + if (stage != 4) /* Don't push back fF/dD. */ java_unget_unicode (); /* An exponent (if any) must have seen a digit. */ if (seen_exponent && !seen_digit) - java_lex_error ("Invalid FP literal", 0); + java_lex_error + ("Invalid FP literal, exponent must have digit", 0); literal_token [literal_index] = '\0'; JAVA_LEX_LIT (literal_token, radix); #ifndef JC1_LITE - a.literal_token = literal_token; - a.fflag = fflag; - a.java_lval = java_lval; - a.number_beginning = number_beginning; - if (do_float_handler (java_perform_atof, (PTR) &a)) - return FP_LIT_TK; - - JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double")); -#else - return FP_LIT_TK; + java_perform_atof (java_lval, literal_token, + fflag, number_beginning); #endif + return FP_LIT_TK; } } - } /* JAVA_ASCCI_FPCHAR (c) */ + } /* JAVA_ASCII_FPCHAR (c) */ /* Here we get back to converting the integral literal. */ - if (c == 'L' || c == 'l') + if (radix == 16 && ! found_hex_digits) + java_lex_error + ("0x must be followed by at least one hexadecimal digit", 0); + else if (radix == 8 && found_non_octal_digits) + java_lex_error ("Octal literal contains digit out of range", 0); + else if (c == 'L' || c == 'l') long_suffix = 1; - else if (radix == 16 && JAVA_ASCII_LETTER (c)) - java_lex_error ("Digit out of range in hexadecimal literal", 0); - else if (radix == 8 && JAVA_ASCII_DIGIT (c)) - java_lex_error ("Digit out of range in octal literal", 0); - else if (radix == 16 && !literal_index) - java_lex_error ("No digit specified for hexadecimal literal", 0); else java_unget_unicode (); #ifdef JAVA_LEX_DEBUG - literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */ + literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */ JAVA_LEX_LIT (literal_token, radix); #endif - /* This section of code is borrowed from gcc/c-lex.c */ + /* This section of code is borrowed from gcc/c-lex.c. */ if (!overflow) { bytes = GET_TYPE_PRECISION (long_type_node); @@ -1148,44 +1274,46 @@ java_lex (java_lval) } /* End borrowed section. */ - /* Range checking */ - if (long_suffix) +#ifndef JC1_LITE + /* Range checking. */ + value = build_int_2 (low, high); + /* Temporarily set type to unsigned. */ + SET_LVAL_NODE_TYPE (value, (long_suffix + ? unsigned_long_type_node + : unsigned_int_type_node)); + + /* For base 10 numbers, only values up to the highest value + (plus one) can be written. For instance, only ints up to + 2147483648 can be written. The special case of the largest + negative value is handled elsewhere. For other bases, any + number can be represented. */ + if (overflow || (radix == 10 + && tree_int_cst_lt (long_suffix + ? decimal_long_max + : decimal_int_max, + value))) { - /* 9223372036854775808L is valid if operand of a '-'. Otherwise - 9223372036854775807L is the biggest `long' literal that can be - expressed using a 10 radix. For other radixes, everything that - fits withing 64 bits is OK. */ - int hb = (high >> 31); - if (overflow || (hb && low && radix == 10) || - (hb && high & 0x7fffffff && radix == 10) || - (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10)) + if (long_suffix) JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal"); - } - else - { - /* 2147483648 is valid if operand of a '-'. Otherwise, - 2147483647 is the biggest `int' literal that can be - expressed using a 10 radix. For other radixes, everything - that fits within 32 bits is OK. As all literals are - signed, we sign extend here. */ - int hb = (low >> 31) & 0x1; - if (overflow || high || (hb && low & 0x7fffffff && radix == 10) || - (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10)) + else JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal"); - high = -hb; } - ctxp->minus_seen = 0; + + /* Sign extend the value. */ + SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node)); + force_fit_type (value, 0); + JAVA_RADIX10_FLAG (value) = radix == 10; +#else SET_LVAL_NODE_TYPE (build_int_2 (low, high), - (long_suffix ? long_type_node : int_type_node)); + long_suffix ? long_type_node : int_type_node); +#endif return INT_LIT_TK; } - ctxp->minus_seen = 0; - - /* Character literals */ + /* Character literals. */ if (c == '\'') { - unicode_t char_lit; + int char_lit; if ((c = java_get_unicode ()) == '\\') char_lit = java_parse_escape_sequence (); else @@ -1202,15 +1330,15 @@ java_lex (java_lval) if (c != '\'') java_lex_error ("Syntax error in character literal", 0); - if (c == JAVA_CHAR_ERROR) - char_lit = 0; /* We silently convert it to zero */ + if (char_lit == JAVA_CHAR_ERROR) + char_lit = 0; /* We silently convert it to zero. */ JAVA_LEX_CHAR_LIT (char_lit); SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node); return CHAR_LIT_TK; } - /* String literals */ + /* String literals. */ if (c == '"') { int no_error; @@ -1221,21 +1349,26 @@ java_lex (java_lval) { if (c == '\\') c = java_parse_escape_sequence (); - no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0); + if (c == JAVA_CHAR_ERROR) + { + no_error = 0; + c = 0; /* We silently convert it to zero. */ + } java_unicode_2_utf8 (c); } - if (c == '\n' || c == UEOF) /* ULT */ + if (c == '\n' || c == UEOF) /* ULT. */ { - lineno--; /* Refer to the line the terminator was seen */ - java_lex_error ("String not terminated at end of line.", 0); - lineno++; + input_line--; /* Refer to the line where the terminator was seen. */ + java_lex_error ("String not terminated at end of line", 0); + input_line++; } obstack_1grow (&temporary_obstack, '\0'); string = obstack_finish (&temporary_obstack); #ifndef JC1_LITE if (!no_error || (c != '"')) - java_lval->node = error_mark_node; /* Requires futher testing FIXME */ + java_lval->node = error_mark_node; /* FIXME: Requires further + testing. */ else java_lval->node = build_string (strlen (string), string); #endif @@ -1243,7 +1376,7 @@ java_lex (java_lval) return STRING_LIT_TK; } - /* Separator */ + /* Separator. */ switch (c) { case '(': @@ -1255,14 +1388,14 @@ java_lex (java_lval) case '{': JAVA_LEX_SEP (c); if (ctxp->ccb_indent == 1) - ctxp->first_ccb_indent1 = lineno; + ctxp->first_ccb_indent1 = input_line; ctxp->ccb_indent++; BUILD_OPERATOR (OCB_TK); case '}': JAVA_LEX_SEP (c); ctxp->ccb_indent--; if (ctxp->ccb_indent == 1) - ctxp->last_ccb_indent1 = lineno; + ctxp->last_ccb_indent1 = input_line; BUILD_OPERATOR (CCB_TK); case '[': JAVA_LEX_SEP (c); @@ -1282,7 +1415,7 @@ java_lex (java_lval) /* return DOT_TK; */ } - /* Operators */ + /* Operators. */ switch (c) { case '=': @@ -1394,7 +1527,6 @@ java_lex (java_lval) BUILD_OPERATOR2 (MINUS_ASSIGN_TK); default: java_unget_unicode (); - ctxp->minus_seen = 1; BUILD_OPERATOR (MINUS_TK); } @@ -1463,9 +1595,9 @@ java_lex (java_lval) BUILD_OPERATOR (NOT_TK); } - /* Keyword, boolean literal or null literal */ + /* Keyword, boolean literal or null literal. */ for (first_unicode = c, all_ascii = 1, ascii_index = 0; - JAVA_ID_CHAR_P (c); c = java_get_unicode ()) + c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ()) { java_unicode_2_utf8 (c); if (all_ascii && c >= 128) @@ -1475,14 +1607,15 @@ java_lex (java_lval) obstack_1grow (&temporary_obstack, '\0'); string = obstack_finish (&temporary_obstack); - java_unget_unicode (); + if (c != UEOF) + java_unget_unicode (); /* If we have something all ascii, we consider a keyword, a boolean literal, a null literal or an all ASCII identifier. Otherwise, this is an identifier (possibly not respecting formation rule). */ if (all_ascii) { - struct java_keyword *kw; + const struct java_keyword *kw; if ((kw=java_keyword (string, ascii_index))) { JAVA_LEX_KW (string); @@ -1491,7 +1624,7 @@ java_lex (java_lval) case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK: case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK: case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK: - case PRIVATE_TK: + case PRIVATE_TK: case STRICT_TK: SET_MODIFIER_CTX (kw->token); return MODIFIER_TK; case FLOAT_TK: @@ -1519,7 +1652,7 @@ java_lex (java_lval) SET_LVAL_NODE (char_type_node); return INTEGRAL_TK; - /* Keyword based literals */ + /* Keyword based literals. */ case TRUE_TK: case FALSE_TK: SET_LVAL_NODE ((kw->token == TRUE_TK ? @@ -1529,8 +1662,17 @@ java_lex (java_lval) SET_LVAL_NODE (null_pointer_node); return NULL_TK; + case ASSERT_TK: + if (flag_assert) + { + BUILD_OPERATOR (kw->token); + return kw->token; + } + else + break; + /* Some keyword we want to retain information on the location - they where found */ + they where found. */ case CASE_TK: case DEFAULT_TK: case SUPER_TK: @@ -1550,15 +1692,15 @@ java_lex (java_lval) } } - /* We may have and ID here */ - if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode)) + /* We may have an ID here. */ + if (JAVA_START_CHAR_P (first_unicode)) { JAVA_LEX_ID (string); java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string)); return ID_TK; } - /* Everything else is an invalid character in the input */ + /* Everything else is an invalid character in the input. */ { char lex_error_buffer [128]; sprintf (lex_error_buffer, "Invalid character `%s' in input", @@ -1568,9 +1710,42 @@ java_lex (java_lval) return 0; } +#ifndef JC1_LITE + +/* The exported interface to the lexer. */ +static int +java_lex (YYSTYPE *java_lval) +{ + int r; + + timevar_push (TV_LEX); + r = do_java_lex (java_lval); + timevar_pop (TV_LEX); + return r; +} + +/* This is called by the parser to see if an error should be generated + due to numeric overflow. This function only handles the particular + case of the largest negative value, and is only called in the case + where this value is not preceded by `-'. */ static void -java_unicode_2_utf8 (unicode) - unicode_t unicode; +error_if_numeric_overflow (tree value) +{ + if (TREE_CODE (value) == INTEGER_CST + && JAVA_RADIX10_FLAG (value) + && tree_int_cst_sgn (value) < 0) + { + if (TREE_TYPE (value) == long_type_node) + java_lex_error ("Numeric overflow for `long' literal", 0); + else + java_lex_error ("Numeric overflow for `int' literal", 0); + } +} + +#endif /* JC1_LITE */ + +static void +java_unicode_2_utf8 (unicode_t unicode) { if (RANGE (unicode, 0x01, 0x7f)) obstack_1grow (&temporary_obstack, (char)unicode); @@ -1581,7 +1756,7 @@ java_unicode_2_utf8 (unicode) obstack_1grow (&temporary_obstack, (unsigned char)(0x80 | (unicode & 0x3f))); } - else /* Range 0x800-0xffff */ + else /* Range 0x800-0xffff. */ { obstack_1grow (&temporary_obstack, (unsigned char)(0xe0 | (unicode & 0xf000) >> 12)); @@ -1594,23 +1769,23 @@ java_unicode_2_utf8 (unicode) #ifndef JC1_LITE static tree -build_wfl_node (node) - tree node; +build_wfl_node (tree node) { - return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col); + node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col); + /* Prevent java_complete_lhs from short-circuiting node (if constant). */ + TREE_TYPE (node) = NULL_TREE; + return node; } #endif static void -java_lex_error (msg, forward) - const char *msg ATTRIBUTE_UNUSED; - int forward ATTRIBUTE_UNUSED; +java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED) { #ifndef JC1_LITE ctxp->elc.line = ctxp->c_line->lineno; ctxp->elc.col = ctxp->c_line->char_col-1+forward; - /* Might be caught in the middle of some error report */ + /* Might be caught in the middle of some error report. */ ctxp->java_error_flag = 0; java_error (NULL); java_error (msg); @@ -1619,9 +1794,7 @@ java_lex_error (msg, forward) #ifndef JC1_LITE static int -java_is_eol (fp, c) - FILE *fp; - int c; +java_is_eol (FILE *fp, int c) { int next; switch (c) @@ -1640,18 +1813,17 @@ java_is_eol (fp, c) #endif char * -java_get_line_col (filename, line, col) - const char *filename ATTRIBUTE_UNUSED; - int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED; +java_get_line_col (const char *filename ATTRIBUTE_UNUSED, + int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED) { #ifdef JC1_LITE return 0; #else - /* Dumb implementation. Doesn't try to cache or optimize things. */ - /* First line of the file is line 1, first column is 1 */ + /* Dumb implementation. Doesn't try to cache or optimize things. */ + /* First line of the file is line 1, first column is 1. */ - /* COL == -1 means, at the CR/LF in LINE */ - /* COL == -2 means, at the first non space char in LINE */ + /* COL == -1 means, at the CR/LF in LINE. */ + /* COL == -2 means, at the first non space char in LINE. */ FILE *fp; int c, ccol, cline = 1; @@ -1660,14 +1832,14 @@ java_get_line_col (filename, line, col) char *base; if (!(fp = fopen (filename, "r"))) - fatal ("Can't open file - java_display_line_col"); + fatal_error ("can't open %s: %m", filename); while (cline != line) { c = getc (fp); if (c == EOF) { - static char msg[] = "<>"; + static const char msg[] = "<>"; obstack_grow (&temporary_obstack, msg, sizeof(msg)-1); goto have_line; } @@ -1675,7 +1847,7 @@ java_get_line_col (filename, line, col) cline++; } - /* Gather the chars of the current line in a buffer */ + /* Gather the chars of the current line in a buffer. */ for (;;) { c = getc (fp); @@ -1700,11 +1872,11 @@ java_get_line_col (filename, line, col) else first_non_space = 0; - /* Place the '^' a the right position */ + /* Place the '^' a the right position. */ base = obstack_base (&temporary_obstack); for (ccol = 1; ccol <= col+3; ccol++) { - /* Compute \t when reaching first_non_space */ + /* Compute \t when reaching first_non_space. */ char c = (first_non_space ? (base [ccol-1] == '\t' ? '\t' : ' ') : ' '); obstack_1grow (&temporary_obstack, c); @@ -1715,3 +1887,168 @@ java_get_line_col (filename, line, col) return obstack_finish (&temporary_obstack); #endif } + +#ifndef JC1_LITE +static int +utf8_cmp (const unsigned char *str, int length, const char *name) +{ + const unsigned char *limit = str + length; + int i; + + for (i = 0; name[i]; ++i) + { + int ch = UTF8_GET (str, limit); + if (ch != name[i]) + return ch - name[i]; + } + + return str == limit ? 0 : 1; +} + +/* A sorted list of all C++ keywords. */ + +static const char *const cxx_keywords[] = +{ + "_Complex", + "__alignof", + "__alignof__", + "__asm", + "__asm__", + "__attribute", + "__attribute__", + "__builtin_va_arg", + "__complex", + "__complex__", + "__const", + "__const__", + "__extension__", + "__imag", + "__imag__", + "__inline", + "__inline__", + "__label__", + "__null", + "__real", + "__real__", + "__restrict", + "__restrict__", + "__signed", + "__signed__", + "__typeof", + "__typeof__", + "__volatile", + "__volatile__", + "and", + "and_eq", + "asm", + "auto", + "bitand", + "bitor", + "bool", + "break", + "case", + "catch", + "char", + "class", + "compl", + "const", + "const_cast", + "continue", + "default", + "delete", + "do", + "double", + "dynamic_cast", + "else", + "enum", + "explicit", + "export", + "extern", + "false", + "float", + "for", + "friend", + "goto", + "if", + "inline", + "int", + "long", + "mutable", + "namespace", + "new", + "not", + "not_eq", + "operator", + "or", + "or_eq", + "private", + "protected", + "public", + "register", + "reinterpret_cast", + "return", + "short", + "signed", + "sizeof", + "static", + "static_cast", + "struct", + "switch", + "template", + "this", + "throw", + "true", + "try", + "typedef", + "typeid", + "typename", + "typeof", + "union", + "unsigned", + "using", + "virtual", + "void", + "volatile", + "wchar_t", + "while", + "xor", + "xor_eq" +}; + +/* Return true if NAME is a C++ keyword. */ + +int +cxx_keyword_p (const char *name, int length) +{ + int last = ARRAY_SIZE (cxx_keywords); + int first = 0; + int mid = (last + first) / 2; + int old = -1; + + for (mid = (last + first) / 2; + mid != old; + old = mid, mid = (last + first) / 2) + { + int kwl = strlen (cxx_keywords[mid]); + int min_length = kwl > length ? length : kwl; + int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]); + + if (r == 0) + { + int i; + /* We've found a match if all the remaining characters are `$'. */ + for (i = min_length; i < length && name[i] == '$'; ++i) + ; + if (i == length) + return 1; + r = 1; + } + + if (r < 0) + last = mid; + else + first = mid; + } + return 0; +} +#endif /* JC1_LITE */