/* Language lexer for the GNU compiler for the Java(TM) language.
- Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
+ Free Software Foundation, Inc.
Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
-This file is part of GNU CC.
+This file is part of GCC.
-GNU CC is free software; you can redistribute it and/or modify
+GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
-GNU CC is distributed in the hope that it will be useful,
+GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING. If not, write to
+along with GCC; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.
The Free Software Foundation is independent of Sun Microsystems, Inc. */
/* It defines java_lex (yylex) that reads a Java ASCII source file
-possibly containing Unicode escape sequence or utf8 encoded characters
-and returns a token for everything found but comments, white spaces
-and line terminators. When necessary, it also fills the java_lval
-(yylval) union. It's implemented to be called by a re-entrant parser
-generated by Bison.
+ possibly containing Unicode escape sequence or utf8 encoded
+ characters and returns a token for everything found but comments,
+ white spaces and line terminators. When necessary, it also fills
+ the java_lval (yylval) union. It's implemented to be called by a
+ re-entrant parser generated by Bison.
-The lexical analysis conforms to the Java grammar described in "The
-Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
-Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
+ The lexical analysis conforms to the Java grammar described in "The
+ Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
+ Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
-#include <stdio.h>
-#include <string.h>
-#include <strings.h>
-
-#ifdef JAVA_LEX_DEBUG
-#include <ctype.h>
+#include "keyword.h"
+#include "flags.h"
+#include "chartables.h"
+#ifndef JC1_LITE
+#include "timevar.h"
#endif
-#ifdef inline /* javaop.h redefines inline as static */
-#undef inline
+/* Function declarations. */
+static char *java_sprint_unicode (struct java_line *, int);
+static void java_unicode_2_utf8 (unicode_t);
+static void java_lex_error (const char *, int);
+#ifndef JC1_LITE
+static int do_java_lex (YYSTYPE *);
+static int java_lex (YYSTYPE *);
+static int java_is_eol (FILE *, int);
+static tree build_wfl_node (tree);
#endif
-#include "keyword.h"
-
-#ifndef SEEK_SET
-#include <unistd.h>
+static void java_store_unicode (struct java_line *, unicode_t, int);
+static int java_parse_escape_sequence (void);
+static int java_start_char_p (unicode_t);
+static int java_part_char_p (unicode_t);
+static int java_space_char_p (unicode_t);
+static void java_parse_doc_section (int);
+static void java_parse_end_comment (int);
+static int java_get_unicode (void);
+static int java_read_unicode (java_lexer *, int *);
+static int java_read_unicode_collapsing_terminators (java_lexer *, int *);
+static void java_store_unicode (struct java_line *, unicode_t, int);
+static int java_read_char (java_lexer *);
+static void java_allocate_new_line (void);
+static void java_unget_unicode (void);
+static unicode_t java_sneak_unicode (void);
+#ifndef JC1_LITE
+static int utf8_cmp (const unsigned char *, int, const char *);
#endif
+java_lexer *java_new_lexer (FILE *, const char *);
#ifndef JC1_LITE
-extern struct obstack *expression_obstack;
+static void error_if_numeric_overflow (tree);
#endif
-/* Function declaration */
-static int java_lineterminator PROTO ((unicode_t));
-static char *java_sprint_unicode PROTO ((struct java_line *, int));
-static void java_unicode_2_utf8 PROTO ((unicode_t));
-static void java_lex_error PROTO ((char *, int));
-static int java_is_eol PROTO ((FILE *, int));
-static void java_store_unicode PROTO ((struct java_line *, unicode_t, int));
-static unicode_t java_parse_escape_sequence PROTO (());
-static int java_letter_or_digit_p PROTO ((unicode_t));
-static int java_parse_doc_section PROTO ((unicode_t));
-static void java_parse_end_comment PROTO (());
-static unicode_t java_get_unicode PROTO (());
-static unicode_t java_read_unicode PROTO ((int, int *));
-static void java_store_unicode PROTO ((struct java_line *, unicode_t, int));
-static unicode_t java_read_char PROTO (());
-static void java_allocate_new_line PROTO (());
-static void java_unget_unicode PROTO (());
-static unicode_t java_sneak_unicode PROTO (());
+#ifdef HAVE_ICONV
+/* This is nonzero if we have initialized `need_byteswap'. */
+static int byteswap_init = 0;
+
+/* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
+ big-endian order -- not native endian order. We handle this by
+ doing a conversion once at startup and seeing what happens. This
+ flag holds the results of this determination. */
+static int need_byteswap = 0;
+#endif
void
-java_init_lex ()
+java_init_lex (FILE *finput, const char *encoding)
{
+#ifndef JC1_LITE
int java_lang_imported = 0;
-#ifndef JC1_LITE
if (!java_lang_id)
java_lang_id = get_identifier ("java.lang");
+ if (!inst_id)
+ inst_id = get_identifier ("inst$");
+ if (!wpv_id)
+ wpv_id = get_identifier ("write_parm_value$");
if (!java_lang_imported)
{
wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
if (!wfl_string_buffer)
wfl_string_buffer =
- build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
+ build_expr_wfl (get_identifier (flag_emit_class_files
+ ? "java.lang.StringBuffer"
+ : "gnu.gcj.runtime.StringBuffer"),
+ NULL, 0, 0);
if (!wfl_to_string)
wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
- ctxp->static_initialized = ctxp->non_static_initialized =
- ctxp->incomplete_class = NULL_TREE;
-
- bzero (ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
- bzero (current_jcf, sizeof (JCF));
+ CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
+ CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
+
+ memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
+ current_jcf = ggc_alloc_cleared (sizeof (JCF));
ctxp->current_parsed_class = NULL;
ctxp->package = NULL_TREE;
#endif
ctxp->filename = input_filename;
- ctxp->lineno = lineno = 0;
+ ctxp->lineno = input_line = 0;
ctxp->p_line = NULL;
ctxp->c_line = NULL;
- ctxp->unget_utf8_value = 0;
- ctxp->minus_seen = 0;
ctxp->java_error_flag = 0;
+ ctxp->lexer = java_new_lexer (finput, encoding);
}
static char *
-java_sprint_unicode (line, i)
- struct java_line *line;
- int i;
+java_sprint_unicode (struct java_line *line, int i)
{
static char buffer [10];
if (line->unicode_escape_p [i] || line->line [i] > 128)
}
static unicode_t
-java_sneak_unicode ()
+java_sneak_unicode (void)
{
return (ctxp->c_line->line [ctxp->c_line->current]);
}
static void
-java_unget_unicode ()
+java_unget_unicode (void)
{
if (!ctxp->c_line->current)
- fatal ("can't unget unicode - java_unget_unicode");
+ /* Can't unget unicode. */
+ abort ();
+
ctxp->c_line->current--;
ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
}
-void
-java_allocate_new_line ()
+static void
+java_allocate_new_line (void)
{
unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
char ahead_escape_p = (ctxp->c_line ?
free (ctxp->p_line);
}
ctxp->p_line = ctxp->c_line;
- ctxp->c_line = NULL; /* Reallocated */
+ ctxp->c_line = NULL; /* Reallocated. */
}
if (!ctxp->c_line)
{
- ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
+ ctxp->c_line = xmalloc (sizeof (struct java_line));
ctxp->c_line->max = JAVA_LINE_MAX;
- ctxp->c_line->line = (unicode_t *)xmalloc
- (sizeof (unicode_t)*ctxp->c_line->max);
+ ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
ctxp->c_line->unicode_escape_p =
- (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
+ xmalloc (sizeof (char)*ctxp->c_line->max);
ctxp->c_line->white_space_only = 0;
}
}
ctxp->c_line->ahead [0] = 0;
ctxp->c_line->unicode_escape_ahead_p = 0;
- ctxp->c_line->lineno = ++lineno;
+ ctxp->c_line->lineno = ++input_line;
ctxp->c_line->white_space_only = 1;
}
-static unicode_t
-java_read_char ()
-{
- int c;
- int c1, c2;
+/* Create a new lexer object. */
- if (ctxp->unget_utf8_value)
+java_lexer *
+java_new_lexer (FILE *finput, const char *encoding)
+{
+ java_lexer *lex = xmalloc (sizeof (java_lexer));
+ int enc_error = 0;
+
+ lex->finput = finput;
+ lex->bs_count = 0;
+ lex->unget_value = 0;
+ lex->hit_eof = 0;
+ lex->encoding = encoding;
+
+#ifdef HAVE_ICONV
+ lex->handle = iconv_open ("UCS-2", encoding);
+ if (lex->handle != (iconv_t) -1)
{
- int to_return = ctxp->unget_utf8_value;
- ctxp->unget_utf8_value = 0;
- return (to_return);
- }
+ lex->first = -1;
+ lex->last = -1;
+ lex->out_first = -1;
+ lex->out_last = -1;
+ lex->read_anything = 0;
+ lex->use_fallback = 0;
+
+ /* Work around broken iconv() implementations by doing checking at
+ runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
+ then all UCS-2 encoders will be broken. Perhaps not a valid
+ assumption. */
+ if (! byteswap_init)
+ {
+ iconv_t handle;
- c = GETC ();
+ byteswap_init = 1;
- if (c < 128)
- return (unicode_t)c;
- if (c == EOF)
- return UEOF;
+ handle = iconv_open ("UCS-2", "UTF-8");
+ if (handle != (iconv_t) -1)
+ {
+ unicode_t result;
+ unsigned char in[3];
+ char *inp, *outp;
+ size_t inc, outc, r;
+
+ /* This is the UTF-8 encoding of \ufeff. */
+ in[0] = 0xef;
+ in[1] = 0xbb;
+ in[2] = 0xbf;
+
+ inp = (char *) in;
+ inc = 3;
+ outp = (char *) &result;
+ outc = 2;
+
+ r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
+ &outp, &outc);
+ iconv_close (handle);
+ /* Conversion must be complete for us to use the result. */
+ if (r != (size_t) -1 && inc == 0 && outc == 0)
+ need_byteswap = (result != 0xfeff);
+ }
+ }
+
+ lex->byte_swap = need_byteswap;
+ }
else
+#endif /* HAVE_ICONV */
{
- if (c & (0xe0 == 0xc0))
+ /* If iconv failed, use the internal decoder if the default
+ encoding was requested. This code is used on platforms where
+ iconv exists but is insufficient for our needs. For
+ instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
+
+ On Solaris the default encoding, as returned by nl_langinfo(),
+ is `646' (aka ASCII), but the Solaris iconv_open() doesn't
+ understand that. We work around that by pretending
+ `646' to be the same as UTF-8. */
+ if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
+ enc_error = 1;
+#ifdef HAVE_ICONV
+ else
{
- c1 = GETC ();
- if (c1 & (0xc0 == 0x80))
- return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
+ lex->use_fallback = 1;
+ lex->encoding = "UTF-8";
}
- else if (c & (0xf0 == 0xe0))
- {
- c1 = GETC ();
- if (c1 & (0xc0 == 0x80))
+#endif /* HAVE_ICONV */
+ }
+
+ if (enc_error)
+ fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
+
+ return lex;
+}
+
+void
+java_destroy_lexer (java_lexer *lex)
+{
+#ifdef HAVE_ICONV
+ if (! lex->use_fallback)
+ iconv_close (lex->handle);
+#endif
+ free (lex);
+}
+
+static int
+java_read_char (java_lexer *lex)
+{
+ if (lex->unget_value)
+ {
+ unicode_t r = lex->unget_value;
+ lex->unget_value = 0;
+ return r;
+ }
+
+#ifdef HAVE_ICONV
+ if (! lex->use_fallback)
+ {
+ size_t ir, inbytesleft, in_save, out_count, out_save;
+ char *inp, *outp;
+ unicode_t result;
+
+ /* If there is data which has already been converted, use it. */
+ if (lex->out_first == -1 || lex->out_first >= lex->out_last)
+ {
+ lex->out_first = 0;
+ lex->out_last = 0;
+
+ while (1)
{
- c2 = GETC ();
- if (c2 & (0xc0 == 0x80))
- return (unicode_t)(((c & 0xf) << 12) +
- (( c1 & 0x3f) << 6) + (c2 & 0x3f));
+ /* See if we need to read more data. If FIRST == 0 then
+ the previous conversion attempt ended in the middle of
+ a character at the end of the buffer. Otherwise we
+ only have to read if the buffer is empty. */
+ if (lex->first == 0 || lex->first >= lex->last)
+ {
+ int r;
+
+ if (lex->first >= lex->last)
+ {
+ lex->first = 0;
+ lex->last = 0;
+ }
+ if (feof (lex->finput))
+ return UEOF;
+ r = fread (&lex->buffer[lex->last], 1,
+ sizeof (lex->buffer) - lex->last,
+ lex->finput);
+ lex->last += r;
+ }
+
+ inbytesleft = lex->last - lex->first;
+ out_count = sizeof (lex->out_buffer) - lex->out_last;
+
+ if (inbytesleft == 0)
+ {
+ /* We've tried to read and there is nothing left. */
+ return UEOF;
+ }
+
+ in_save = inbytesleft;
+ out_save = out_count;
+ inp = &lex->buffer[lex->first];
+ outp = (char *) &lex->out_buffer[lex->out_last];
+ ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
+ &inbytesleft, &outp, &out_count);
+
+ /* If we haven't read any bytes, then look to see if we
+ have read a BOM. */
+ if (! lex->read_anything && out_save - out_count >= 2)
+ {
+ unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
+ if (uc == 0xfeff)
+ {
+ lex->byte_swap = 0;
+ lex->out_first += 2;
+ }
+ else if (uc == 0xfffe)
+ {
+ lex->byte_swap = 1;
+ lex->out_first += 2;
+ }
+ lex->read_anything = 1;
+ }
+
+ if (lex->byte_swap)
+ {
+ unsigned int i;
+ for (i = 0; i < out_save - out_count; i += 2)
+ {
+ char t = lex->out_buffer[lex->out_last + i];
+ lex->out_buffer[lex->out_last + i]
+ = lex->out_buffer[lex->out_last + i + 1];
+ lex->out_buffer[lex->out_last + i + 1] = t;
+ }
+ }
+
+ lex->first += in_save - inbytesleft;
+ lex->out_last += out_save - out_count;
+
+ /* If we converted anything at all, move along. */
+ if (out_count != out_save)
+ break;
+
+ if (ir == (size_t) -1)
+ {
+ if (errno == EINVAL)
+ {
+ /* This is ok. This means that the end of our buffer
+ is in the middle of a character sequence. We just
+ move the valid part of the buffer to the beginning
+ to force a read. */
+ memmove (&lex->buffer[0], &lex->buffer[lex->first],
+ lex->last - lex->first);
+ lex->last -= lex->first;
+ lex->first = 0;
+ }
+ else
+ {
+ /* A more serious error. */
+ char buffer[128];
+ sprintf (buffer,
+ "Unrecognized character for encoding '%s'",
+ lex->encoding);
+ java_lex_error (buffer, 0);
+ return UEOF;
+ }
+ }
}
}
- java_lex_error ("Bad utf8 encoding", 0);
+
+ if (lex->out_first == -1 || lex->out_first >= lex->out_last)
+ {
+ /* Don't have any data. */
+ return UEOF;
+ }
+
+ /* Success. */
+ result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
+ lex->out_first += 2;
+ return result;
}
- return 0;
+ else
+#endif /* HAVE_ICONV */
+ {
+ int c, c1, c2;
+ c = getc (lex->finput);
+
+ if (c == EOF)
+ return UEOF;
+ if (c < 128)
+ return (unicode_t) c;
+ else
+ {
+ if ((c & 0xe0) == 0xc0)
+ {
+ c1 = getc (lex->finput);
+ if ((c1 & 0xc0) == 0x80)
+ {
+ unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
+ /* Check for valid 2-byte characters. We explicitly
+ allow \0 because this encoding is common in the
+ Java world. */
+ if (r == 0 || (r >= 0x80 && r <= 0x7ff))
+ return r;
+ }
+ }
+ else if ((c & 0xf0) == 0xe0)
+ {
+ c1 = getc (lex->finput);
+ if ((c1 & 0xc0) == 0x80)
+ {
+ c2 = getc (lex->finput);
+ if ((c2 & 0xc0) == 0x80)
+ {
+ unicode_t r = (unicode_t)(((c & 0xf) << 12) +
+ (( c1 & 0x3f) << 6)
+ + (c2 & 0x3f));
+ /* Check for valid 3-byte characters.
+ Don't allow surrogate, \ufffe or \uffff. */
+ if (IN_RANGE (r, 0x800, 0xffff)
+ && ! IN_RANGE (r, 0xd800, 0xdfff)
+ && r != 0xfffe && r != 0xffff)
+ return r;
+ }
+ }
+ }
+
+ /* We simply don't support invalid characters. We also
+ don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
+ cannot be valid Java characters. */
+ java_lex_error ("malformed UTF-8 character", 0);
+ }
+ }
+
+ /* We only get here on error. */
+ return UEOF;
}
static void
-java_store_unicode (l, c, unicode_escape_p)
- struct java_line *l;
- unicode_t c;
- int unicode_escape_p;
+java_store_unicode (struct java_line *l, unicode_t c, int unicode_escape_p)
{
if (l->size == l->max)
{
l->max += JAVA_LINE_MAX;
- l->line = (unicode_t *)realloc (l->line, sizeof (unicode_t)*l->max);
- l->unicode_escape_p = (char *)realloc (l->unicode_escape_p,
- sizeof (char)*l->max);
+ l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
+ l->unicode_escape_p = xrealloc (l->unicode_escape_p,
+ sizeof (char)*l->max);
}
l->line [l->size] = c;
l->unicode_escape_p [l->size++] = unicode_escape_p;
}
-static unicode_t
-java_read_unicode (term_context, unicode_escape_p)
- int term_context;
- int *unicode_escape_p;
+static int
+java_read_unicode (java_lexer *lex, int *unicode_escape_p)
{
- unicode_t c;
- long i, base;
+ int c;
- c = java_read_char ();
+ c = java_read_char (lex);
*unicode_escape_p = 0;
if (c != '\\')
- return ((term_context ? c :
- java_lineterminator (c) ? '\n' : (unicode_t)c));
-
- /* Count the number of preceeding '\' */
- for (base = ftell (finput), i = base-2; c == '\\';)
- {
- fseek (finput, i--, SEEK_SET);
- c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */
+ {
+ lex->bs_count = 0;
+ return c;
}
- fseek (finput, base, SEEK_SET);
- if ((base-i-3)%2 == 0) /* If odd number of \ seen */
+
+ ++lex->bs_count;
+ if ((lex->bs_count) % 2 == 1)
{
- c = java_read_char ();
+ /* Odd number of \ seen. */
+ c = java_read_char (lex);
if (c == 'u')
{
- unsigned short unicode = 0;
+ unicode_t unicode = 0;
int shift = 12;
- /* Next should be 4 hex digits, otherwise it's an error.
- The hex value is converted into the unicode, pushed into
- the Unicode stream. */
- for (shift = 12; shift >= 0; shift -= 4)
+
+ /* Recognize any number of `u's in \u. */
+ while ((c = java_read_char (lex)) == 'u')
+ ;
+
+ shift = 12;
+ do
{
- if ((c = java_read_char ()) == UEOF)
- return UEOF;
- if (c >= '0' && c <= '9')
- unicode |= (unicode_t)((c-'0') << shift);
- else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
- unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
+ if (c == UEOF)
+ {
+ java_lex_error ("prematurely terminated \\u sequence", 0);
+ return UEOF;
+ }
+
+ if (hex_p (c))
+ unicode |= (unicode_t)(hex_value (c) << shift);
else
- java_lex_error
- ("Non hex digit in Unicode escape sequence", 0);
+ {
+ java_lex_error ("non-hex digit in \\u sequence", 0);
+ break;
+ }
+
+ c = java_read_char (lex);
+ shift -= 4;
}
+ while (shift >= 0);
+
+ if (c != UEOF)
+ lex->unget_value = c;
+
+ lex->bs_count = 0;
*unicode_escape_p = 1;
- return (term_context ? unicode :
- (java_lineterminator (c) ? '\n' : unicode));
+ return unicode;
}
- UNGETC (c);
+ lex->unget_value = c;
}
- return (unicode_t)'\\';
+ return (unicode_t) '\\';
}
-static unicode_t
-java_get_unicode ()
+static int
+java_read_unicode_collapsing_terminators (java_lexer *lex,
+ int *unicode_escape_p)
{
- /* It's time to read a line when... */
- if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
+ int c = java_read_unicode (lex, unicode_escape_p);
+
+ if (c == '\r')
{
- unicode_t c;
- java_allocate_new_line ();
- if (ctxp->c_line->line[0] != '\n')
- for (;;)
- {
- int unicode_escape_p;
- c = java_read_unicode (0, &unicode_escape_p);
- java_store_unicode (ctxp->c_line, c, unicode_escape_p);
- if (ctxp->c_line->white_space_only
- && !JAVA_WHITE_SPACE_P (c) && c!='\n')
- ctxp->c_line->white_space_only = 0;
- if ((c == '\n') || (c == UEOF))
- break;
- }
+ /* We have to read ahead to see if we got \r\n. In that case we
+ return a single line terminator. */
+ int dummy;
+ c = java_read_unicode (lex, &dummy);
+ if (c != '\n' && c != UEOF)
+ lex->unget_value = c;
+ /* In either case we must return a newline. */
+ c = '\n';
}
- ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
- JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
- return ctxp->c_line->line [ctxp->c_line->current++];
+
+ return c;
}
static int
-java_lineterminator (c)
- unicode_t c;
+java_get_unicode (void)
{
- int unicode_escape_p;
- if (c == '\n') /* CR */
- {
- if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r')
- {
- ctxp->c_line->ahead [0] = c;
- ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
- }
- return 1;
- }
- else if (c == '\r') /* LF */
+ /* It's time to read a line when... */
+ if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
{
- if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n')
+ int c;
+ int found_chars = 0;
+
+ if (ctxp->lexer->hit_eof)
+ return UEOF;
+
+ java_allocate_new_line ();
+ if (ctxp->c_line->line[0] != '\n')
{
- ctxp->c_line->ahead [0] = c;
- ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
+ for (;;)
+ {
+ int unicode_escape_p;
+ c = java_read_unicode_collapsing_terminators (ctxp->lexer,
+ &unicode_escape_p);
+ if (c != UEOF)
+ {
+ found_chars = 1;
+ java_store_unicode (ctxp->c_line, c, unicode_escape_p);
+ if (ctxp->c_line->white_space_only
+ && !JAVA_WHITE_SPACE_P (c)
+ && c != '\n')
+ ctxp->c_line->white_space_only = 0;
+ }
+ if ((c == '\n') || (c == UEOF))
+ break;
+ }
+
+ if (c == UEOF && ! found_chars)
+ {
+ ctxp->lexer->hit_eof = 1;
+ return UEOF;
+ }
}
- return 1;
}
- else
- return 0;
+ ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
+ JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
+ return ctxp->c_line->line [ctxp->c_line->current++];
}
-/* Parse the end of a C style comment */
+/* Parse the end of a C style comment.
+ * C is the first character following the '/' and '*'. */
static void
-java_parse_end_comment ()
+java_parse_end_comment (int c)
{
- unicode_t c;
-
- for (c = java_get_unicode ();; c = java_get_unicode ())
+ for ( ;; c = java_get_unicode ())
{
switch (c)
{
case UEOF:
java_lex_error ("Comment not terminated at end of input", 0);
+ return;
case '*':
switch (c = java_get_unicode ())
{
case UEOF:
java_lex_error ("Comment not terminated at end of input", 0);
+ return;
case '/':
return;
- case '*': /* reparse only '*' */
+ case '*': /* Reparse only '*'. */
java_unget_unicode ();
}
}
of a documentation comment line (ignoring white space and any `*'
character). Parsed keyword(s): @DEPRECATED. */
-static int
-java_parse_doc_section (c)
- unicode_t c;
+static void
+java_parse_doc_section (int c)
{
- int valid_tag = 0, seen_star;
+ int last_was_star;
+
+ /* We reset this here, because only the most recent doc comment
+ applies to the following declaration. */
+ ctxp->deprecated = 0;
- while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
+ /* We loop over all the lines of the comment. We'll eventually exit
+ if we hit EOF prematurely, or when we see the comment
+ terminator. */
+ while (1)
{
- switch (c)
+ /* These first steps need only be done if we're still looking
+ for the deprecated tag. If we've already seen it, we might
+ as well skip looking for it again. */
+ if (! ctxp->deprecated)
{
- case '*':
- seen_star = 1;
- break;
- case '\n': /* ULT */
- valid_tag = 1;
- default:
- seen_star = 0;
- }
- c = java_get_unicode();
- }
-
- if (c == UEOF)
- java_lex_error ("Comment not terminated at end of input", 0);
-
- if (seen_star && (c == '/'))
- return 1; /* Goto step1 in caller */
+ /* Skip whitespace and '*'s. We must also check for the end
+ of the comment here. */
+ while (JAVA_WHITE_SPACE_P (c) || c == '*')
+ {
+ last_was_star = (c == '*');
+ c = java_get_unicode ();
+ if (last_was_star && c == '/')
+ {
+ /* We just saw the comment terminator. */
+ return;
+ }
+ }
- /* We're parsing @deprecated */
- if (valid_tag && (c == '@'))
- {
- char tag [10];
- int tag_index = 0;
+ if (c == UEOF)
+ goto eof;
+
+ if (c == '@')
+ {
+ const char *deprecated = "@deprecated";
+ int i;
+
+ for (i = 0; deprecated[i]; ++i)
+ {
+ if (c != deprecated[i])
+ break;
+ /* We write the code in this way, with the
+ update at the end, so that after the loop
+ we're left with the next character in C. */
+ c = java_get_unicode ();
+ }
+
+ if (c == UEOF)
+ goto eof;
- while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
+ /* @deprecated must be followed by a space or newline.
+ We also allow a '*' in case it appears just before
+ the end of a comment. In this position only we also
+ must allow any Unicode space character. */
+ if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
+ {
+ if (! deprecated[i])
+ ctxp->deprecated = 1;
+ }
+ }
+ }
+
+ /* We've examined the relevant content from this line. Now we
+ skip the remaining characters and start over with the next
+ line. We also check for end of comment here. */
+ while (c != '\n' && c != UEOF)
{
+ last_was_star = (c == '*');
c = java_get_unicode ();
- tag [tag_index++] = c;
+ if (last_was_star && c == '/')
+ return;
}
-
- if (c == UEOF)
- java_lex_error ("Comment not terminated at end of input", 0);
-
- java_unget_unicode ();
- tag [tag_index] = '\0';
- if (!strcmp (tag, "deprecated"))
- ctxp->deprecated = 1;
+ if (c == UEOF)
+ goto eof;
+ /* We have to advance past the \n. */
+ c = java_get_unicode ();
+ if (c == UEOF)
+ goto eof;
}
- return 0;
+
+ eof:
+ java_lex_error ("Comment not terminated at end of input", 0);
}
-/* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
- will return a wrong result. */
+/* Return true if C is a valid start character for a Java identifier.
+ This is only called if C >= 128 -- smaller values are handled
+ inline. However, this function handles all values anyway. */
static int
-java_letter_or_digit_p (c)
- unicode_t c;
+java_start_char_p (unicode_t c)
{
- return _JAVA_LETTER_OR_DIGIT_P (c);
+ unsigned int hi = c / 256;
+ const char *const page = type_table[hi];
+ unsigned long val = (unsigned long) page;
+ int flags;
+
+ if ((val & ~ LETTER_MASK) != 0)
+ flags = page[c & 255];
+ else
+ flags = val;
+
+ return flags & LETTER_START;
}
-static unicode_t
-java_parse_escape_sequence ()
+/* Return true if C is a valid part character for a Java identifier.
+ This is only called if C >= 128 -- smaller values are handled
+ inline. However, this function handles all values anyway. */
+static int
+java_part_char_p (unicode_t c)
+{
+ unsigned int hi = c / 256;
+ const char *const page = type_table[hi];
+ unsigned long val = (unsigned long) page;
+ int flags;
+
+ if ((val & ~ LETTER_MASK) != 0)
+ flags = page[c & 255];
+ else
+ flags = val;
+
+ return flags & LETTER_PART;
+}
+
+/* Return true if C is whitespace. */
+static int
+java_space_char_p (unicode_t c)
+{
+ unsigned int hi = c / 256;
+ const char *const page = type_table[hi];
+ unsigned long val = (unsigned long) page;
+ int flags;
+
+ if ((val & ~ LETTER_MASK) != 0)
+ flags = page[c & 255];
+ else
+ flags = val;
+
+ return flags & LETTER_SPACE;
+}
+
+static int
+java_parse_escape_sequence (void)
{
unicode_t char_lit;
- unicode_t c;
+ int c;
switch (c = java_get_unicode ())
{
case '\\':
return (unicode_t)0x5c;
case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
+ case '5': case '6': case '7':
{
int octal_escape[3];
int octal_escape_index = 0;
-
- for (; octal_escape_index < 3 && RANGE (c, '0', '9');
+ int max = 3;
+ int i, shift;
+
+ for (; octal_escape_index < max && RANGE (c, '0', '7');
c = java_get_unicode ())
- octal_escape [octal_escape_index++] = c;
+ {
+ if (octal_escape_index == 0 && c > '3')
+ {
+ /* According to the grammar, `\477' has a well-defined
+ meaning -- it is `\47' followed by `7'. */
+ --max;
+ }
+ octal_escape [octal_escape_index++] = c;
+ }
java_unget_unicode ();
- if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
- {
- java_lex_error ("Literal octal escape out of range", 0);
- return JAVA_CHAR_ERROR;
- }
- else
- {
- int i, shift;
- for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
- i < octal_escape_index; i++, shift -= 3)
- char_lit |= (octal_escape [i] - '0') << shift;
+ for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
+ i < octal_escape_index; i++, shift -= 3)
+ char_lit |= (octal_escape [i] - '0') << shift;
- return (char_lit);
- }
- break;
+ return char_lit;
}
- case '\n':
- return '\n'; /* ULT, caught latter as a specific error */
default:
- java_lex_error ("Illegal character in escape sequence", 0);
+ java_lex_error ("Invalid character in escape sequence", 0);
return JAVA_CHAR_ERROR;
}
}
-int
+#ifndef JC1_LITE
+#define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
+
+/* Subroutine of java_lex: converts floating-point literals to tree
+ nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
+ store the result. FFLAG indicates whether the literal was tagged
+ with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
+ is the line number on which to report any error. */
+
+static void java_perform_atof (YYSTYPE *, char *, int, int);
+
+static void
+java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
+ int number_beginning)
+{
+ REAL_VALUE_TYPE value;
+ tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
+
+ SET_REAL_VALUE_ATOF (value,
+ REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
+
+ if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
+ {
+ JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
+ value = DCONST0;
+ }
+ else if (IS_ZERO (value))
+ {
+ /* We check to see if the value is really 0 or if we've found an
+ underflow. We do this in the most primitive imaginable way. */
+ int really_zero = 1;
+ char *p = literal_token;
+ if (*p == '-')
+ ++p;
+ while (*p && *p != 'e' && *p != 'E')
+ {
+ if (*p != '0' && *p != '.')
+ {
+ really_zero = 0;
+ break;
+ }
+ ++p;
+ }
+ if (! really_zero)
+ {
+ int i = ctxp->c_line->current;
+ ctxp->c_line->current = number_beginning;
+ java_lex_error ("Floating point literal underflow", 0);
+ ctxp->c_line->current = i;
+ }
+ }
+
+ SET_LVAL_NODE_TYPE (build_real (type, value), type);
+}
+#endif
+
+static int yylex (YYSTYPE *);
+
+static int
#ifdef JC1_LITE
-yylex (java_lval)
+yylex (YYSTYPE *java_lval)
#else
-java_lex (java_lval)
+do_java_lex (YYSTYPE *java_lval)
#endif
- YYSTYPE *java_lval;
{
- unicode_t c, first_unicode;
+ int c;
+ unicode_t first_unicode;
int ascii_index, all_ascii;
char *string;
/* Translation of the Unicode escape in the raw stream of Unicode
characters. Takes care of line terminator. */
step1:
- /* Skip white spaces: SP, TAB and FF or ULT */
+ /* Skip white spaces: SP, TAB and FF or ULT. */
for (c = java_get_unicode ();
c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
if (c == '\n')
ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
- if (c == 0x1a) /* CTRL-Z */
+ if (c == 0x1a) /* CTRL-Z. */
{
if ((c = java_get_unicode ()) == UEOF)
- return 0; /* Ok here */
+ return 0; /* Ok here. */
else
- java_unget_unicode (); /* Caught latter at the end the function */
+ java_unget_unicode (); /* Caught later, at the end of the
+ function. */
}
- /* Handle EOF here */
- if (c == UEOF) /* Should probably do something here... */
+ /* Handle EOF here. */
+ if (c == UEOF) /* Should probably do something here... */
return 0;
/* Take care of eventual comments. */
switch (c = java_get_unicode ())
{
case '/':
- for (c = java_get_unicode ();;c = java_get_unicode ())
+ for (;;)
{
+ c = java_get_unicode ();
if (c == UEOF)
- java_lex_error ("Comment not terminated at end of input", 0);
+ {
+ /* It is ok to end a `//' comment with EOF, unless
+ we're being pedantic. */
+ if (pedantic)
+ java_lex_error ("Comment not terminated at end of input",
+ 0);
+ return 0;
+ }
if (c == '\n') /* ULT */
goto step1;
}
case '*':
if ((c = java_get_unicode ()) == '*')
{
- if ((c = java_get_unicode ()) == '/')
- goto step1; /* Empy documentation comment */
- else if (java_parse_doc_section (c))
- goto step1;
+ c = java_get_unicode ();
+ if (c == '/')
+ {
+ /* Empty documentation comment. We have to reset
+ the deprecation marker as only the most recent
+ doc comment applies. */
+ ctxp->deprecated = 0;
+ }
+ else
+ java_parse_doc_section (c);
}
else
- java_unget_unicode ();
-
- java_parse_end_comment ();
+ java_parse_end_comment ((c = java_get_unicode ()));
goto step1;
break;
default:
ctxp->elc.prev_col = ctxp->elc.col;
ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
if (ctxp->elc.col < 0)
- fatal ("ctxp->elc.col < 0 - java_lex");
+ abort ();
- /* Numeric literals */
+ /* Numeric literals. */
if (JAVA_ASCII_DIGIT (c) || (c == '.'))
{
- /* This section of code is borrowed from gcc/c-lex.c */
+ /* This section of code is borrowed from gcc/c-lex.c. */
#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
int parts[TOTAL_PARTS];
HOST_WIDE_INT high, low;
- /* End borrowed section */
+ /* End borrowed section. */
char literal_token [256];
int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
+ int found_hex_digits = 0, found_non_octal_digits = 0;
int i;
+#ifndef JC1_LITE
int number_beginning = ctxp->c_line->current;
+ tree value;
+#endif
- /* We might have a . separator instead of a FP like .[0-9]* */
+ /* We might have a . separator instead of a FP like .[0-9]*. */
if (c == '.')
{
unicode_t peep = java_sneak_unicode ();
}
else if (JAVA_ASCII_DIGIT (c))
radix = 8;
- else if (c == '.')
+ else if (c == '.' || c == 'e' || c =='E')
{
- /* Push the '.' back and prepare for a FP parsing... */
+ /* Push the '.', 'e', or 'E' back and prepare for a FP
+ parsing... */
java_unget_unicode ();
c = '0';
}
else
{
- /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
+ /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
JAVA_LEX_LIT ("0", 10);
switch (c)
{
}
/* Parse the first part of the literal, until we find something
which is not a number. */
- while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
- (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
- (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
+ while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
+ JAVA_ASCII_DIGIT (c))
{
/* We store in a string (in case it turns out to be a FP) and in
PARTS if we have to process a integer literal. */
- int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
+ int numeric = hex_value (c);
int count;
+ /* Remember when we find a valid hexadecimal digit. */
+ if (radix == 16)
+ found_hex_digits = 1;
+ /* Remember when we find an invalid octal digit. */
+ else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
+ found_non_octal_digits = 1;
+
literal_token [literal_index++] = c;
- /* This section of code if borrowed from gcc/c-lex.c */
+ /* This section of code if borrowed from gcc/c-lex.c. */
for (count = 0; count < TOTAL_PARTS; count++)
{
parts[count] *= radix;
int seen_digit = (literal_index ? 1 : 0);
int seen_exponent = 0;
int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
- double unless specified. */
- if (radix != 10)
+ double unless specified. */
+
+ /* It is ok if the radix is 8 because this just means we've
+ seen a leading `0'. However, radix==16 is invalid. */
+ if (radix == 16)
java_lex_error ("Can't express non-decimal FP literal", 0);
+ radix = 10;
for (;;)
{
{
if (stage < 2)
{
- /* {E,e} must have seen at list a digit */
+ /* {E,e} must have seen at least a digit. */
if (!seen_digit)
- java_lex_error ("Invalid FP literal", 0);
+ java_lex_error
+ ("Invalid FP literal, mantissa must have digit", 0);
seen_digit = 0;
seen_exponent = 1;
stage = 2;
if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
{
fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
- stage = 4; /* So we fall through */
+ stage = 4; /* So we fall through. */
}
- if ((c=='-' || c =='+') && stage < 3)
+ if ((c=='-' || c =='+') && stage == 2)
{
stage = 3;
literal_token [literal_index++] = c;
{
if (JAVA_ASCII_DIGIT (c))
seen_digit = 1;
+ if (stage == 2)
+ stage = 3;
literal_token [literal_index++ ] = c;
c = java_get_unicode ();
}
else
{
- jmp_buf handler;
- REAL_VALUE_TYPE value;
-#ifndef JC1_LITE
- tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
-#endif
-
- if (stage != 4) /* Don't push back fF/dD */
+ if (stage != 4) /* Don't push back fF/dD. */
java_unget_unicode ();
/* An exponent (if any) must have seen a digit. */
if (seen_exponent && !seen_digit)
- java_lex_error ("Invalid FP literal", 0);
+ java_lex_error
+ ("Invalid FP literal, exponent must have digit", 0);
literal_token [literal_index] = '\0';
JAVA_LEX_LIT (literal_token, radix);
- if (setjmp (handler))
- {
- JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
- value = DCONST0;
- }
- else
- {
- SET_FLOAT_HANDLER (handler);
- SET_REAL_VALUE_ATOF
- (value, REAL_VALUE_ATOF (literal_token,
- TYPE_MODE (type)));
-
- if (REAL_VALUE_ISINF (value))
- JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
-
- if (REAL_VALUE_ISNAN (value))
- JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
-
- SET_LVAL_NODE_TYPE (build_real (type, value), type);
- SET_FLOAT_HANDLER (NULL_PTR);
- return FP_LIT_TK;
- }
+#ifndef JC1_LITE
+ java_perform_atof (java_lval, literal_token,
+ fflag, number_beginning);
+#endif
+ return FP_LIT_TK;
}
}
- } /* JAVA_ASCCI_FPCHAR (c) */
+ } /* JAVA_ASCII_FPCHAR (c) */
/* Here we get back to converting the integral literal. */
- if (c == 'L' || c == 'l')
+ if (radix == 16 && ! found_hex_digits)
+ java_lex_error
+ ("0x must be followed by at least one hexadecimal digit", 0);
+ else if (radix == 8 && found_non_octal_digits)
+ java_lex_error ("Octal literal contains digit out of range", 0);
+ else if (c == 'L' || c == 'l')
long_suffix = 1;
- else if (radix == 16 && JAVA_ASCII_LETTER (c))
- java_lex_error ("Digit out of range in hexadecimal literal", 0);
- else if (radix == 8 && JAVA_ASCII_DIGIT (c))
- java_lex_error ("Digit out of range in octal literal", 0);
- else if (radix == 16 && !literal_index)
- java_lex_error ("No digit specified for hexadecimal literal", 0);
else
java_unget_unicode ();
#ifdef JAVA_LEX_DEBUG
- literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
+ literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
JAVA_LEX_LIT (literal_token, radix);
#endif
- /* This section of code is borrowed from gcc/c-lex.c */
+ /* This section of code is borrowed from gcc/c-lex.c. */
if (!overflow)
{
bytes = GET_TYPE_PRECISION (long_type_node);
}
/* End borrowed section. */
- /* Range checking */
- if (long_suffix)
+#ifndef JC1_LITE
+ /* Range checking. */
+ value = build_int_2 (low, high);
+ /* Temporarily set type to unsigned. */
+ SET_LVAL_NODE_TYPE (value, (long_suffix
+ ? unsigned_long_type_node
+ : unsigned_int_type_node));
+
+ /* For base 10 numbers, only values up to the highest value
+ (plus one) can be written. For instance, only ints up to
+ 2147483648 can be written. The special case of the largest
+ negative value is handled elsewhere. For other bases, any
+ number can be represented. */
+ if (overflow || (radix == 10
+ && tree_int_cst_lt (long_suffix
+ ? decimal_long_max
+ : decimal_int_max,
+ value)))
{
- /* 9223372036854775808L is valid if operand of a '-'. Otherwise
- 9223372036854775807L is the biggest `long' literal that can be
- expressed using a 10 radix. For other radixes, everything that
- fits withing 64 bits is OK. */
- int hb = (high >> 31);
- if (overflow || (hb && low && radix == 10) ||
- (hb && high & 0x7fffffff && radix == 10) ||
- (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
+ if (long_suffix)
JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
- }
- else
- {
- /* 2147483648 is valid if operand of a '-'. Otherwise,
- 2147483647 is the biggest `int' literal that can be
- expressed using a 10 radix. For other radixes, everything
- that fits within 32 bits is OK. */
- int hb = (low >> 31) & 0x1;
- if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
- (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
+ else
JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
}
- ctxp->minus_seen = 0;
+
+ /* Sign extend the value. */
+ SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
+ force_fit_type (value, 0);
+ JAVA_RADIX10_FLAG (value) = radix == 10;
+#else
SET_LVAL_NODE_TYPE (build_int_2 (low, high),
- (long_suffix ? long_type_node : int_type_node));
+ long_suffix ? long_type_node : int_type_node);
+#endif
return INT_LIT_TK;
}
- ctxp->minus_seen = 0;
- /* Character literals */
+ /* Character literals. */
if (c == '\'')
{
- unicode_t char_lit;
+ int char_lit;
if ((c = java_get_unicode ()) == '\\')
char_lit = java_parse_escape_sequence ();
else
- char_lit = c;
+ {
+ if (c == '\n' || c == '\'')
+ java_lex_error ("Invalid character literal", 0);
+ char_lit = c;
+ }
c = java_get_unicode ();
-
+
if ((c == '\n') || (c == UEOF))
java_lex_error ("Character literal not terminated at end of line", 0);
if (c != '\'')
java_lex_error ("Syntax error in character literal", 0);
- if (c == JAVA_CHAR_ERROR)
- char_lit = 0; /* We silently convert it to zero */
+ if (char_lit == JAVA_CHAR_ERROR)
+ char_lit = 0; /* We silently convert it to zero. */
JAVA_LEX_CHAR_LIT (char_lit);
SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
return CHAR_LIT_TK;
}
- /* String literals */
+ /* String literals. */
if (c == '"')
{
int no_error;
char *string;
for (no_error = 1, c = java_get_unicode ();
- c != '"' && c != '\n'; c = java_get_unicode ())
+ c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
{
if (c == '\\')
c = java_parse_escape_sequence ();
- no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
- if (c)
- java_unicode_2_utf8 (c);
+ if (c == JAVA_CHAR_ERROR)
+ {
+ no_error = 0;
+ c = 0; /* We silently convert it to zero. */
+ }
+ java_unicode_2_utf8 (c);
}
- if (c == '\n' || c == UEOF) /* ULT */
+ if (c == '\n' || c == UEOF) /* ULT. */
{
- lineno--; /* Refer to the line the terminator was seen */
- java_lex_error ("String not terminated at end of line.", 0);
- lineno++;
+ input_line--; /* Refer to the line where the terminator was seen. */
+ java_lex_error ("String not terminated at end of line", 0);
+ input_line++;
}
obstack_1grow (&temporary_obstack, '\0');
string = obstack_finish (&temporary_obstack);
#ifndef JC1_LITE
if (!no_error || (c != '"'))
- java_lval->node = error_mark_node; /* Requires futher testing FIXME */
+ java_lval->node = error_mark_node; /* FIXME: Requires further
+ testing. */
else
- {
- tree s = make_node (STRING_CST);
- TREE_STRING_LENGTH (s) = strlen (string);
- TREE_STRING_POINTER (s) =
- obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
- strcpy (TREE_STRING_POINTER (s), string);
- java_lval->node = s;
- }
+ java_lval->node = build_string (strlen (string), string);
#endif
+ obstack_free (&temporary_obstack, string);
return STRING_LIT_TK;
}
- /* Separator */
+ /* Separator. */
switch (c)
{
case '(':
case '{':
JAVA_LEX_SEP (c);
if (ctxp->ccb_indent == 1)
- ctxp->first_ccb_indent1 = lineno;
+ ctxp->first_ccb_indent1 = input_line;
ctxp->ccb_indent++;
BUILD_OPERATOR (OCB_TK);
case '}':
JAVA_LEX_SEP (c);
ctxp->ccb_indent--;
if (ctxp->ccb_indent == 1)
- ctxp->last_ccb_indent1 = lineno;
- return CCB_TK;
+ ctxp->last_ccb_indent1 = input_line;
+ BUILD_OPERATOR (CCB_TK);
case '[':
JAVA_LEX_SEP (c);
BUILD_OPERATOR (OSB_TK);
/* return DOT_TK; */
}
- /* Operators */
+ /* Operators. */
switch (c)
{
case '=':
BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
default:
java_unget_unicode ();
- ctxp->minus_seen = 1;
BUILD_OPERATOR (MINUS_TK);
}
BUILD_OPERATOR (NOT_TK);
}
- /* Keyword, boolean literal or null literal */
+ /* Keyword, boolean literal or null literal. */
for (first_unicode = c, all_ascii = 1, ascii_index = 0;
- JAVA_ID_CHAR_P (c); c = java_get_unicode ())
+ c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
{
java_unicode_2_utf8 (c);
if (all_ascii && c >= 128)
obstack_1grow (&temporary_obstack, '\0');
string = obstack_finish (&temporary_obstack);
- java_unget_unicode ();
+ if (c != UEOF)
+ java_unget_unicode ();
/* If we have something all ascii, we consider a keyword, a boolean
literal, a null literal or an all ASCII identifier. Otherwise,
this is an identifier (possibly not respecting formation rule). */
if (all_ascii)
{
- struct java_keyword *kw;
+ const struct java_keyword *kw;
if ((kw=java_keyword (string, ascii_index)))
{
JAVA_LEX_KW (string);
case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
- case PRIVATE_TK:
+ case PRIVATE_TK: case STRICT_TK:
SET_MODIFIER_CTX (kw->token);
return MODIFIER_TK;
case FLOAT_TK:
SET_LVAL_NODE (char_type_node);
return INTEGRAL_TK;
- /* Keyword based literals */
+ /* Keyword based literals. */
case TRUE_TK:
case FALSE_TK:
SET_LVAL_NODE ((kw->token == TRUE_TK ?
SET_LVAL_NODE (null_pointer_node);
return NULL_TK;
+ case ASSERT_TK:
+ if (flag_assert)
+ {
+ BUILD_OPERATOR (kw->token);
+ return kw->token;
+ }
+ else
+ break;
+
/* Some keyword we want to retain information on the location
- they where found */
+ they where found. */
case CASE_TK:
case DEFAULT_TK:
case SUPER_TK:
}
}
- /* We may have and ID here */
- if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
+ /* We may have an ID here. */
+ if (JAVA_START_CHAR_P (first_unicode))
{
JAVA_LEX_ID (string);
java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
return ID_TK;
}
- /* Everything else is an invalid character in the input */
+ /* Everything else is an invalid character in the input. */
{
char lex_error_buffer [128];
- sprintf (lex_error_buffer, "Invalid character '%s' in input",
+ sprintf (lex_error_buffer, "Invalid character `%s' in input",
java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
java_lex_error (lex_error_buffer, 1);
}
return 0;
}
+#ifndef JC1_LITE
+
+/* The exported interface to the lexer. */
+static int
+java_lex (YYSTYPE *java_lval)
+{
+ int r;
+
+ timevar_push (TV_LEX);
+ r = do_java_lex (java_lval);
+ timevar_pop (TV_LEX);
+ return r;
+}
+
+/* This is called by the parser to see if an error should be generated
+ due to numeric overflow. This function only handles the particular
+ case of the largest negative value, and is only called in the case
+ where this value is not preceded by `-'. */
+static void
+error_if_numeric_overflow (tree value)
+{
+ if (TREE_CODE (value) == INTEGER_CST
+ && JAVA_RADIX10_FLAG (value)
+ && tree_int_cst_sgn (value) < 0)
+ {
+ if (TREE_TYPE (value) == long_type_node)
+ java_lex_error ("Numeric overflow for `long' literal", 0);
+ else
+ java_lex_error ("Numeric overflow for `int' literal", 0);
+ }
+}
+
+#endif /* JC1_LITE */
+
static void
-java_unicode_2_utf8 (unicode)
- unicode_t unicode;
+java_unicode_2_utf8 (unicode_t unicode)
{
if (RANGE (unicode, 0x01, 0x7f))
obstack_1grow (&temporary_obstack, (char)unicode);
obstack_1grow (&temporary_obstack,
(unsigned char)(0x80 | (unicode & 0x3f)));
}
- else /* Range 0x800-0xffff */
+ else /* Range 0x800-0xffff. */
{
obstack_1grow (&temporary_obstack,
(unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
obstack_1grow (&temporary_obstack,
(unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
obstack_1grow (&temporary_obstack,
- (unsigned char)(0x80 | (unicode & 0x003f) >> 12));
+ (unsigned char)(0x80 | (unicode & 0x003f)));
}
}
#ifndef JC1_LITE
static tree
-build_wfl_node (node)
- tree node;
+build_wfl_node (tree node)
{
- return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
+ node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
+ /* Prevent java_complete_lhs from short-circuiting node (if constant). */
+ TREE_TYPE (node) = NULL_TREE;
+ return node;
}
#endif
static void
-java_lex_error (msg, forward)
- char *msg;
- int forward;
+java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
{
#ifndef JC1_LITE
ctxp->elc.line = ctxp->c_line->lineno;
ctxp->elc.col = ctxp->c_line->char_col-1+forward;
- /* Might be caught in the middle of some error report */
+ /* Might be caught in the middle of some error report. */
ctxp->java_error_flag = 0;
java_error (NULL);
java_error (msg);
#endif
}
+#ifndef JC1_LITE
static int
-java_is_eol (fp, c)
- FILE *fp;
- int c;
+java_is_eol (FILE *fp, int c)
{
int next;
switch (c)
{
- case '\n':
+ case '\r':
next = getc (fp);
- if (next != '\r' && next != EOF)
+ if (next != '\n' && next != EOF)
ungetc (next, fp);
return 1;
- case '\r':
+ case '\n':
return 1;
default:
return 0;
}
}
+#endif
char *
-java_get_line_col (filename, line, col)
- char *filename;
- int line, col;
+java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
+ int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
{
#ifdef JC1_LITE
return 0;
#else
- /* Dumb implementation. Doesn't try to cache or optimize things. */
- /* First line of the file is line 1, first column is 1 */
+ /* Dumb implementation. Doesn't try to cache or optimize things. */
+ /* First line of the file is line 1, first column is 1. */
- /* COL == -1 means, at the CR/LF in LINE */
- /* COL == -2 means, at the first non space char in LINE */
+ /* COL == -1 means, at the CR/LF in LINE. */
+ /* COL == -2 means, at the first non space char in LINE. */
FILE *fp;
int c, ccol, cline = 1;
char *base;
if (!(fp = fopen (filename, "r")))
- fatal ("Can't open file - java_display_line_col");
+ fatal_error ("can't open %s: %m", filename);
while (cline != line)
{
c = getc (fp);
- if (c < 0)
+ if (c == EOF)
{
- static char msg[] = "<<file too short - unexpected EOF>>";
+ static const char msg[] = "<<file too short - unexpected EOF>>";
obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
goto have_line;
}
cline++;
}
- /* Gather the chars of the current line in a buffer */
+ /* Gather the chars of the current line in a buffer. */
for (;;)
{
c = getc (fp);
else
first_non_space = 0;
- /* Place the '^' a the right position */
+ /* Place the '^' a the right position. */
base = obstack_base (&temporary_obstack);
- for (ccol = 1; ccol <= col; ccol++)
+ for (ccol = 1; ccol <= col+3; ccol++)
{
- /* Compute \t when reaching first_non_space */
+ /* Compute \t when reaching first_non_space. */
char c = (first_non_space ?
(base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
obstack_1grow (&temporary_obstack, c);
return obstack_finish (&temporary_obstack);
#endif
}
+
+#ifndef JC1_LITE
+static int
+utf8_cmp (const unsigned char *str, int length, const char *name)
+{
+ const unsigned char *limit = str + length;
+ int i;
+
+ for (i = 0; name[i]; ++i)
+ {
+ int ch = UTF8_GET (str, limit);
+ if (ch != name[i])
+ return ch - name[i];
+ }
+
+ return str == limit ? 0 : 1;
+}
+
+/* A sorted list of all C++ keywords. */
+
+static const char *const cxx_keywords[] =
+{
+ "_Complex",
+ "__alignof",
+ "__alignof__",
+ "__asm",
+ "__asm__",
+ "__attribute",
+ "__attribute__",
+ "__builtin_va_arg",
+ "__complex",
+ "__complex__",
+ "__const",
+ "__const__",
+ "__extension__",
+ "__imag",
+ "__imag__",
+ "__inline",
+ "__inline__",
+ "__label__",
+ "__null",
+ "__real",
+ "__real__",
+ "__restrict",
+ "__restrict__",
+ "__signed",
+ "__signed__",
+ "__typeof",
+ "__typeof__",
+ "__volatile",
+ "__volatile__",
+ "and",
+ "and_eq",
+ "asm",
+ "auto",
+ "bitand",
+ "bitor",
+ "bool",
+ "break",
+ "case",
+ "catch",
+ "char",
+ "class",
+ "compl",
+ "const",
+ "const_cast",
+ "continue",
+ "default",
+ "delete",
+ "do",
+ "double",
+ "dynamic_cast",
+ "else",
+ "enum",
+ "explicit",
+ "export",
+ "extern",
+ "false",
+ "float",
+ "for",
+ "friend",
+ "goto",
+ "if",
+ "inline",
+ "int",
+ "long",
+ "mutable",
+ "namespace",
+ "new",
+ "not",
+ "not_eq",
+ "operator",
+ "or",
+ "or_eq",
+ "private",
+ "protected",
+ "public",
+ "register",
+ "reinterpret_cast",
+ "return",
+ "short",
+ "signed",
+ "sizeof",
+ "static",
+ "static_cast",
+ "struct",
+ "switch",
+ "template",
+ "this",
+ "throw",
+ "true",
+ "try",
+ "typedef",
+ "typeid",
+ "typename",
+ "typeof",
+ "union",
+ "unsigned",
+ "using",
+ "virtual",
+ "void",
+ "volatile",
+ "wchar_t",
+ "while",
+ "xor",
+ "xor_eq"
+};
+
+/* Return true if NAME is a C++ keyword. */
+
+int
+cxx_keyword_p (const char *name, int length)
+{
+ int last = ARRAY_SIZE (cxx_keywords);
+ int first = 0;
+ int mid = (last + first) / 2;
+ int old = -1;
+
+ for (mid = (last + first) / 2;
+ mid != old;
+ old = mid, mid = (last + first) / 2)
+ {
+ int kwl = strlen (cxx_keywords[mid]);
+ int min_length = kwl > length ? length : kwl;
+ int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
+
+ if (r == 0)
+ {
+ int i;
+ /* We've found a match if all the remaining characters are `$'. */
+ for (i = min_length; i < length && name[i] == '$'; ++i)
+ ;
+ if (i == length)
+ return 1;
+ r = 1;
+ }
+
+ if (r < 0)
+ last = mid;
+ else
+ first = mid;
+ }
+ return 0;
+}
+#endif /* JC1_LITE */