#include "keyword.h"
#include "flags.h"
#include "chartables.h"
+#ifndef JC1_LITE
+#include "timevar.h"
+#endif
/* Function declarations. */
static char *java_sprint_unicode (struct java_line *, int);
static void java_unicode_2_utf8 (unicode_t);
static void java_lex_error (const char *, int);
#ifndef JC1_LITE
+static int do_java_lex (YYSTYPE *);
+static int java_lex (YYSTYPE *);
static int java_is_eol (FILE *, int);
static tree build_wfl_node (tree);
#endif
static int java_parse_escape_sequence (void);
static int java_start_char_p (unicode_t);
static int java_part_char_p (unicode_t);
-static int java_parse_doc_section (int);
+static int java_space_char_p (unicode_t);
+static void java_parse_doc_section (int);
static void java_parse_end_comment (int);
static int java_get_unicode (void);
static int java_read_unicode (java_lexer *, int *);
#endif
ctxp->filename = input_filename;
- ctxp->lineno = lineno = 0;
+ ctxp->lineno = input_line = 0;
ctxp->p_line = NULL;
ctxp->c_line = NULL;
ctxp->java_error_flag = 0;
}
ctxp->c_line->ahead [0] = 0;
ctxp->c_line->unicode_escape_ahead_p = 0;
- ctxp->c_line->lineno = ++lineno;
+ ctxp->c_line->lineno = ++input_line;
ctxp->c_line->white_space_only = 1;
}
lex->bs_count = 0;
lex->unget_value = 0;
lex->hit_eof = 0;
+ lex->encoding = encoding;
#ifdef HAVE_ICONV
lex->handle = iconv_open ("UCS-2", encoding);
in[1] = 0xbb;
in[2] = 0xbf;
- inp = in;
+ inp = (char *) in;
inc = 3;
outp = (char *) &result;
outc = 2;
enc_error = 1;
#ifdef HAVE_ICONV
else
- lex->use_fallback = 1;
+ {
+ lex->use_fallback = 1;
+ lex->encoding = "UTF-8";
+ }
#endif /* HAVE_ICONV */
}
in_save = inbytesleft;
out_save = out_count;
inp = &lex->buffer[lex->first];
- outp = &lex->out_buffer[lex->out_last];
+ outp = (char *) &lex->out_buffer[lex->out_last];
ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
&inbytesleft, &outp, &out_count);
else
{
/* A more serious error. */
- java_lex_error ("unrecognized character in input stream",
- 0);
+ char buffer[128];
+ sprintf (buffer,
+ "Unrecognized character for encoding '%s'",
+ lex->encoding);
+ java_lex_error (buffer, 0);
return UEOF;
}
}
while ((c = java_read_char (lex)) == 'u')
;
- /* Unget the most recent character as it is not a `u'. */
- if (c == UEOF)
- return UEOF;
- lex->unget_value = c;
-
- /* Next should be 4 hex digits, otherwise it's an error.
- The hex value is converted into the unicode, pushed into
- the Unicode stream. */
- for (shift = 12; shift >= 0; shift -= 4)
+ shift = 12;
+ do
{
- if ((c = java_read_char (lex)) == UEOF)
- return UEOF;
+ if (c == UEOF)
+ {
+ java_lex_error ("prematurely terminated \\u sequence", 0);
+ return UEOF;
+ }
+
if (hex_p (c))
unicode |= (unicode_t)(hex_value (c) << shift);
else
- java_lex_error ("Non hex digit in Unicode escape sequence", 0);
+ {
+ java_lex_error ("non-hex digit in \\u sequence", 0);
+ break;
+ }
+
+ c = java_read_char (lex);
+ shift -= 4;
}
+ while (shift >= 0);
+
+ if (c != UEOF)
+ lex->unget_value = c;
+
lex->bs_count = 0;
*unicode_escape_p = 1;
return unicode;
of a documentation comment line (ignoring white space and any `*'
character). Parsed keyword(s): @DEPRECATED. */
-static int
+static void
java_parse_doc_section (int c)
{
- int valid_tag = 0, seen_star = 0;
+ int last_was_star;
- while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
+ /* We reset this here, because only the most recent doc comment
+ applies to the following declaration. */
+ ctxp->deprecated = 0;
+
+ /* We loop over all the lines of the comment. We'll eventually exit
+ if we hit EOF prematurely, or when we see the comment
+ terminator. */
+ while (1)
{
- switch (c)
+ /* These first steps need only be done if we're still looking
+ for the deprecated tag. If we've already seen it, we might
+ as well skip looking for it again. */
+ if (! ctxp->deprecated)
{
- case '*':
- seen_star = 1;
- break;
- case '\n': /* ULT */
- valid_tag = 1;
- default:
- seen_star = 0;
- }
- c = java_get_unicode();
- }
+ /* Skip whitespace and '*'s. We must also check for the end
+ of the comment here. */
+ while (JAVA_WHITE_SPACE_P (c) || c == '*')
+ {
+ last_was_star = (c == '*');
+ c = java_get_unicode ();
+ if (last_was_star && c == '/')
+ {
+ /* We just saw the comment terminator. */
+ return;
+ }
+ }
- if (c == UEOF)
- java_lex_error ("Comment not terminated at end of input", 0);
+ if (c == UEOF)
+ goto eof;
- if (seen_star && (c == '/'))
- return 1; /* Goto step1 in caller. */
+ if (c == '@')
+ {
+ const char *deprecated = "@deprecated";
+ int i;
- /* We're parsing `@deprecated'. */
- if (valid_tag && (c == '@'))
- {
- char tag [11];
- int tag_index = 0;
+ for (i = 0; deprecated[i]; ++i)
+ {
+ if (c != deprecated[i])
+ break;
+ /* We write the code in this way, with the
+ update at the end, so that after the loop
+ we're left with the next character in C. */
+ c = java_get_unicode ();
+ }
+
+ if (c == UEOF)
+ goto eof;
- while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
+ /* @deprecated must be followed by a space or newline.
+ We also allow a '*' in case it appears just before
+ the end of a comment. In this position only we also
+ must allow any Unicode space character. */
+ if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
+ {
+ if (! deprecated[i])
+ ctxp->deprecated = 1;
+ }
+ }
+ }
+
+ /* We've examined the relevant content from this line. Now we
+ skip the remaining characters and start over with the next
+ line. We also check for end of comment here. */
+ while (c != '\n' && c != UEOF)
{
+ last_was_star = (c == '*');
c = java_get_unicode ();
- tag [tag_index++] = c;
+ if (last_was_star && c == '/')
+ return;
}
if (c == UEOF)
- java_lex_error ("Comment not terminated at end of input", 0);
- tag [tag_index] = '\0';
-
- if (!strcmp (tag, "deprecated"))
- ctxp->deprecated = 1;
+ goto eof;
+ /* We have to advance past the \n. */
+ c = java_get_unicode ();
+ if (c == UEOF)
+ goto eof;
}
- java_unget_unicode ();
- return 0;
+
+ eof:
+ java_lex_error ("Comment not terminated at end of input", 0);
}
/* Return true if C is a valid start character for a Java identifier.
unsigned long val = (unsigned long) page;
int flags;
- if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
+ if ((val & ~ LETTER_MASK) != 0)
flags = page[c & 255];
else
flags = val;
unsigned long val = (unsigned long) page;
int flags;
- if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
+ if ((val & ~ LETTER_MASK) != 0)
flags = page[c & 255];
else
flags = val;
return flags & LETTER_PART;
}
+/* Return true if C is whitespace. */
+static int
+java_space_char_p (unicode_t c)
+{
+ unsigned int hi = c / 256;
+ const char *const page = type_table[hi];
+ unsigned long val = (unsigned long) page;
+ int flags;
+
+ if ((val & ~ LETTER_MASK) != 0)
+ flags = page[c & 255];
+ else
+ flags = val;
+
+ return flags & LETTER_SPACE;
+}
+
static int
java_parse_escape_sequence (void)
{
#ifdef JC1_LITE
yylex (YYSTYPE *java_lval)
#else
-java_lex (YYSTYPE *java_lval)
+do_java_lex (YYSTYPE *java_lval)
#endif
{
int c;
case '*':
if ((c = java_get_unicode ()) == '*')
{
- if ((c = java_get_unicode ()) == '/')
- goto step1; /* Empty documentation comment. */
- else if (java_parse_doc_section (c))
- goto step1;
+ c = java_get_unicode ();
+ if (c == '/')
+ {
+ /* Empty documentation comment. We have to reset
+ the deprecation marker as only the most recent
+ doc comment applies. */
+ ctxp->deprecated = 0;
+ }
+ else
+ java_parse_doc_section (c);
}
-
- java_parse_end_comment ((c = java_get_unicode ()));
+ else
+ java_parse_end_comment ((c = java_get_unicode ()));
goto step1;
break;
default:
}
if (c == '\n' || c == UEOF) /* ULT. */
{
- lineno--; /* Refer to the line where the terminator was seen. */
+ input_line--; /* Refer to the line where the terminator was seen. */
java_lex_error ("String not terminated at end of line", 0);
- lineno++;
+ input_line++;
}
obstack_1grow (&temporary_obstack, '\0');
string = obstack_finish (&temporary_obstack);
#ifndef JC1_LITE
if (!no_error || (c != '"'))
- java_lval->node = error_mark_node; /* FIXME: Requires futher
+ java_lval->node = error_mark_node; /* FIXME: Requires further
testing. */
else
java_lval->node = build_string (strlen (string), string);
case '{':
JAVA_LEX_SEP (c);
if (ctxp->ccb_indent == 1)
- ctxp->first_ccb_indent1 = lineno;
+ ctxp->first_ccb_indent1 = input_line;
ctxp->ccb_indent++;
BUILD_OPERATOR (OCB_TK);
case '}':
JAVA_LEX_SEP (c);
ctxp->ccb_indent--;
if (ctxp->ccb_indent == 1)
- ctxp->last_ccb_indent1 = lineno;
+ ctxp->last_ccb_indent1 = input_line;
BUILD_OPERATOR (CCB_TK);
case '[':
JAVA_LEX_SEP (c);
/* Keyword, boolean literal or null literal. */
for (first_unicode = c, all_ascii = 1, ascii_index = 0;
- JAVA_PART_CHAR_P (c); c = java_get_unicode ())
+ c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
{
java_unicode_2_utf8 (c);
if (all_ascii && c >= 128)
obstack_1grow (&temporary_obstack, '\0');
string = obstack_finish (&temporary_obstack);
- java_unget_unicode ();
+ if (c != UEOF)
+ java_unget_unicode ();
/* If we have something all ascii, we consider a keyword, a boolean
literal, a null literal or an all ASCII identifier. Otherwise,
}
#ifndef JC1_LITE
+
+/* The exported interface to the lexer. */
+static int
+java_lex (YYSTYPE *java_lval)
+{
+ int r;
+
+ timevar_push (TV_LEX);
+ r = do_java_lex (java_lval);
+ timevar_pop (TV_LEX);
+ return r;
+}
+
/* This is called by the parser to see if an error should be generated
due to numeric overflow. This function only handles the particular
case of the largest negative value, and is only called in the case
java_lex_error ("Numeric overflow for `int' literal", 0);
}
}
+
#endif /* JC1_LITE */
static void
char *base;
if (!(fp = fopen (filename, "r")))
- fatal_io_error ("can't open %s", filename);
+ fatal_error ("can't open %s: %m", filename);
while (cline != line)
{
{
int kwl = strlen (cxx_keywords[mid]);
int min_length = kwl > length ? length : kwl;
- int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
+ int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
if (r == 0)
{