static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
static int name_p PARAMS ((cpp_reader *, const cpp_string *));
-static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
- const unsigned char *, HOST_WIDE_INT,
- int));
-static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
- const unsigned char *, unsigned int));
+static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
+ const unsigned char *, unsigned int *));
static cpp_chunk *new_chunk PARAMS ((unsigned int));
static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
if (token->type != CPP_NAME)
return 0;
- return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
+ return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
}
/* Call when meeting a newline. Returns the character after the newline
{
cpp_hashnode *result;
cpp_buffer *buffer = pfile->buffer;
- unsigned char *dest, *limit;
- unsigned int r = 0, saw_dollar = 0;
-
- dest = POOL_FRONT (&pfile->ident_pool);
- limit = POOL_LIMIT (&pfile->ident_pool);
+ unsigned int saw_dollar = 0, len;
+ struct obstack *stack = &pfile->hash_table->stack;
do
{
do
{
- /* Need room for terminating null. */
- if (dest + 1 >= limit)
- limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
-
- *dest++ = c;
- r = HASHSTEP (r, c);
+ obstack_1grow (stack, c);
if (c == '$')
saw_dollar++;
cpp_pedwarn (pfile, "'$' character(s) in identifier");
/* Identifiers are null-terminated. */
- *dest = '\0';
+ len = obstack_object_size (stack);
+ obstack_1grow (stack, '\0');
/* This routine commits the memory if necessary. */
- result = _cpp_lookup_with_hash (pfile,
- dest - POOL_FRONT (&pfile->ident_pool), r);
+ result = (cpp_hashnode *)
+ ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
/* Some identifiers require diagnostics when lexed. */
if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
{
/* It is allowed to poison the same identifier twice. */
if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
- cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
+ cpp_error (pfile, "attempt to use poisoned \"%s\"",
+ NODE_NAME (result));
/* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
replacement list of a variadic macro. */
switch (c)
{
case EOF:
- /* Non-empty files should end in a newline. Ignore for command
- line and _Pragma buffers. */
- if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
+ /* Non-empty files should end in a newline. Checking "bol" too
+ prevents multiple warnings when hitting the EOF more than
+ once, like in a directive. Don't warn for command line and
+ _Pragma buffers. */
+ if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
cpp_pedwarn (pfile, "no newline at end of file");
pfile->state.next_bol = 1;
pfile->skipping = 0; /* In case missing #endif. */
ACCEPT_CHAR (CPP_DIV_EQ);
if (c != '/' && c != '*')
break;
-
+
if (c == '*')
{
if (skip_block_comment (pfile))
switch (TOKEN_SPELL (token))
{
- default: len = 0; break;
- case SPELL_STRING: len = token->val.str.len; break;
- case SPELL_IDENT: len = token->val.node->length; break;
+ default: len = 0; break;
+ case SPELL_STRING: len = token->val.str.len; break;
+ case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
}
/* 1 for whitespace, 4 for comment delimeters. */
return len + 5;
case SPELL_IDENT:
spell_ident:
- memcpy (buffer, token->val.node->name, token->val.node->length);
- buffer += token->val.node->length;
+ memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
+ buffer += NODE_LEN (token->val.node);
break;
case SPELL_STRING:
spell_ident:
case SPELL_IDENT:
- ufputs (token->val.node->name, fp);
+ ufputs (NODE_NAME (token->val.node), fp);
break;
case SPELL_STRING:
abort ();
}
-/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
+/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
+ failure if cpplib is not parsing C++ or C99. Such failure is
+ silent, and no variables are updated. Otherwise returns 0, and
+ warns if -Wtraditional.
[lex.charset]: The character designated by the universal character
name \UNNNNNNNN is that character whose character short name in
program is ill-formed.
We assume that wchar_t is Unicode, so we don't need to do any
- mapping. Is this ever wrong? */
+ mapping. Is this ever wrong?
-static unsigned int
-read_ucs (pfile, pstr, limit, length)
+ PC points to the 'u' or 'U', PSTR is points to the byte after PC,
+ LIMIT is the end of the string or charconst. PSTR is updated to
+ point after the UCS on return, and the UCS is written into PC. */
+
+static int
+maybe_read_ucs (pfile, pstr, limit, pc)
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
- unsigned int length;
+ unsigned int *pc;
{
const unsigned char *p = *pstr;
- unsigned int c, code = 0;
+ unsigned int code = 0;
+ unsigned int c = *pc, length;
- for (; length; --length)
- {
- if (p >= limit)
- {
- cpp_error (pfile, "incomplete universal-character-name");
- break;
- }
+ /* Only attempt to interpret a UCS for C++ and C99. */
+ if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
+ return 1;
- c = *p;
- if (ISXDIGIT (c))
- {
- code = (code << 4) + hex_digit_value (c);
- p++;
- }
- else
+ if (CPP_WTRADITIONAL (pfile))
+ cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
+
+ length = (c == 'u' ? 4: 8);
+
+ if ((size_t) (limit - p) < length)
+ {
+ cpp_error (pfile, "incomplete universal-character-name");
+ /* Skip to the end to avoid more diagnostics. */
+ p = limit;
+ }
+ else
+ {
+ for (; length; length--, p++)
{
- cpp_error (pfile,
- "non-hex digit '%c' in universal-character-name", c);
- break;
+ c = *p;
+ if (ISXDIGIT (c))
+ code = (code << 4) + hex_digit_value (c);
+ else
+ {
+ cpp_error (pfile,
+ "non-hex digit '%c' in universal-character-name", c);
+ /* We shouldn't skip in case there are multibyte chars. */
+ break;
+ }
}
-
}
#ifdef TARGET_EBCDIC
cpp_error (pfile, "universal-character-name on EBCDIC target");
code = 0x3f; /* EBCDIC invalid character */
#else
- if (code > 0x9f && !(code & 0x80000000))
- ; /* True extended character, OK. */
- else if (code >= 0x20 && code < 0x7f)
- {
- /* ASCII printable character. The C character set consists of all of
- these except $, @ and `. We use hex escapes so that this also
- works with EBCDIC hosts. */
- if (code != 0x24 && code != 0x40 && code != 0x60)
- cpp_error (pfile, "universal-character-name used for '%c'", code);
- }
- else
- cpp_error (pfile, "invalid universal-character-name");
+ /* True extended characters are OK. */
+ if (code >= 0xa0
+ && !(code & 0x80000000)
+ && !(code >= 0xD800 && code <= 0xDFFF))
+ ;
+ /* The standard permits $, @ and ` to be specified as UCNs. We use
+ hex escapes so that this also works with EBCDIC hosts. */
+ else if (code == 0x24 || code == 0x40 || code == 0x60)
+ ;
+ /* Don't give another error if one occurred above. */
+ else if (length == 0)
+ cpp_error (pfile, "universal-character-name out of range");
#endif
*pstr = p;
- return code;
+ *pc = code;
+ return 0;
}
/* Interpret an escape sequence, and return its value. PSTR points to
the input pointer, which is just after the backslash. LIMIT is how
- much text we have. MASK is the precision for the target type (char
- or wchar_t). TRADITIONAL, if true, does not interpret escapes that
- did not exist in traditional C. */
+ much text we have. MASK is a bitmask for the precision for the
+ destination type (char or wchar_t). TRADITIONAL, if true, does not
+ interpret escapes that did not exist in traditional C.
-static unsigned int
-parse_escape (pfile, pstr, limit, mask, traditional)
+ Handles all relevant diagnostics. */
+
+unsigned int
+cpp_parse_escape (pfile, pstr, limit, mask, traditional)
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
- HOST_WIDE_INT mask;
+ unsigned HOST_WIDE_INT mask;
int traditional;
{
int unknown = 0;
c = TARGET_ESC;
break;
- /* Warnings and support checks handled by read_ucs(). */
case 'u': case 'U':
- if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
- {
- if (CPP_WTRADITIONAL (pfile))
- cpp_warning (pfile,
- "the meaning of '\\%c' varies with -traditional", c);
- c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
- }
- else
- unknown = 1;
+ unknown = maybe_read_ucs (pfile, &str, limit, &c);
break;
case 'x':
cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
}
+ if (c > mask)
+ cpp_pedwarn (pfile, "escape sequence out of range for character");
+
*pstr = str;
return c;
}
const unsigned char *limit = str + token->val.str.len;
unsigned int chars_seen = 0;
unsigned int width, max_chars, c;
- HOST_WIDE_INT result = 0, mask;
+ unsigned HOST_WIDE_INT mask;
+ HOST_WIDE_INT result = 0;
#ifdef MULTIBYTE_CHARS
(void) local_mbtowc (NULL, NULL, 0);
#endif
if (c == '\\')
- {
- c = parse_escape (pfile, &str, limit, mask, traditional);
- if (width < HOST_BITS_PER_WIDE_INT && c > mask)
- cpp_pedwarn (pfile, "escape sequence out of range for character");
- }
+ c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
#ifdef MAP_CHARACTER
if (ISPRINT (c))
else if (chars_seen > max_chars)
{
chars_seen = max_chars;
- cpp_error (pfile, "character constant too long");
+ cpp_warning (pfile, "character constant too long");
}
else if (chars_seen > 1 && !traditional && warn_multi)
cpp_warning (pfile, "multi-character character constant");