/* CPP Library - lexical analysis.
- Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+ Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
Contributed by Per Bothner, 1994-95.
Based on CCCP program by Paul Rubin, June 1986
Adapted to ANSI C, Richard Stallman, Jan 1987
static void adjust_column PARAMS ((cpp_reader *));
static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
-static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
- const U_CHAR *));
-static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
+static U_CHAR *parse_slow PARAMS ((cpp_reader *, const U_CHAR *, int,
+ unsigned int *));
+static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
-static void unterminated PARAMS ((cpp_reader *, int));
static bool trigraph_p PARAMS ((cpp_reader *));
-static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
+static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *,
+ cppchar_t));
static int name_p PARAMS ((cpp_reader *, const cpp_string *));
static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
const unsigned char *, unsigned int *));
Compares, the token TOKEN to the NUL-terminated string STRING.
TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
-
int
cpp_ideq (token, string)
const cpp_token *token;
seen:unseen identifiers in normal code; the distribution is
Poisson-like). Second most common case is a new identifier, not
split and no dollar sign. The other possibilities are rare and
- have been relegated to parse_identifier_slow. */
-
+ have been relegated to parse_slow. */
static cpp_hashnode *
parse_identifier (pfile)
cpp_reader *pfile;
{
cpp_hashnode *result;
- const U_CHAR *cur;
+ const U_CHAR *cur, *base;
/* Fast-path loop. Skim over a normal identifier.
N.B. ISIDNUM does not include $. */
/* Check for slow-path cases. */
if (*cur == '?' || *cur == '\\' || *cur == '$')
- result = parse_identifier_slow (pfile, cur);
+ {
+ unsigned int len;
+
+ base = parse_slow (pfile, cur, 0, &len);
+ result = (cpp_hashnode *)
+ ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
+ }
else
{
- const U_CHAR *base = pfile->buffer->cur - 1;
+ base = pfile->buffer->cur - 1;
+ pfile->buffer->cur = cur;
result = (cpp_hashnode *)
ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
- pfile->buffer->cur = cur;
}
/* Rarely, identifiers require diagnostics when lexed.
return result;
}
-/* Slow path. This handles identifiers which have been split, and
- identifiers which contain dollar signs. The part of the identifier
- from PFILE->buffer->cur-1 to CUR has already been scanned. */
-static cpp_hashnode *
-parse_identifier_slow (pfile, cur)
+/* Slow path. This handles numbers and identifiers which have been
+ split, or contain dollar signs. The part of the token from
+ PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
+ 1 if it's a number, and 2 if it has a leading period. Returns a
+ pointer to the token's NUL-terminated spelling in permanent
+ storage, and sets PLEN to its length. */
+static U_CHAR *
+parse_slow (pfile, cur, number_p, plen)
cpp_reader *pfile;
const U_CHAR *cur;
+ int number_p;
+ unsigned int *plen;
{
cpp_buffer *buffer = pfile->buffer;
const U_CHAR *base = buffer->cur - 1;
struct obstack *stack = &pfile->hash_table->stack;
- unsigned int c, saw_dollar = 0, len;
+ unsigned int c, prevc, saw_dollar = 0;
+
+ /* Place any leading period. */
+ if (number_p == 2)
+ obstack_1grow (stack, '.');
/* Copy the part of the token which is known to be okay. */
obstack_grow (stack, base, cur - base);
/* Now process the part which isn't. We are looking at one of
'$', '\\', or '?' on entry to this loop. */
+ prevc = cur[-1];
c = *cur++;
buffer->cur = cur;
- do
+ for (;;)
{
- while (is_idchar (c))
+ /* Potential escaped newline? */
+ buffer->backup_to = buffer->cur - 1;
+ if (c == '?' || c == '\\')
+ c = skip_escaped_newlines (pfile);
+
+ if (!is_idchar (c))
+ {
+ if (!number_p)
+ break;
+ if (c != '.' && !VALID_SIGN (c, prevc))
+ break;
+ }
+
+ /* Handle normal identifier characters in this loop. */
+ do
{
+ prevc = c;
obstack_1grow (stack, c);
if (c == '$')
c = *buffer->cur++;
}
-
- /* Potential escaped newline? */
- buffer->backup_to = buffer->cur - 1;
- if (c != '?' && c != '\\')
- break;
- c = skip_escaped_newlines (pfile);
+ while (is_idchar (c));
}
- while (is_idchar (c));
/* Step back over the unwanted char. */
BACKUP ();
accepted as an extension. Don't warn about it in skipped
conditional blocks. */
if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
- cpp_pedwarn (pfile, "'$' character(s) in identifier");
+ cpp_pedwarn (pfile, "'$' character(s) in identifier or number");
- /* Identifiers are null-terminated. */
- len = obstack_object_size (stack);
+ /* Identifiers and numbers are null-terminated. */
+ *plen = obstack_object_size (stack);
obstack_1grow (stack, '\0');
-
- return (cpp_hashnode *)
- ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
+ return obstack_finish (stack);
}
-/* Parse a number, skipping embedded backslash-newlines. */
+/* Parse a number, beginning with character C, skipping embedded
+ backslash-newlines. LEADING_PERIOD is non-zero if there was a "."
+ before C. Place the result in NUMBER. */
static void
-parse_number (pfile, number, c, leading_period)
+parse_number (pfile, number, leading_period)
cpp_reader *pfile;
cpp_string *number;
- cppchar_t c;
int leading_period;
{
- cpp_buffer *buffer = pfile->buffer;
- unsigned char *dest, *limit;
+ const U_CHAR *cur;
- dest = BUFF_FRONT (pfile->u_buff);
- limit = BUFF_LIMIT (pfile->u_buff);
+ /* Fast-path loop. Skim over a normal number.
+ N.B. ISIDNUM does not include $. */
+ cur = pfile->buffer->cur;
+ while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
+ cur++;
- /* Place a leading period. */
- if (leading_period)
- {
- if (dest == limit)
- {
- _cpp_extend_buff (pfile, &pfile->u_buff, 1);
- dest = BUFF_FRONT (pfile->u_buff);
- limit = BUFF_LIMIT (pfile->u_buff);
- }
- *dest++ = '.';
- }
-
- do
+ /* Check for slow-path cases. */
+ if (*cur == '?' || *cur == '\\' || *cur == '$')
+ number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
+ else
{
- do
- {
- /* Need room for terminating null. */
- if ((size_t) (limit - dest) < 2)
- {
- size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
- _cpp_extend_buff (pfile, &pfile->u_buff, 2);
- dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
- limit = BUFF_LIMIT (pfile->u_buff);
- }
- *dest++ = c;
-
- c = *buffer->cur++;
- }
- while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
-
- /* Potential escaped newline? */
- buffer->backup_to = buffer->cur - 1;
- if (c != '?' && c != '\\')
- break;
- c = skip_escaped_newlines (pfile);
- }
- while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
-
- /* Step back over the unwanted char. */
- BACKUP ();
-
- /* Null-terminate the number. */
- *dest = '\0';
-
- number->text = BUFF_FRONT (pfile->u_buff);
- number->len = dest - number->text;
- BUFF_FRONT (pfile->u_buff) = dest + 1;
-}
+ const U_CHAR *base = pfile->buffer->cur - 1;
+ U_CHAR *dest;
-/* Subroutine of parse_string. Emits error for unterminated strings. */
-static void
-unterminated (pfile, term)
- cpp_reader *pfile;
- int term;
-{
- cpp_error (pfile, "missing terminating %c character", term);
+ number->len = cur - base + leading_period;
+ dest = _cpp_unaligned_alloc (pfile, number->len + 1);
+ dest[number->len] = '\0';
+ number->text = dest;
- if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
- {
- cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
- "possible start of unterminated string literal");
- pfile->mls_line = 0;
+ if (leading_period)
+ *dest++ = '.';
+ memcpy (dest, base, cur - base);
+ pfile->buffer->cur = cur;
}
}
name. Handles embedded trigraphs and escaped newlines. The stored
string is guaranteed NUL-terminated, but it is not guaranteed that
this is the first NUL since embedded NULs are preserved.
- Multi-line strings are allowed, but they are deprecated.
When this function returns, buffer->cur points to the next
character to be processed. */
cpp_buffer *buffer = pfile->buffer;
unsigned char *dest, *limit;
cppchar_t c;
- bool warned_nulls = false, warned_multi = false;
+ bool warned_nulls = false;
dest = BUFF_FRONT (pfile->u_buff);
limit = BUFF_LIMIT (pfile->u_buff);
}
else if (is_vspace (c))
{
- /* In assembly language, silently terminate string and
- character literals at end of line. This is a kludge
- around not knowing where comments are. */
- if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
- {
- buffer->cur--;
- break;
- }
-
- /* Character constants and header names may not extend over
- multiple lines. In Standard C, neither may strings.
- Unfortunately, we accept multiline strings as an
- extension, except in #include family directives. */
- if (terminator != '"' || pfile->state.angled_headers)
- {
- unterminated (pfile, terminator);
- buffer->cur--;
- break;
- }
-
- if (!warned_multi)
- {
- warned_multi = true;
- cpp_pedwarn (pfile, "multi-line string literals are deprecated");
- }
-
- if (pfile->mls_line == 0)
- {
- pfile->mls_line = token->line;
- pfile->mls_col = token->col;
- }
-
- handle_newline (pfile);
- c = '\n';
+ /* No string literal may extend over multiple lines. In
+ assembly language, suppress the error except for <>
+ includes. This is a kludge around not knowing where
+ comments are. */
+ unterminated:
+ if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
+ cpp_error (pfile, "missing terminating %c character", terminator);
+ buffer->cur--;
+ break;
}
else if (c == '\0')
{
if (buffer->cur - 1 == buffer->rlimit)
- {
- unterminated (pfile, terminator);
- buffer->cur--;
- break;
- }
+ goto unterminated;
if (!warned_nulls)
{
warned_nulls = true;
/* The stored comment includes the comment start and any terminator. */
static void
-save_comment (pfile, token, from)
+save_comment (pfile, token, from, type)
cpp_reader *pfile;
cpp_token *token;
const unsigned char *from;
+ cppchar_t type;
{
unsigned char *buffer;
- unsigned int len;
+ unsigned int len, clen;
len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
line, which we don't want to save in the comment. */
if (is_vspace (pfile->buffer->cur[-1]))
len--;
- buffer = _cpp_unaligned_alloc (pfile, len);
+
+ /* If we are currently in a directive, then we need to store all
+ C++ comments as C comments internally, and so we need to
+ allocate a little extra space in that case.
+
+ Note that the only time we encounter a directive here is
+ when we are saving comments in a "#define". */
+ clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
+
+ buffer = _cpp_unaligned_alloc (pfile, clen);
token->type = CPP_COMMENT;
- token->val.str.len = len;
+ token->val.str.len = clen;
token->val.str.text = buffer;
buffer[0] = '/';
memcpy (buffer + 1, from, len - 1);
+
+ /* Finish conversion to a C comment, if necessary. */
+ if (pfile->state.in_directive && type == '/')
+ {
+ buffer[1] = '*';
+ buffer[clen - 2] = '*';
+ buffer[clen - 1] = '/';
+ }
}
/* Allocate COUNT tokens for RUN. */
/* Is this a directive. If _cpp_handle_directive returns
false, it is an assembler #. */
if (result->type == CPP_HASH
- && !pfile->state.parsing_args
+ /* 6.10.3 p 11: Directives in a list of macro arguments
+ gives undefined behavior. This implementation
+ handles the directive as normal. */
+ && pfile->state.parsing_args != 1
&& _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
continue;
if (pfile->cb.line_change && !pfile->state.skipping)
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
result->type = CPP_NUMBER;
- parse_number (pfile, &result->val.str, c, 0);
+ parse_number (pfile, &result->val.str, 0);
break;
- case '$':
- if (!CPP_OPTION (pfile, dollars_in_ident))
- goto random_char;
- /* Fall through... */
+ case 'L':
+ /* 'L' may introduce wide characters or strings. */
+ {
+ const unsigned char *pos = buffer->cur;
+
+ c = get_effective_char (pfile);
+ if (c == '\'' || c == '"')
+ {
+ result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
+ parse_string (pfile, result, c);
+ break;
+ }
+ buffer->cur = pos;
+ }
+ /* Fall through. */
+ start_ident:
case '_':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'G': case 'H': case 'I': case 'J': case 'K':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
result->type = CPP_NAME;
result->val.node = parse_identifier (pfile);
- /* 'L' may introduce wide characters or strings. */
- if (result->val.node == pfile->spec_nodes.n_L)
- {
- c = *buffer->cur;
- if (c == '\'' || c == '"')
- {
- buffer->cur++;
- result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
- parse_string (pfile, result, c);
- }
- }
/* Convert named operators to their proper types. */
- else if (result->val.node->flags & NODE_OPERATOR)
+ if (result->val.node->flags & NODE_OPERATOR)
{
result->flags |= NAMED_OP;
result->type = result->val.node->value.operator;
}
/* Save the comment as a token in its own right. */
- save_comment (pfile, result, comment_start);
+ save_comment (pfile, result, comment_start, c);
break;
case '<':
else if (ISDIGIT (c))
{
result->type = CPP_NUMBER;
- parse_number (pfile, &result->val.str, c, 1);
+ parse_number (pfile, &result->val.str, 1);
}
else if (c == '*' && CPP_OPTION (pfile, cplusplus))
result->type = CPP_DOT_STAR;
/* @ is a punctuator in Objective C. */
case '@': result->type = CPP_ATSIGN; break;
+ case '$':
+ if (CPP_OPTION (pfile, dollars_in_ident))
+ goto start_ident;
+ /* Fall through... */
+
random_char:
default:
result->type = CPP_OTHER;
return result;
}
-/* An upper bound on the number of bytes needed to spell a token,
+/* An upper bound on the number of bytes needed to spell TOKEN,
including preceding whitespace. */
unsigned int
cpp_token_len (token)
break;
case SPELL_NONE:
- cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
+ cpp_ice (pfile, "unspellable token %s", TOKEN_NAME (token));
break;
}
return buffer;
}
-/* Returns a token as a null-terminated string. The string is
- temporary, and automatically freed later. Useful for diagnostics. */
+/* Returns TOKEN spelt as a null-terminated string. The string is
+ freed when the reader is destroyed. Useful for diagnostics. */
unsigned char *
cpp_token_as_text (pfile, token)
cpp_reader *pfile;
return start;
}
-/* Used by C front ends. Should really move to using cpp_token_as_text. */
+/* Used by C front ends, which really should move to using
+ cpp_token_as_text. */
const char *
cpp_type2name (type)
enum cpp_ttype type;
accidental token paste for output. For simplicity, it is
conservative, and occasionally advises a space where one is not
needed, e.g. "." and ".2". */
-
int
cpp_avoid_paste (pfile, token1, token2)
cpp_reader *pfile;
return 1;
if (CPP_WTRADITIONAL (pfile))
- cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
+ cpp_warning (pfile, "the meaning of '\\%c' is different in traditional C", c);
length = (c == 'u' ? 4: 8);
/* Interpret an escape sequence, and return its value. PSTR points to
the input pointer, which is just after the backslash. LIMIT is how
much text we have. MASK is a bitmask for the precision for the
- destination type (char or wchar_t). TRADITIONAL, if true, does not
- interpret escapes that did not exist in traditional C.
+ destination type (char or wchar_t).
Handles all relevant diagnostics. */
-
unsigned int
-cpp_parse_escape (pfile, pstr, limit, mask, traditional)
+cpp_parse_escape (pfile, pstr, limit, mask)
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
unsigned HOST_WIDE_INT mask;
- int traditional;
{
int unknown = 0;
const unsigned char *str = *pstr;
case 'a':
if (CPP_WTRADITIONAL (pfile))
- cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
- if (!traditional)
- c = TARGET_BELL;
+ cpp_warning (pfile, "the meaning of '\\a' is different in traditional C");
+ c = TARGET_BELL;
break;
case 'e': case 'E':
case 'x':
if (CPP_WTRADITIONAL (pfile))
- cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
+ cpp_warning (pfile, "the meaning of '\\x' is different in traditional C");
- if (!traditional)
{
unsigned int i = 0, overflow = 0;
int digits_found = 0;
#endif
/* Interpret a (possibly wide) character constant in TOKEN.
- WARN_MULTI warns about multi-character charconsts, if not
- TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
- that did not exist in traditional C. PCHARS_SEEN points to a
- variable that is filled in with the number of characters seen. */
+ WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points
+ to a variable that is filled in with the number of characters seen. */
HOST_WIDE_INT
-cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
+cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
cpp_reader *pfile;
const cpp_token *token;
int warn_multi;
- int traditional;
unsigned int *pchars_seen;
{
const unsigned char *str = token->val.str.text;
unsigned int width, max_chars, c;
unsigned HOST_WIDE_INT mask;
HOST_WIDE_INT result = 0;
+ bool unsigned_p;
#ifdef MULTIBYTE_CHARS
(void) local_mbtowc (NULL, NULL, 0);
/* Width in bits. */
if (token->type == CPP_CHAR)
- width = MAX_CHAR_TYPE_SIZE;
+ {
+ width = MAX_CHAR_TYPE_SIZE;
+ unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
+ }
else
- width = MAX_WCHAR_TYPE_SIZE;
+ {
+ width = MAX_WCHAR_TYPE_SIZE;
+ unsigned_p = WCHAR_UNSIGNED;
+ }
if (width < HOST_BITS_PER_WIDE_INT)
mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
#endif
if (c == '\\')
- c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
+ c = cpp_parse_escape (pfile, &str, limit, mask);
#ifdef MAP_CHARACTER
if (ISPRINT (c))
chars_seen = max_chars;
cpp_warning (pfile, "character constant too long");
}
- else if (chars_seen > 1 && !traditional && warn_multi)
+ else if (chars_seen > 1 && warn_multi)
cpp_warning (pfile, "multi-character character constant");
- /* If char type is signed, sign-extend the constant. The
- __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
- if (token->type == CPP_CHAR && chars_seen)
+ /* If relevant type is signed, sign-extend the constant. */
+ if (chars_seen)
{
unsigned int nbits = chars_seen * width;
mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
- if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
- || ((result >> (nbits - 1)) & 1) == 0)
+ if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
result &= mask;
else
result |= ~mask;