#include "config.h"
#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
#include "cpplib.h"
#include "cpphash.h"
static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
- unsigned int *));
+ unsigned int *));
static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
static bool trigraph_p PARAMS ((cpp_reader *));
static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
cppchar_t));
+static bool continue_after_nul PARAMS ((cpp_reader *));
static int name_p PARAMS ((cpp_reader *, const cpp_string *));
static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
- const unsigned char *, unsigned int *));
+ const unsigned char *, cppchar_t *));
static tokenrun *next_tokenrun PARAMS ((tokenrun *));
static unsigned int hex_digit_value PARAMS ((unsigned int));
cpp_buffer *buffer = pfile->buffer;
/* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
- only accept CR-LF; maybe we should fall back to that behaviour? */
+ only accept CR-LF; maybe we should fall back to that behavior? */
if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
buffer->cur++;
the second '?'.
Warn if necessary, and returns true if the sequence forms a
- trigraph and the trigraph should be honoured. */
+ trigraph and the trigraph should be honored. */
static bool
trigraph_p (pfile)
cpp_reader *pfile;
if (__builtin_expect (next == '?' || next == '\\', 0))
next = skip_escaped_newlines (pfile);
- return next;
+ return next;
}
/* Skip a C-style block comment. We find the end of the comment by
seeing if an asterisk is before every '/' we encounter. Returns
- non-zero if comment terminated by EOF, zero otherwise. */
+ nonzero if comment terminated by EOF, zero otherwise. */
static int
skip_block_comment (pfile)
cpp_reader *pfile;
}
/* Skip a C++ line comment, leaving buffer->cur pointing to the
- terminating newline. Handles escaped newlines. Returns non-zero
+ terminating newline. Handles escaped newlines. Returns nonzero
if a multiline comment. */
static int
skip_line_comment (pfile)
if (!is_idchar (string->text[i]))
return 0;
- return 1;
+ return 1;
}
/* Parse an identifier, skipping embedded backslash-newlines. This is
/* Handle normal identifier characters in this loop. */
do
- {
+ {
prevc = c;
- obstack_1grow (stack, c);
+ obstack_1grow (stack, c);
- if (c == '$')
- saw_dollar++;
+ if (c == '$')
+ saw_dollar++;
- c = *buffer->cur++;
- }
+ c = *buffer->cur++;
+ }
while (is_idchar (c));
}
}
/* Parse a number, beginning with character C, skipping embedded
- backslash-newlines. LEADING_PERIOD is non-zero if there was a "."
+ backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
before C. Place the result in NUMBER. */
static void
parse_number (pfile, number, leading_period)
if (char_len == -1)
{
cpp_error (pfile, DL_WARNING,
- "ignoring invalid multibyte character");
+ "ignoring invalid multibyte character");
char_len = 1;
c = *buffer->cur++;
}
unterminated:
if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
cpp_error (pfile, DL_ERROR, "missing terminating %c character",
- terminator);
+ (int) terminator);
buffer->cur--;
break;
}
{
unsigned char *buffer;
unsigned int len, clen;
-
+
len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
/* C++ comments probably (not definitely) have moved past a new
clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
buffer = _cpp_unaligned_alloc (pfile, clen);
-
+
token->type = CPP_COMMENT;
token->val.str.len = clen;
token->val.str.text = buffer;
return result;
}
+/* A NUL terminates the current buffer. For ISO preprocessing this is
+ EOF, but for traditional preprocessing it indicates we need a line
+ refill. Returns TRUE to continue preprocessing a new buffer, FALSE
+ to return a CPP_EOF to the caller. */
+static bool
+continue_after_nul (pfile)
+ cpp_reader *pfile;
+{
+ cpp_buffer *buffer = pfile->buffer;
+ bool more = false;
+
+ buffer->saved_flags = BOL;
+ if (CPP_OPTION (pfile, traditional))
+ {
+ if (pfile->state.in_directive)
+ return false;
+
+ _cpp_remove_overlay (pfile);
+ more = _cpp_read_logical_line_trad (pfile);
+ _cpp_overlay_buffer (pfile, pfile->out.base,
+ pfile->out.cur - pfile->out.base);
+ pfile->line = pfile->out.first_line;
+ }
+ else
+ {
+ /* Stop parsing arguments with a CPP_EOF. When we finally come
+ back here, do the work of popping the buffer. */
+ if (!pfile->state.parsing_args)
+ {
+ if (buffer->cur != buffer->line_base)
+ {
+ /* Non-empty files should end in a newline. Don't warn
+ for command line and _Pragma buffers. */
+ if (!buffer->from_stage3)
+ cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
+ handle_newline (pfile);
+ }
+
+ /* Similarly, finish an in-progress directive with CPP_EOF
+ before popping the buffer. */
+ if (!pfile->state.in_directive && buffer->prev)
+ {
+ more = !buffer->return_at_eof;
+ _cpp_pop_buffer (pfile);
+ }
+ }
+ }
+
+ return more;
+}
+
#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
do { \
if (get_effective_char (pfile) == CHAR) \
if (skip_whitespace (pfile, c))
goto skipped_white;
- /* EOF. */
+ /* End of buffer. */
buffer->cur--;
- buffer->saved_flags = BOL;
- if (!pfile->state.parsing_args && !pfile->state.in_directive)
- {
- if (buffer->cur != buffer->line_base)
- {
- /* Non-empty files should end in a newline. Don't warn
- for command line and _Pragma buffers. */
- if (!buffer->from_stage3)
- cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
- handle_newline (pfile);
- }
-
- /* Don't pop the last buffer. */
- if (buffer->prev)
- {
- unsigned char stop = buffer->return_at_eof;
-
- _cpp_pop_buffer (pfile);
- if (!stop)
- goto fresh_line;
- }
- }
+ if (continue_after_nul (pfile))
+ goto fresh_line;
result->type = CPP_EOF;
break;
case 'L':
/* 'L' may introduce wide characters or strings. */
- {
- const unsigned char *pos = buffer->cur;
+ {
+ const unsigned char *pos = buffer->cur;
- c = get_effective_char (pfile);
- if (c == '\'' || c == '"')
- {
- result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
- parse_string (pfile, result, c);
- break;
- }
- buffer->cur = pos;
- }
- /* Fall through. */
+ c = get_effective_char (pfile);
+ if (c == '\'' || c == '"')
+ {
+ result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
+ parse_string (pfile, result, c);
+ break;
+ }
+ buffer->cur = pos;
+ }
+ /* Fall through. */
start_ident:
case '_':
if (result->val.node->flags & NODE_OPERATOR)
{
result->flags |= NAMED_OP;
- result->type = result->val.node->value.operator;
+ result->type = result->val.node->directive_index;
}
break;
&& ! buffer->warned_cplusplus_comments)
{
cpp_error (pfile, DL_PEDWARN,
- "C++ style comments are not allowed in ISO C89");
+ "C++ style comments are not allowed in ISO C90");
cpp_error (pfile, DL_PEDWARN,
"(this will be reported only once per input file)");
buffer->warned_cplusplus_comments = 1;
result->type = CPP_AND;
}
break;
-
+
case '|':
c = get_effective_char (pfile);
if (c == '|')
case '}': result->type = CPP_CLOSE_BRACE; break;
case ';': result->type = CPP_SEMICOLON; break;
- /* @ is a punctuator in Objective C. */
+ /* @ is a punctuator in Objective-C. */
case '@': result->type = CPP_ATSIGN; break;
case '$':
goto spell_ident;
else
spelling = TOKEN_NAME (token);
-
+
while ((c = *spelling++) != '\0')
*buffer++ = c;
}
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
- unsigned int *pc;
+ cppchar_t *pc;
{
const unsigned char *p = *pstr;
unsigned int code = 0;
return 0;
}
-/* Interpret an escape sequence, and return its value. PSTR points to
- the input pointer, which is just after the backslash. LIMIT is how
- much text we have. MASK is a bitmask for the precision for the
- destination type (char or wchar_t).
-
- Handles all relevant diagnostics. */
-unsigned int
-cpp_parse_escape (pfile, pstr, limit, mask)
+/* Returns the value of an escape sequence, truncated to the correct
+ target precision. PSTR points to the input pointer, which is just
+ after the backslash. LIMIT is how much text we have. WIDE is true
+ if the escape sequence is part of a wide character constant or
+ string literal. Handles all relevant diagnostics. */
+cppchar_t
+cpp_parse_escape (pfile, pstr, limit, wide)
cpp_reader *pfile;
const unsigned char **pstr;
const unsigned char *limit;
- unsigned HOST_WIDE_INT mask;
+ int wide;
{
int unknown = 0;
const unsigned char *str = *pstr;
- unsigned int c = *str++;
+ cppchar_t c, mask;
+ unsigned int width;
+ if (wide)
+ width = CPP_OPTION (pfile, wchar_precision);
+ else
+ width = CPP_OPTION (pfile, char_precision);
+ if (width < BITS_PER_CPPCHAR_T)
+ mask = ((cppchar_t) 1 << width) - 1;
+ else
+ mask = ~0;
+
+ c = *str++;
switch (c)
{
case '\\': case '\'': case '"': case '?': break;
case 'e': case 'E':
if (CPP_PEDANTIC (pfile))
cpp_error (pfile, DL_PEDWARN,
- "non-ISO-standard escape sequence, '\\%c'", c);
+ "non-ISO-standard escape sequence, '\\%c'", (int) c);
c = TARGET_ESC;
break;
-
+
case 'u': case 'U':
unknown = maybe_read_ucs (pfile, &str, limit, &c);
break;
cpp_error (pfile, DL_WARNING,
"the meaning of '\\x' is different in traditional C");
- {
- unsigned int i = 0, overflow = 0;
- int digits_found = 0;
+ {
+ cppchar_t i = 0, overflow = 0;
+ int digits_found = 0;
- while (str < limit)
- {
- c = *str;
- if (! ISXDIGIT (c))
- break;
- str++;
- overflow |= i ^ (i << 4 >> 4);
- i = (i << 4) + hex_digit_value (c);
- digits_found = 1;
- }
+ while (str < limit)
+ {
+ c = *str;
+ if (! ISXDIGIT (c))
+ break;
+ str++;
+ overflow |= i ^ (i << 4 >> 4);
+ i = (i << 4) + hex_digit_value (c);
+ digits_found = 1;
+ }
- if (!digits_found)
- cpp_error (pfile, DL_ERROR,
+ if (!digits_found)
+ cpp_error (pfile, DL_ERROR,
"\\x used with no following hex digits");
- if (overflow | (i != (i & mask)))
- {
- cpp_error (pfile, DL_PEDWARN,
- "hex escape sequence out of range");
- i &= mask;
- }
- c = i;
- }
+ if (overflow | (i != (i & mask)))
+ {
+ cpp_error (pfile, DL_PEDWARN,
+ "hex escape sequence out of range");
+ i &= mask;
+ }
+ c = i;
+ }
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
- unsigned int i = c - '0';
- int count = 0;
+ size_t count = 0;
+ cppchar_t i = c - '0';
while (str < limit && ++count < 3)
{
if (unknown)
{
if (ISGRAPH (c))
- cpp_error (pfile, DL_PEDWARN, "unknown escape sequence '\\%c'", c);
+ cpp_error (pfile, DL_PEDWARN,
+ "unknown escape sequence '\\%c'", (int) c);
else
- cpp_error (pfile, DL_PEDWARN, "unknown escape sequence: '\\%03o'", c);
+ cpp_error (pfile, DL_PEDWARN,
+ "unknown escape sequence: '\\%03o'", (int) c);
}
if (c > mask)
- cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
+ {
+ cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
+ c &= mask;
+ }
*pstr = str;
return c;
}
-#ifndef MAX_CHAR_TYPE_SIZE
-#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
-#endif
-
-#ifndef MAX_WCHAR_TYPE_SIZE
-#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
-#endif
-
/* Interpret a (possibly wide) character constant in TOKEN.
- WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points
- to a variable that is filled in with the number of characters seen. */
-HOST_WIDE_INT
-cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
+ WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
+ points to a variable that is filled in with the number of
+ characters seen, and UNSIGNEDP to a variable that indicates whether
+ the result has signed type. */
+cppchar_t
+cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
cpp_reader *pfile;
const cpp_token *token;
- int warn_multi;
unsigned int *pchars_seen;
+ int *unsignedp;
{
const unsigned char *str = token->val.str.text;
const unsigned char *limit = str + token->val.str.len;
unsigned int chars_seen = 0;
- unsigned int width, max_chars, c;
- unsigned HOST_WIDE_INT mask;
- HOST_WIDE_INT result = 0;
+ size_t width, max_chars;
+ cppchar_t c, mask, result = 0;
bool unsigned_p;
#ifdef MULTIBYTE_CHARS
/* Width in bits. */
if (token->type == CPP_CHAR)
{
- width = MAX_CHAR_TYPE_SIZE;
- unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
+ width = CPP_OPTION (pfile, char_precision);
+ max_chars = CPP_OPTION (pfile, int_precision) / width;
+ unsigned_p = CPP_OPTION (pfile, unsigned_char);
}
else
{
- width = MAX_WCHAR_TYPE_SIZE;
- unsigned_p = WCHAR_UNSIGNED;
+ width = CPP_OPTION (pfile, wchar_precision);
+ max_chars = 1;
+ unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
}
- if (width < HOST_BITS_PER_WIDE_INT)
- mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
+ if (width < BITS_PER_CPPCHAR_T)
+ mask = ((cppchar_t) 1 << width) - 1;
else
mask = ~0;
- max_chars = HOST_BITS_PER_WIDE_INT / width;
while (str < limit)
{
#endif
if (c == '\\')
- c = cpp_parse_escape (pfile, &str, limit, mask);
+ c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
#ifdef MAP_CHARACTER
if (ISPRINT (c))
c = MAP_CHARACTER (c);
#endif
-
- /* Merge character into result; ignore excess chars. */
- if (++chars_seen <= max_chars)
- {
- if (width < HOST_BITS_PER_WIDE_INT)
- result = (result << width) | (c & mask);
- else
- result = c;
- }
+
+ chars_seen++;
+
+ /* Truncate the character, scale the result and merge the two. */
+ c &= mask;
+ if (width < BITS_PER_CPPCHAR_T)
+ result = (result << width) | c;
+ else
+ result = c;
}
if (chars_seen == 0)
cpp_error (pfile, DL_ERROR, "empty character constant");
- else if (chars_seen > max_chars)
+ else if (chars_seen > 1)
{
- chars_seen = max_chars;
- cpp_error (pfile, DL_WARNING, "character constant too long");
+ /* Multichar charconsts are of type int and therefore signed. */
+ unsigned_p = 0;
+
+ if (chars_seen > max_chars)
+ {
+ chars_seen = max_chars;
+ cpp_error (pfile, DL_WARNING,
+ "character constant too long for its type");
+ }
+ else if (CPP_OPTION (pfile, warn_multichar))
+ cpp_error (pfile, DL_WARNING, "multi-character character constant");
}
- else if (chars_seen > 1 && warn_multi)
- cpp_error (pfile, DL_WARNING, "multi-character character constant");
- /* If relevant type is signed, sign-extend the constant. */
- if (chars_seen)
+ /* Sign-extend or truncate the constant to cppchar_t. The value is
+ in WIDTH bits, but for multi-char charconsts it's value is the
+ full target type's width. */
+ if (chars_seen > 1)
+ width *= max_chars;
+ if (width < BITS_PER_CPPCHAR_T)
{
- unsigned int nbits = chars_seen * width;
-
- mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
- if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
+ mask = ((cppchar_t) 1 << width) - 1;
+ if (unsigned_p || !(result & (1 << (width - 1))))
result &= mask;
else
result |= ~mask;
}
*pchars_seen = chars_seen;
+ *unsignedp = unsigned_p;
return result;
}
#error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
#endif
-struct dummy
-{
- char c;
- union
- {
- double d;
- int *p;
- } u;
-};
-
-#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
-#define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
-
/* Create a new allocation buffer. Place the control block at the end
of the buffer, so that buffer overflows will cause immediate chaos. */
static _cpp_buff *
if (len < MIN_BUFF_SIZE)
len = MIN_BUFF_SIZE;
- len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
+ len = CPP_ALIGN (len);
base = xmalloc (len + sizeof (_cpp_buff));
result = (_cpp_buff *) (base + len);