#include "config.h"
#include "system.h"
-#include "coretypes.h"
-#include "tm.h"
#include "cpplib.h"
#include "cpphash.h"
unsigned int, enum cpp_ttype);
static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
static int name_p (cpp_reader *, const cpp_string *);
-static cppchar_t maybe_read_ucn (cpp_reader *, const uchar **);
static tokenrun *next_tokenrun (tokenrun *);
-static unsigned int hex_digit_value (unsigned int);
static _cpp_buff *new_buff (size_t);
if (buffer->notes_used == buffer->notes_cap)
{
buffer->notes_cap = buffer->notes_cap * 2 + 200;
- buffer->notes = (_cpp_line_note *)
- xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
+ buffer->notes = xrealloc (buffer->notes,
+ buffer->notes_cap * sizeof (_cpp_line_note));
}
buffer->notes[buffer->notes_used].pos = pos;
if (!buffer->from_stage3)
{
- d = (uchar *) s;
+ /* Short circuit for the common case of an un-escaped line with
+ no trigraphs. The primary win here is by not writing any
+ data back to memory until we have to. */
+ for (;;)
+ {
+ c = *++s;
+ if (c == '\n' || c == '\r')
+ {
+ d = (uchar *) s;
+
+ if (s == buffer->rlimit)
+ goto done;
+
+ /* DOS line ending? */
+ if (c == '\r' && s[1] == '\n')
+ s++;
+
+ if (s == buffer->rlimit)
+ goto done;
+
+ /* check for escaped newline */
+ p = d;
+ while (p != buffer->next_line && is_nvspace (p[-1]))
+ p--;
+ if (p == buffer->next_line || p[-1] != '\\')
+ goto done;
+
+ /* Have an escaped newline; process it and proceed to
+ the slow path. */
+ add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
+ d = p - 2;
+ buffer->next_line = p - 1;
+ break;
+ }
+ if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
+ {
+ /* Have a trigraph. We may or may not have to convert
+ it. Add a line note regardless, for -Wtrigraphs. */
+ add_line_note (buffer, s, s[2]);
+ if (CPP_OPTION (pfile, trigraphs))
+ {
+ /* We do, and that means we have to switch to the
+ slow path. */
+ d = (uchar *) s;
+ *d = _cpp_trigraph_map[s[2]];
+ s += 2;
+ break;
+ }
+ }
+ }
+
for (;;)
{
s++;
}
+ done:
*d = '\n';
/* A sentinel note that should never be processed. */
add_line_note (buffer, d + 1, '\n');
if (note->type == '\\' || note->type == ' ')
{
if (note->type == ' ' && !in_comment)
- cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
+ cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line, col,
"backslash and newline separated by space");
if (buffer->next_line > buffer->rlimit)
{
- cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
+ cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line, col,
"backslash-newline at end of file");
/* Prevent "no newline at end of file" warning. */
buffer->next_line = buffer->rlimit;
&& (!in_comment || warn_in_comment (pfile, note)))
{
if (CPP_OPTION (pfile, trigraphs))
- cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
+ cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line, col,
"trigraph ??%c converted to %c",
note->type,
(int) _cpp_trigraph_map[note->type]);
else
- cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
- "trigraph ??%c ignored",
- note->type);
+ {
+ cpp_error_with_line
+ (pfile, CPP_DL_WARNING, pfile->line, col,
+ "trigraph ??%c ignored, use -trigraphs to enable",
+ note->type);
+ }
}
}
else
_cpp_skip_block_comment (cpp_reader *pfile)
{
cpp_buffer *buffer = pfile->buffer;
- cppchar_t c;
+ const uchar *cur = buffer->cur;
+ uchar c;
- buffer->cur++;
- if (*buffer->cur == '/')
- buffer->cur++;
+ cur++;
+ if (*cur == '/')
+ cur++;
for (;;)
{
- c = *buffer->cur++;
-
/* People like decorating comments with '*', so check for '/'
instead for efficiency. */
+ c = *cur++;
+
if (c == '/')
{
- if (buffer->cur[-2] == '*')
+ if (cur[-2] == '*')
break;
/* Warn about potential nested comments, but not if the '/'
comes immediately before the true comment delimiter.
Don't bother to get it right across escaped newlines. */
if (CPP_OPTION (pfile, warn_comments)
- && buffer->cur[0] == '*' && buffer->cur[1] != '/')
- cpp_error_with_line (pfile, DL_WARNING,
- pfile->line, CPP_BUF_COL (buffer),
- "\"/*\" within comment");
+ && cur[0] == '*' && cur[1] != '/')
+ {
+ buffer->cur = cur;
+ cpp_error_with_line (pfile, CPP_DL_WARNING,
+ pfile->line, CPP_BUF_COL (buffer),
+ "\"/*\" within comment");
+ }
}
else if (c == '\n')
{
- buffer->cur--;
+ buffer->cur = cur - 1;
_cpp_process_line_notes (pfile, true);
if (buffer->next_line >= buffer->rlimit)
return true;
_cpp_clean_line (pfile);
pfile->line++;
+ cur = buffer->cur;
}
}
+ buffer->cur = cur;
_cpp_process_line_notes (pfile, true);
return false;
}
else if (c == '\0')
saw_NUL = true;
else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
- cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
+ cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line,
CPP_BUF_COL (buffer),
"%s in preprocessing directive",
c == '\f' ? "form feed" : "vertical tab");
while (is_nvspace (c));
if (saw_NUL)
- cpp_error (pfile, DL_WARNING, "null character(s) ignored");
+ cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
buffer->cur--;
}
if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
{
CPP_OPTION (pfile, warn_dollars) = 0;
- cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
+ cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
}
return true;
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
buffer->cur += 2;
- if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
+ if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
return true;
buffer->cur -= 2;
}
{
/* It is allowed to poison the same identifier twice. */
if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
- cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
+ cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
NODE_NAME (result));
/* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
replacement list of a variadic macro. */
if (result == pfile->spec_nodes.n__VA_ARGS__
&& !pfile->state.va_args_ok)
- cpp_error (pfile, DL_PEDWARN,
+ cpp_error (pfile, CPP_DL_PEDWARN,
"__VA_ARGS__ can only appear in the expansion"
" of a C99 variadic macro");
}
}
if (saw_NUL && !pfile->state.skipping)
- cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
+ cpp_error (pfile, CPP_DL_WARNING,
+ "null character(s) preserved in literal");
pfile->buffer->cur = cur;
create_literal (pfile, token, base, cur - base, type);
{
/* Only warn once. */
buffer->next_line = buffer->rlimit;
- cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
+ cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line - 1,
CPP_BUF_COLUMN (buffer, buffer->cur),
"no newline at end of file");
}
- if (!buffer->prev)
- return false;
-
- if (buffer->return_at_eof)
- {
- _cpp_pop_buffer (pfile);
- return false;
- }
-
_cpp_pop_buffer (pfile);
+ if (pfile->buffer == NULL)
+ return false;
}
}
fresh_line:
result->flags = 0;
- if (pfile->buffer->need_line)
+ buffer = pfile->buffer;
+ if (buffer->need_line)
{
if (!_cpp_get_fresh_line (pfile))
{
if (c == '*')
{
if (_cpp_skip_block_comment (pfile))
- cpp_error (pfile, DL_ERROR, "unterminated comment");
+ cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
}
else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
|| CPP_IN_SYSTEM_HEADER (pfile)))
if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
&& ! buffer->warned_cplusplus_comments)
{
- cpp_error (pfile, DL_PEDWARN,
+ cpp_error (pfile, CPP_DL_PEDWARN,
"C++ style comments are not allowed in ISO C90");
- cpp_error (pfile, DL_PEDWARN,
+ cpp_error (pfile, CPP_DL_PEDWARN,
"(this will be reported only once per input file)");
buffer->warned_cplusplus_comments = 1;
}
if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
- cpp_error (pfile, DL_WARNING, "multi-line comment");
+ cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
}
else if (c == '=')
{
break;
case SPELL_NONE:
- cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
+ cpp_error (pfile, CPP_DL_ICE,
+ "unspellable token %s", TOKEN_NAME (token));
break;
}
putc ('\n', fp);
}
-/* Returns the value of a hexadecimal digit. */
-static unsigned int
-hex_digit_value (unsigned int c)
-{
- if (hex_p (c))
- return hex_value (c);
- else
- abort ();
-}
-
-/* Read a possible universal character name starting at *PSTR. */
-static cppchar_t
-maybe_read_ucn (cpp_reader *pfile, const uchar **pstr)
-{
- cppchar_t result, c = (*pstr)[-1];
-
- result = _cpp_valid_ucn (pfile, pstr, false);
- if (result)
- {
- if (CPP_WTRADITIONAL (pfile))
- cpp_error (pfile, DL_WARNING,
- "the meaning of '\\%c' is different in traditional C",
- (int) c);
-
- if (CPP_OPTION (pfile, EBCDIC))
- {
- cpp_error (pfile, DL_ERROR,
- "universal character with an EBCDIC target");
- result = 0x3f; /* EBCDIC invalid character */
- }
- }
-
- return result;
-}
-
-/* Returns the value of an escape sequence, truncated to the correct
- target precision. PSTR points to the input pointer, which is just
- after the backslash. LIMIT is how much text we have. WIDE is true
- if the escape sequence is part of a wide character constant or
- string literal. Handles all relevant diagnostics. */
-cppchar_t
-cpp_parse_escape (cpp_reader *pfile, const unsigned char **pstr,
- const unsigned char *limit, int wide)
-{
- /* Values of \a \b \e \f \n \r \t \v respectively. */
- static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
- static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
-
- int unknown = 0;
- const unsigned char *str = *pstr, *charconsts;
- cppchar_t c, ucn, mask;
- unsigned int width;
-
- if (CPP_OPTION (pfile, EBCDIC))
- charconsts = ebcdic;
- else
- charconsts = ascii;
-
- if (wide)
- width = CPP_OPTION (pfile, wchar_precision);
- else
- width = CPP_OPTION (pfile, char_precision);
- if (width < BITS_PER_CPPCHAR_T)
- mask = ((cppchar_t) 1 << width) - 1;
- else
- mask = ~0;
-
- c = *str++;
- switch (c)
- {
- case '\\': case '\'': case '"': case '?': break;
- case 'b': c = charconsts[1]; break;
- case 'f': c = charconsts[3]; break;
- case 'n': c = charconsts[4]; break;
- case 'r': c = charconsts[5]; break;
- case 't': c = charconsts[6]; break;
- case 'v': c = charconsts[7]; break;
-
- case '(': case '{': case '[': case '%':
- /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
- '\%' is used to prevent SCCS from getting confused. */
- unknown = CPP_PEDANTIC (pfile);
- break;
-
- case 'a':
- if (CPP_WTRADITIONAL (pfile))
- cpp_error (pfile, DL_WARNING,
- "the meaning of '\\a' is different in traditional C");
- c = charconsts[0];
- break;
-
- case 'e': case 'E':
- if (CPP_PEDANTIC (pfile))
- cpp_error (pfile, DL_PEDWARN,
- "non-ISO-standard escape sequence, '\\%c'", (int) c);
- c = charconsts[2];
- break;
-
- case 'u': case 'U':
- ucn = maybe_read_ucn (pfile, &str);
- if (ucn)
- c = ucn;
- else
- unknown = true;
- break;
-
- case 'x':
- if (CPP_WTRADITIONAL (pfile))
- cpp_error (pfile, DL_WARNING,
- "the meaning of '\\x' is different in traditional C");
-
- {
- cppchar_t i = 0, overflow = 0;
- int digits_found = 0;
-
- while (str < limit)
- {
- c = *str;
- if (! ISXDIGIT (c))
- break;
- str++;
- overflow |= i ^ (i << 4 >> 4);
- i = (i << 4) + hex_digit_value (c);
- digits_found = 1;
- }
-
- if (!digits_found)
- cpp_error (pfile, DL_ERROR,
- "\\x used with no following hex digits");
-
- if (overflow | (i != (i & mask)))
- {
- cpp_error (pfile, DL_PEDWARN,
- "hex escape sequence out of range");
- i &= mask;
- }
- c = i;
- }
- break;
-
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- {
- size_t count = 0;
- cppchar_t i = c - '0';
-
- while (str < limit && ++count < 3)
- {
- c = *str;
- if (c < '0' || c > '7')
- break;
- str++;
- i = (i << 3) + c - '0';
- }
-
- if (i != (i & mask))
- {
- cpp_error (pfile, DL_PEDWARN,
- "octal escape sequence out of range");
- i &= mask;
- }
- c = i;
- }
- break;
-
- default:
- unknown = 1;
- break;
- }
-
- if (unknown)
- {
- if (ISGRAPH (c))
- cpp_error (pfile, DL_PEDWARN,
- "unknown escape sequence '\\%c'", (int) c);
- else
- cpp_error (pfile, DL_PEDWARN,
- "unknown escape sequence: '\\%03o'", (int) c);
- }
-
- if (c > mask)
- {
- cpp_error (pfile, DL_PEDWARN,
- "escape sequence out of range for its type");
- c &= mask;
- }
-
- *pstr = str;
- return c;
-}
-
-/* Interpret a (possibly wide) character constant in TOKEN.
- WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
- points to a variable that is filled in with the number of
- characters seen, and UNSIGNEDP to a variable that indicates whether
- the result has signed type. */
-cppchar_t
-cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
- unsigned int *pchars_seen, int *unsignedp)
-{
- const unsigned char *str, *limit;
- unsigned int chars_seen = 0;
- size_t width, max_chars;
- cppchar_t c, mask, result = 0;
- bool unsigned_p;
-
- str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
- limit = token->val.str.text + token->val.str.len - 1;
-
- if (token->type == CPP_CHAR)
- {
- width = CPP_OPTION (pfile, char_precision);
- max_chars = CPP_OPTION (pfile, int_precision) / width;
- unsigned_p = CPP_OPTION (pfile, unsigned_char);
- }
- else
- {
- width = CPP_OPTION (pfile, wchar_precision);
- max_chars = 1;
- unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
- }
-
- if (width < BITS_PER_CPPCHAR_T)
- mask = ((cppchar_t) 1 << width) - 1;
- else
- mask = ~0;
-
- while (str < limit)
- {
- c = *str++;
-
- if (c == '\\')
- c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
-
-#ifdef MAP_CHARACTER
- if (ISPRINT (c))
- c = MAP_CHARACTER (c);
-#endif
-
- chars_seen++;
-
- /* Truncate the character, scale the result and merge the two. */
- c &= mask;
- if (width < BITS_PER_CPPCHAR_T)
- result = (result << width) | c;
- else
- result = c;
- }
-
- if (chars_seen == 0)
- cpp_error (pfile, DL_ERROR, "empty character constant");
- else if (chars_seen > 1)
- {
- /* Multichar charconsts are of type int and therefore signed. */
- unsigned_p = 0;
-
- if (chars_seen > max_chars)
- {
- chars_seen = max_chars;
- cpp_error (pfile, DL_WARNING,
- "character constant too long for its type");
- }
- else if (CPP_OPTION (pfile, warn_multichar))
- cpp_error (pfile, DL_WARNING, "multi-character character constant");
- }
-
- /* Sign-extend or truncate the constant to cppchar_t. The value is
- in WIDTH bits, but for multi-char charconsts it's value is the
- full target type's width. */
- if (chars_seen > 1)
- width *= max_chars;
- if (width < BITS_PER_CPPCHAR_T)
- {
- mask = ((cppchar_t) 1 << width) - 1;
- if (unsigned_p || !(result & (1 << (width - 1))))
- result &= mask;
- else
- result |= ~mask;
- }
-
- *pchars_seen = chars_seen;
- *unsignedp = unsigned_p;
- return result;
-}
-
/* Memory buffers. Changing these three constants can have a dramatic
effect on performance. The values here are reasonable defaults,
but might be tuned. If you adjust them, be sure to test across a
/* Free a chain of buffers starting at BUFF. */
void
-_cpp_free_buff (buff)
- _cpp_buff *buff;
+_cpp_free_buff (_cpp_buff *buff)
{
_cpp_buff *next;