#include "cpphash.h"
/* The replacement text of a function-like macro is stored as a
- contiguous sequence of aligned blocks. Each block represents the
- portion of text from the start of the previous block (or the start
- of the macro replacement text in the case of the first block) to
- the next parameter, or the end of the replacement list if there
- are none left.
-
- Each block consists of an unsigned int, which is the length of text
- contained in the third part, an unsigned short, which is the
+ contiguous sequence of aligned blocks, each representing the text
+ between subsequent parameters in that text.
+
+ Each block comprises the length of text contained therein, the
one-based index of the argument that immediately follows that text,
and the text itself. The final block in the macro expansion is
- recognizable as it has an argument index of zero. */
+ easily recognizable as it has an argument index of zero. */
struct block
{
unsigned int argc;
};
-/* Lexing TODO: Handle -C, maybe -CC, and space in escaped newlines.
- Stop cpplex.c from recognizing comments and directives during its
- lexing pass. Get rid of line_base usage - seems pointless? Do we
- get escaped newline at EOF correct? */
+/* Lexing state. It is mostly used to prevent macro expansion. */
+enum ls {ls_none = 0, /* Normal state. */
+ ls_fun_macro, /* When looking for '('. */
+ ls_defined, /* After defined. */
+ ls_defined_close, /* Looking for ')' of defined(). */
+ ls_hash, /* After # in preprocessor conditional. */
+ ls_predicate, /* After the predicate, maybe paren? */
+ ls_answer}; /* In answer to predicate. */
+
+/* Lexing TODO: Maybe handle space in escaped newlines. Stop cpplex.c
+ from recognizing comments and directives during its lexing pass. */
static const uchar *handle_newline PARAMS ((cpp_reader *, const uchar *));
static const uchar *skip_escaped_newlines PARAMS ((cpp_reader *,
const uchar *));
-static const uchar *skip_whitespace PARAMS ((cpp_reader *, const uchar *));
+static const uchar *skip_whitespace PARAMS ((cpp_reader *, const uchar *,
+ int));
static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
-static const uchar *skip_comment PARAMS ((cpp_reader *, const uchar *));
+static const uchar *copy_comment PARAMS ((cpp_reader *, const uchar *, int));
static void scan_out_logical_line PARAMS ((cpp_reader *pfile, cpp_macro *));
static void check_output_buffer PARAMS ((cpp_reader *, size_t));
static void push_replacement_text PARAMS ((cpp_reader *, cpp_hashnode *));
cpp_reader *pfile;
size_t n;
{
+ /* We might need two bytes to terminate an unterminated comment, and
+ one more to terminate the line with a NUL. */
+ n += 2 + 1;
+
if (n > (size_t) (pfile->out.limit - pfile->out.cur))
{
size_t size = pfile->out.cur - pfile->out.base;
}
/* To be called whenever a newline character is encountered in the
- input file, at CUR. Handles DOS, MAC and Unix ends of line, and
- returns the character after the newline sequence. */
+ input file, at CUR. Handles DOS, Mac and Unix ends of line, and
+ increments pfile->line.
+
+ Returns a pointer the character after the newline sequence. */
static const uchar *
handle_newline (pfile, cur)
cpp_reader *pfile;
pfile->line++;
if (cur[0] + cur[1] == '\r' + '\n')
cur++;
- pfile->buffer->line_base = cur + 1;
return cur + 1;
}
/* CUR points to any character in the buffer, not necessarily a
backslash. Advances CUR until all escaped newlines are skipped,
- and returns the new position. */
+ and returns the new position.
+
+ Warns if a file buffer ends in an escaped newline. */
static const uchar *
skip_escaped_newlines (pfile, cur)
cpp_reader *pfile;
const uchar *cur;
{
+ const uchar *orig_cur = cur;
+
while (*cur == '\\' && is_vspace (cur[1]))
cur = handle_newline (pfile, cur + 1);
+ if (cur != orig_cur && cur == RLIMIT (pfile->context) && pfile->buffer->inc)
+ cpp_error (pfile, DL_PEDWARN, "backslash-newline at end of file");
+
return cur;
}
-/* CUR points to the character after the asterisk introducing a
- comment. Returns the position after the comment. */
+/* CUR points to the asterisk introducing a comment in the input
+ buffer. IN_DEFINE is true if we are in the replacement text
+ of a macro.
+
+ The asterisk and following comment is copied to the buffer pointed
+ to by pfile->out.cur, which must be of sufficient size.
+ Unterminated comments are diagnosed, and correctly terminated in
+ the output. pfile->out.cur is updated depending upon IN_DEFINE,
+ -C, -CC and pfile->state.in_directive.
+
+ Returns a pointer to the first character after the comment in the
+ input buffer. */
static const uchar *
-skip_comment (pfile, cur)
+copy_comment (pfile, cur, in_define)
cpp_reader *pfile;
const uchar *cur;
+ int in_define;
{
unsigned int from_line = pfile->line;
- unsigned int c = 0, prevc = 0;
const uchar *limit = RLIMIT (pfile->context);
+ uchar *out = pfile->out.cur;
- while (cur < limit)
+ do
{
- prevc = c;
- c = *cur++;
+ unsigned int c = *cur++;
+ *out++ = c;
if (c == '/')
{
- if (prevc == '*')
- break;
+ /* An immediate slash does not terminate the comment. */
+ if (out[-2] == '*' && out - 2 > pfile->out.cur)
+ goto done;
+
if (*cur == '*' && cur[1] != '/'
&& CPP_OPTION (pfile, warn_comments))
cpp_error_with_line (pfile, DL_WARNING, pfile->line, 0,
"\"/*\" within comment");
}
else if (is_vspace (c))
- cur = handle_newline (pfile, cur - 1);
+ {
+ cur = handle_newline (pfile, cur - 1);
+ /* Canonicalize newline sequences and skip escaped ones. */
+ if (out[-2] == '\\')
+ out -= 2;
+ else
+ out[-1] = '\n';
+ }
}
+ while (cur < limit);
+
+ cpp_error_with_line (pfile, DL_ERROR, from_line, 0, "unterminated comment");
+ *out++ = '*';
+ *out++ = '/';
- if (c != '/' || prevc != '*')
- cpp_error_with_line (pfile, DL_ERROR, from_line, 0,
- "unterminated comment");
+ done:
+ /* Comments in directives become spaces so that tokens are properly
+ separated when the ISO preprocessor re-lexes the line. The
+ exception is #define. */
+ if (pfile->state.in_directive)
+ {
+ if (in_define)
+ {
+ if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
+ pfile->out.cur--;
+ else
+ pfile->out.cur = out;
+ }
+ else
+ pfile->out.cur[-1] = ' ';
+ }
+ else if (CPP_OPTION (pfile, discard_comments))
+ pfile->out.cur--;
+ else
+ pfile->out.cur = out;
return cur;
}
-/* Skip any horizontal whitespace and comments beginning at CUR,
- returning the following character. */
+/* CUR points to any character in the input buffer. Skips over all
+ contiguous horizontal white space and NULs, including comments if
+ SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
+ character or the end of the current context. Escaped newlines are
+ removed.
+
+ The whitespace is copied verbatim to the output buffer, except that
+ comments are handled as described in copy_comment().
+ pfile->out.cur is updated.
+
+ Returns a pointer to the first character after the whitespace in
+ the input buffer. */
static const uchar *
-skip_whitespace (pfile, cur)
+skip_whitespace (pfile, cur, skip_comments)
cpp_reader *pfile;
const uchar *cur;
+ int skip_comments;
{
- const uchar *tmp;
+ uchar *out = pfile->out.cur;
for (;;)
{
- while (is_nvspace (*cur) && *cur != 0)
- cur++;
+ unsigned int c = *cur++;
+ *out++ = c;
- if (*cur == '\0' && cur != RLIMIT (pfile->context))
+ if (is_nvspace (c) && c)
continue;
- if (*cur == '\\')
- {
- tmp = cur;
- cur = skip_escaped_newlines (pfile, cur);
- if (tmp != cur)
- continue;
- }
+ if (!c && cur != RLIMIT (pfile->context))
+ continue;
- if (*cur == '/')
+ if (*cur == '/' && skip_comments)
{
- tmp = skip_escaped_newlines (pfile, cur + 1);
+ const uchar *tmp = skip_escaped_newlines (pfile, cur);
if (*tmp == '*')
{
- cur = skip_comment (pfile, tmp + 1);
+ pfile->out.cur = out;
+ cur = copy_comment (pfile, tmp, false /* in_define */);
+ out = pfile->out.cur;
continue;
}
}
+ out--;
+ if (c == '\\' && is_vspace (*cur))
+ {
+ cur = skip_escaped_newlines (pfile, cur);
+ continue;
+ }
+
break;
}
- return cur;
+ pfile->out.cur = out;
+ return cur - 1;
}
/* Lexes and outputs an identifier starting at CUR, which is assumed
return result;
}
-/* Reads an identifier, returning its hashnode. If the next token is
- not an identifier, returns NULL. */
-cpp_hashnode *
-_cpp_lex_identifier_trad (pfile)
- cpp_reader *pfile;
-{
- const uchar *cur = skip_whitespace (pfile, CUR (pfile->context));
-
- if (!is_idstart (*cur))
- {
- CUR (pfile->context) = cur;
- return NULL;
- }
-
- return lex_identifier (pfile, cur);
-}
-
/* Overlays the true file buffer temporarily with text of length LEN
starting at START. The true buffer is restored upon calling
restore_buff(). */
{
cpp_buffer *buffer = pfile->buffer;
+ pfile->overlaid_buffer = buffer;
buffer->saved_cur = buffer->cur;
buffer->saved_rlimit = buffer->rlimit;
- buffer->saved_line_base = buffer->line_base;
buffer->cur = start;
- buffer->line_base = start;
buffer->rlimit = start + len;
pfile->saved_line = pfile->line;
_cpp_remove_overlay (pfile)
cpp_reader *pfile;
{
- cpp_buffer *buffer = pfile->buffer;
+ cpp_buffer *buffer = pfile->overlaid_buffer;
buffer->cur = buffer->saved_cur;
buffer->rlimit = buffer->saved_rlimit;
- buffer->line_base = buffer->saved_line_base;
pfile->line = pfile->saved_line;
}
{
stop = buffer->return_at_eof;
_cpp_pop_buffer (pfile);
+ buffer = pfile->buffer;
}
if (stop)
CUR (pfile->context) = buffer->cur;
RLIMIT (pfile->context) = buffer->rlimit;
scan_out_logical_line (pfile, NULL);
+ buffer = pfile->buffer;
buffer->cur = CUR (pfile->context);
}
while (pfile->state.skipping);
{
cpp_context *context;
const uchar *cur;
- unsigned int c, paren_depth, quote = 0;
uchar *out;
struct fun_macro fmacro;
+ unsigned int c, paren_depth = 0, quote = 0;
+ enum ls lex_state = ls_none;
fmacro.buff = NULL;
c = *cur++;
*out++ = c;
- /* There are only a few entities we need to catch: comments,
- identifiers, newlines, escaped newlines, # and '\0'. */
+ /* Whitespace should "continue" out of the switch,
+ non-whitespace should "break" out of it. */
switch (c)
{
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\v':
+ continue;
+
case '\0':
if (cur - 1 != RLIMIT (context))
- break;
+ continue;
/* If this is a macro's expansion, pop it. */
if (context->prev)
case '\r': case '\n':
cur = handle_newline (pfile, cur - 1);
- if (pfile->state.parsing_args == 2)
+ if (pfile->state.parsing_args == 2 && !pfile->state.in_directive)
{
/* Newlines in arguments become a space. */
out[-1] = ' ';
}
goto done;
+ case '<':
+ if (pfile->state.angled_headers && !quote)
+ quote = '>';
+ break;
+ case '>':
+ if (pfile->state.angled_headers && c == quote)
+ {
+ pfile->state.angled_headers = false;
+ quote = 0;
+ }
+ break;
+
case '"':
case '\'':
if (c == quote)
case '\\':
if (is_vspace (*cur))
- out--, cur = skip_escaped_newlines (pfile, cur - 1);
+ {
+ out--;
+ cur = skip_escaped_newlines (pfile, cur - 1);
+ continue;
+ }
else
{
/* Skip escaped quotes here, it's easier than above, but
{
cur = skip_escaped_newlines (pfile, cur);
if (*cur == '*')
- out--, cur = skip_comment (pfile, cur + 1);
+ {
+ pfile->out.cur = out;
+ cur = copy_comment (pfile, cur, macro != 0);
+ out = pfile->out.cur;
+ continue;
+ }
}
break;
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
- if (quote == 0 || macro)
+ if (!pfile->state.skipping && (quote == 0 || macro))
{
cpp_hashnode *node;
+ uchar *out_start = out - 1;
- pfile->out.cur = --out;
+ pfile->out.cur = out_start;
node = lex_identifier (pfile, cur - 1);
+ out = pfile->out.cur;
+ cur = CUR (context);
if (node->type == NT_MACRO
- && !pfile->state.skipping
- && pfile->state.parsing_args != 2
+ /* Should we expand for ls_answer? */
+ && lex_state == ls_none
&& !pfile->state.prevent_expansion
&& !recursive_macro (pfile, node))
{
if (node->value.macro->fun_like)
- maybe_start_funlike (pfile, node, out, &fmacro);
+ {
+ maybe_start_funlike (pfile, node, out_start, &fmacro);
+ lex_state = ls_fun_macro;
+ continue;
+ }
else
{
/* Remove the object-like macro's name from the
output, and push its replacement text. */
- pfile->out.cur = out;
+ pfile->out.cur = out_start;
push_replacement_text (pfile, node);
goto new_context;
}
{
/* Found a parameter in the replacement text of a
#define. Remove its name from the output. */
- pfile->out.cur = out;
+ out = pfile->out.cur = out_start;
save_replacement_text (pfile, macro, node->arg_index);
}
-
- out = pfile->out.cur;
- cur = CUR (context);
+ else if (lex_state == ls_hash)
+ {
+ lex_state = ls_predicate;
+ continue;
+ }
+ else if (pfile->state.in_expression
+ && node == pfile->spec_nodes.n_defined)
+ {
+ lex_state = ls_defined;
+ continue;
+ }
}
break;
if (quote == 0)
{
paren_depth++;
- if (pfile->state.parsing_args == 1)
+ if (lex_state == ls_fun_macro)
{
- const uchar *p = pfile->out.base + fmacro.offset;
-
- /* Invoke a prior function-like macro if there is only
- white space in-between. */
- while (is_numchar (*p))
- p++;
- while (is_space (*p))
- p++;
-
- if (p == out - 1)
- {
- pfile->state.parsing_args = 2;
- paren_depth = 1;
- out = pfile->out.base + fmacro.offset;
- fmacro.args[0] = fmacro.offset;
- }
- else
- pfile->state.parsing_args = 0;
+ lex_state = ls_none;
+ pfile->state.parsing_args = 2;
+ paren_depth = 1;
+ out = pfile->out.base + fmacro.offset;
+ fmacro.args[0] = fmacro.offset;
}
+ else if (lex_state == ls_predicate)
+ lex_state = ls_answer;
+ else if (lex_state == ls_defined)
+ lex_state = ls_defined_close;
}
break;
/* A single zero-length argument is no argument. */
if (fmacro.argc == 1
&& m->paramc == 0
- && out == pfile->out.base + 1)
+ && out == pfile->out.base + fmacro.offset + 1)
fmacro.argc = 0;
if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
goto new_context;
}
}
+ else if (lex_state == ls_answer || lex_state == ls_defined_close)
+ lex_state = ls_none;
}
break;
if (_cpp_handle_directive (pfile, false /* indented */))
goto start_logical_line;
}
+ if (pfile->state.in_expression)
+ {
+ lex_state = ls_hash;
+ continue;
+ }
break;
default:
break;
}
+
+ if (lex_state == ls_none)
+ continue;
+
+ /* Some of these transitions of state are syntax errors. The
+ ISO preprocessor will issue errors later. */
+ if (lex_state == ls_fun_macro)
+ {
+ /* Missing '('. */
+ lex_state = ls_none;
+ pfile->state.parsing_args = 0;
+ }
+ else if (lex_state == ls_hash
+ || lex_state == ls_predicate
+ || lex_state == ls_defined)
+ lex_state = ls_none;
+
+ /* ls_answer and ls_defined_close keep going until ')'. */
}
done:
pfile->out.cur = out - 1;
if (fmacro.buff)
_cpp_release_buff (pfile, fmacro.buff);
+
+ if (pfile->state.parsing_args == 2)
+ cpp_error (pfile, DL_ERROR,
+ "unterminated argument list invoking macro \"%s\"",
+ NODE_NAME (fmacro.node));
+ pfile->state.parsing_args = 0;
}
/* Push a context holding the replacement text of the macro NODE on
for (;;)
{
- cur = skip_whitespace (pfile, cur);
+ cur = skip_whitespace (pfile, cur, true /* skip_comments */);
if (is_idstart (*cur))
{
ok = false;
if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
break;
- cur = skip_whitespace (pfile, CUR (pfile->context));
+ cur = skip_whitespace (pfile, CUR (pfile->context),
+ true /* skip_comments */);
if (*cur == ',')
{
cur++;
}
/* Skip leading whitespace in the replacement text. */
- CUR (pfile->context) = skip_whitespace (pfile, CUR (pfile->context));
+ CUR (pfile->context)
+ = skip_whitespace (pfile, CUR (pfile->context),
+ CPP_OPTION (pfile, discard_comments_in_macro_exp));
pfile->state.prevent_expansion++;
scan_out_logical_line (pfile, macro);