extern "C" {
#endif
+/* For complex reasons, cpp_reader is also typedefed in c-pragma.h. */
+#ifndef _C_PRAGMA_H
typedef struct cpp_reader cpp_reader;
+#endif
typedef struct cpp_buffer cpp_buffer;
typedef struct cpp_options cpp_options;
-typedef struct cpp_printer cpp_printer;
typedef struct cpp_token cpp_token;
-typedef struct cpp_toklist cpp_toklist;
-typedef struct cpp_name cpp_name;
+typedef struct cpp_string cpp_string;
+typedef struct cpp_hashnode cpp_hashnode;
+typedef struct cpp_pool cpp_pool;
+typedef struct cpp_macro cpp_macro;
+typedef struct cpp_lexer_pos cpp_lexer_pos;
+typedef struct cpp_lookahead cpp_lookahead;
+
+struct directive; /* These are deliberately incomplete. */
+struct answer;
+struct cpp_macro;
+struct macro_args;
+struct cpp_chunk;
+struct file_name_map_list;
+struct htab;
/* The first two groups, apart from '=', can appear in preprocessor
expressions. This allows a lookup table to be implemented in
the same order as their counterparts without the '=', like ">>". */
/* Positions in the table. */
-#define CPP_LAST_EQ CPP_LSHIFT
+#define CPP_LAST_EQ CPP_MAX
#define CPP_FIRST_DIGRAPH CPP_HASH
+#define CPP_LAST_PUNCTUATOR CPP_DOT_STAR
#define TTYPE_TABLE \
- T(CPP_EQ = 0, "=") \
- T(CPP_NOT, "!") \
- T(CPP_GREATER, ">") /* compare */ \
- T(CPP_LESS, "<") \
- T(CPP_PLUS, "+") /* math */ \
- T(CPP_MINUS, "-") \
- T(CPP_MULT, "*") \
- T(CPP_DIV, "/") \
- T(CPP_MOD, "%") \
- T(CPP_AND, "&") /* bit ops */ \
- T(CPP_OR, "|") \
- T(CPP_XOR, "^") \
- T(CPP_COMPL, "~") \
- T(CPP_RSHIFT, ">>") \
- T(CPP_LSHIFT, "<<") \
+ OP(CPP_EQ = 0, "=") \
+ OP(CPP_NOT, "!") \
+ OP(CPP_GREATER, ">") /* compare */ \
+ OP(CPP_LESS, "<") \
+ OP(CPP_PLUS, "+") /* math */ \
+ OP(CPP_MINUS, "-") \
+ OP(CPP_MULT, "*") \
+ OP(CPP_DIV, "/") \
+ OP(CPP_MOD, "%") \
+ OP(CPP_AND, "&") /* bit ops */ \
+ OP(CPP_OR, "|") \
+ OP(CPP_XOR, "^") \
+ OP(CPP_RSHIFT, ">>") \
+ OP(CPP_LSHIFT, "<<") \
+ OP(CPP_MIN, "<?") /* extension */ \
+ OP(CPP_MAX, ">?") \
\
- T(CPP_AND_AND, "&&") /* logical */ \
- T(CPP_OR_OR, "||") \
- T(CPP_QUERY, "?") \
- T(CPP_COLON, ":") \
- T(CPP_COMMA, ",") /* grouping */ \
- T(CPP_OPEN_PAREN, "(") \
- T(CPP_CLOSE_PAREN, ")") \
- T(CPP_EQ_EQ, "==") /* compare */ \
- T(CPP_NOT_EQ, "!=") \
- T(CPP_GREATER_EQ, ">=") \
- T(CPP_LESS_EQ, "<=") \
+ OP(CPP_COMPL, "~") \
+ OP(CPP_AND_AND, "&&") /* logical */ \
+ OP(CPP_OR_OR, "||") \
+ OP(CPP_QUERY, "?") \
+ OP(CPP_COLON, ":") \
+ OP(CPP_COMMA, ",") /* grouping */ \
+ OP(CPP_OPEN_PAREN, "(") \
+ OP(CPP_CLOSE_PAREN, ")") \
+ OP(CPP_EQ_EQ, "==") /* compare */ \
+ OP(CPP_NOT_EQ, "!=") \
+ OP(CPP_GREATER_EQ, ">=") \
+ OP(CPP_LESS_EQ, "<=") \
\
- T(CPP_PLUS_EQ, "+=") /* math */ \
- T(CPP_MINUS_EQ, "-=") \
- T(CPP_MULT_EQ, "*=") \
- T(CPP_DIV_EQ, "/=") \
- T(CPP_MOD_EQ, "%=") \
- T(CPP_AND_EQ, "&=") /* bit ops */ \
- T(CPP_OR_EQ, "|=") \
- T(CPP_XOR_EQ, "^=") \
- T(CPP_COMPL_EQ, "~=") \
- T(CPP_RSHIFT_EQ, ">>=") \
- T(CPP_LSHIFT_EQ, "<<=") \
+ OP(CPP_PLUS_EQ, "+=") /* math */ \
+ OP(CPP_MINUS_EQ, "-=") \
+ OP(CPP_MULT_EQ, "*=") \
+ OP(CPP_DIV_EQ, "/=") \
+ OP(CPP_MOD_EQ, "%=") \
+ OP(CPP_AND_EQ, "&=") /* bit ops */ \
+ OP(CPP_OR_EQ, "|=") \
+ OP(CPP_XOR_EQ, "^=") \
+ OP(CPP_RSHIFT_EQ, ">>=") \
+ OP(CPP_LSHIFT_EQ, "<<=") \
+ OP(CPP_MIN_EQ, "<?=") /* extension */ \
+ OP(CPP_MAX_EQ, ">?=") \
/* Digraphs together, beginning with CPP_FIRST_DIGRAPH. */ \
- T(CPP_HASH, "#") /* digraphs */ \
- T(CPP_PASTE, "##") \
- T(CPP_OPEN_SQUARE, "[") \
- T(CPP_CLOSE_SQUARE, "]") \
- T(CPP_OPEN_BRACE, "{") \
- T(CPP_CLOSE_BRACE, "}") \
- /* The remainder of the punctuation. Order is not significant. */ \
- T(CPP_SEMICOLON, ";") /* structure */ \
- T(CPP_ELLIPSIS, "...") \
- T(CPP_BACKSLASH, "\\") \
- T(CPP_PLUS_PLUS, "++") /* increment */ \
- T(CPP_MINUS_MINUS, "--") \
- T(CPP_DEREF, "->") /* accessors */ \
- T(CPP_DOT, ".") \
- T(CPP_SCOPE, "::") \
- T(CPP_DEREF_STAR, "->*") \
- T(CPP_DOT_STAR, ".*") \
- T(CPP_MIN, "<?") /* extension */ \
- T(CPP_MAX, ">?") \
- C(CPP_OTHER, 0) /* stray punctuation */ \
+ OP(CPP_HASH, "#") /* digraphs */ \
+ OP(CPP_PASTE, "##") \
+ OP(CPP_OPEN_SQUARE, "[") \
+ OP(CPP_CLOSE_SQUARE, "]") \
+ OP(CPP_OPEN_BRACE, "{") \
+ OP(CPP_CLOSE_BRACE, "}") \
+ /* The remainder of the punctuation. Order is not significant. */ \
+ OP(CPP_SEMICOLON, ";") /* structure */ \
+ OP(CPP_ELLIPSIS, "...") \
+ OP(CPP_BACKSLASH, "\\") \
+ OP(CPP_PLUS_PLUS, "++") /* increment */ \
+ OP(CPP_MINUS_MINUS, "--") \
+ OP(CPP_DEREF, "->") /* accessors */ \
+ OP(CPP_DOT, ".") \
+ OP(CPP_SCOPE, "::") \
+ OP(CPP_DEREF_STAR, "->*") \
+ OP(CPP_DOT_STAR, ".*") \
+\
+ TK(CPP_NAME, SPELL_IDENT) /* word */ \
+ TK(CPP_INT, SPELL_STRING) /* 23 */ \
+ TK(CPP_FLOAT, SPELL_STRING) /* 3.14159 */ \
+ TK(CPP_NUMBER, SPELL_STRING) /* 34_be+ta */ \
\
- H(CPP_NAME, spell_name) /* word */ \
- N(CPP_INT, 0) /* 23 */ \
- N(CPP_FLOAT, 0) /* 3.14159 */ \
- H(CPP_NUMBER, spell_name) /* 34_be+ta */ \
- H(CPP_CHAR, spell_string) /* 'char' */ \
- H(CPP_WCHAR, spell_string) /* L'char' */ \
- H(CPP_STRING, spell_string) /* "string" */ \
- H(CPP_WSTRING, spell_string) /* L"string" */ \
+ TK(CPP_CHAR, SPELL_STRING) /* 'char' */ \
+ TK(CPP_WCHAR, SPELL_STRING) /* L'char' */ \
+ TK(CPP_OTHER, SPELL_CHAR) /* stray punctuation */ \
\
- H(CPP_C_COMMENT, spell_comment) /* Only if output comments. */ \
- H(CPP_CPP_COMMENT, spell_comment) /* Only if output comments. */ \
- H(CPP_CHILL_COMMENT, spell_comment) /* Only if output comments. */ \
- N(CPP_MACRO_ARG, 0) /* Macro argument. */ \
- N(CPP_SUBLIST, 0) /* Sublist. */ \
- E(CPP_VSPACE, "\n") /* End of line. */ \
- N(CPP_EOF, 0) /* End of file. */ \
- N(CPP_HEADER_NAME, 0) /* <stdio.h> in #include */ \
- N(CPP_ASSERTION, 0) /* (...) in #assert */ \
+ TK(CPP_STRING, SPELL_STRING) /* "string" */ \
+ TK(CPP_WSTRING, SPELL_STRING) /* L"string" */ \
+ TK(CPP_OSTRING, SPELL_STRING) /* @"string" - Objective C */ \
+ TK(CPP_HEADER_NAME, SPELL_STRING) /* <stdio.h> in #include */ \
\
- /* Obsolete - will be removed when no code uses them still. */ \
- H(CPP_COMMENT, 0) /* Only if output comments. */ \
- N(CPP_HSPACE, 0) /* Horizontal white space. */ \
- N(CPP_POP, 0) /* End of buffer. */ \
- N(CPP_DIRECTIVE, 0) /* #define and the like */ \
- N(CPP_MACRO, 0) /* Like a NAME, but expanded. */
-
-#define T(e, s) e,
-#define H(e, s) e,
-#define C(e, s) e,
-#define N(e, s) e,
-#define E(e, s) e,
+ TK(CPP_COMMENT, SPELL_STRING) /* Only if output comments. */ \
+ TK(CPP_DHASH, SPELL_NONE) /* The # of a directive. */ \
+ TK(CPP_MACRO_ARG, SPELL_NONE) /* Macro argument. */ \
+ TK(CPP_PLACEMARKER, SPELL_NONE) /* Placemarker token. */ \
+ OP(CPP_EOF, "EOL") /* End of line or file. */
+
+#define OP(e, s) e,
+#define TK(e, s) e,
enum cpp_ttype
{
TTYPE_TABLE
N_TTYPES
};
-#undef T
-#undef H
-#undef C
-#undef N
-#undef E
-
-/* Payload of a NAME, NUMBER, FLOAT, STRING, or COMMENT token. */
-struct cpp_name
+#undef OP
+#undef TK
+
+/* Multiple-include optimisation. */
+enum mi_state {MI_FAILED = 0, MI_OUTSIDE};
+enum mi_ind {MI_IND_NONE = 0, MI_IND_NOT};
+
+/* Payload of a NUMBER, FLOAT, STRING, or COMMENT token. */
+struct cpp_string
{
unsigned int len;
- unsigned int offset; /* from list->namebuf */
+ const unsigned char *text;
};
-#define TOK_NAME(list, token) ((list)->namebuf + (token)->val.name.offset)
-
/* Flags for the cpp_token structure. */
-#define PREV_WHITESPACE 1 /* If whitespace before this token. */
-#define DIGRAPH 2 /* If it was a digraph. */
-#define UNSIGNED_INT 4 /* If int preprocessing token unsigned. */
-
-/* A preprocessing token.
- This has been carefully packed and should occupy 16 bytes on
- both 32- and 64-bit hosts. */
+#define PREV_WHITE (1 << 0) /* If whitespace before this token. */
+#define DIGRAPH (1 << 1) /* If it was a digraph. */
+#define STRINGIFY_ARG (1 << 2) /* If macro argument to be stringified. */
+#define PASTE_LEFT (1 << 3) /* If on LHS of a ## operator. */
+#define NAMED_OP (1 << 4) /* C++ named operators, also "defined". */
+#define NO_EXPAND (1 << 5) /* Do not macro-expand this token. */
+#define VARARGS_FIRST STRINGIFY_ARG /* First token of varargs expansion. */
+
+/* A preprocessing token. This has been carefully packed and should
+ occupy 12 bytes on 32-bit hosts and 16 bytes on 64-bit hosts. */
struct cpp_token
{
- unsigned short col; /* starting column of this token */
-#ifdef ENUM_BITFIELDS_ARE_UNSIGNED
- enum cpp_ttype type : CHAR_BIT; /* node type */
-#else
- unsigned char type;
-#endif
- unsigned char flags; /* flags - see above */
- unsigned int aux; /* CPP_OTHER character. Hash of a
- NAME, or something - see uses
- in the code */
+ ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT; /* token type */
+ unsigned char flags; /* flags - see above */
+
union
{
- struct cpp_name name; /* a string */
- HOST_WIDEST_INT integer; /* an integer */
+ HOST_WIDEST_INT integer; /* an integer */
+ struct cpp_hashnode *node; /* an identifier */
+ struct cpp_string str; /* a string, or number */
+ unsigned int aux; /* argument no. for a CPP_MACRO_ARG, or
+ character represented by CPP_OTHER. */
} val;
};
-/* Directive flags. */
-#define SYNTAX_INCLUDE (1 << 8)
-#define SYNTAX_ASSERT (1 << 9)
+/* The position of a token in the current file. */
+struct cpp_lexer_pos
+{
+ unsigned int line;
+ unsigned int output_line;
+ unsigned short col;
+};
-typedef int (*directive_handler) PARAMS ((cpp_reader *));
-typedef int (*parse_cleanup_t) PARAMS ((cpp_buffer *, cpp_reader *));
+typedef struct cpp_token_with_pos cpp_token_with_pos;
+struct cpp_token_with_pos
+{
+ cpp_token token;
+ cpp_lexer_pos pos;
+};
-struct cpp_toklist
+/* Token lookahead. */
+struct cpp_lookahead
{
- cpp_token *tokens; /* actual tokens as an array */
- unsigned int tokens_used; /* tokens used */
- unsigned int tokens_cap; /* tokens allocated */
-
- unsigned char *namebuf; /* names buffer */
- unsigned int name_used; /* _bytes_ used */
- unsigned int name_cap; /* _bytes_ allocated */
-
- unsigned int line; /* starting line number */
-
- /* Comment copying. */
- cpp_token *comments; /* comment tokens. */
- unsigned int comments_used; /* comment tokens used. */
- unsigned int comments_cap; /* comment token capacity. */
-
- /* Only used if tokens[0].type == CPP_DIRECTIVE. This is the
- handler to call after lexing the rest of this line. The flags
- indicate whether the rest of the line gets special treatment
- during lexing (#include, #if, #assert, #unassert). */
- directive_handler dir_handler;
- unsigned short dir_flags;
+ struct cpp_lookahead *next;
+ cpp_token_with_pos *tokens;
+ cpp_lexer_pos pos;
+ unsigned int cur, count, cap;
};
+/* Memory pools. */
+struct cpp_pool
+{
+ struct cpp_chunk *cur, *locked;
+ unsigned char *pos; /* Current position. */
+ unsigned int align;
+ unsigned int locks;
+};
+
+typedef struct toklist toklist;
+struct toklist
+{
+ cpp_token *first;
+ cpp_token *limit;
+};
+
+typedef struct cpp_context cpp_context;
+struct cpp_context
+{
+ /* Doubly-linked list. */
+ cpp_context *next, *prev;
+
+ /* Contexts other than the base context are contiguous tokens.
+ e.g. macro expansions, expanded argument tokens. */
+ struct toklist list;
+
+ /* For a macro context, these are the macro and its arguments. */
+ cpp_macro *macro;
+};
+
+/* A standalone character. We may want to make it unsigned for the
+ same reason we use unsigned char - to avoid signedness issues. */
+typedef int cppchar_t;
+
struct cpp_buffer
{
const unsigned char *cur; /* current position */
const unsigned char *rlimit; /* end of valid data */
- const unsigned char *buf; /* entire buffer */
const unsigned char *line_base; /* start of current line */
- const unsigned char *mark; /* Saved position for lengthy backtrack. */
+ cppchar_t read_ahead; /* read ahead character */
+ cppchar_t extra_char; /* extra read-ahead for long tokens. */
+ struct cpp_reader *pfile; /* Owns this buffer. */
struct cpp_buffer *prev;
+ const unsigned char *buf; /* entire buffer */
+
/* Filename specified with #line command. */
const char *nominal_fname;
+
/* Actual directory of this file, used only for "" includes */
struct file_name_list *actual_dir;
- /* Pointer into the include hash table. Used for include_next and
+ /* Pointer into the include table. Used for include_next and
to record control macros. */
- struct ihash *ihash;
-
- parse_cleanup_t cleanup;
-
- /* If the buffer is the expansion of a macro, this points to the
- macro's hash table entry. */
- struct hashnode *macro;
+ struct include_file *inc;
/* Value of if_stack at start of this file.
Used to prohibit unmatched #endif (etc) in an include file. */
struct if_stack *if_stack;
+ /* Token column position adjustment owing to tabs in whitespace. */
+ unsigned int col_adjust;
+
/* Line number at line_base (above). */
unsigned int lineno;
- /* True if this is a header file included using <FILENAME>. */
- char system_header_p;
-
- /* True if end-of-file has already been hit once in this buffer. */
- char seen_eof;
-
- /* True if buffer contains escape sequences.
- Currently there are two kinds:
- "\r-" means following identifier should not be macro-expanded.
- "\r " means a token-separator. This turns into " " in final output
- if not stringizing and needed to separate tokens; otherwise nothing.
- Any other two-character sequence beginning with \r is an error.
-
- If this is NOT set, then \r is a one-character escape meaning backslash
- newline. This is guaranteed not to occur in the middle of a token.
- The two interpretations of \r do not conflict, because the two-character
- escapes are used only in macro buffers, and backslash-newline is removed
- from macro expansion text in collect_expansion and/or macarg. */
- char has_escapes;
-
- /* Used by the C++ frontend to implement redirected input (such as for
- default argument and/or template parsing). */
- char manual_pop;
-
/* True if we have already warned about C++ comments in this file.
The warning happens only for C89 extended mode with -pedantic on,
or for -Wtraditional, and only once per file (otherwise it would
be far too noisy). */
- char warned_cplusplus_comments;
+ unsigned char warned_cplusplus_comments;
- /* True if this buffer's data is mmapped. */
- char mapped;
+ /* True if we don't process trigraphs and escaped newlines. True
+ for preprocessed input, command line directives, and _Pragma
+ buffers. */
+ unsigned char from_stage3;
};
-struct file_name_map_list;
-struct htab;
-
/* Maximum nesting of cpp_buffers. We use a static limit, partly for
efficiency, and partly to limit runaway recursion. */
#define CPP_STACK_MAX 200
const char *in_fname;
const char *out_fname;
+ /* Characters between tab stops. */
+ unsigned int tabstop;
+
/* Pending options - -D, -U, -A, -I, -ixxx. */
struct cpp_pending *pending;
const char *include_prefix;
unsigned int include_prefix_len;
+ /* -fleading_underscore sets this to "_". */
+ const char *user_label_prefix;
+
/* Non-0 means -v, so print the full set of include dirs. */
unsigned char verbose;
likely to be in comments). */
unsigned char lang_asm;
- /* Nonzero means this is Fortran, and we don't know where the
- comments are, so permit unbalanced ' strings. Unlike lang_asm,
- this does not ignore unrecognized directives. */
- unsigned char lang_fortran;
-
- /* Nonzero means handle CHILL comment syntax and output CHILL string
- delimiters for __DATE__ etc. */
- unsigned char chill;
-
/* Nonzero means don't copy comments into the output file. */
unsigned char discard_comments;
- /* Nonzero means process the ANSI trigraph sequences. */
+ /* Nonzero means process the ISO trigraph sequences. */
unsigned char trigraphs;
+ /* Nonzero means process the ISO digraph sequences. */
+ unsigned char digraphs;
+
/* Nonzero means print the names of included files rather than the
preprocessed output. 1 means just the #include "...", 2 means
#include <...> as well. */
/* Nonzero means don't print warning messages. */
unsigned char inhibit_warnings;
+ /* Nonzero means don't suppress warnings from system headers. */
+ unsigned char warn_system_headers;
+
/* Nonzero means don't print error messages. Has no option to
select it, but can be set by a user of cpplib (e.g. fix-header). */
unsigned char inhibit_errors;
/* Nonzero means warn if #import is used. */
unsigned char warn_import;
- /* Nonzero means warn if a macro argument is (or would be)
- stringified with -traditional, and warn about directives
- with the # indented from the beginning of the line. */
+ /* Nonzero means warn about various incompatibilities with
+ traditional C. */
unsigned char warn_traditional;
+ /* Nonzero means warn if ## is applied to two tokens that cannot be
+ pasted together. */
+ unsigned char warn_paste;
+
/* Nonzero means turn warnings into errors. */
unsigned char warnings_are_errors;
/* Zero means dollar signs are punctuation. */
unsigned char dollars_in_ident;
- /* Nonzero means try to imitate old fashioned non-ANSI preprocessor. */
- unsigned char traditional;
-
/* Nonzero means warn if undefined identifiers are evaluated in an #if. */
unsigned char warn_undef;
unsigned char show_column;
};
+struct lexer_state
+{
+ /* Nonzero if first token on line is CPP_HASH. */
+ unsigned char in_directive;
+
+ /* Nonzero if in a directive that takes angle-bracketed headers. */
+ unsigned char angled_headers;
+
+ /* Nonzero to save comments. Turned off if discard_comments, and in
+ all directives apart from #define. */
+ unsigned char save_comments;
+
+ /* If nonzero the lexer skips newlines. Internal to the lexer. */
+ unsigned char skip_newlines;
+
+ /* Nonzero if we're mid-comment. */
+ unsigned char lexing_comment;
+
+ /* Nonzero if lexing __VA_ARGS__ is valid. */
+ unsigned char va_args_ok;
+
+ /* Nonzero if lexing poisoned identifiers is valid. */
+ unsigned char poisoned_ok;
+
+ /* Nonzero to prevent macro expansion. */
+ unsigned char prevent_expansion;
+
+ /* Nonzero when parsing arguments to a function-like macro. */
+ unsigned char parsing_args;
+};
+
+/* Special nodes - identifiers with predefined significance. */
+struct spec_nodes
+{
+ cpp_hashnode *n_L; /* L"str" */
+ cpp_hashnode *n_defined; /* defined operator */
+ cpp_hashnode *n__Pragma; /* _Pragma operator */
+ cpp_hashnode *n__STRICT_ANSI__; /* STDC_0_IN_SYSTEM_HEADERS */
+ cpp_hashnode *n__CHAR_UNSIGNED__; /* plain char is unsigned */
+ cpp_hashnode *n__VA_ARGS__; /* C99 vararg macros */
+};
-/* A cpp_reader encapsulates the "state" of a pre-processor run.
+/* a cpp_reader encapsulates the "state" of a pre-processor run.
Applying cpp_get_token repeatedly yields a stream of pre-processor
- tokens. Usually, there is only one cpp_reader object active. */
+ tokens. Usually, there is only one cpp_reader object active. */
struct cpp_reader
{
/* Top of buffer stack. */
cpp_buffer *buffer;
- /* Token list used by get_directive_token. */
- cpp_toklist directbuf;
-
- /* A buffer used for both for cpp_get_token's output, and also internally. */
- unsigned char *token_buffer;
- /* Allocated size of token_buffer. CPP_RESERVE allocates space. */
- unsigned int token_buffer_size;
- /* End of the written part of token_buffer. */
- unsigned char *limit;
-
- /* Error counter for exit code */
+ /* Lexer state. */
+ struct lexer_state state;
+
+ /* The position of the last lexed token, last lexed directive, and
+ last macro invocation. */
+ cpp_lexer_pos lexer_pos;
+ cpp_lexer_pos macro_pos;
+ cpp_lexer_pos directive_pos;
+
+ /* Memory pools. */
+ cpp_pool ident_pool; /* For all identifiers, and permanent
+ numbers and strings. */
+ cpp_pool temp_string_pool; /* For temporary numbers and strings. */
+ cpp_pool macro_pool; /* For macro definitions. Permanent. */
+ cpp_pool argument_pool; /* For macro arguments. Temporary. */
+ cpp_pool* string_pool; /* Either temp_string_pool or ident_pool. */
+
+ /* Context stack. */
+ struct cpp_context base_context;
+ struct cpp_context *context;
+
+ /* If in_directive, the directive if known. */
+ const struct directive *directive;
+
+ /* Multiple inlcude optimisation. */
+ enum mi_state mi_state;
+ enum mi_ind mi_if_not_defined;
+ unsigned int mi_lexed;
+ const cpp_hashnode *mi_cmacro;
+ const cpp_hashnode *mi_ind_cmacro;
+
+ /* Token lookahead. */
+ struct cpp_lookahead *la_read; /* Read from this lookahead. */
+ struct cpp_lookahead *la_write; /* Write to this lookahead. */
+ struct cpp_lookahead *la_unused; /* Free store. */
+
+ /* Error counter for exit code. */
unsigned int errors;
- /* Line where a newline was first seen in a string constant. */
- unsigned int multiline_string_line;
+ /* Line and column where a newline was first seen in a string
+ constant (multi-line strings). */
+ cpp_lexer_pos mlstring_pos;
+
+ /* Buffer to hold macro definition string. */
+ unsigned char *macro_buffer;
+ unsigned int macro_buffer_len;
/* Current depth in #include directives that use <...>. */
unsigned int system_include_depth;
- /* Current depth of buffer stack. */
+ /* Current depth of buffer stack. */
unsigned int buffer_stack_depth;
- /* Hash table of macros and assertions. See cpphash.c */
+ /* Current depth in #include directives. */
+ unsigned int include_depth;
+
+ /* Hash table of macros and assertions. See cpphash.c. */
struct htab *hashtab;
- /* Hash table of other included files. See cppfiles.c */
- struct htab *all_include_files;
+ /* Tree of other included files. See cppfiles.c. */
+ struct splay_tree_s *all_include_files;
- /* Chain of `actual directory' file_name_list entries,
- for "" inclusion. */
+ /* Chain of `actual directory' file_name_list entries, for ""
+ inclusion. */
struct file_name_list *actual_dirs;
/* Current maximum length of directory names in the search path
for include files. (Altered as we get more of them.) */
unsigned int max_include_len;
- struct if_stack *if_stack;
- const unsigned char *potential_control_macro;
+ /* Date and time tokens. Calculated together if either is requested. */
+ cpp_token date;
+ cpp_token time;
/* Buffer of -M output. */
struct deps *deps;
- /* A buffer used only by read_and_prescan (in cppfiles.c), which is
- allocated once per cpp_reader object to keep it off the stack. */
- unsigned char *input_buffer;
- size_t input_buffer_len;
+ /* Obstack holding all macro hash nodes. This never shrinks.
+ See cpphash.c */
+ struct obstack *hash_ob;
+
+ /* Obstack holding buffer and conditional structures. This is a
+ real stack. See cpplib.c */
+ struct obstack *buffer_ob;
+
+ /* Pragma table - dynamic, because a library user can add to the
+ list of recognized pragmas. */
+ struct pragma_entry *pragmas;
+
+ /* Call backs. */
+ struct {
+ void (*enter_file) PARAMS ((cpp_reader *));
+ void (*leave_file) PARAMS ((cpp_reader *));
+ void (*rename_file) PARAMS ((cpp_reader *));
+ void (*include) PARAMS ((cpp_reader *, const unsigned char *,
+ const cpp_token *));
+ void (*define) PARAMS ((cpp_reader *, cpp_hashnode *));
+ void (*undef) PARAMS ((cpp_reader *, cpp_hashnode *));
+ void (*poison) PARAMS ((cpp_reader *));
+ void (*ident) PARAMS ((cpp_reader *, const cpp_string *));
+ void (*def_pragma) PARAMS ((cpp_reader *));
+ } cb;
/* User visible options. */
struct cpp_options opts;
+ /* Special nodes - identifiers with predefined significance to the
+ preprocessor. */
+ struct spec_nodes spec_nodes;
+
/* Nonzero means we have printed (while error reporting) a list of
containing files that matches the current status. */
unsigned char input_stack_listing_current;
- /* If non-zero, macros are not expanded. */
- unsigned char no_macro_expand;
-
- /* If non-zero, directives cause a hard error. Used when parsing
- macro arguments. */
- unsigned char no_directives;
-
/* We're printed a warning recommending against using #import. */
unsigned char import_warning;
- /* If true, characters between '<' and '>' are a single (string) token. */
- unsigned char parsing_include_directive;
-
- /* If true, # introduces an assertion (see do_assert) */
- unsigned char parsing_if_directive;
-
- /* If true, # and ## are the STRINGIZE and TOKPASTE operators */
- unsigned char parsing_define_directive;
-
- /* True if escape sequences (as described for has_escapes in
- parse_buffer) should be emitted. */
- unsigned char output_escapes;
-
- /* 0: Have seen non-white-space on this line.
- 1: Only seen white space so far on this line.
- 2: Only seen white space so far in this file. */
- unsigned char only_seen_white;
-
/* True after cpp_start_read completes. Used to inhibit some
warnings while parsing the command line. */
unsigned char done_initializing;
-};
-
-/* struct cpp_printer encapsulates state used to convert the stream of
- tokens coming from cpp_get_token back into a text file. Not
- everyone wants to do that, hence we separate the function. */
-struct cpp_printer
-{
- FILE *outf; /* stream to write to */
- const char *last_fname; /* previous file name */
- unsigned int last_bsd; /* did we just push? */
- unsigned int lineno; /* line currently being written */
- unsigned int written; /* low water mark in token buffer */
+ /* True if we are skipping a failed conditional group. */
+ unsigned char skipping;
};
#define CPP_FATAL_LIMIT 1000
/* True if we have seen a "fatal" error. */
#define CPP_FATAL_ERRORS(READER) ((READER)->errors >= CPP_FATAL_LIMIT)
-/* Macros for manipulating the token_buffer. */
-
-/* Number of characters currently in PFILE's output buffer. */
-#define CPP_WRITTEN(PFILE) ((size_t)((PFILE)->limit - (PFILE)->token_buffer))
-#define CPP_PWRITTEN(PFILE) ((PFILE)->limit)
-#define CPP_ADJUST_WRITTEN(PFILE,DELTA) ((PFILE)->limit += (DELTA))
-#define CPP_SET_WRITTEN(PFILE,N) ((PFILE)->limit = (PFILE)->token_buffer + (N))
-
#define CPP_OPTION(PFILE, OPTION) ((PFILE)->opts.OPTION)
#define CPP_BUFFER(PFILE) ((PFILE)->buffer)
#define CPP_BUF_LINE(BUF) ((BUF)->lineno)
-#define CPP_BUF_COL(BUF) ((BUF)->cur - (BUF)->line_base)
+#define CPP_BUF_COLUMN(BUF, CUR) ((CUR) - (BUF)->line_base + (BUF)->col_adjust)
+#define CPP_BUF_COL(BUF) CPP_BUF_COLUMN(BUF, (BUF)->cur)
/* Name under which this program was invoked. */
extern const char *progname;
-extern void _cpp_lex_file PARAMS((cpp_reader *));
-extern int cpp_handle_options PARAMS ((cpp_reader *, int, char **));
-extern enum cpp_ttype cpp_get_token PARAMS ((cpp_reader *));
-extern enum cpp_ttype cpp_get_non_space_token PARAMS ((cpp_reader *));
+/* The structure of a node in the hash table. The hash table has
+ entries for all identifiers: either macros defined by #define
+ commands (type NT_MACRO), assertions created with #assert
+ (NT_ASSERTION), or neither of the above (NT_VOID). Builtin macros
+ like __LINE__ are flagged NODE_BUILTIN. Poisioned identifiers are
+ flagged NODE_POISONED. NODE_OPERATOR (C++ only) indicates an
+ identifier that behaves like an operator such as "xor".
+ NODE_DIAGNOSTIC is for speed in lex_token: it indicates a
+ diagnostic may be required for this node. Currently this only
+ applies to __VA_ARGS__ and poisoned identifiers. */
+
+/* Hash node flags. */
+#define NODE_OPERATOR (1 << 0) /* C++ named operator. */
+#define NODE_POISONED (1 << 1) /* Poisoned identifier. */
+#define NODE_BUILTIN (1 << 2) /* Builtin macro. */
+#define NODE_DIAGNOSTIC (1 << 3) /* Possible diagnostic when lexed. */
+
+/* Different flavors of hash node. */
+enum node_type
+{
+ NT_VOID = 0, /* No definition yet. */
+ NT_MACRO, /* A macro of some form. */
+ NT_ASSERTION /* Predicate for #assert. */
+};
+
+/* Different flavors of builtin macro. */
+enum builtin_type
+{
+ BT_SPECLINE = 0, /* `__LINE__' */
+ BT_DATE, /* `__DATE__' */
+ BT_FILE, /* `__FILE__' */
+ BT_BASE_FILE, /* `__BASE_FILE__' */
+ BT_INCLUDE_LEVEL, /* `__INCLUDE_LEVEL__' */
+ BT_TIME, /* `__TIME__' */
+ BT_STDC /* `__STDC__' */
+};
+
+/* There is a slot in the hashnode for use by front ends when integrated
+ with cpplib. It holds a tree (see tree.h) but we mustn't drag that
+ header into every user of cpplib.h. cpplib does not do anything with
+ this slot except clear it when a new node is created. */
+union tree_node;
+struct cpp_hashnode
+{
+ const unsigned char *name; /* null-terminated name */
+ unsigned int hash; /* cached hash value */
+ unsigned short length; /* length of name excluding null */
+ unsigned short arg_index; /* macro argument index */
+ unsigned char directive_index; /* index into directive table. */
+ ENUM_BITFIELD(node_type) type : 8; /* node type. */
+ unsigned char flags; /* node flags. */
+
+ union
+ {
+ cpp_macro *macro; /* a macro. */
+ struct answer *answers; /* answers to an assertion. */
+ enum cpp_ttype operator; /* code for a named operator. */
+ enum builtin_type builtin; /* code for a builtin macro. */
+ } value;
+
+ union tree_node *fe_value; /* front end value */
+};
+
+extern unsigned int cpp_token_len PARAMS ((const cpp_token *));
+extern unsigned char *cpp_token_as_text PARAMS ((cpp_reader *, const cpp_token *));
+extern unsigned char *cpp_spell_token PARAMS ((cpp_reader *, const cpp_token *,
+ unsigned char *));
+extern void cpp_init PARAMS ((void));
+extern int cpp_handle_options PARAMS ((cpp_reader *, int, char **));
+extern int cpp_handle_option PARAMS ((cpp_reader *, int, char **));
extern void cpp_reader_init PARAMS ((cpp_reader *));
-extern cpp_printer *cpp_printer_init PARAMS ((cpp_reader *, cpp_printer *));
-extern int cpp_start_read PARAMS ((cpp_reader *, cpp_printer *, const char *));
-extern void cpp_output_tokens PARAMS ((cpp_reader *, cpp_printer *));
-extern void cpp_output_list PARAMS ((cpp_reader *, cpp_printer *,
- const cpp_toklist *));
-extern void cpp_finish PARAMS ((cpp_reader *, cpp_printer *));
+
+extern void cpp_register_pragma PARAMS ((cpp_reader *,
+ const char *, const char *,
+ void (*) PARAMS ((cpp_reader *))));
+extern void cpp_register_pragma_space PARAMS ((cpp_reader *, const char *));
+
+extern int cpp_start_read PARAMS ((cpp_reader *, const char *));
+extern void cpp_finish PARAMS ((cpp_reader *));
extern void cpp_cleanup PARAMS ((cpp_reader *));
+extern int cpp_avoid_paste PARAMS ((cpp_reader *, const cpp_token *,
+ const cpp_token *));
+extern enum cpp_ttype cpp_can_paste PARAMS ((cpp_reader *, const cpp_token *,
+ const cpp_token *, int *));
+extern void cpp_get_token PARAMS ((cpp_reader *, cpp_token *));
+extern const cpp_lexer_pos *cpp_get_line PARAMS ((cpp_reader *));
+extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *,
+ const cpp_hashnode *));
-extern cpp_buffer *cpp_file_buffer PARAMS((cpp_reader *));
extern void cpp_define PARAMS ((cpp_reader *, const char *));
extern void cpp_assert PARAMS ((cpp_reader *, const char *));
extern void cpp_undef PARAMS ((cpp_reader *, const char *));
extern void cpp_unassert PARAMS ((cpp_reader *, const char *));
-extern void cpp_free_token_list PARAMS ((cpp_toklist *));
+extern cpp_buffer *cpp_push_buffer PARAMS ((cpp_reader *,
+ const unsigned char *, long));
+extern cpp_buffer *cpp_pop_buffer PARAMS ((cpp_reader *));
+extern int cpp_defined PARAMS ((cpp_reader *, const unsigned char *, int));
/* N.B. The error-message-printer prototypes have not been nicely
formatted because exgettext needs to see 'msgid' on the same line
ATTRIBUTE_PRINTF_4;
extern void cpp_pedwarn_with_file_and_line PARAMS ((cpp_reader *, const char *, int, int, const char *msgid, ...))
ATTRIBUTE_PRINTF_5;
-extern void cpp_error_from_errno PARAMS ((cpp_reader *, const char *));
-extern void cpp_notice_from_errno PARAMS ((cpp_reader *, const char *));
+extern void cpp_error_from_errno PARAMS ((cpp_reader *, const char *));
+extern void cpp_notice_from_errno PARAMS ((cpp_reader *, const char *));
/* In cpplex.c */
-extern cpp_buffer *cpp_push_buffer PARAMS ((cpp_reader *,
- const unsigned char *, long));
-extern cpp_buffer *cpp_pop_buffer PARAMS ((cpp_reader *));
-extern void cpp_scan_buffer PARAMS ((cpp_reader *, cpp_printer *));
+extern int cpp_ideq PARAMS ((const cpp_token *,
+ const char *));
+extern void cpp_output_line PARAMS ((cpp_reader *, FILE *));
+extern void cpp_output_token PARAMS ((const cpp_token *, FILE *));
+extern const char *cpp_type2name PARAMS ((enum cpp_ttype));
+
+/* In cpphash.c */
+extern cpp_hashnode *cpp_lookup PARAMS ((cpp_reader *,
+ const unsigned char *, size_t));
+extern void cpp_forall_identifiers PARAMS ((cpp_reader *,
+ int (*) PARAMS ((cpp_reader *,
+ cpp_hashnode *,
+ void *)),
+ void *));
+
+/* In cppmacro.c */
extern void cpp_scan_buffer_nooutput PARAMS ((cpp_reader *));
+extern void cpp_start_lookahead PARAMS ((cpp_reader *));
+extern void cpp_stop_lookahead PARAMS ((cpp_reader *, int));
+/* In cppfiles.c */
+extern int cpp_included PARAMS ((cpp_reader *, const char *));
+extern int cpp_read_file PARAMS ((cpp_reader *, const char *));
+extern void cpp_make_system_header PARAMS ((cpp_reader *, cpp_buffer *, int));
+extern const char *cpp_syshdr_flags PARAMS ((cpp_reader *, cpp_buffer *));
+
+/* These are inline functions instead of macros so we can get type
+ checking. */
+typedef unsigned char U_CHAR;
+#define U (const U_CHAR *) /* Intended use: U"string" */
+
+static inline int ustrcmp PARAMS ((const U_CHAR *, const U_CHAR *));
+static inline int ustrncmp PARAMS ((const U_CHAR *, const U_CHAR *,
+ size_t));
+static inline size_t ustrlen PARAMS ((const U_CHAR *));
+static inline U_CHAR *uxstrdup PARAMS ((const U_CHAR *));
+static inline U_CHAR *ustrchr PARAMS ((const U_CHAR *, int));
+static inline int ufputs PARAMS ((const U_CHAR *, FILE *));
+
+static inline int
+ustrcmp (s1, s2)
+ const U_CHAR *s1, *s2;
+{
+ return strcmp ((const char *)s1, (const char *)s2);
+}
+static inline int
+ustrncmp (s1, s2, n)
+ const U_CHAR *s1, *s2;
+ size_t n;
+{
+ return strncmp ((const char *)s1, (const char *)s2, n);
+}
-/* In cpphash.c */
-extern int cpp_defined PARAMS ((cpp_reader *,
- const unsigned char *, int));
+static inline size_t
+ustrlen (s1)
+ const U_CHAR *s1;
+{
+ return strlen ((const char *)s1);
+}
-/* In cppfiles.c */
-extern int cpp_included PARAMS ((cpp_reader *, const char *));
-extern int cpp_read_file PARAMS ((cpp_reader *, const char *));
+static inline U_CHAR *
+uxstrdup (s1)
+ const U_CHAR *s1;
+{
+ return (U_CHAR *) xstrdup ((const char *)s1);
+}
+
+static inline U_CHAR *
+ustrchr (s1, c)
+ const U_CHAR *s1;
+ int c;
+{
+ return (U_CHAR *) strchr ((const char *)s1, c);
+}
+
+static inline int
+ufputs (s, f)
+ const U_CHAR *s;
+ FILE *f;
+{
+ return fputs ((const char *)s, f);
+}
#ifdef __cplusplus
}