1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
27 o Distinguish integers, floats, and 'other' pp-numbers.
28 o Store ints and char constants as binary values.
29 o New command-line assertion syntax.
30 o Comment all functions, and describe macro expansion algorithm.
31 o Move as much out of header files as possible.
32 o Remove single quote pairs `', and some '', from diagnostics.
33 o Correct pastability test for CPP_NAME and CPP_NUMBER.
44 const unsigned char *_cpp_digraph_spellings [] = {U"%:", U"%:%:", U"<:",
46 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER,
48 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
50 /* Flags for cpp_context. */
51 #define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
52 #define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
53 #define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
54 #define CONTEXT_ARG (1 << 3) /* If an argument context. */
55 #define CONTEXT_VARARGS (1 << 4) /* If a varargs argument context. */
57 typedef struct cpp_context cpp_context;
62 const cpp_toklist *list; /* Used for macro contexts only. */
63 const cpp_token **arg; /* Used for arg contexts only. */
66 /* Pushed token to be returned by next call to get_raw_token. */
67 const cpp_token *pushed_token;
69 struct macro_args *args; /* The arguments for a function-like
70 macro. NULL otherwise. */
71 unsigned short posn; /* Current posn, index into u. */
72 unsigned short count; /* No. of tokens in u. */
77 typedef struct macro_args macro_args;
81 const cpp_token **tokens;
82 unsigned int capacity;
87 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
88 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
89 macro_args *, unsigned int *));
90 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
91 static void save_token PARAMS ((macro_args *, const cpp_token *));
92 static int pop_context PARAMS ((cpp_reader *));
93 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
94 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
95 static void free_macro_args PARAMS ((macro_args *));
97 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
98 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
99 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
101 static int skip_block_comment PARAMS ((cpp_reader *));
102 static int skip_line_comment PARAMS ((cpp_reader *));
103 static void adjust_column PARAMS ((cpp_reader *));
104 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
105 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
106 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t));
107 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
108 static void unterminated PARAMS ((cpp_reader *, unsigned int, int));
109 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
110 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
111 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
112 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
113 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
114 static void lex_token PARAMS ((cpp_reader *, cpp_token *));
115 static int lex_next PARAMS ((cpp_reader *, int));
117 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
120 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
121 static void expand_context_stack PARAMS ((cpp_reader *));
122 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
124 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
126 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
128 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
129 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
131 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
132 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
134 static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
135 static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
136 static cpp_token *get_temp_token PARAMS ((cpp_reader *));
137 static void release_temp_tokens PARAMS ((cpp_reader *));
138 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
140 #define VALID_SIGN(c, prevc) \
141 (((c) == '+' || (c) == '-') && \
142 ((prevc) == 'e' || (prevc) == 'E' \
143 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
145 /* An upper bound on the number of bytes needed to spell a token,
146 including preceding whitespace. */
147 static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
150 const cpp_token *token;
154 switch (TOKEN_SPELL (token))
156 default: len = 0; break;
157 case SPELL_STRING: len = token->val.str.len; break;
158 case SPELL_IDENT: len = token->val.node->length; break;
163 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
164 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
166 #define ASSIGN_FLAGS_AND_POS(d, s) \
167 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
168 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
171 /* f is flags, just consisting of PREV_WHITE | BOL. */
172 #define MODIFY_FLAGS_AND_POS(d, s, f) \
173 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
174 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
177 #define OP(e, s) { SPELL_OPERATOR, U s },
178 #define TK(e, s) { s, U STRINGX (e) },
180 const struct token_spelling
181 _cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
186 /* Helper routine used by parse_include, which can't see spell_token.
187 Reinterpret the current line as an h-char-sequence (< ... >); we are
188 looking at the first token after the <. */
190 _cpp_glue_header_name (pfile)
200 buf = xmalloc (avail);
204 t = _cpp_get_token (pfile);
205 if (t->type == CPP_GREATER || t->type == CPP_EOF)
208 if (len + TOKEN_LEN (t) > avail)
210 avail = len + TOKEN_LEN (t) + 40;
211 buf = xrealloc (buf, avail);
214 if (t->flags & PREV_WHITE)
217 p = spell_token (pfile, t, buf + len);
218 len = (size_t) (p - buf); /* p known >= buf */
221 if (t->type == CPP_EOF)
222 cpp_error (pfile, "missing terminating > character");
224 buf = xrealloc (buf, len);
226 hdr = get_temp_token (pfile);
227 hdr->type = CPP_HEADER_NAME;
229 hdr->val.str.text = buf;
230 hdr->val.str.len = len;
234 /* Token-buffer helper functions. */
236 /* Expand a token list's string space. It is *vital* that
237 list->tokens_used is correct, to get pointer fix-up right. */
239 _cpp_expand_name_space (list, len)
243 const U_CHAR *old_namebuf;
245 old_namebuf = list->namebuf;
246 list->name_cap += len;
247 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
249 /* Fix up token text pointers. */
250 if (list->namebuf != old_namebuf)
254 for (i = 0; i < list->tokens_used; i++)
255 if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
256 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
260 /* If there is not enough room for LEN more characters, expand the
261 list by just enough to have room for LEN characters. */
263 _cpp_reserve_name_space (list, len)
267 unsigned int room = list->name_cap - list->name_used;
270 _cpp_expand_name_space (list, len - room);
273 /* Expand the number of tokens in a list. */
275 _cpp_expand_token_space (list, count)
279 list->tokens_cap += count;
280 list->tokens = (cpp_token *)
281 xrealloc (list->tokens, list->tokens_cap * sizeof (cpp_token));
284 /* Initialize a token list. If EMPTY is false, some token and name
285 space is provided. */
287 _cpp_init_toklist (list, empty)
293 list->tokens_cap = 0;
300 /* Initialize token space. */
301 list->tokens_cap = 256; /* 4K's worth. */
302 list->tokens = (cpp_token *)
303 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
305 /* Initialize name space. */
306 list->name_cap = 1024;
307 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
310 _cpp_clear_toklist (list);
313 /* Clear a token list. */
315 _cpp_clear_toklist (list)
318 list->tokens_used = 0;
322 list->params_len = 0;
326 /* Free a token list. Does not free the list itself, which may be
327 embedded in a larger structure. */
329 _cpp_free_toklist (list)
330 const cpp_toklist *list;
333 free (list->namebuf);
336 /* Compare two tokens. */
338 _cpp_equiv_tokens (a, b)
339 const cpp_token *a, *b;
341 if (a->type == b->type && a->flags == b->flags)
342 switch (TOKEN_SPELL (a))
344 default: /* Keep compiler happy. */
349 return a->val.aux == b->val.aux; /* arg_no or character. */
351 return a->val.node == b->val.node;
353 return (a->val.str.len == b->val.str.len
354 && !memcmp (a->val.str.text, b->val.str.text,
361 /* Compare two token lists. */
363 _cpp_equiv_toklists (a, b)
364 const cpp_toklist *a, *b;
368 if (a->tokens_used != b->tokens_used
369 || a->flags != b->flags
370 || a->paramc != b->paramc)
373 for (i = 0; i < a->tokens_used; i++)
374 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
381 Compares, the token TOKEN to the NUL-terminated string STRING.
382 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
385 cpp_ideq (token, string)
386 const cpp_token *token;
389 if (token->type != CPP_NAME)
392 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
395 /* Call when meeting a newline. Returns the character after the newline
396 (or carriage-return newline combination), or EOF. */
398 handle_newline (buffer, newline_char)
400 cppchar_t newline_char;
402 cppchar_t next = EOF;
404 buffer->col_adjust = 0;
406 buffer->line_base = buffer->cur;
408 /* Handle CR-LF and LF-CR combinations, get the next character. */
409 if (buffer->cur < buffer->rlimit)
411 next = *buffer->cur++;
412 if (next + newline_char == '\r' + '\n')
414 buffer->line_base = buffer->cur;
415 if (buffer->cur < buffer->rlimit)
416 next = *buffer->cur++;
422 buffer->read_ahead = next;
426 /* Subroutine of skip_escaped_newlines; called when a trigraph is
427 encountered. It warns if necessary, and returns true if the
428 trigraph should be honoured. FROM_CHAR is the third character of a
429 trigraph, and presumed to be the previous character for position
432 trigraph_ok (pfile, from_char)
436 int accept = CPP_OPTION (pfile, trigraphs);
438 /* Don't warn about trigraphs in comments. */
439 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
441 cpp_buffer *buffer = pfile->buffer;
443 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
444 "trigraph ??%c converted to %c",
446 (int) _cpp_trigraph_map[from_char]);
448 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
449 "trigraph ??%c ignored", (int) from_char);
455 /* Assumes local variables buffer and result. */
456 #define ACCEPT_CHAR(t) \
457 do { result->type = t; buffer->read_ahead = EOF; } while (0)
459 /* When we move to multibyte character sets, add to these something
460 that saves and restores the state of the multibyte conversion
461 library. This probably involves saving and restoring a "cookie".
462 In the case of glibc it is an 8-byte structure, so is not a high
463 overhead operation. In any case, it's out of the fast path. */
464 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
465 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
467 /* Skips any escaped newlines introduced by NEXT, which is either a
468 '?' or a '\\'. Returns the next character, which will also have
469 been placed in buffer->read_ahead. */
471 skip_escaped_newlines (buffer, next)
476 const unsigned char *saved_cur;
481 if (buffer->cur == buffer->rlimit)
487 next1 = *buffer->cur++;
488 if (next1 != '?' || buffer->cur == buffer->rlimit)
494 next1 = *buffer->cur++;
495 if (!_cpp_trigraph_map[next1] || !trigraph_ok (buffer->pfile, next1))
501 /* We have a full trigraph here. */
502 next = _cpp_trigraph_map[next1];
503 if (next != '\\' || buffer->cur == buffer->rlimit)
508 /* We have a backslash, and room for at least one more character. */
512 next1 = *buffer->cur++;
513 if (!is_nvspace (next1))
517 while (buffer->cur < buffer->rlimit);
519 if (!is_vspace (next1))
526 cpp_warning (buffer->pfile,
527 "backslash and newline separated by space");
529 next = handle_newline (buffer, next1);
531 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
533 while (next == '\\' || next == '?');
535 buffer->read_ahead = next;
539 /* Obtain the next character, after trigraph conversion and skipping
540 an arbitrary string of escaped newlines. The common case of no
541 trigraphs or escaped newlines falls through quickly. */
543 get_effective_char (buffer)
546 cppchar_t next = EOF;
548 if (buffer->cur < buffer->rlimit)
550 next = *buffer->cur++;
552 /* '?' can introduce trigraphs (and therefore backslash); '\\'
553 can introduce escaped newlines, which we want to skip, or
554 UCNs, which, depending upon lexer state, we will handle in
556 if (next == '?' || next == '\\')
557 next = skip_escaped_newlines (buffer, next);
560 buffer->read_ahead = next;
564 /* Skip a C-style block comment. We find the end of the comment by
565 seeing if an asterisk is before every '/' we encounter. Returns
566 non-zero if comment terminated by EOF, zero otherwise. */
568 skip_block_comment (pfile)
571 cpp_buffer *buffer = pfile->buffer;
572 cppchar_t c = EOF, prevc = EOF;
574 pfile->state.lexing_comment = 1;
575 while (buffer->cur != buffer->rlimit)
577 prevc = c, c = *buffer->cur++;
580 /* FIXME: For speed, create a new character class of characters
581 of no interest inside block comments. */
582 if (c == '?' || c == '\\')
583 c = skip_escaped_newlines (buffer, c);
585 /* People like decorating comments with '*', so check for '/'
586 instead for efficiency. */
592 /* Warn about potential nested comments, but not if the '/'
593 comes immediately before the true comment delimeter.
594 Don't bother to get it right across escaped newlines. */
595 if (CPP_OPTION (pfile, warn_comments)
596 && buffer->cur != buffer->rlimit)
598 prevc = c, c = *buffer->cur++;
599 if (c == '*' && buffer->cur != buffer->rlimit)
601 prevc = c, c = *buffer->cur++;
603 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
604 CPP_BUF_COL (buffer),
605 "\"/*\" within comment");
610 else if (is_vspace (c))
612 prevc = c, c = handle_newline (buffer, c);
616 adjust_column (pfile);
619 pfile->state.lexing_comment = 0;
620 buffer->read_ahead = EOF;
621 return c != '/' || prevc != '*';
624 /* Skip a C++ line comment. Handles escaped newlines. Returns
625 non-zero if a multiline comment. The following new line, if any,
626 is left in buffer->read_ahead. */
628 skip_line_comment (pfile)
631 cpp_buffer *buffer = pfile->buffer;
632 unsigned int orig_lineno = buffer->lineno;
635 pfile->state.lexing_comment = 1;
639 if (buffer->cur == buffer->rlimit)
643 if (c == '?' || c == '\\')
644 c = skip_escaped_newlines (buffer, c);
646 while (!is_vspace (c));
648 pfile->state.lexing_comment = 0;
649 buffer->read_ahead = c; /* Leave any newline for caller. */
650 return orig_lineno != buffer->lineno;
653 /* pfile->buffer->cur is one beyond the \t character. Update
654 col_adjust so we track the column correctly. */
656 adjust_column (pfile)
659 cpp_buffer *buffer = pfile->buffer;
660 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
662 /* Round it up to multiple of the tabstop, but subtract 1 since the
663 tab itself occupies a character position. */
664 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
665 - col % CPP_OPTION (pfile, tabstop)) - 1;
668 /* Skips whitespace, saving the next non-whitespace character.
669 Adjusts pfile->col_adjust to account for tabs. Without this,
670 tokens might be assigned an incorrect column. */
672 skip_whitespace (pfile, c)
676 cpp_buffer *buffer = pfile->buffer;
677 unsigned int warned = 0;
681 /* Horizontal space always OK. */
685 adjust_column (pfile);
686 /* Just \f \v or \0 left. */
691 cpp_warning (pfile, "null character(s) ignored");
695 else if (IN_DIRECTIVE (pfile) && CPP_PEDANTIC (pfile))
696 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
697 CPP_BUF_COL (buffer),
698 "%s in preprocessing directive",
699 c == '\f' ? "form feed" : "vertical tab");
702 if (buffer->cur == buffer->rlimit)
706 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
707 while (is_nvspace (c));
709 /* Remember the next character. */
710 buffer->read_ahead = c;
713 /* Parse an identifier, skipping embedded backslash-newlines.
714 Calculate the hash value of the token while parsing, for improved
715 performance. The hashing algorithm *must* match cpp_lookup(). */
717 static cpp_hashnode *
718 parse_identifier (pfile, c)
722 cpp_buffer *buffer = pfile->buffer;
723 unsigned int r = 0, saw_dollar = 0;
724 unsigned int orig_used = pfile->token_list.name_used;
730 if (pfile->token_list.name_used == pfile->token_list.name_cap)
731 _cpp_expand_name_space (&pfile->token_list,
732 pfile->token_list.name_used + 256);
733 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
740 if (buffer->cur == buffer->rlimit)
745 while (is_idchar (c));
747 /* Potential escaped newline? */
748 if (c != '?' && c != '\\')
750 c = skip_escaped_newlines (buffer, c);
752 while (is_idchar (c));
754 /* $ is not a identifier character in the standard, but is commonly
755 accepted as an extension. Don't warn about it in skipped
756 conditional blocks. */
757 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
758 cpp_pedwarn (pfile, "'$' character(s) in identifier");
760 /* Remember the next character. */
761 buffer->read_ahead = c;
762 return _cpp_lookup_with_hash (pfile, &pfile->token_list.namebuf[orig_used],
763 pfile->token_list.name_used - orig_used, r);
766 /* Parse a number, skipping embedded backslash-newlines. */
768 parse_number (pfile, number, c)
774 cpp_buffer *buffer = pfile->buffer;
775 unsigned int orig_used = pfile->token_list.name_used;
777 /* Reserve space for a leading period. */
778 if (pfile->state.seen_dot)
779 pfile->token_list.name_used++;
785 if (pfile->token_list.name_used >= pfile->token_list.name_cap)
786 _cpp_expand_name_space (&pfile->token_list,
787 pfile->token_list.name_used + 256);
788 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
792 if (buffer->cur == buffer->rlimit)
797 while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
799 /* Potential escaped newline? */
800 if (c != '?' && c != '\\')
802 c = skip_escaped_newlines (buffer, c);
804 while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
806 /* Put any leading period in place, now we have the room. */
807 if (pfile->state.seen_dot)
808 pfile->token_list.namebuf[orig_used] = '.';
810 /* Remember the next character. */
811 buffer->read_ahead = c;
813 number->text = &pfile->token_list.namebuf[orig_used];
814 number->len = pfile->token_list.name_used - orig_used;
817 /* Subroutine of parse_string. Emits error for unterminated strings. */
819 unterminated (pfile, line, term)
824 cpp_error (pfile, "missing terminating %c character", term);
826 if (term == '\"' && pfile->mls_line && pfile->mls_line != line)
828 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_column,
829 "possible start of unterminated string literal");
834 /* Parses a string, character constant, or angle-bracketed header file
835 name. Handles embedded trigraphs and escaped newlines.
837 Multi-line strings are allowed, but they are deprecated within
840 parse_string (pfile, token, terminator)
843 cppchar_t terminator;
845 cpp_buffer *buffer = pfile->buffer;
846 unsigned int orig_used = pfile->token_list.name_used;
848 unsigned int nulls = 0;
852 if (buffer->cur == buffer->rlimit)
855 unterminated (pfile, token->line, terminator);
861 /* Handle trigraphs, escaped newlines etc. */
862 if (c == '?' || c == '\\')
863 c = skip_escaped_newlines (buffer, c);
867 unsigned int u = pfile->token_list.name_used;
869 /* An odd number of consecutive backslashes represents an
870 escaped terminator. */
871 while (u > orig_used && pfile->token_list.namebuf[u - 1] == '\\')
874 if ((pfile->token_list.name_used - u) % 2 == 0)
880 else if (is_vspace (c))
882 /* In assembly language, silently terminate string and
883 character literals at end of line. This is a kludge
884 around not knowing where comments are. */
885 if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
888 /* Character constants and header names may not extend over
889 multiple lines. In Standard C, neither may strings.
890 Unfortunately, we accept multiline strings as an
891 extension. (Deprecatedly even in directives - otherwise,
892 glibc's longlong.h breaks.) */
893 if (terminator != '"')
895 unterminated (pfile, token->line, terminator);
899 if (pfile->mls_line == 0)
901 pfile->mls_line = token->line;
902 pfile->mls_column = token->col;
903 if (CPP_PEDANTIC (pfile))
904 cpp_pedwarn (pfile, "multi-line string constant");
907 handle_newline (buffer, c); /* Stores to read_ahead. */
913 cpp_warning (pfile, "null character(s) preserved in literal");
916 if (pfile->token_list.name_used == pfile->token_list.name_cap)
917 _cpp_expand_name_space (&pfile->token_list,
918 pfile->token_list.name_used + 256);
920 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
921 /* If we had a new line, the next character is in read_ahead. */
924 c = buffer->read_ahead;
929 buffer->read_ahead = c;
931 token->val.str.text = &pfile->token_list.namebuf[orig_used];
932 token->val.str.len = pfile->token_list.name_used - orig_used;
935 /* For output routine simplicity, the stored comment includes the
936 comment start and any terminator. */
938 save_comment (pfile, token, from)
941 const unsigned char *from;
943 unsigned char *buffer;
945 cpp_toklist *list = &pfile->token_list;
947 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
948 _cpp_reserve_name_space (list, len);
949 buffer = list->namebuf + list->name_used;
950 list->name_used += len;
952 token->type = CPP_COMMENT;
953 token->val.str.len = len;
954 token->val.str.text = buffer;
957 memcpy (buffer + 1, from, len - 1);
960 /* Subroutine of lex_token to handle '%'. A little tricky, since we
961 want to avoid stepping back when lexing %:%X. */
963 lex_percent (buffer, result)
969 result->type = CPP_MOD;
970 /* Parsing %:%X could leave an extra character. */
971 if (buffer->extra_char == EOF)
972 c = get_effective_char (buffer);
975 c = buffer->read_ahead = buffer->extra_char;
976 buffer->extra_char = EOF;
980 ACCEPT_CHAR (CPP_MOD_EQ);
981 else if (CPP_OPTION (buffer->pfile, digraphs))
985 result->flags |= DIGRAPH;
986 ACCEPT_CHAR (CPP_HASH);
987 if (get_effective_char (buffer) == '%')
989 buffer->extra_char = get_effective_char (buffer);
990 if (buffer->extra_char == ':')
992 buffer->extra_char = EOF;
993 ACCEPT_CHAR (CPP_PASTE);
996 /* We'll catch the extra_char when we're called back. */
997 buffer->read_ahead = '%';
1002 result->flags |= DIGRAPH;
1003 ACCEPT_CHAR (CPP_CLOSE_BRACE);
1008 /* Subroutine of lex_token to handle '.'. This is tricky, since we
1009 want to avoid stepping back when lexing '...' or '.123'. In the
1010 latter case we should also set a flag for parse_number. */
1012 lex_dot (pfile, result)
1016 cpp_buffer *buffer = pfile->buffer;
1019 /* Parsing ..X could leave an extra character. */
1020 if (buffer->extra_char == EOF)
1021 c = get_effective_char (buffer);
1024 c = buffer->read_ahead = buffer->extra_char;
1025 buffer->extra_char = EOF;
1028 /* All known character sets have 0...9 contiguous. */
1029 if (c >= '0' && c <= '9')
1031 result->type = CPP_NUMBER;
1032 buffer->pfile->state.seen_dot = 1;
1033 parse_number (pfile, &result->val.str, c);
1034 buffer->pfile->state.seen_dot = 0;
1038 result->type = CPP_DOT;
1041 buffer->extra_char = get_effective_char (buffer);
1042 if (buffer->extra_char == '.')
1044 buffer->extra_char = EOF;
1045 ACCEPT_CHAR (CPP_ELLIPSIS);
1048 /* We'll catch the extra_char when we're called back. */
1049 buffer->read_ahead = '.';
1051 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1052 ACCEPT_CHAR (CPP_DOT_STAR);
1057 lex_token (pfile, result)
1062 cpp_buffer *buffer = pfile->buffer;
1063 const unsigned char *comment_start;
1067 result->line = CPP_BUF_LINE (buffer);
1069 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1071 c = buffer->read_ahead;
1072 if (c == EOF && buffer->cur < buffer->rlimit)
1079 buffer->read_ahead = EOF;
1083 /* Non-empty files should end in a newline. Testing
1084 skip_newlines ensures we only emit the warning once. */
1085 if (buffer->cur != buffer->line_base && buffer->cur != buffer->buf
1086 && pfile->state.skip_newlines)
1087 cpp_pedwarn_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer),
1088 "no newline at end of file");
1089 result->type = CPP_EOF;
1092 case ' ': case '\t': case '\f': case '\v': case '\0':
1093 skip_whitespace (pfile, c);
1094 result->flags |= PREV_WHITE;
1097 case '\n': case '\r':
1098 result->type = CPP_EOF;
1099 handle_newline (buffer, c);
1100 /* Handling here will change significantly when moving to
1102 if (pfile->state.skip_newlines)
1104 result->flags &= ~PREV_WHITE; /* Clear any whitespace flag. */
1111 /* These could start an escaped newline, or '?' a trigraph. Let
1112 skip_escaped_newlines do all the work. */
1114 unsigned int lineno = buffer->lineno;
1116 c = skip_escaped_newlines (buffer, c);
1117 if (lineno != buffer->lineno)
1118 /* We had at least one escaped newline of some sort, and the
1119 next character is in buffer->read_ahead. Update the
1120 token's line and column. */
1123 /* We are either the original '?' or '\\', or a trigraph. */
1124 result->type = CPP_QUERY;
1125 buffer->read_ahead = EOF;
1127 result->type = CPP_BACKSLASH;
1133 case '0': case '1': case '2': case '3': case '4':
1134 case '5': case '6': case '7': case '8': case '9':
1135 result->type = CPP_NUMBER;
1136 parse_number (pfile, &result->val.str, c);
1140 if (!CPP_OPTION (pfile, dollars_in_ident))
1142 /* Fall through... */
1145 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1146 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1147 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1148 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1150 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1151 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1152 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1153 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1155 result->type = CPP_NAME;
1156 result->val.node = parse_identifier (pfile, c);
1158 /* 'L' may introduce wide characters or strings. */
1159 if (result->val.node == pfile->spec_nodes->n_L)
1161 c = buffer->read_ahead; /* For make_string. */
1162 if (c == '\'' || c == '"')
1164 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1168 /* Convert named operators to their proper types. */
1169 else if (result->val.node->type == T_OPERATOR)
1171 result->flags |= NAMED_OP;
1172 result->type = result->val.node->value.code;
1178 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1180 parse_string (pfile, result, c);
1184 /* A potential block or line comment. */
1185 comment_start = buffer->cur;
1186 result->type = CPP_DIV;
1187 c = get_effective_char (buffer);
1189 ACCEPT_CHAR (CPP_DIV_EQ);
1190 if (c != '/' && c != '*')
1195 if (skip_block_comment (pfile))
1196 cpp_error_with_line (pfile, result->line, result->col,
1197 "unterminated comment");
1201 if (!CPP_OPTION (pfile, cplusplus_comments)
1202 && !CPP_IN_SYSTEM_HEADER (pfile))
1205 /* We silently allow C++ comments in system headers,
1206 irrespective of conformance mode, because lots of
1207 broken systems do that and trying to clean it up in
1208 fixincludes is a nightmare. */
1209 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1210 && ! buffer->warned_cplusplus_comments)
1213 "C++ style comments are not allowed in ISO C89");
1215 "(this will be reported only once per input file)");
1216 buffer->warned_cplusplus_comments = 1;
1219 /* Skip_line_comment updates buffer->read_ahead. */
1220 if (skip_line_comment (pfile))
1221 cpp_warning_with_line (pfile, result->line, result->col,
1222 "multi-line comment");
1225 /* Skipping the comment has updated buffer->read_ahead. */
1226 if (!pfile->state.save_comments)
1228 result->flags |= PREV_WHITE;
1232 /* Save the comment as a token in its own right. */
1233 save_comment (pfile, result, comment_start);
1237 if (pfile->state.angled_headers)
1239 result->type = CPP_HEADER_NAME;
1240 c = '>'; /* terminator. */
1244 result->type = CPP_LESS;
1245 c = get_effective_char (buffer);
1247 ACCEPT_CHAR (CPP_LESS_EQ);
1250 ACCEPT_CHAR (CPP_LSHIFT);
1251 if (get_effective_char (buffer) == '=')
1252 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1254 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1256 ACCEPT_CHAR (CPP_MIN);
1257 if (get_effective_char (buffer) == '=')
1258 ACCEPT_CHAR (CPP_MIN_EQ);
1260 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1262 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1263 result->flags |= DIGRAPH;
1265 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1267 ACCEPT_CHAR (CPP_OPEN_BRACE);
1268 result->flags |= DIGRAPH;
1273 result->type = CPP_GREATER;
1274 c = get_effective_char (buffer);
1276 ACCEPT_CHAR (CPP_GREATER_EQ);
1279 ACCEPT_CHAR (CPP_RSHIFT);
1280 if (get_effective_char (buffer) == '=')
1281 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1283 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1285 ACCEPT_CHAR (CPP_MAX);
1286 if (get_effective_char (buffer) == '=')
1287 ACCEPT_CHAR (CPP_MAX_EQ);
1292 lex_percent (buffer, result);
1296 lex_dot (pfile, result);
1300 result->type = CPP_PLUS;
1301 c = get_effective_char (buffer);
1303 ACCEPT_CHAR (CPP_PLUS_EQ);
1305 ACCEPT_CHAR (CPP_PLUS_PLUS);
1309 result->type = CPP_MINUS;
1310 c = get_effective_char (buffer);
1313 ACCEPT_CHAR (CPP_DEREF);
1314 if (CPP_OPTION (pfile, cplusplus)
1315 && get_effective_char (buffer) == '*')
1316 ACCEPT_CHAR (CPP_DEREF_STAR);
1319 ACCEPT_CHAR (CPP_MINUS_EQ);
1321 ACCEPT_CHAR (CPP_MINUS_MINUS);
1325 result->type = CPP_MULT;
1326 if (get_effective_char (buffer) == '=')
1327 ACCEPT_CHAR (CPP_MULT_EQ);
1331 result->type = CPP_EQ;
1332 if (get_effective_char (buffer) == '=')
1333 ACCEPT_CHAR (CPP_EQ_EQ);
1337 result->type = CPP_NOT;
1338 if (get_effective_char (buffer) == '=')
1339 ACCEPT_CHAR (CPP_NOT_EQ);
1343 result->type = CPP_AND;
1344 c = get_effective_char (buffer);
1346 ACCEPT_CHAR (CPP_AND_EQ);
1348 ACCEPT_CHAR (CPP_AND_AND);
1352 result->type = CPP_HASH;
1353 if (get_effective_char (buffer) == '#')
1354 ACCEPT_CHAR (CPP_PASTE);
1358 result->type = CPP_OR;
1359 c = get_effective_char (buffer);
1361 ACCEPT_CHAR (CPP_OR_EQ);
1363 ACCEPT_CHAR (CPP_OR_OR);
1367 result->type = CPP_XOR;
1368 if (get_effective_char (buffer) == '=')
1369 ACCEPT_CHAR (CPP_XOR_EQ);
1373 result->type = CPP_COLON;
1374 c = get_effective_char (buffer);
1375 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1376 ACCEPT_CHAR (CPP_SCOPE);
1377 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1379 result->flags |= DIGRAPH;
1380 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1384 case '~': result->type = CPP_COMPL; break;
1385 case ',': result->type = CPP_COMMA; break;
1386 case '(': result->type = CPP_OPEN_PAREN; break;
1387 case ')': result->type = CPP_CLOSE_PAREN; break;
1388 case '[': result->type = CPP_OPEN_SQUARE; break;
1389 case ']': result->type = CPP_CLOSE_SQUARE; break;
1390 case '{': result->type = CPP_OPEN_BRACE; break;
1391 case '}': result->type = CPP_CLOSE_BRACE; break;
1392 case ';': result->type = CPP_SEMICOLON; break;
1395 if (CPP_OPTION (pfile, objc))
1397 /* In Objective C, '@' may begin keywords or strings, like
1398 @keyword or @"string". It would be nice to call
1399 get_effective_char here and test the result. However, we
1400 would then need to pass 2 characters to parse_identifier,
1401 making it ugly and slowing down its main loop. Instead,
1402 we assume we have an identifier, and recover if not. */
1403 result->type = CPP_NAME;
1404 result->val.node = parse_identifier (pfile, c);
1405 if (result->val.node->length != 1)
1408 /* OK, so it wasn't an identifier. Maybe a string? */
1409 if (buffer->read_ahead == '"')
1412 ACCEPT_CHAR (CPP_OSTRING);
1420 result->type = CPP_OTHER;
1421 result->val.aux = c;
1427 * The tokenizer's main loop. Returns a token list, representing a
1428 * logical line in the input file. On EOF after some tokens have
1429 * been processed, we return immediately. Then in next call, or if
1430 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1431 * token is placed in the list.
1435 lex_line (pfile, list)
1439 unsigned int first_token;
1440 cpp_token *cur_token, *first;
1441 cpp_buffer *buffer = pfile->buffer;
1443 pfile->state.in_lex_line = 1;
1444 if (pfile->buffer->cur == pfile->buffer->buf)
1445 list->flags |= BEG_OF_FILE;
1448 pfile->state.in_directive = 0;
1449 pfile->state.angled_headers = 0;
1450 pfile->state.skip_newlines = 1;
1451 pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);
1452 first_token = list->tokens_used;
1453 list->file = buffer->nominal_fname;
1457 if (list->tokens_used >= list->tokens_cap)
1458 _cpp_expand_token_space (list, 256);
1460 cur_token = list->tokens + list->tokens_used;
1461 lex_token (pfile, cur_token);
1463 if (pfile->state.skip_newlines)
1465 pfile->state.skip_newlines = 0;
1466 list->line = buffer->lineno;
1467 if (cur_token->type == CPP_HASH)
1469 pfile->state.in_directive = 1;
1470 pfile->state.save_comments = 0;
1471 pfile->state.indented = cur_token->flags & PREV_WHITE;
1473 /* 6.10.3.10: Within the sequence of preprocessing tokens
1474 making up the invocation of a function-like macro, new
1475 line is considered a normal white-space character. */
1476 else if (first_token != 0)
1477 cur_token->flags |= PREV_WHITE;
1479 else if (IN_DIRECTIVE (pfile) && list->tokens_used == first_token + 1)
1481 if (cur_token->type == CPP_NUMBER)
1482 list->directive = _cpp_check_linemarker (pfile, cur_token);
1484 list->directive = _cpp_check_directive (pfile, cur_token);
1487 /* _cpp_get_line assumes list->tokens_used refers to the current
1488 token being lexed. So do this after _cpp_check_directive to
1489 get the warnings therein correct. */
1490 list->tokens_used++;
1492 while (cur_token->type != CPP_EOF);
1494 /* All tokens are allocated, so the memory location is fixed. */
1495 first = &list->tokens[first_token];
1496 first->flags |= BOL;
1497 pfile->first_directive_token = first;
1499 /* Don't complain about the null directive, nor directives in
1500 assembly source: we don't know where the comments are, and # may
1501 introduce assembler pseudo-ops. Don't complain about invalid
1502 directives in skipped conditional groups (6.10 p4). */
1503 if (IN_DIRECTIVE (pfile) && !KNOWN_DIRECTIVE (list) && !pfile->skipping
1504 && !CPP_OPTION (pfile, lang_asm))
1506 if (cur_token > first + 1)
1508 if (first[1].type == CPP_NAME)
1509 cpp_error_with_line (pfile, first->line, first->col,
1510 "invalid preprocessing directive #%s",
1511 first[1].val.node->name);
1513 cpp_error_with_line (pfile, first->line, first->col,
1514 "invalid preprocessing directive");
1517 /* Discard this line to prevent further errors from cc1. */
1518 _cpp_clear_toklist (list);
1522 /* Drop the EOF unless really at EOF or in a directive. */
1523 if (cur_token != first && !KNOWN_DIRECTIVE (list)
1524 && pfile->done_initializing)
1525 list->tokens_used--;
1527 pfile->state.in_lex_line = 0;
1530 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1531 already contain the enough space to hold the token's spelling.
1532 Returns a pointer to the character after the last character
1535 static unsigned char *
1536 spell_token (pfile, token, buffer)
1537 cpp_reader *pfile; /* Would be nice to be rid of this... */
1538 const cpp_token *token;
1539 unsigned char *buffer;
1541 switch (TOKEN_SPELL (token))
1543 case SPELL_OPERATOR:
1545 const unsigned char *spelling;
1548 if (token->flags & DIGRAPH)
1549 spelling = _cpp_digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1550 else if (token->flags & NAMED_OP)
1553 spelling = TOKEN_NAME (token);
1555 while ((c = *spelling++) != '\0')
1562 memcpy (buffer, token->val.node->name, token->val.node->length);
1563 buffer += token->val.node->length;
1568 int left, right, tag;
1569 switch (token->type)
1571 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1572 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1573 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1574 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1575 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1576 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1577 default: left = '\0'; right = '\0'; tag = '\0'; break;
1579 if (tag) *buffer++ = tag;
1580 if (left) *buffer++ = left;
1581 memcpy (buffer, token->val.str.text, token->val.str.len);
1582 buffer += token->val.str.len;
1583 if (right) *buffer++ = right;
1588 *buffer++ = token->val.aux;
1592 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1599 /* Macro expansion algorithm.
1601 Macro expansion is implemented by a single-pass algorithm; there are
1602 no rescan passes involved. cpp_get_token expands just enough to be
1603 able to return a token to the caller, a consequence is that when it
1604 returns the preprocessor can be in a state of mid-expansion. The
1605 algorithm does not work by fully expanding a macro invocation into
1606 some kind of token list, and then returning them one by one.
1608 Our expansion state is recorded in a context stack. We start out with
1609 a single context on the stack, let's call it base context. This
1610 consists of the token list returned by lex_line that forms the next
1611 logical line in the source file.
1613 The current level in the context stack is stored in the cur_context
1614 member of the cpp_reader structure. The context it references keeps,
1615 amongst other things, a count of how many tokens form that context and
1616 our position within those tokens.
1618 Fundamentally, calling cpp_get_token will return the next token from
1619 the current context. If we're at the end of the current context, that
1620 context is popped from the stack first, unless it is the base context,
1621 in which case the next logical line is lexed from the source file.
1623 However, before returning the token, if it is a CPP_NAME token
1624 _cpp_get_token checks to see if it is a macro and if it is enabled.
1625 Each time it encounters a macro name, it calls push_macro_context.
1626 This function checks that the macro should be expanded (with
1627 is_macro_enabled), and if so pushes a new macro context on the stack
1628 which becomes the current context. It then loops back to read the
1629 first token of the macro context.
1631 A macro context basically consists of the token list representing the
1632 macro's replacement list, which was saved in the hash table by
1633 save_macro_expansion when its #define statement was parsed. If the
1634 macro is function-like, it also contains the tokens that form the
1635 arguments to the macro. I say more about macro arguments below, but
1636 for now just saying that each argument is a set of pointers to tokens
1639 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
1640 token. This represents an argument passed to the macro, with the
1641 argument number stored in the token's AUX field. The argument should
1642 be substituted, this is achieved by pushing an "argument context". An
1643 argument context is just refers to the tokens forming the argument,
1644 which are obtained directly from the macro context. The STRINGIFY
1645 flag on a CPP_MACRO_ARG token indicates that the argument should be
1648 Here's a few simple rules the context stack obeys:-
1650 1) The lex_line token list is always context zero.
1652 2) Context 1, if it exists, must be a macro context.
1654 3) An argument context can only appear above a macro context.
1656 4) A macro context can appear above the base context, another macro
1657 context, or an argument context.
1659 5) These imply that the minimal level of an argument context is 2.
1661 The only tricky thing left is ensuring that macros are enabled and
1662 disabled correctly. The algorithm controls macro expansion by the
1663 level of the context a token is taken from in the context stack. If a
1664 token is taken from a level equal to no_expand_level (a member of
1665 struct cpp_reader), no expansion is performed.
1667 When popping a context off the stack, if no_expand_level equals the
1668 level of the popped context, it is reduced by one to match the new
1669 context level, so that expansion is still disabled. It does not
1670 increase if a context is pushed, though. It starts out life as
1671 UINT_MAX, which has the effect that initially macro expansion is
1672 enabled. I explain how this mechanism works below.
1674 The standard requires:-
1676 1) Arguments to be fully expanded before substitution.
1678 2) Stringified arguments to not be expanded, nor the tokens
1679 immediately surrounding a ## operator.
1681 3) Continual rescanning until there are no more macros left to
1684 4) Once a macro has been expanded in stage 1) or 3), it cannot be
1685 expanded again during later rescans. This prevents infinite
1688 The first thing to observe is that stage 3) is mostly redundant.
1689 Since a macro is disabled once it has been expanded, how can a rescan
1690 find an unexpanded macro name? There are only two cases where this is
1693 a) If the macro name results from a token paste operation.
1695 b) If the macro in question is a function-like macro that hasn't
1696 already been expanded because previously there was not the required
1697 '(' token immediately following it. This is only possible when an
1698 argument is substituted, and after substitution the last token of
1699 the argument can bind with a parenthesis appearing in the tokens
1700 following the substitution. Note that if the '(' appears within the
1701 argument, the ')' must too, as expanding macro arguments cannot
1702 "suck in" tokens outside the argument.
1704 So we tackle this as follows. When parsing the macro invocation for
1705 arguments, we record the tokens forming each argument as a list of
1706 pointers to those tokens. We do not expand any tokens that are "raw",
1707 i.e. directly from the macro invocation, but other tokens that come
1708 from (nested) argument substitution are fully expanded.
1710 This is achieved by setting the no_expand_level to that of the macro
1711 invocation. A CPP_MACRO_ARG token never appears in the list of tokens
1712 forming an argument, because parse_args (indirectly) calls
1713 get_raw_token which automatically pushes argument contexts and traces
1714 into them. Since these contexts are at a higher level than the
1715 no_expand_level, they get fully macro expanded.
1717 "Raw" and non-raw tokens are separated in arguments by null pointers,
1718 with the policy that the initial state of an argument is raw. If the
1719 first token is not raw, it should be preceded by a null pointer. When
1720 tracing through the tokens of an argument context, each time
1721 get_raw_token encounters a null pointer, it toggles the flag
1724 This flag, when set, indicates to is_macro_disabled that we are
1725 reading raw tokens which should be macro-expanded. Similarly, if
1726 clear, is_macro_disabled suppresses re-expansion.
1728 It's probably time for an example.
1732 #define xstr(y) str(y hash)
1734 xstr(hash) // "# hash"
1736 In the invocation of str, parse_args turns off macro expansion and so
1737 parses the argument as <hash>. This is the only token (pointer)
1738 passed as the argument to str. Since <hash> is raw there is no need
1739 for an initial null pointer. stringify_arg is called from
1740 get_raw_token when tracing through the expansion of str, since the
1741 argument has the STRINGIFY flag set. stringify_arg turns off
1742 macro_expansion by setting the no_expand_level to that of the argument
1743 context. Thus it gets the token <hash> and stringifies it to "hash"
1746 Similary xstr is passed <hash>. However, when parse_args is parsing
1747 the invocation of str() in xstr's expansion, get_raw_token encounters
1748 a CPP_MACRO_ARG token for y. Transparently to parse_args, it pushes
1749 an argument context, and enters the tokens of the argument,
1750 i.e. <hash>. This is at a higher context level than parse_args
1751 disabled, and so is_macro_disabled permits expansion of it and a macro
1752 context is pushed on top of the argument context. This contains the
1753 <#> token, and the end result is that <hash> is macro expanded.
1754 However, after popping off the argument context, the <hash> of xstr's
1755 expansion does not get macro expanded because we're back at the
1756 no_expand_level. The end result is that the argument passed to str is
1757 <NULL> <#> <NULL> <hash>. Note the nulls - policy is we start off
1758 raw, <#> is not raw, but then <hash> is.
1763 /* Free the storage allocated for macro arguments. */
1765 free_macro_args (args)
1769 free ((PTR) args->tokens);
1774 /* Determines if a macro has been already used (and is therefore
1777 is_macro_disabled (pfile, expansion, token)
1779 const cpp_toklist *expansion;
1780 const cpp_token *token;
1782 cpp_context *context = CURRENT_CONTEXT (pfile);
1784 /* Arguments on either side of ## are inserted in place without
1785 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
1786 occurs during a later rescan pass. The effect is that we expand
1787 iff we would as part of the macro's expansion list, so we should
1788 drop to the macro's context. */
1789 if (IS_ARG_CONTEXT (context))
1791 if (token->flags & PASTED)
1793 else if (!(context->flags & CONTEXT_RAW))
1795 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
1799 /* Have we already used this macro? */
1800 while (context->level > 0)
1802 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
1804 /* Raw argument tokens are judged based on the token list they
1806 if (context->flags & CONTEXT_RAW)
1807 context = pfile->contexts + context->level;
1812 /* Function-like macros may be disabled if the '(' is not in the
1813 current context. We check this without disrupting the context
1815 if (expansion->paramc >= 0)
1817 const cpp_token *next;
1818 unsigned int prev_nme;
1820 context = CURRENT_CONTEXT (pfile);
1821 /* Drop down any contexts we're at the end of: the '(' may
1822 appear in lower macro expansions, or in the rest of the file. */
1823 while (context->posn == context->count && context > pfile->contexts)
1826 /* If we matched, we are disabled, as we appear in the
1827 expansion of each macro we meet. */
1828 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
1832 prev_nme = pfile->no_expand_level;
1833 pfile->no_expand_level = context - pfile->contexts;
1834 next = _cpp_get_token (pfile);
1835 restore_macro_expansion (pfile, prev_nme);
1837 if (next->type != CPP_OPEN_PAREN)
1839 _cpp_push_token (pfile, next);
1840 if (CPP_WTRADITIONAL (pfile))
1842 "function macro %s must be used with arguments in traditional C",
1843 token->val.node->name);
1851 /* Add a token to the set of tokens forming the arguments to the macro
1852 being parsed in parse_args. */
1854 save_token (args, token)
1856 const cpp_token *token;
1858 if (args->used == args->capacity)
1860 args->capacity += args->capacity + 100;
1861 args->tokens = (const cpp_token **)
1862 xrealloc ((PTR) args->tokens,
1863 args->capacity * sizeof (const cpp_token *));
1865 args->tokens[args->used++] = token;
1868 /* Take and save raw tokens until we finish one argument. Empty
1869 arguments are saved as a single CPP_PLACEMARKER token. */
1870 static const cpp_token *
1871 parse_arg (pfile, var_args, paren_context, args, pcount)
1874 unsigned int paren_context;
1876 unsigned int *pcount;
1878 const cpp_token *token;
1879 unsigned int paren = 0, count = 0;
1880 int raw, was_raw = 1;
1882 for (count = 0;; count++)
1884 token = _cpp_get_token (pfile);
1886 switch (token->type)
1891 case CPP_OPEN_PAREN:
1895 case CPP_CLOSE_PAREN:
1901 /* Commas are not terminators within parantheses or var_args. */
1902 if (paren || var_args)
1906 case CPP_EOF: /* Error reported by caller. */
1910 raw = pfile->cur_context <= paren_context;
1914 save_token (args, 0);
1917 save_token (args, token);
1923 /* Duplicate the placemarker. Then we can set its flags and
1924 position and safely be using more than one. */
1925 save_token (args, duplicate_token (pfile, &placemarker_token));
1933 /* This macro returns true if the argument starting at offset O of arglist
1934 A is empty - that is, it's either a single PLACEMARKER token, or a null
1935 pointer followed by a PLACEMARKER. */
1937 #define empty_argument(A, O) \
1938 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
1939 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
1941 /* Parse the arguments making up a macro invocation. Nested arguments
1942 are automatically macro expanded, but immediate macros are not
1943 expanded; this enables e.g. operator # to work correctly. Returns
1944 non-zero on error. */
1946 parse_args (pfile, hp, args)
1951 const cpp_token *token;
1952 const cpp_toklist *macro;
1953 unsigned int total = 0;
1954 unsigned int paren_context = pfile->cur_context;
1957 macro = hp->value.expansion;
1962 token = parse_arg (pfile, (argc + 1 == macro->paramc
1963 && (macro->flags & VAR_ARGS)),
1964 paren_context, args, &count);
1965 if (argc < macro->paramc)
1968 args->ends[argc] = total;
1972 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
1974 if (token->type == CPP_EOF)
1976 cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
1979 else if (argc < macro->paramc)
1981 /* A rest argument is allowed to not appear in the invocation at all.
1982 e.g. #define debug(format, args...) ...
1984 This is exactly the same as if the rest argument had received no
1985 tokens - debug("string",); This extension is deprecated. */
1987 if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
1989 /* Duplicate the placemarker. Then we can set its flags and
1990 position and safely be using more than one. */
1991 save_token (args, duplicate_token (pfile, &placemarker_token));
1992 args->ends[argc] = total + 1;
1994 if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
1995 cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2001 cpp_error (pfile, "%u arguments is not enough for macro \"%s\"",
2006 /* An empty argument to an empty function-like macro is fine. */
2007 else if (argc > macro->paramc
2008 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2010 cpp_error (pfile, "%u arguments is too many for macro \"%s\"",
2018 /* Adds backslashes before all backslashes and double quotes appearing
2019 in strings. Non-printable characters are converted to octal. */
2021 quote_string (dest, src, len)
2030 if (c == '\\' || c == '"')
2041 sprintf ((char *) dest, "\\%03o", c);
2050 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2051 CPP_STRING token containing TEXT in quoted form. */
2053 make_string_token (token, text, len)
2060 buf = (U_CHAR *) xmalloc (len * 4);
2061 token->type = CPP_STRING;
2063 token->val.str.text = buf;
2064 token->val.str.len = quote_string (buf, text, len) - buf;
2068 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2069 evaluating to NUMBER. */
2071 alloc_number_token (pfile, number)
2078 result = get_temp_token (pfile);
2080 sprintf (buf, "%d", number);
2082 result->type = CPP_NUMBER;
2084 result->val.str.text = (U_CHAR *) buf;
2085 result->val.str.len = strlen (buf);
2089 /* Returns a temporary token from the temporary token store of PFILE. */
2091 get_temp_token (pfile)
2094 if (pfile->temp_used == pfile->temp_alloced)
2096 if (pfile->temp_used == pfile->temp_cap)
2098 pfile->temp_cap += pfile->temp_cap + 20;
2099 pfile->temp_tokens = (cpp_token **) xrealloc
2100 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2102 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2103 (sizeof (cpp_token));
2106 return pfile->temp_tokens[pfile->temp_used++];
2109 /* Release (not free) for re-use the temporary tokens of PFILE. */
2111 release_temp_tokens (pfile)
2114 while (pfile->temp_used)
2116 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2118 if (TOKEN_SPELL (token) == SPELL_STRING)
2120 free ((char *) token->val.str.text);
2121 token->val.str.text = 0;
2126 /* Free all of PFILE's dynamically-allocated temporary tokens. */
2128 _cpp_free_temp_tokens (pfile)
2131 if (pfile->temp_tokens)
2133 /* It is possible, though unlikely (looking for '(' of a funlike
2134 macro into EOF), that we haven't released the tokens yet. */
2135 release_temp_tokens (pfile);
2136 while (pfile->temp_alloced)
2137 free (pfile->temp_tokens[--pfile->temp_alloced]);
2138 free (pfile->temp_tokens);
2143 free ((char *) pfile->date->val.str.text);
2145 free ((char *) pfile->time->val.str.text);
2150 /* Copy TOKEN into a temporary token from PFILE's store. */
2152 duplicate_token (pfile, token)
2154 const cpp_token *token;
2156 cpp_token *result = get_temp_token (pfile);
2159 if (TOKEN_SPELL (token) == SPELL_STRING)
2161 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2162 memcpy (buff, token->val.str.text, token->val.str.len);
2163 result->val.str.text = buff;
2168 /* Determine whether two tokens can be pasted together, and if so,
2169 what the resulting token is. Returns CPP_EOF if the tokens cannot
2170 be pasted, or the appropriate type for the merged token if they
2173 _cpp_can_paste (pfile, token1, token2, digraph)
2175 const cpp_token *token1, *token2;
2178 enum cpp_ttype a = token1->type, b = token2->type;
2179 int cxx = CPP_OPTION (pfile, cplusplus);
2181 /* Treat named operators as if they were ordinary NAMEs. */
2182 if (token1->flags & NAMED_OP)
2184 if (token2->flags & NAMED_OP)
2187 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2188 return a + (CPP_EQ_EQ - CPP_EQ);
2193 if (b == a) return CPP_RSHIFT;
2194 if (b == CPP_QUERY && cxx) return CPP_MAX;
2195 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2198 if (b == a) return CPP_LSHIFT;
2199 if (b == CPP_QUERY && cxx) return CPP_MIN;
2200 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2201 if (CPP_OPTION (pfile, digraphs))
2204 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2206 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2210 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2211 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2212 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2215 if (b == a) return CPP_MINUS_MINUS;
2216 if (b == CPP_GREATER) return CPP_DEREF;
2219 if (b == a && cxx) return CPP_SCOPE;
2220 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2221 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2225 if (CPP_OPTION (pfile, digraphs))
2227 if (b == CPP_GREATER)
2228 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2230 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2234 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2237 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2238 if (b == CPP_NUMBER) return CPP_NUMBER;
2242 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2244 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2248 if (b == CPP_NAME) return CPP_NAME;
2250 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
2252 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2254 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2258 if (b == CPP_NUMBER) return CPP_NUMBER;
2259 if (b == CPP_NAME) return CPP_NUMBER;
2260 if (b == CPP_DOT) return CPP_NUMBER;
2261 /* Numbers cannot have length zero, so this is safe. */
2262 if ((b == CPP_PLUS || b == CPP_MINUS)
2263 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2268 if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2270 if (b == CPP_NAME) return CPP_NAME;
2271 if (b == CPP_STRING) return CPP_OSTRING;
2281 /* Check if TOKEN is to be ##-pasted with the token after it. */
2282 static const cpp_token *
2283 maybe_paste_with_next (pfile, token)
2285 const cpp_token *token;
2288 const cpp_token *second;
2289 cpp_context *context = CURRENT_CONTEXT (pfile);
2291 /* Is this token on the LHS of ## ? */
2293 while ((token->flags & PASTE_LEFT)
2294 || ((context->flags & CONTEXT_PASTEL)
2295 && context->posn == context->count))
2297 /* Suppress macro expansion for next token, but don't conflict
2298 with the other method of suppression. If it is an argument,
2299 macro expansion within the argument will still occur. */
2300 pfile->paste_level = pfile->cur_context;
2301 second = _cpp_get_token (pfile);
2302 pfile->paste_level = 0;
2303 context = CURRENT_CONTEXT (pfile);
2305 /* Ignore placemarker argument tokens (cannot be from an empty
2306 macro since macros are not expanded). */
2307 if (token->type == CPP_PLACEMARKER)
2308 pasted = duplicate_token (pfile, second);
2309 else if (second->type == CPP_PLACEMARKER)
2311 /* GCC has special extended semantics for , ## b where b is
2312 a varargs parameter: the comma disappears if b was given
2313 no actual arguments (not merely if b is an empty
2315 if (token->type == CPP_COMMA && (context->flags & CONTEXT_VARARGS))
2316 pasted = duplicate_token (pfile, second);
2318 pasted = duplicate_token (pfile, token);
2323 enum cpp_ttype type = _cpp_can_paste (pfile, token, second, &digraph);
2325 if (type == CPP_EOF)
2327 if (CPP_OPTION (pfile, warn_paste))
2329 /* Do not complain about , ## <whatever> if
2330 <whatever> came from a variable argument, because
2331 the author probably intended the ## to trigger
2332 the special extended semantics (see above). */
2333 if (token->type == CPP_COMMA
2334 && (context->flags & CONTEXT_VARARGS))
2338 "pasting would not give a valid preprocessing token");
2340 _cpp_push_token (pfile, second);
2341 /* A short term hack to safely clear the PASTE_LEFT flag. */
2342 pasted = duplicate_token (pfile, token);
2343 pasted->flags &= ~PASTE_LEFT;
2347 if (type == CPP_NAME || type == CPP_NUMBER)
2349 /* Join spellings. */
2352 pasted = get_temp_token (pfile);
2353 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2354 end = spell_token (pfile, token, buf);
2355 end = spell_token (pfile, second, end);
2358 if (type == CPP_NAME)
2359 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2362 pasted->val.str.text = uxstrdup (buf);
2363 pasted->val.str.len = end - buf;
2366 else if (type == CPP_WCHAR || type == CPP_WSTRING
2367 || type == CPP_OSTRING)
2368 pasted = duplicate_token (pfile, second);
2371 pasted = get_temp_token (pfile);
2372 pasted->val.integer = 0;
2375 pasted->type = type;
2376 pasted->flags = digraph ? DIGRAPH : 0;
2378 if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2380 pasted->type = pasted->val.node->value.code;
2381 pasted->flags |= NAMED_OP;
2385 /* The pasted token gets the whitespace flags and position of the
2386 first token, the PASTE_LEFT flag of the second token, plus the
2387 PASTED flag to indicate it is the result of a paste. However, we
2388 want to preserve the DIGRAPH flag. */
2389 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2390 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2391 | (second->flags & PASTE_LEFT) | PASTED);
2392 pasted->col = token->col;
2393 pasted->line = token->line;
2395 /* See if there is another token to be pasted onto the one we just
2403 /* Convert a token sequence to a single string token according to the
2404 rules of the ISO C #-operator. */
2405 #define INIT_SIZE 200
2407 stringify_arg (pfile, token)
2409 const cpp_token *token;
2412 unsigned char *main_buf;
2413 unsigned int prev_value, backslash_count = 0;
2414 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2416 push_arg_context (pfile, token);
2417 prev_value = prevent_macro_expansion (pfile);
2418 main_buf = (unsigned char *) xmalloc (buf_cap);
2420 result = get_temp_token (pfile);
2421 ASSIGN_FLAGS_AND_POS (result, token);
2423 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2427 unsigned int len = TOKEN_LEN (token);
2429 if (token->type == CPP_PLACEMARKER)
2432 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2433 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2437 if (buf_used + len > buf_cap)
2439 buf_cap = buf_used + len + INIT_SIZE;
2440 main_buf = xrealloc (main_buf, buf_cap);
2443 if (whitespace && (token->flags & PREV_WHITE))
2444 main_buf[buf_used++] = ' ';
2447 buf = (unsigned char *) xmalloc (len);
2449 buf = main_buf + buf_used;
2451 len = spell_token (pfile, token, buf) - buf;
2454 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2461 if (token->type == CPP_BACKSLASH)
2464 backslash_count = 0;
2467 /* Ignore the final \ of invalid string literals. */
2468 if (backslash_count & 1)
2470 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2474 result->type = CPP_STRING;
2475 result->val.str.text = main_buf;
2476 result->val.str.len = buf_used;
2477 restore_macro_expansion (pfile, prev_value);
2481 /* Allocate more room on the context stack of PFILE. */
2483 expand_context_stack (pfile)
2486 pfile->context_cap += pfile->context_cap + 20;
2487 pfile->contexts = (cpp_context *)
2488 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2491 /* Push the context of macro NODE onto the context stack. TOKEN is
2492 the CPP_NAME token invoking the macro. */
2494 push_macro_context (pfile, token)
2496 const cpp_token *token;
2498 unsigned char orig_flags;
2500 cpp_context *context;
2501 cpp_hashnode *node = token->val.node;
2503 /* Token's flags may change when parsing args containing a nested
2504 invocation of this macro. */
2505 orig_flags = token->flags & (PREV_WHITE | BOL);
2507 if (node->value.expansion->paramc >= 0)
2509 unsigned int error, prev_nme;
2511 /* Allocate room for the argument contexts, and parse them. */
2512 args = (macro_args *) xmalloc (sizeof (macro_args));
2513 args->ends = (unsigned int *)
2514 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2519 prev_nme = prevent_macro_expansion (pfile);
2521 error = parse_args (pfile, node, args);
2523 restore_macro_expansion (pfile, prev_nme);
2526 free_macro_args (args);
2529 /* Set the level after the call to parse_args. */
2530 args->level = pfile->cur_context;
2533 /* Now push its context. */
2534 pfile->cur_context++;
2535 if (pfile->cur_context == pfile->context_cap)
2536 expand_context_stack (pfile);
2538 context = CURRENT_CONTEXT (pfile);
2539 context->u.list = node->value.expansion;
2540 context->args = args;
2542 context->count = context->u.list->tokens_used;
2543 context->level = pfile->cur_context;
2545 context->pushed_token = 0;
2547 /* Set the flags of the first token. We know there must
2548 be one, empty macros are a single placemarker token. */
2549 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2554 /* Push an argument to the current macro onto the context stack.
2555 TOKEN is the MACRO_ARG token representing the argument expansion. */
2557 push_arg_context (pfile, token)
2559 const cpp_token *token;
2561 cpp_context *context;
2564 pfile->cur_context++;
2565 if (pfile->cur_context == pfile->context_cap)
2566 expand_context_stack (pfile);
2568 context = CURRENT_CONTEXT (pfile);
2569 args = context[-1].args;
2571 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2572 context->u.arg = args->tokens + context->count;
2573 context->count = args->ends[token->val.aux] - context->count;
2576 context->level = args->level;
2577 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2578 if ((context[-1].u.list->flags & VAR_ARGS)
2579 && token->val.aux + 1 == (unsigned) context[-1].u.list->paramc)
2580 context->flags |= CONTEXT_VARARGS;
2581 context->pushed_token = 0;
2583 /* Set the flags of the first token. There is one. */
2585 const cpp_token *first = context->u.arg[0];
2587 first = context->u.arg[1];
2589 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2590 token->flags & (PREV_WHITE | BOL));
2593 if (token->flags & PASTE_LEFT)
2594 context->flags |= CONTEXT_PASTEL;
2595 if (pfile->paste_level)
2596 context->flags |= CONTEXT_PASTER;
2599 /* "Unget" a token. It is effectively inserted in the token queue and
2600 will be returned by the next call to get_raw_token. */
2602 _cpp_push_token (pfile, token)
2604 const cpp_token *token;
2606 cpp_context *context = CURRENT_CONTEXT (pfile);
2608 if (context->posn > 0)
2610 const cpp_token *prev;
2611 if (IS_ARG_CONTEXT (context))
2612 prev = context->u.arg[context->posn - 1];
2614 prev = &context->u.list->tokens[context->posn - 1];
2623 if (context->pushed_token)
2624 cpp_ice (pfile, "two tokens pushed in a row");
2625 if (token->type != CPP_EOF)
2626 context->pushed_token = token;
2627 /* Don't push back a directive's CPP_EOF, step back instead. */
2628 else if (pfile->cur_context == 0)
2629 pfile->contexts[0].posn--;
2632 /* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2633 introducing the directive. */
2635 _cpp_process_directive (pfile, token)
2637 const cpp_token *token;
2639 const struct directive *d = pfile->token_list.directive;
2642 /* Skip over the directive name. */
2643 if (token[1].type == CPP_NAME)
2644 _cpp_get_raw_token (pfile);
2645 else if (token[1].type != CPP_NUMBER)
2646 cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
2648 if (! (d->flags & EXPAND))
2649 prev_nme = prevent_macro_expansion (pfile);
2650 (void) (*d->handler) (pfile);
2651 if (! (d->flags & EXPAND))
2652 restore_macro_expansion (pfile, prev_nme);
2653 _cpp_skip_rest_of_line (pfile);
2656 /* The external interface to return the next token. All macro
2657 expansion and directive processing is handled internally, the
2658 caller only ever sees the output after preprocessing. */
2660 cpp_get_token (pfile)
2663 const cpp_token *token;
2664 /* Loop till we hit a non-directive, non-placemarker token. */
2667 token = _cpp_get_token (pfile);
2669 if (token->type == CPP_PLACEMARKER)
2672 if (token->type == CPP_HASH && token->flags & BOL
2673 && pfile->token_list.directive)
2675 _cpp_process_directive (pfile, token);
2683 /* The internal interface to return the next token. There are two
2684 differences between the internal and external interfaces: the
2685 internal interface may return a PLACEMARKER token, and it does not
2686 process directives. */
2688 _cpp_get_token (pfile)
2691 const cpp_token *token, *old_token;
2694 /* Loop until we hit a non-macro token. */
2697 token = get_raw_token (pfile);
2699 /* Short circuit EOF. */
2700 if (token->type == CPP_EOF)
2703 /* If we are skipping... */
2704 if (pfile->skipping)
2706 /* we still have to process directives, */
2707 if (pfile->token_list.directive)
2710 /* but everything else is ignored. */
2711 _cpp_skip_rest_of_line (pfile);
2715 /* If there's a potential control macro and we get here, then that
2716 #ifndef didn't cover the entire file and its argument shouldn't
2717 be taken as a control macro. */
2718 pfile->potential_control_macro = 0;
2720 /* If we are rescanning preprocessed input, no macro expansion or
2721 token pasting may occur. */
2722 if (CPP_OPTION (pfile, preprocessed))
2727 /* See if there's a token to paste with this one. */
2728 if (!pfile->paste_level)
2729 token = maybe_paste_with_next (pfile, token);
2731 /* If it isn't a macro, return it now. */
2732 if (token->type != CPP_NAME || token->val.node->type == T_VOID)
2735 /* Is macro expansion disabled in general, or are we in the
2736 middle of a token paste, or was this token just pasted?
2737 (Note we don't check token->flags & PASTED, because that
2738 counts tokens that were pasted at some point in the past,
2739 we're only interested in tokens that were pasted by this call
2740 to maybe_paste_with_next.) */
2741 if (pfile->no_expand_level == pfile->cur_context
2742 || pfile->paste_level
2743 || (token != old_token
2744 && pfile->no_expand_level + 1 == pfile->cur_context))
2747 node = token->val.node;
2748 if (node->type != T_MACRO)
2749 return special_symbol (pfile, node, token);
2751 if (is_macro_disabled (pfile, node->value.expansion, token))
2754 if (push_macro_context (pfile, token))
2760 /* Returns the next raw token, i.e. without performing macro
2761 expansion. Argument contexts are automatically entered. */
2762 static const cpp_token *
2763 get_raw_token (pfile)
2766 const cpp_token *result;
2767 cpp_context *context;
2771 context = CURRENT_CONTEXT (pfile);
2772 if (context->pushed_token)
2774 result = context->pushed_token;
2775 context->pushed_token = 0;
2776 return result; /* Cannot be a CPP_MACRO_ARG */
2778 else if (context->posn == context->count)
2780 if (pop_context (pfile))
2784 else if (IS_ARG_CONTEXT (context))
2786 result = context->u.arg[context->posn++];
2789 context->flags ^= CONTEXT_RAW;
2790 result = context->u.arg[context->posn++];
2792 return result; /* Cannot be a CPP_MACRO_ARG */
2795 result = &context->u.list->tokens[context->posn++];
2797 if (result->type != CPP_MACRO_ARG)
2800 if (result->flags & STRINGIFY_ARG)
2801 return stringify_arg (pfile, result);
2803 push_arg_context (pfile, result);
2807 /* Internal interface to get the token without macro expanding. */
2809 _cpp_get_raw_token (pfile)
2812 int prev_nme = prevent_macro_expansion (pfile);
2813 const cpp_token *result = _cpp_get_token (pfile);
2814 restore_macro_expansion (pfile, prev_nme);
2818 /* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2819 list should be overwritten, or zero if we need to append
2820 (typically, if we are within the arguments to a macro, or looking
2821 for the '(' to start a function-like macro invocation). */
2823 lex_next (pfile, clear)
2827 cpp_toklist *list = &pfile->token_list;
2828 const cpp_token *old_list = list->tokens;
2829 unsigned int old_used = list->tokens_used;
2833 /* Release all temporary tokens. */
2834 _cpp_clear_toklist (list);
2835 pfile->contexts[0].posn = 0;
2836 if (pfile->temp_used)
2837 release_temp_tokens (pfile);
2839 lex_line (pfile, list);
2840 pfile->contexts[0].count = list->tokens_used;
2842 if (!clear && pfile->args)
2844 /* Fix up argument token pointers. */
2845 if (old_list != list->tokens)
2849 for (i = 0; i < pfile->args->used; i++)
2851 const cpp_token *token = pfile->args->tokens[i];
2852 if (token >= old_list && token < old_list + old_used)
2853 pfile->args->tokens[i] = (const cpp_token *)
2854 ((char *) token + ((char *) list->tokens - (char *) old_list));
2858 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
2859 tokens within the list of arguments that would otherwise act as
2860 preprocessing directives, the behavior is undefined.
2862 This implementation will report a hard error and treat the
2863 'sequence of preprocessing tokens' as part of the macro argument,
2866 Note if pfile->args == 0, we're OK since we're only inside a
2867 macro argument after a '('. */
2868 if (list->directive)
2870 cpp_error_with_line (pfile, list->tokens[old_used].line,
2871 list->tokens[old_used].col,
2872 "#%s may not be used inside a macro argument",
2873 list->directive->name);
2881 /* Pops a context off the context stack. If we're at the bottom, lexes
2882 the next logical line. Returns EOF if we're at the end of the
2883 argument list to the # operator, or we should not "overflow"
2884 into the rest of the file (e.g. 6.10.3.1.1). */
2889 cpp_context *context;
2891 if (pfile->cur_context == 0)
2893 /* If we are currently processing a directive, do not advance. 6.10
2894 paragraph 2: A new-line character ends the directive even if it
2895 occurs within what would otherwise be an invocation of a
2896 function-like macro. */
2897 if (pfile->token_list.directive)
2900 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
2903 /* Argument contexts, when parsing args or handling # operator
2904 return CPP_EOF at the end. */
2905 context = CURRENT_CONTEXT (pfile);
2906 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
2909 /* Free resources when leaving macro contexts. */
2911 free_macro_args (context->args);
2913 if (pfile->cur_context == pfile->no_expand_level)
2914 pfile->no_expand_level--;
2915 pfile->cur_context--;
2920 /* Turn off macro expansion at the current context level. */
2922 prevent_macro_expansion (pfile)
2925 unsigned int prev_value = pfile->no_expand_level;
2926 pfile->no_expand_level = pfile->cur_context;
2930 /* Restore macro expansion to its previous state. */
2932 restore_macro_expansion (pfile, prev_value)
2934 unsigned int prev_value;
2936 pfile->no_expand_level = prev_value;
2939 /* Used by cpperror.c to obtain the correct line and column to report
2942 _cpp_get_line (pfile, pcol)
2947 const cpp_token *cur_token;
2949 if (pfile->state.in_lex_line)
2950 index = pfile->token_list.tokens_used;
2953 index = pfile->contexts[0].posn;
2964 cur_token = &pfile->token_list.tokens[index];
2966 *pcol = cur_token->col;
2967 return cur_token->line;
2970 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
2971 static const char * const monthnames[] =
2973 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2974 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
2977 /* Handle builtin macros like __FILE__. */
2978 static const cpp_token *
2979 special_symbol (pfile, node, token)
2982 const cpp_token *token;
2994 ip = CPP_BUFFER (pfile);
2999 if (node->type == T_BASE_FILE)
3000 while (CPP_PREV_BUFFER (ip) != NULL)
3001 ip = CPP_PREV_BUFFER (ip);
3003 file = ip->nominal_fname;
3005 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3010 case T_INCLUDE_LEVEL:
3011 /* pfile->include_depth counts the primary source as level 1,
3012 but historically __INCLUDE_DEPTH__ has called the primary
3014 result = alloc_number_token (pfile, pfile->include_depth - 1);
3018 /* If __LINE__ is embedded in a macro, it must expand to the
3019 line of the macro's invocation, not its definition.
3020 Otherwise things like assert() will not work properly. */
3021 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3028 #ifdef STDC_0_IN_SYSTEM_HEADERS
3029 if (CPP_IN_SYSTEM_HEADER (pfile)
3030 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3033 result = alloc_number_token (pfile, stdc);
3039 if (pfile->date == 0)
3041 /* Allocate __DATE__ and __TIME__ from permanent storage,
3042 and save them in pfile so we don't have to do this again.
3043 We don't generate these strings at init time because
3044 time() and localtime() are very slow on some systems. */
3045 time_t tt = time (NULL);
3046 struct tm *tb = localtime (&tt);
3048 pfile->date = make_string_token
3049 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3050 pfile->time = make_string_token
3051 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3053 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3054 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3055 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3056 tb->tm_hour, tb->tm_min, tb->tm_sec);
3058 result = node->type == T_DATE ? pfile->date: pfile->time;
3062 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3066 cpp_ice (pfile, "invalid special hash type");
3070 ASSIGN_FLAGS_AND_POS (result, token);
3075 /* Allocate pfile->input_buffer, and initialize _cpp_trigraph_map[]
3076 if it hasn't happened already. */
3079 _cpp_init_input_buffer (pfile)
3084 _cpp_init_toklist (&pfile->token_list, 0);
3085 pfile->no_expand_level = UINT_MAX;
3086 pfile->context_cap = 20;
3087 pfile->cur_context = 0;
3089 pfile->contexts = (cpp_context *)
3090 xmalloc (pfile->context_cap * sizeof (cpp_context));
3092 /* Clear the base context. */
3093 base = &pfile->contexts[0];
3094 base->u.list = &pfile->token_list;
3100 base->pushed_token = 0;
3103 /* Moves to the end of the directive line, popping contexts as
3106 _cpp_skip_rest_of_line (pfile)
3109 /* Discard all stacked contexts. */
3111 for (i = pfile->cur_context; i > 0; i--)
3112 if (pfile->contexts[i].args)
3113 free_macro_args (pfile->contexts[i].args);
3115 if (pfile->no_expand_level <= pfile->cur_context)
3116 pfile->no_expand_level = 0;
3117 pfile->cur_context = 0;
3119 /* Clear the base context, and clear the directive pointer so that
3120 get_raw_token will advance to the next line. */
3121 pfile->contexts[0].count = 0;
3122 pfile->contexts[0].posn = 0;
3123 pfile->token_list.directive = 0;
3126 /* Directive handler wrapper used by the command line option
3129 _cpp_run_directive (pfile, dir, buf, count, name)
3131 const struct directive *dir;
3136 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3138 unsigned int prev_lvl = 0;
3141 CPP_BUFFER (pfile)->nominal_fname = name;
3143 CPP_BUFFER (pfile)->nominal_fname = _("<command line>");
3144 CPP_BUFFER (pfile)->lineno = (unsigned int)-1;
3146 /* Scan the line now, else prevent_macro_expansion won't work. */
3147 lex_next (pfile, 1);
3148 if (! (dir->flags & EXPAND))
3149 prev_lvl = prevent_macro_expansion (pfile);
3151 (void) (*dir->handler) (pfile);
3153 if (! (dir->flags & EXPAND))
3154 restore_macro_expansion (pfile, prev_lvl);
3156 _cpp_skip_rest_of_line (pfile);
3157 cpp_pop_buffer (pfile);