1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
27 o Distinguish integers, floats, and 'other' pp-numbers.
28 o Store ints and char constants as binary values.
29 o New command-line assertion syntax.
30 o Comment all functions, and describe macro expansion algorithm.
31 o Move as much out of header files as possible.
32 o Remove single quote pairs `', and some '', from diagnostics.
33 o Correct pastability test for CPP_NAME and CPP_NUMBER.
44 const unsigned char *_cpp_digraph_spellings [] = {U"%:", U"%:%:", U"<:",
46 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER,
48 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
50 /* Flags for cpp_context. */
51 #define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
52 #define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
53 #define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
54 #define CONTEXT_ARG (1 << 3) /* If an argument context. */
56 typedef struct cpp_context cpp_context;
61 const cpp_toklist *list; /* Used for macro contexts only. */
62 const cpp_token **arg; /* Used for arg contexts only. */
65 /* Pushed token to be returned by next call to get_raw_token. */
66 const cpp_token *pushed_token;
68 struct macro_args *args; /* The arguments for a function-like
69 macro. NULL otherwise. */
70 unsigned short posn; /* Current posn, index into u. */
71 unsigned short count; /* No. of tokens in u. */
76 typedef struct macro_args macro_args;
80 const cpp_token **tokens;
81 unsigned int capacity;
86 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
87 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
88 macro_args *, unsigned int *));
89 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
90 static void save_token PARAMS ((macro_args *, const cpp_token *));
91 static int pop_context PARAMS ((cpp_reader *));
92 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
93 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
94 static void free_macro_args PARAMS ((macro_args *));
96 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
97 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
98 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
100 static int skip_block_comment PARAMS ((cpp_reader *));
101 static int skip_line_comment PARAMS ((cpp_reader *));
102 static void adjust_column PARAMS ((cpp_reader *));
103 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
104 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
105 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t));
106 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
107 static void unterminated PARAMS ((cpp_reader *, unsigned int, int));
108 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
109 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
110 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
111 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
112 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
113 static void lex_token PARAMS ((cpp_reader *, cpp_token *));
114 static int lex_next PARAMS ((cpp_reader *, int));
116 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
119 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
120 static void expand_context_stack PARAMS ((cpp_reader *));
121 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
123 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
125 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
127 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
128 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
130 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
131 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
133 static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
134 static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
135 static cpp_token *get_temp_token PARAMS ((cpp_reader *));
136 static void release_temp_tokens PARAMS ((cpp_reader *));
137 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
139 #define VALID_SIGN(c, prevc) \
140 (((c) == '+' || (c) == '-') && \
141 ((prevc) == 'e' || (prevc) == 'E' \
142 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
144 /* An upper bound on the number of bytes needed to spell a token,
145 including preceding whitespace. */
146 static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
149 const cpp_token *token;
153 switch (TOKEN_SPELL (token))
155 default: len = 0; break;
156 case SPELL_STRING: len = token->val.str.len; break;
157 case SPELL_IDENT: len = token->val.node->length; break;
162 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
163 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
164 #define ON_REST_ARG(c) \
165 (((c)->u.list->flags & VAR_ARGS) \
166 && (c)->u.list->tokens[(c)->posn - 1].val.aux \
167 == (unsigned int) ((c)->u.list->paramc - 1))
169 #define ASSIGN_FLAGS_AND_POS(d, s) \
170 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
171 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
174 /* f is flags, just consisting of PREV_WHITE | BOL. */
175 #define MODIFY_FLAGS_AND_POS(d, s, f) \
176 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
177 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
180 #define OP(e, s) { SPELL_OPERATOR, U s },
181 #define TK(e, s) { s, U STRINGX (e) },
183 const struct token_spelling
184 _cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
189 /* Helper routine used by parse_include, which can't see spell_token.
190 Reinterpret the current line as an h-char-sequence (< ... >); we are
191 looking at the first token after the <. */
193 _cpp_glue_header_name (pfile)
203 buf = xmalloc (avail);
207 t = _cpp_get_token (pfile);
208 if (t->type == CPP_GREATER || t->type == CPP_EOF)
211 if (len + TOKEN_LEN (t) > avail)
213 avail = len + TOKEN_LEN (t) + 40;
214 buf = xrealloc (buf, avail);
217 if (t->flags & PREV_WHITE)
220 p = spell_token (pfile, t, buf + len);
221 len = (size_t) (p - buf); /* p known >= buf */
224 if (t->type == CPP_EOF)
225 cpp_error (pfile, "missing terminating > character");
227 buf = xrealloc (buf, len);
229 hdr = get_temp_token (pfile);
230 hdr->type = CPP_HEADER_NAME;
232 hdr->val.str.text = buf;
233 hdr->val.str.len = len;
237 /* Token-buffer helper functions. */
239 /* Expand a token list's string space. It is *vital* that
240 list->tokens_used is correct, to get pointer fix-up right. */
242 _cpp_expand_name_space (list, len)
246 const U_CHAR *old_namebuf;
248 old_namebuf = list->namebuf;
249 list->name_cap += len;
250 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
252 /* Fix up token text pointers. */
253 if (list->namebuf != old_namebuf)
257 for (i = 0; i < list->tokens_used; i++)
258 if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
259 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
263 /* If there is not enough room for LEN more characters, expand the
264 list by just enough to have room for LEN characters. */
266 _cpp_reserve_name_space (list, len)
270 unsigned int room = list->name_cap - list->name_used;
273 _cpp_expand_name_space (list, len - room);
276 /* Expand the number of tokens in a list. */
278 _cpp_expand_token_space (list, count)
282 list->tokens_cap += count;
283 list->tokens = (cpp_token *)
284 xrealloc (list->tokens, list->tokens_cap * sizeof (cpp_token));
287 /* Initialize a token list. If EMPTY is false, some token and name
288 space is provided. */
290 _cpp_init_toklist (list, empty)
296 list->tokens_cap = 0;
303 /* Initialize token space. */
304 list->tokens_cap = 256; /* 4K's worth. */
305 list->tokens = (cpp_token *)
306 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
308 /* Initialize name space. */
309 list->name_cap = 1024;
310 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
313 _cpp_clear_toklist (list);
316 /* Clear a token list. */
318 _cpp_clear_toklist (list)
321 list->tokens_used = 0;
325 list->params_len = 0;
329 /* Free a token list. Does not free the list itself, which may be
330 embedded in a larger structure. */
332 _cpp_free_toklist (list)
333 const cpp_toklist *list;
336 free (list->namebuf);
339 /* Compare two tokens. */
341 _cpp_equiv_tokens (a, b)
342 const cpp_token *a, *b;
344 if (a->type == b->type && a->flags == b->flags)
345 switch (TOKEN_SPELL (a))
347 default: /* Keep compiler happy. */
352 return a->val.aux == b->val.aux; /* arg_no or character. */
354 return a->val.node == b->val.node;
356 return (a->val.str.len == b->val.str.len
357 && !memcmp (a->val.str.text, b->val.str.text,
364 /* Compare two token lists. */
366 _cpp_equiv_toklists (a, b)
367 const cpp_toklist *a, *b;
371 if (a->tokens_used != b->tokens_used
372 || a->flags != b->flags
373 || a->paramc != b->paramc)
376 for (i = 0; i < a->tokens_used; i++)
377 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
384 Compares, the token TOKEN to the NUL-terminated string STRING.
385 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
388 cpp_ideq (token, string)
389 const cpp_token *token;
392 if (token->type != CPP_NAME)
395 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
398 /* Call when meeting a newline. Returns the character after the newline
399 (or carriage-return newline combination), or EOF. */
401 handle_newline (buffer, newline_char)
403 cppchar_t newline_char;
405 cppchar_t next = EOF;
407 buffer->col_adjust = 0;
409 buffer->line_base = buffer->cur;
411 /* Handle CR-LF and LF-CR combinations, get the next character. */
412 if (buffer->cur < buffer->rlimit)
414 next = *buffer->cur++;
415 if (next + newline_char == '\r' + '\n')
417 buffer->line_base = buffer->cur;
418 if (buffer->cur < buffer->rlimit)
419 next = *buffer->cur++;
425 buffer->read_ahead = next;
429 /* Subroutine of skip_escaped_newlines; called when a trigraph is
430 encountered. It warns if necessary, and returns true if the
431 trigraph should be honoured. FROM_CHAR is the third character of a
432 trigraph, and presumed to be the previous character for position
435 trigraph_ok (pfile, from_char)
439 int accept = CPP_OPTION (pfile, trigraphs);
441 /* Don't warn about trigraphs in comments. */
442 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
444 cpp_buffer *buffer = pfile->buffer;
446 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
447 "trigraph ??%c converted to %c",
449 (int) _cpp_trigraph_map[from_char]);
451 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
452 "trigraph ??%c ignored", (int) from_char);
458 /* Assumes local variables buffer and result. */
459 #define ACCEPT_CHAR(t) \
460 do { result->type = t; buffer->read_ahead = EOF; } while (0)
462 /* When we move to multibyte character sets, add to these something
463 that saves and restores the state of the multibyte conversion
464 library. This probably involves saving and restoring a "cookie".
465 In the case of glibc it is an 8-byte structure, so is not a high
466 overhead operation. In any case, it's out of the fast path. */
467 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
468 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
470 /* Skips any escaped newlines introduced by NEXT, which is either a
471 '?' or a '\\'. Returns the next character, which will also have
472 been placed in buffer->read_ahead. */
474 skip_escaped_newlines (buffer, next)
479 const unsigned char *saved_cur;
484 if (buffer->cur == buffer->rlimit)
490 next1 = *buffer->cur++;
491 if (next1 != '?' || buffer->cur == buffer->rlimit)
497 next1 = *buffer->cur++;
498 if (!_cpp_trigraph_map[next1] || !trigraph_ok (buffer->pfile, next1))
504 /* We have a full trigraph here. */
505 next = _cpp_trigraph_map[next1];
506 if (next != '\\' || buffer->cur == buffer->rlimit)
511 /* We have a backslash, and room for at least one more character. */
515 next1 = *buffer->cur++;
516 if (!is_nvspace (next1))
520 while (buffer->cur < buffer->rlimit);
522 if (!is_vspace (next1))
529 cpp_warning (buffer->pfile,
530 "backslash and newline separated by space");
532 next = handle_newline (buffer, next1);
534 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
536 while (next == '\\' || next == '?');
538 buffer->read_ahead = next;
542 /* Obtain the next character, after trigraph conversion and skipping
543 an arbitrary string of escaped newlines. The common case of no
544 trigraphs or escaped newlines falls through quickly. */
546 get_effective_char (buffer)
549 cppchar_t next = EOF;
551 if (buffer->cur < buffer->rlimit)
553 next = *buffer->cur++;
555 /* '?' can introduce trigraphs (and therefore backslash); '\\'
556 can introduce escaped newlines, which we want to skip, or
557 UCNs, which, depending upon lexer state, we will handle in
559 if (next == '?' || next == '\\')
560 next = skip_escaped_newlines (buffer, next);
563 buffer->read_ahead = next;
567 /* Skip a C-style block comment. We find the end of the comment by
568 seeing if an asterisk is before every '/' we encounter. Returns
569 non-zero if comment terminated by EOF, zero otherwise. */
571 skip_block_comment (pfile)
574 cpp_buffer *buffer = pfile->buffer;
575 cppchar_t c = EOF, prevc;
577 pfile->state.lexing_comment = 1;
578 while (buffer->cur != buffer->rlimit)
580 prevc = c, c = *buffer->cur++;
583 /* FIXME: For speed, create a new character class of characters
584 of no interest inside block comments. */
585 if (c == '?' || c == '\\')
586 c = skip_escaped_newlines (buffer, c);
588 /* People like decorating comments with '*', so check for '/'
589 instead for efficiency. */
595 /* Warn about potential nested comments, but not if the '/'
596 comes immediately before the true comment delimeter.
597 Don't bother to get it right across escaped newlines. */
598 if (CPP_OPTION (pfile, warn_comments)
599 && buffer->cur != buffer->rlimit)
601 prevc = c, c = *buffer->cur++;
602 if (c == '*' && buffer->cur != buffer->rlimit)
604 prevc = c, c = *buffer->cur++;
606 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
607 CPP_BUF_COL (buffer),
608 "\"/*\" within comment");
613 else if (is_vspace (c))
615 prevc = c, c = handle_newline (buffer, c);
619 adjust_column (pfile);
622 pfile->state.lexing_comment = 0;
623 buffer->read_ahead = EOF;
624 return c != '/' || prevc != '*';
627 /* Skip a C++ line comment. Handles escaped newlines. Returns
628 non-zero if a multiline comment. The following new line, if any,
629 is left in buffer->read_ahead. */
631 skip_line_comment (pfile)
634 cpp_buffer *buffer = pfile->buffer;
635 unsigned int orig_lineno = buffer->lineno;
638 pfile->state.lexing_comment = 1;
642 if (buffer->cur == buffer->rlimit)
646 if (c == '?' || c == '\\')
647 c = skip_escaped_newlines (buffer, c);
649 while (!is_vspace (c));
651 pfile->state.lexing_comment = 0;
652 buffer->read_ahead = c; /* Leave any newline for caller. */
653 return orig_lineno != buffer->lineno;
656 /* pfile->buffer->cur is one beyond the \t character. Update
657 col_adjust so we track the column correctly. */
659 adjust_column (pfile)
662 cpp_buffer *buffer = pfile->buffer;
663 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
665 /* Round it up to multiple of the tabstop, but subtract 1 since the
666 tab itself occupies a character position. */
667 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
668 - col % CPP_OPTION (pfile, tabstop)) - 1;
671 /* Skips whitespace, saving the next non-whitespace character.
672 Adjusts pfile->col_adjust to account for tabs. Without this,
673 tokens might be assigned an incorrect column. */
675 skip_whitespace (pfile, c)
679 cpp_buffer *buffer = pfile->buffer;
680 unsigned int warned = 0;
684 /* Horizontal space always OK. */
688 adjust_column (pfile);
689 /* Just \f \v or \0 left. */
694 cpp_warning (pfile, "null character(s) ignored");
698 else if (IN_DIRECTIVE (pfile) && CPP_PEDANTIC (pfile))
699 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
700 CPP_BUF_COL (buffer),
701 "%s in preprocessing directive",
702 c == '\f' ? "form feed" : "vertical tab");
705 if (buffer->cur == buffer->rlimit)
709 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
710 while (is_nvspace (c));
712 /* Remember the next character. */
713 buffer->read_ahead = c;
716 /* Parse an identifier, skipping embedded backslash-newlines.
717 Calculate the hash value of the token while parsing, for improved
718 performance. The hashing algorithm *must* match cpp_lookup(). */
720 static cpp_hashnode *
721 parse_identifier (pfile, c)
725 cpp_buffer *buffer = pfile->buffer;
726 unsigned int r = 0, saw_dollar = 0;
727 unsigned int orig_used = pfile->token_list.name_used;
733 if (pfile->token_list.name_used == pfile->token_list.name_cap)
734 _cpp_expand_name_space (&pfile->token_list,
735 pfile->token_list.name_used + 256);
736 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
743 if (buffer->cur == buffer->rlimit)
748 while (is_idchar (c));
750 /* Potential escaped newline? */
751 if (c != '?' && c != '\\')
753 c = skip_escaped_newlines (buffer, c);
755 while (is_idchar (c));
757 /* $ is not a identifier character in the standard, but is commonly
758 accepted as an extension. Don't warn about it in skipped
759 conditional blocks. */
760 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
761 cpp_pedwarn (pfile, "'$' character(s) in identifier");
763 /* Remember the next character. */
764 buffer->read_ahead = c;
765 return _cpp_lookup_with_hash (pfile, &pfile->token_list.namebuf[orig_used],
766 pfile->token_list.name_used - orig_used, r);
769 /* Parse a number, skipping embedded backslash-newlines. */
771 parse_number (pfile, number, c)
777 cpp_buffer *buffer = pfile->buffer;
778 unsigned int orig_used = pfile->token_list.name_used;
780 /* Reserve space for a leading period. */
781 if (pfile->state.seen_dot)
782 pfile->token_list.name_used++;
788 if (pfile->token_list.name_used >= pfile->token_list.name_cap)
789 _cpp_expand_name_space (&pfile->token_list,
790 pfile->token_list.name_used + 256);
791 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
795 if (buffer->cur == buffer->rlimit)
800 while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
802 /* Potential escaped newline? */
803 if (c != '?' && c != '\\')
805 c = skip_escaped_newlines (buffer, c);
807 while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
809 /* Put any leading period in place, now we have the room. */
810 if (pfile->state.seen_dot)
811 pfile->token_list.namebuf[orig_used] = '.';
813 /* Remember the next character. */
814 buffer->read_ahead = c;
816 number->text = &pfile->token_list.namebuf[orig_used];
817 number->len = pfile->token_list.name_used - orig_used;
820 /* Subroutine of parse_string. Emits error for unterminated strings. */
822 unterminated (pfile, line, term)
827 cpp_error (pfile, "missing terminating %c character", term);
829 if (term == '\"' && pfile->mls_line && pfile->mls_line != line)
831 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_column,
832 "possible start of unterminated string literal");
837 /* Parses a string, character constant, or angle-bracketed header file
838 name. Handles embedded trigraphs and escaped newlines.
840 Multi-line strings are allowed, but they are deprecated within
843 parse_string (pfile, token, terminator)
846 cppchar_t terminator;
848 cpp_buffer *buffer = pfile->buffer;
849 unsigned int orig_used = pfile->token_list.name_used;
851 unsigned int nulls = 0;
855 if (buffer->cur == buffer->rlimit)
858 unterminated (pfile, token->line, terminator);
864 /* Handle trigraphs, escaped newlines etc. */
865 if (c == '?' || c == '\\')
866 c = skip_escaped_newlines (buffer, c);
870 unsigned int u = pfile->token_list.name_used;
872 /* An odd number of consecutive backslashes represents an
873 escaped terminator. */
874 while (u > orig_used && pfile->token_list.namebuf[u - 1] == '\\')
877 if ((pfile->token_list.name_used - u) % 2 == 0)
883 else if (is_vspace (c))
885 /* In assembly language, silently terminate string and
886 character literals at end of line. This is a kludge
887 around not knowing where comments are. */
888 if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
891 /* Character constants and header names may not extend over
892 multiple lines. In Standard C, neither may strings.
893 Unfortunately, we accept multiline strings as an
894 extension. (Deprecatedly even in directives - otherwise,
895 glibc's longlong.h breaks.) */
896 if (terminator != '"')
898 unterminated (pfile, token->line, terminator);
902 if (pfile->mls_line == 0)
904 pfile->mls_line = token->line;
905 pfile->mls_column = token->col;
906 if (CPP_PEDANTIC (pfile))
907 cpp_pedwarn (pfile, "multi-line string constant");
910 handle_newline (buffer, c); /* Stores to read_ahead. */
916 cpp_warning (pfile, "null character(s) preserved in literal");
919 if (pfile->token_list.name_used == pfile->token_list.name_cap)
920 _cpp_expand_name_space (&pfile->token_list,
921 pfile->token_list.name_used + 256);
923 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
924 /* If we had a new line, the next character is in read_ahead. */
927 c = buffer->read_ahead;
932 buffer->read_ahead = c;
934 token->val.str.text = &pfile->token_list.namebuf[orig_used];
935 token->val.str.len = pfile->token_list.name_used - orig_used;
938 /* For output routine simplicity, the stored comment includes the
939 comment start and any terminator. */
941 save_comment (pfile, token, from)
944 const unsigned char *from;
946 unsigned char *buffer;
948 cpp_toklist *list = &pfile->token_list;
950 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
951 _cpp_reserve_name_space (list, len);
952 buffer = list->namebuf + list->name_used;
953 list->name_used += len;
955 token->type = CPP_COMMENT;
956 token->val.str.len = len;
957 token->val.str.text = buffer;
960 memcpy (buffer + 1, from, len - 1);
963 /* Subroutine of lex_token to handle '%'. A little tricky, since we
964 want to avoid stepping back when lexing %:%X. */
966 lex_percent (buffer, result)
972 result->type = CPP_MOD;
973 /* Parsing %:%X could leave an extra character. */
974 if (buffer->extra_char == EOF)
975 c = get_effective_char (buffer);
978 c = buffer->read_ahead = buffer->extra_char;
979 buffer->extra_char = EOF;
983 ACCEPT_CHAR (CPP_MOD_EQ);
984 else if (CPP_OPTION (buffer->pfile, digraphs))
988 result->flags |= DIGRAPH;
989 ACCEPT_CHAR (CPP_HASH);
990 if (get_effective_char (buffer) == '%')
992 buffer->extra_char = get_effective_char (buffer);
993 if (buffer->extra_char == ':')
995 buffer->extra_char = EOF;
996 ACCEPT_CHAR (CPP_PASTE);
999 /* We'll catch the extra_char when we're called back. */
1000 buffer->read_ahead = '%';
1005 result->flags |= DIGRAPH;
1006 ACCEPT_CHAR (CPP_CLOSE_BRACE);
1011 /* Subroutine of lex_token to handle '.'. This is tricky, since we
1012 want to avoid stepping back when lexing '...' or '.123'. In the
1013 latter case we should also set a flag for parse_number. */
1015 lex_dot (pfile, result)
1019 cpp_buffer *buffer = pfile->buffer;
1022 /* Parsing ..X could leave an extra character. */
1023 if (buffer->extra_char == EOF)
1024 c = get_effective_char (buffer);
1027 c = buffer->read_ahead = buffer->extra_char;
1028 buffer->extra_char = EOF;
1031 /* All known character sets have 0...9 contiguous. */
1032 if (c >= '0' && c <= '9')
1034 result->type = CPP_NUMBER;
1035 buffer->pfile->state.seen_dot = 1;
1036 parse_number (pfile, &result->val.str, c);
1037 buffer->pfile->state.seen_dot = 0;
1041 result->type = CPP_DOT;
1044 buffer->extra_char = get_effective_char (buffer);
1045 if (buffer->extra_char == '.')
1047 buffer->extra_char = EOF;
1048 ACCEPT_CHAR (CPP_ELLIPSIS);
1051 /* We'll catch the extra_char when we're called back. */
1052 buffer->read_ahead = '.';
1054 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1055 ACCEPT_CHAR (CPP_DOT_STAR);
1060 lex_token (pfile, result)
1065 cpp_buffer *buffer = pfile->buffer;
1066 const unsigned char *comment_start;
1070 result->line = CPP_BUF_LINE (buffer);
1072 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1074 c = buffer->read_ahead;
1075 if (c == EOF && buffer->cur < buffer->rlimit)
1082 buffer->read_ahead = EOF;
1086 /* Non-empty files should end in a newline. Testing
1087 skip_newlines ensures we only emit the warning once. */
1088 if (buffer->cur != buffer->line_base && buffer->cur != buffer->buf
1089 && pfile->state.skip_newlines)
1090 cpp_pedwarn_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer),
1091 "no newline at end of file");
1092 result->type = CPP_EOF;
1095 case ' ': case '\t': case '\f': case '\v': case '\0':
1096 skip_whitespace (pfile, c);
1097 result->flags |= PREV_WHITE;
1100 case '\n': case '\r':
1101 result->type = CPP_EOF;
1102 handle_newline (buffer, c);
1103 /* Handling here will change significantly when moving to
1105 if (pfile->state.skip_newlines)
1107 result->flags &= ~PREV_WHITE; /* Clear any whitespace flag. */
1114 /* These could start an escaped newline, or '?' a trigraph. Let
1115 skip_escaped_newlines do all the work. */
1117 unsigned int lineno = buffer->lineno;
1119 c = skip_escaped_newlines (buffer, c);
1120 if (lineno != buffer->lineno)
1121 /* We had at least one escaped newline of some sort, and the
1122 next character is in buffer->read_ahead. Update the
1123 token's line and column. */
1126 /* We are either the original '?' or '\\', or a trigraph. */
1127 result->type = CPP_QUERY;
1128 buffer->read_ahead = EOF;
1130 result->type = CPP_BACKSLASH;
1136 case '0': case '1': case '2': case '3': case '4':
1137 case '5': case '6': case '7': case '8': case '9':
1138 result->type = CPP_NUMBER;
1139 parse_number (pfile, &result->val.str, c);
1143 if (!CPP_OPTION (pfile, dollars_in_ident))
1145 /* Fall through... */
1148 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1149 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1150 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1151 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1153 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1154 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1155 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1156 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1158 result->type = CPP_NAME;
1159 result->val.node = parse_identifier (pfile, c);
1161 /* 'L' may introduce wide characters or strings. */
1162 if (result->val.node == pfile->spec_nodes->n_L)
1164 c = buffer->read_ahead; /* For make_string. */
1165 if (c == '\'' || c == '"')
1167 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1171 /* Convert named operators to their proper types. */
1172 else if (result->val.node->type == T_OPERATOR)
1174 result->flags |= NAMED_OP;
1175 result->type = result->val.node->value.code;
1181 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1183 parse_string (pfile, result, c);
1187 /* A potential block or line comment. */
1188 comment_start = buffer->cur;
1189 result->type = CPP_DIV;
1190 c = get_effective_char (buffer);
1192 ACCEPT_CHAR (CPP_DIV_EQ);
1193 if (c != '/' && c != '*')
1198 if (skip_block_comment (pfile))
1199 cpp_error_with_line (pfile, result->line, result->col,
1200 "unterminated comment");
1204 if (!CPP_OPTION (pfile, cplusplus_comments)
1205 && !CPP_IN_SYSTEM_HEADER (pfile))
1208 /* We silently allow C++ comments in system headers,
1209 irrespective of conformance mode, because lots of
1210 broken systems do that and trying to clean it up in
1211 fixincludes is a nightmare. */
1212 if (CPP_OPTION (pfile, cplusplus_comments)
1213 || CPP_IN_SYSTEM_HEADER (pfile))
1215 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1216 && ! buffer->warned_cplusplus_comments)
1219 "C++ style comments are not allowed in ISO C89");
1221 "(this will be reported only once per input file)");
1222 buffer->warned_cplusplus_comments = 1;
1224 comment_start = buffer->cur;
1226 /* Skip_line_comment updates buffer->read_ahead. */
1227 if (skip_line_comment (pfile))
1228 cpp_warning_with_line (pfile, result->line, result->col,
1229 "multi-line comment");
1231 "C++ style comments are not allowed in ISO C89");
1233 "(this will be reported only once per input file)");
1234 buffer->warned_cplusplus_comments = 1;
1237 if (skip_line_comment (pfile))
1238 cpp_warning_with_line (pfile, result->line, result->col,
1239 "multi-line comment");
1242 /* Skipping the comment has updated buffer->read_ahead. */
1243 if (!pfile->state.save_comments)
1245 result->flags |= PREV_WHITE;
1249 /* Save the comment as a token in its own right. */
1250 save_comment (pfile, result, comment_start);
1254 if (pfile->state.angled_headers)
1256 result->type = CPP_HEADER_NAME;
1257 c = '>'; /* terminator. */
1261 result->type = CPP_LESS;
1262 c = get_effective_char (buffer);
1264 ACCEPT_CHAR (CPP_LESS_EQ);
1267 ACCEPT_CHAR (CPP_LSHIFT);
1268 if (get_effective_char (buffer) == '=')
1269 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1271 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1273 ACCEPT_CHAR (CPP_MIN);
1274 if (get_effective_char (buffer) == '=')
1275 ACCEPT_CHAR (CPP_MIN_EQ);
1277 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1279 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1280 result->flags |= DIGRAPH;
1282 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1284 ACCEPT_CHAR (CPP_OPEN_BRACE);
1285 result->flags |= DIGRAPH;
1290 result->type = CPP_GREATER;
1291 c = get_effective_char (buffer);
1293 ACCEPT_CHAR (CPP_GREATER_EQ);
1296 ACCEPT_CHAR (CPP_RSHIFT);
1297 if (get_effective_char (buffer) == '=')
1298 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1300 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1302 ACCEPT_CHAR (CPP_MAX);
1303 if (get_effective_char (buffer) == '=')
1304 ACCEPT_CHAR (CPP_MAX_EQ);
1309 lex_percent (buffer, result);
1313 lex_dot (pfile, result);
1317 result->type = CPP_PLUS;
1318 c = get_effective_char (buffer);
1320 ACCEPT_CHAR (CPP_PLUS_EQ);
1322 ACCEPT_CHAR (CPP_PLUS_PLUS);
1326 result->type = CPP_MINUS;
1327 c = get_effective_char (buffer);
1330 ACCEPT_CHAR (CPP_DEREF);
1331 if (CPP_OPTION (pfile, cplusplus)
1332 && get_effective_char (buffer) == '*')
1333 ACCEPT_CHAR (CPP_DEREF_STAR);
1336 ACCEPT_CHAR (CPP_MINUS_EQ);
1338 ACCEPT_CHAR (CPP_MINUS_MINUS);
1342 result->type = CPP_MULT;
1343 if (get_effective_char (buffer) == '=')
1344 ACCEPT_CHAR (CPP_MULT_EQ);
1348 result->type = CPP_EQ;
1349 if (get_effective_char (buffer) == '=')
1350 ACCEPT_CHAR (CPP_EQ_EQ);
1354 result->type = CPP_NOT;
1355 if (get_effective_char (buffer) == '=')
1356 ACCEPT_CHAR (CPP_NOT_EQ);
1360 result->type = CPP_AND;
1361 c = get_effective_char (buffer);
1363 ACCEPT_CHAR (CPP_AND_EQ);
1365 ACCEPT_CHAR (CPP_AND_AND);
1369 result->type = CPP_HASH;
1370 if (get_effective_char (buffer) == '#')
1371 ACCEPT_CHAR (CPP_PASTE);
1375 result->type = CPP_OR;
1376 c = get_effective_char (buffer);
1378 ACCEPT_CHAR (CPP_OR_EQ);
1380 ACCEPT_CHAR (CPP_OR_OR);
1384 result->type = CPP_XOR;
1385 if (get_effective_char (buffer) == '=')
1386 ACCEPT_CHAR (CPP_XOR_EQ);
1390 result->type = CPP_COLON;
1391 c = get_effective_char (buffer);
1392 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1393 ACCEPT_CHAR (CPP_SCOPE);
1394 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1396 result->flags |= DIGRAPH;
1397 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1401 case '~': result->type = CPP_COMPL; break;
1402 case ',': result->type = CPP_COMMA; break;
1403 case '(': result->type = CPP_OPEN_PAREN; break;
1404 case ')': result->type = CPP_CLOSE_PAREN; break;
1405 case '[': result->type = CPP_OPEN_SQUARE; break;
1406 case ']': result->type = CPP_CLOSE_SQUARE; break;
1407 case '{': result->type = CPP_OPEN_BRACE; break;
1408 case '}': result->type = CPP_CLOSE_BRACE; break;
1409 case ';': result->type = CPP_SEMICOLON; break;
1412 if (CPP_OPTION (pfile, objc))
1414 /* In Objective C, '@' may begin keywords or strings, like
1415 @keyword or @"string". It would be nice to call
1416 get_effective_char here and test the result. However, we
1417 would then need to pass 2 characters to parse_identifier,
1418 making it ugly and slowing down its main loop. Instead,
1419 we assume we have an identifier, and recover if not. */
1420 result->type = CPP_NAME;
1421 result->val.node = parse_identifier (pfile, c);
1422 if (result->val.node->length != 1)
1425 /* OK, so it wasn't an identifier. Maybe a string? */
1426 if (buffer->read_ahead == '"')
1429 ACCEPT_CHAR (CPP_OSTRING);
1437 result->type = CPP_OTHER;
1438 result->val.aux = c;
1444 * The tokenizer's main loop. Returns a token list, representing a
1445 * logical line in the input file. On EOF after some tokens have
1446 * been processed, we return immediately. Then in next call, or if
1447 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1448 * token is placed in the list.
1452 lex_line (pfile, list)
1456 unsigned int first_token;
1457 cpp_token *cur_token, *first;
1458 cpp_buffer *buffer = pfile->buffer;
1460 pfile->state.in_lex_line = 1;
1461 if (pfile->buffer->cur == pfile->buffer->buf)
1462 list->flags |= BEG_OF_FILE;
1465 pfile->state.in_directive = 0;
1466 pfile->state.angled_headers = 0;
1467 pfile->state.skip_newlines = 1;
1468 pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);
1469 first_token = list->tokens_used;
1470 list->file = buffer->nominal_fname;
1474 if (list->tokens_used >= list->tokens_cap)
1475 _cpp_expand_token_space (list, 256);
1477 cur_token = list->tokens + list->tokens_used;
1478 lex_token (pfile, cur_token);
1480 if (pfile->state.skip_newlines)
1482 pfile->state.skip_newlines = 0;
1483 list->line = buffer->lineno;
1484 if (cur_token->type == CPP_HASH)
1486 pfile->state.in_directive = 1;
1487 pfile->state.save_comments = 0;
1488 pfile->state.indented = cur_token->flags & PREV_WHITE;
1490 /* 6.10.3.10: Within the sequence of preprocessing tokens
1491 making up the invocation of a function-like macro, new
1492 line is considered a normal white-space character. */
1493 else if (first_token != 0)
1494 cur_token->flags |= PREV_WHITE;
1496 else if (IN_DIRECTIVE (pfile) && list->tokens_used == first_token + 1)
1498 if (cur_token->type == CPP_NUMBER)
1499 list->directive = _cpp_check_linemarker (pfile, cur_token);
1501 list->directive = _cpp_check_directive (pfile, cur_token);
1504 /* _cpp_get_line assumes list->tokens_used refers to the current
1505 token being lexed. So do this after _cpp_check_directive to
1506 get the warnings therein correct. */
1507 list->tokens_used++;
1509 while (cur_token->type != CPP_EOF);
1511 /* All tokens are allocated, so the memory location is fixed. */
1512 first = &list->tokens[first_token];
1513 first->flags |= BOL;
1514 pfile->first_directive_token = first;
1516 /* Don't complain about the null directive, nor directives in
1517 assembly source: we don't know where the comments are, and # may
1518 introduce assembler pseudo-ops. Don't complain about invalid
1519 directives in skipped conditional groups (6.10 p4). */
1520 if (IN_DIRECTIVE (pfile) && !KNOWN_DIRECTIVE (list) && !pfile->skipping
1521 && !CPP_OPTION (pfile, lang_asm))
1523 if (cur_token > first + 1)
1525 if (first[1].type == CPP_NAME)
1526 cpp_error_with_line (pfile, first->line, first->col,
1527 "invalid preprocessing directive #%s",
1528 first[1].val.node->name);
1530 cpp_error_with_line (pfile, first->line, first->col,
1531 "invalid preprocessing directive");
1534 /* Discard this line to prevent further errors from cc1. */
1535 _cpp_clear_toklist (list);
1539 /* Drop the EOF unless really at EOF or in a directive. */
1540 if (cur_token != first && !KNOWN_DIRECTIVE (list)
1541 && pfile->done_initializing)
1542 list->tokens_used--;
1544 pfile->state.in_lex_line = 0;
1547 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1548 already contain the enough space to hold the token's spelling.
1549 Returns a pointer to the character after the last character
1552 static unsigned char *
1553 spell_token (pfile, token, buffer)
1554 cpp_reader *pfile; /* Would be nice to be rid of this... */
1555 const cpp_token *token;
1556 unsigned char *buffer;
1558 switch (TOKEN_SPELL (token))
1560 case SPELL_OPERATOR:
1562 const unsigned char *spelling;
1565 if (token->flags & DIGRAPH)
1566 spelling = _cpp_digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1567 else if (token->flags & NAMED_OP)
1570 spelling = TOKEN_NAME (token);
1572 while ((c = *spelling++) != '\0')
1579 memcpy (buffer, token->val.node->name, token->val.node->length);
1580 buffer += token->val.node->length;
1585 int left, right, tag;
1586 switch (token->type)
1588 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1589 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1590 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1591 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1592 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1593 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1594 default: left = '\0'; right = '\0'; tag = '\0'; break;
1596 if (tag) *buffer++ = tag;
1597 if (left) *buffer++ = left;
1598 memcpy (buffer, token->val.str.text, token->val.str.len);
1599 buffer += token->val.str.len;
1600 if (right) *buffer++ = right;
1605 *buffer++ = token->val.aux;
1609 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1616 /* Macro expansion algorithm.
1618 Macro expansion is implemented by a single-pass algorithm; there are
1619 no rescan passes involved. cpp_get_token expands just enough to be
1620 able to return a token to the caller, a consequence is that when it
1621 returns the preprocessor can be in a state of mid-expansion. The
1622 algorithm does not work by fully expanding a macro invocation into
1623 some kind of token list, and then returning them one by one.
1625 Our expansion state is recorded in a context stack. We start out with
1626 a single context on the stack, let's call it base context. This
1627 consists of the token list returned by lex_line that forms the next
1628 logical line in the source file.
1630 The current level in the context stack is stored in the cur_context
1631 member of the cpp_reader structure. The context it references keeps,
1632 amongst other things, a count of how many tokens form that context and
1633 our position within those tokens.
1635 Fundamentally, calling cpp_get_token will return the next token from
1636 the current context. If we're at the end of the current context, that
1637 context is popped from the stack first, unless it is the base context,
1638 in which case the next logical line is lexed from the source file.
1640 However, before returning the token, if it is a CPP_NAME token
1641 _cpp_get_token checks to see if it is a macro and if it is enabled.
1642 Each time it encounters a macro name, it calls push_macro_context.
1643 This function checks that the macro should be expanded (with
1644 is_macro_enabled), and if so pushes a new macro context on the stack
1645 which becomes the current context. It then loops back to read the
1646 first token of the macro context.
1648 A macro context basically consists of the token list representing the
1649 macro's replacement list, which was saved in the hash table by
1650 save_macro_expansion when its #define statement was parsed. If the
1651 macro is function-like, it also contains the tokens that form the
1652 arguments to the macro. I say more about macro arguments below, but
1653 for now just saying that each argument is a set of pointers to tokens
1656 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
1657 token. This represents an argument passed to the macro, with the
1658 argument number stored in the token's AUX field. The argument should
1659 be substituted, this is achieved by pushing an "argument context". An
1660 argument context is just refers to the tokens forming the argument,
1661 which are obtained directly from the macro context. The STRINGIFY
1662 flag on a CPP_MACRO_ARG token indicates that the argument should be
1665 Here's a few simple rules the context stack obeys:-
1667 1) The lex_line token list is always context zero.
1669 2) Context 1, if it exists, must be a macro context.
1671 3) An argument context can only appear above a macro context.
1673 4) A macro context can appear above the base context, another macro
1674 context, or an argument context.
1676 5) These imply that the minimal level of an argument context is 2.
1678 The only tricky thing left is ensuring that macros are enabled and
1679 disabled correctly. The algorithm controls macro expansion by the
1680 level of the context a token is taken from in the context stack. If a
1681 token is taken from a level equal to no_expand_level (a member of
1682 struct cpp_reader), no expansion is performed.
1684 When popping a context off the stack, if no_expand_level equals the
1685 level of the popped context, it is reduced by one to match the new
1686 context level, so that expansion is still disabled. It does not
1687 increase if a context is pushed, though. It starts out life as
1688 UINT_MAX, which has the effect that initially macro expansion is
1689 enabled. I explain how this mechanism works below.
1691 The standard requires:-
1693 1) Arguments to be fully expanded before substitution.
1695 2) Stringified arguments to not be expanded, nor the tokens
1696 immediately surrounding a ## operator.
1698 3) Continual rescanning until there are no more macros left to
1701 4) Once a macro has been expanded in stage 1) or 3), it cannot be
1702 expanded again during later rescans. This prevents infinite
1705 The first thing to observe is that stage 3) is mostly redundant.
1706 Since a macro is disabled once it has been expanded, how can a rescan
1707 find an unexpanded macro name? There are only two cases where this is
1710 a) If the macro name results from a token paste operation.
1712 b) If the macro in question is a function-like macro that hasn't
1713 already been expanded because previously there was not the required
1714 '(' token immediately following it. This is only possible when an
1715 argument is substituted, and after substitution the last token of
1716 the argument can bind with a parenthesis appearing in the tokens
1717 following the substitution. Note that if the '(' appears within the
1718 argument, the ')' must too, as expanding macro arguments cannot
1719 "suck in" tokens outside the argument.
1721 So we tackle this as follows. When parsing the macro invocation for
1722 arguments, we record the tokens forming each argument as a list of
1723 pointers to those tokens. We do not expand any tokens that are "raw",
1724 i.e. directly from the macro invocation, but other tokens that come
1725 from (nested) argument substitution are fully expanded.
1727 This is achieved by setting the no_expand_level to that of the macro
1728 invocation. A CPP_MACRO_ARG token never appears in the list of tokens
1729 forming an argument, because parse_args (indirectly) calls
1730 get_raw_token which automatically pushes argument contexts and traces
1731 into them. Since these contexts are at a higher level than the
1732 no_expand_level, they get fully macro expanded.
1734 "Raw" and non-raw tokens are separated in arguments by null pointers,
1735 with the policy that the initial state of an argument is raw. If the
1736 first token is not raw, it should be preceded by a null pointer. When
1737 tracing through the tokens of an argument context, each time
1738 get_raw_token encounters a null pointer, it toggles the flag
1741 This flag, when set, indicates to is_macro_disabled that we are
1742 reading raw tokens which should be macro-expanded. Similarly, if
1743 clear, is_macro_disabled suppresses re-expansion.
1745 It's probably time for an example.
1749 #define xstr(y) str(y hash)
1751 xstr(hash) // "# hash"
1753 In the invocation of str, parse_args turns off macro expansion and so
1754 parses the argument as <hash>. This is the only token (pointer)
1755 passed as the argument to str. Since <hash> is raw there is no need
1756 for an initial null pointer. stringify_arg is called from
1757 get_raw_token when tracing through the expansion of str, since the
1758 argument has the STRINGIFY flag set. stringify_arg turns off
1759 macro_expansion by setting the no_expand_level to that of the argument
1760 context. Thus it gets the token <hash> and stringifies it to "hash"
1763 Similary xstr is passed <hash>. However, when parse_args is parsing
1764 the invocation of str() in xstr's expansion, get_raw_token encounters
1765 a CPP_MACRO_ARG token for y. Transparently to parse_args, it pushes
1766 an argument context, and enters the tokens of the argument,
1767 i.e. <hash>. This is at a higher context level than parse_args
1768 disabled, and so is_macro_disabled permits expansion of it and a macro
1769 context is pushed on top of the argument context. This contains the
1770 <#> token, and the end result is that <hash> is macro expanded.
1771 However, after popping off the argument context, the <hash> of xstr's
1772 expansion does not get macro expanded because we're back at the
1773 no_expand_level. The end result is that the argument passed to str is
1774 <NULL> <#> <NULL> <hash>. Note the nulls - policy is we start off
1775 raw, <#> is not raw, but then <hash> is.
1780 /* Free the storage allocated for macro arguments. */
1782 free_macro_args (args)
1786 free ((PTR) args->tokens);
1791 /* Determines if a macro has been already used (and is therefore
1794 is_macro_disabled (pfile, expansion, token)
1796 const cpp_toklist *expansion;
1797 const cpp_token *token;
1799 cpp_context *context = CURRENT_CONTEXT (pfile);
1801 /* Arguments on either side of ## are inserted in place without
1802 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
1803 occurs during a later rescan pass. The effect is that we expand
1804 iff we would as part of the macro's expansion list, so we should
1805 drop to the macro's context. */
1806 if (IS_ARG_CONTEXT (context))
1808 if (token->flags & PASTED)
1810 else if (!(context->flags & CONTEXT_RAW))
1812 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
1816 /* Have we already used this macro? */
1817 while (context->level > 0)
1819 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
1821 /* Raw argument tokens are judged based on the token list they
1823 if (context->flags & CONTEXT_RAW)
1824 context = pfile->contexts + context->level;
1829 /* Function-like macros may be disabled if the '(' is not in the
1830 current context. We check this without disrupting the context
1832 if (expansion->paramc >= 0)
1834 const cpp_token *next;
1835 unsigned int prev_nme;
1837 context = CURRENT_CONTEXT (pfile);
1838 /* Drop down any contexts we're at the end of: the '(' may
1839 appear in lower macro expansions, or in the rest of the file. */
1840 while (context->posn == context->count && context > pfile->contexts)
1843 /* If we matched, we are disabled, as we appear in the
1844 expansion of each macro we meet. */
1845 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
1849 prev_nme = pfile->no_expand_level;
1850 pfile->no_expand_level = context - pfile->contexts;
1851 next = _cpp_get_token (pfile);
1852 restore_macro_expansion (pfile, prev_nme);
1853 if (next->type != CPP_OPEN_PAREN)
1855 _cpp_push_token (pfile, next);
1856 if (CPP_WTRADITIONAL (pfile))
1858 "function macro %s must be used with arguments in traditional C",
1859 token->val.node->name);
1867 /* Add a token to the set of tokens forming the arguments to the macro
1868 being parsed in parse_args. */
1870 save_token (args, token)
1872 const cpp_token *token;
1874 if (args->used == args->capacity)
1876 args->capacity += args->capacity + 100;
1877 args->tokens = (const cpp_token **)
1878 xrealloc ((PTR) args->tokens,
1879 args->capacity * sizeof (const cpp_token *));
1881 args->tokens[args->used++] = token;
1884 /* Take and save raw tokens until we finish one argument. Empty
1885 arguments are saved as a single CPP_PLACEMARKER token. */
1886 static const cpp_token *
1887 parse_arg (pfile, var_args, paren_context, args, pcount)
1890 unsigned int paren_context;
1892 unsigned int *pcount;
1894 const cpp_token *token;
1895 unsigned int paren = 0, count = 0;
1896 int raw, was_raw = 1;
1898 for (count = 0;; count++)
1900 token = _cpp_get_token (pfile);
1902 switch (token->type)
1907 case CPP_OPEN_PAREN:
1911 case CPP_CLOSE_PAREN:
1917 /* Commas are not terminators within parantheses or var_args. */
1918 if (paren || var_args)
1922 case CPP_EOF: /* Error reported by caller. */
1926 raw = pfile->cur_context <= paren_context;
1930 save_token (args, 0);
1933 save_token (args, token);
1939 /* Duplicate the placemarker. Then we can set its flags and
1940 position and safely be using more than one. */
1941 save_token (args, duplicate_token (pfile, &placemarker_token));
1949 /* This macro returns true if the argument starting at offset O of arglist
1950 A is empty - that is, it's either a single PLACEMARKER token, or a null
1951 pointer followed by a PLACEMARKER. */
1953 #define empty_argument(A, O) \
1954 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
1955 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
1957 /* Parse the arguments making up a macro invocation. Nested arguments
1958 are automatically macro expanded, but immediate macros are not
1959 expanded; this enables e.g. operator # to work correctly. Returns
1960 non-zero on error. */
1962 parse_args (pfile, hp, args)
1967 const cpp_token *token;
1968 const cpp_toklist *macro;
1969 unsigned int total = 0;
1970 unsigned int paren_context = pfile->cur_context;
1973 macro = hp->value.expansion;
1978 token = parse_arg (pfile, (argc + 1 == macro->paramc
1979 && (macro->flags & VAR_ARGS)),
1980 paren_context, args, &count);
1981 if (argc < macro->paramc)
1984 args->ends[argc] = total;
1988 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
1990 if (token->type == CPP_EOF)
1992 cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
1995 else if (argc < macro->paramc)
1997 /* A rest argument is allowed to not appear in the invocation at all.
1998 e.g. #define debug(format, args...) ...
2000 This is exactly the same as if the rest argument had received no
2001 tokens - debug("string",); This extension is deprecated. */
2003 if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2005 /* Duplicate the placemarker. Then we can set its flags and
2006 position and safely be using more than one. */
2007 cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2008 pm->flags = VOID_REST;
2009 save_token (args, pm);
2010 args->ends[argc] = total + 1;
2012 if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2013 cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2019 cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
2023 /* An empty argument to an empty function-like macro is fine. */
2024 else if (argc > macro->paramc
2025 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2027 cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
2034 /* Adds backslashes before all backslashes and double quotes appearing
2035 in strings. Non-printable characters are converted to octal. */
2037 quote_string (dest, src, len)
2046 if (c == '\\' || c == '"')
2057 sprintf ((char *) dest, "\\%03o", c);
2066 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2067 CPP_STRING token containing TEXT in quoted form. */
2069 make_string_token (token, text, len)
2076 buf = (U_CHAR *) xmalloc (len * 4);
2077 token->type = CPP_STRING;
2079 token->val.str.text = buf;
2080 token->val.str.len = quote_string (buf, text, len) - buf;
2084 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2085 evaluating to NUMBER. */
2087 alloc_number_token (pfile, number)
2094 result = get_temp_token (pfile);
2096 sprintf (buf, "%d", number);
2098 result->type = CPP_NUMBER;
2100 result->val.str.text = (U_CHAR *) buf;
2101 result->val.str.len = strlen (buf);
2105 /* Returns a temporary token from the temporary token store of PFILE. */
2107 get_temp_token (pfile)
2110 if (pfile->temp_used == pfile->temp_alloced)
2112 if (pfile->temp_used == pfile->temp_cap)
2114 pfile->temp_cap += pfile->temp_cap + 20;
2115 pfile->temp_tokens = (cpp_token **) xrealloc
2116 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2118 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2119 (sizeof (cpp_token));
2122 return pfile->temp_tokens[pfile->temp_used++];
2125 /* Release (not free) for re-use the temporary tokens of PFILE. */
2127 release_temp_tokens (pfile)
2130 while (pfile->temp_used)
2132 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2134 if (TOKEN_SPELL (token) == SPELL_STRING)
2136 free ((char *) token->val.str.text);
2137 token->val.str.text = 0;
2142 /* Free all of PFILE's dynamically-allocated temporary tokens. */
2144 _cpp_free_temp_tokens (pfile)
2147 if (pfile->temp_tokens)
2149 /* It is possible, though unlikely (looking for '(' of a funlike
2150 macro into EOF), that we haven't released the tokens yet. */
2151 release_temp_tokens (pfile);
2152 while (pfile->temp_alloced)
2153 free (pfile->temp_tokens[--pfile->temp_alloced]);
2154 free (pfile->temp_tokens);
2159 free ((char *) pfile->date->val.str.text);
2161 free ((char *) pfile->time->val.str.text);
2166 /* Copy TOKEN into a temporary token from PFILE's store. */
2168 duplicate_token (pfile, token)
2170 const cpp_token *token;
2172 cpp_token *result = get_temp_token (pfile);
2175 if (TOKEN_SPELL (token) == SPELL_STRING)
2177 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2178 memcpy (buff, token->val.str.text, token->val.str.len);
2179 result->val.str.text = buff;
2184 /* Determine whether two tokens can be pasted together, and if so,
2185 what the resulting token is. Returns CPP_EOF if the tokens cannot
2186 be pasted, or the appropriate type for the merged token if they
2189 _cpp_can_paste (pfile, token1, token2, digraph)
2191 const cpp_token *token1, *token2;
2194 enum cpp_ttype a = token1->type, b = token2->type;
2195 int cxx = CPP_OPTION (pfile, cplusplus);
2197 /* Treat named operators as if they were ordinary NAMEs. */
2198 if (token1->flags & NAMED_OP)
2200 if (token2->flags & NAMED_OP)
2203 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2204 return a + (CPP_EQ_EQ - CPP_EQ);
2209 if (b == a) return CPP_RSHIFT;
2210 if (b == CPP_QUERY && cxx) return CPP_MAX;
2211 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2214 if (b == a) return CPP_LSHIFT;
2215 if (b == CPP_QUERY && cxx) return CPP_MIN;
2216 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2217 if (CPP_OPTION (pfile, digraphs))
2220 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2222 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2226 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2227 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2228 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2231 if (b == a) return CPP_MINUS_MINUS;
2232 if (b == CPP_GREATER) return CPP_DEREF;
2235 if (b == a && cxx) return CPP_SCOPE;
2236 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2237 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2241 if (CPP_OPTION (pfile, digraphs))
2243 if (b == CPP_GREATER)
2244 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2246 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2250 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2253 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2254 if (b == CPP_NUMBER) return CPP_NUMBER;
2258 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2260 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2264 if (b == CPP_NAME) return CPP_NAME;
2266 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
2268 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2270 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2274 if (b == CPP_NUMBER) return CPP_NUMBER;
2275 if (b == CPP_NAME) return CPP_NUMBER;
2276 if (b == CPP_DOT) return CPP_NUMBER;
2277 /* Numbers cannot have length zero, so this is safe. */
2278 if ((b == CPP_PLUS || b == CPP_MINUS)
2279 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2284 if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2286 if (b == CPP_NAME) return CPP_NAME;
2287 if (b == CPP_STRING) return CPP_OSTRING;
2297 /* Check if TOKEN is to be ##-pasted with the token after it. */
2298 static const cpp_token *
2299 maybe_paste_with_next (pfile, token)
2301 const cpp_token *token;
2304 const cpp_token *second;
2305 cpp_context *context = CURRENT_CONTEXT (pfile);
2307 /* Is this token on the LHS of ## ? */
2309 while ((token->flags & PASTE_LEFT)
2310 || ((context->flags & CONTEXT_PASTEL)
2311 && context->posn == context->count))
2313 /* Suppress macro expansion for next token, but don't conflict
2314 with the other method of suppression. If it is an argument,
2315 macro expansion within the argument will still occur. */
2316 pfile->paste_level = pfile->cur_context;
2317 second = _cpp_get_token (pfile);
2318 pfile->paste_level = 0;
2320 /* Ignore placemarker argument tokens (cannot be from an empty
2321 macro since macros are not expanded). */
2322 if (token->type == CPP_PLACEMARKER)
2323 pasted = duplicate_token (pfile, second);
2324 else if (second->type == CPP_PLACEMARKER)
2326 /* GCC has special extended semantics for , ## b where b is
2327 a varargs parameter: the comma disappears if b was given
2328 no actual arguments (not merely if b is an empty
2330 if (token->type == CPP_COMMA && second->flags & VOID_REST)
2331 pasted = duplicate_token (pfile, second);
2333 pasted = duplicate_token (pfile, token);
2338 enum cpp_ttype type = _cpp_can_paste (pfile, token, second, &digraph);
2340 if (type == CPP_EOF)
2342 if (CPP_OPTION (pfile, warn_paste))
2344 /* Do not complain about , ## <whatever> if
2345 <whatever> came from a variable argument, because
2346 the author probably intended the ## to trigger
2347 the special extended semantics (see above). */
2348 if (token->type == CPP_COMMA
2349 && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
2350 && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
2354 "pasting would not give a valid preprocessing token");
2356 _cpp_push_token (pfile, second);
2357 /* A short term hack to safely clear the PASTE_LEFT flag. */
2358 pasted = duplicate_token (pfile, token);
2359 pasted->flags &= ~PASTE_LEFT;
2363 if (type == CPP_NAME || type == CPP_NUMBER)
2365 /* Join spellings. */
2368 pasted = get_temp_token (pfile);
2369 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2370 end = spell_token (pfile, token, buf);
2371 end = spell_token (pfile, second, end);
2374 if (type == CPP_NAME)
2375 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2378 pasted->val.str.text = uxstrdup (buf);
2379 pasted->val.str.len = end - buf;
2382 else if (type == CPP_WCHAR || type == CPP_WSTRING
2383 || type == CPP_OSTRING)
2384 pasted = duplicate_token (pfile, second);
2387 pasted = get_temp_token (pfile);
2388 pasted->val.integer = 0;
2391 pasted->type = type;
2392 pasted->flags = digraph ? DIGRAPH : 0;
2394 if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2396 pasted->type = pasted->val.node->value.code;
2397 pasted->flags |= NAMED_OP;
2401 /* The pasted token gets the whitespace flags and position of the
2402 first token, the PASTE_LEFT flag of the second token, plus the
2403 PASTED flag to indicate it is the result of a paste. However, we
2404 want to preserve the DIGRAPH flag. */
2405 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2406 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2407 | (second->flags & PASTE_LEFT) | PASTED);
2408 pasted->col = token->col;
2409 pasted->line = token->line;
2411 /* See if there is another token to be pasted onto the one we just
2414 context = CURRENT_CONTEXT (pfile);
2420 /* Convert a token sequence to a single string token according to the
2421 rules of the ISO C #-operator. */
2422 #define INIT_SIZE 200
2424 stringify_arg (pfile, token)
2426 const cpp_token *token;
2429 unsigned char *main_buf;
2430 unsigned int prev_value, backslash_count = 0;
2431 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2433 push_arg_context (pfile, token);
2434 prev_value = prevent_macro_expansion (pfile);
2435 main_buf = (unsigned char *) xmalloc (buf_cap);
2437 result = get_temp_token (pfile);
2438 ASSIGN_FLAGS_AND_POS (result, token);
2440 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2444 unsigned int len = TOKEN_LEN (token);
2446 if (token->type == CPP_PLACEMARKER)
2449 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2450 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2454 if (buf_used + len > buf_cap)
2456 buf_cap = buf_used + len + INIT_SIZE;
2457 main_buf = xrealloc (main_buf, buf_cap);
2460 if (whitespace && (token->flags & PREV_WHITE))
2461 main_buf[buf_used++] = ' ';
2464 buf = (unsigned char *) xmalloc (len);
2466 buf = main_buf + buf_used;
2468 len = spell_token (pfile, token, buf) - buf;
2471 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2478 if (token->type == CPP_BACKSLASH)
2481 backslash_count = 0;
2484 /* Ignore the final \ of invalid string literals. */
2485 if (backslash_count & 1)
2487 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2491 result->type = CPP_STRING;
2492 result->val.str.text = main_buf;
2493 result->val.str.len = buf_used;
2494 restore_macro_expansion (pfile, prev_value);
2498 /* Allocate more room on the context stack of PFILE. */
2500 expand_context_stack (pfile)
2503 pfile->context_cap += pfile->context_cap + 20;
2504 pfile->contexts = (cpp_context *)
2505 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2508 /* Push the context of macro NODE onto the context stack. TOKEN is
2509 the CPP_NAME token invoking the macro. */
2511 push_macro_context (pfile, token)
2513 const cpp_token *token;
2515 unsigned char orig_flags;
2517 cpp_context *context;
2518 cpp_hashnode *node = token->val.node;
2520 /* Token's flags may change when parsing args containing a nested
2521 invocation of this macro. */
2522 orig_flags = token->flags & (PREV_WHITE | BOL);
2524 if (node->value.expansion->paramc >= 0)
2526 unsigned int error, prev_nme;
2528 /* Allocate room for the argument contexts, and parse them. */
2529 args = (macro_args *) xmalloc (sizeof (macro_args));
2530 args->ends = (unsigned int *)
2531 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2536 prev_nme = prevent_macro_expansion (pfile);
2538 error = parse_args (pfile, node, args);
2540 restore_macro_expansion (pfile, prev_nme);
2543 free_macro_args (args);
2546 /* Set the level after the call to parse_args. */
2547 args->level = pfile->cur_context;
2550 /* Now push its context. */
2551 pfile->cur_context++;
2552 if (pfile->cur_context == pfile->context_cap)
2553 expand_context_stack (pfile);
2555 context = CURRENT_CONTEXT (pfile);
2556 context->u.list = node->value.expansion;
2557 context->args = args;
2559 context->count = context->u.list->tokens_used;
2560 context->level = pfile->cur_context;
2562 context->pushed_token = 0;
2564 /* Set the flags of the first token. We know there must
2565 be one, empty macros are a single placemarker token. */
2566 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2571 /* Push an argument to the current macro onto the context stack.
2572 TOKEN is the MACRO_ARG token representing the argument expansion. */
2574 push_arg_context (pfile, token)
2576 const cpp_token *token;
2578 cpp_context *context;
2581 pfile->cur_context++;
2582 if (pfile->cur_context == pfile->context_cap)
2583 expand_context_stack (pfile);
2585 context = CURRENT_CONTEXT (pfile);
2586 args = context[-1].args;
2588 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2589 context->u.arg = args->tokens + context->count;
2590 context->count = args->ends[token->val.aux] - context->count;
2593 context->level = args->level;
2594 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2595 context->pushed_token = 0;
2597 /* Set the flags of the first token. There is one. */
2599 const cpp_token *first = context->u.arg[0];
2601 first = context->u.arg[1];
2603 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2604 token->flags & (PREV_WHITE | BOL));
2607 if (token->flags & PASTE_LEFT)
2608 context->flags |= CONTEXT_PASTEL;
2609 if (pfile->paste_level)
2610 context->flags |= CONTEXT_PASTER;
2613 /* "Unget" a token. It is effectively inserted in the token queue and
2614 will be returned by the next call to get_raw_token. */
2616 _cpp_push_token (pfile, token)
2618 const cpp_token *token;
2620 cpp_context *context = CURRENT_CONTEXT (pfile);
2622 if (context->posn > 0)
2624 const cpp_token *prev;
2625 if (IS_ARG_CONTEXT (context))
2626 prev = context->u.arg[context->posn - 1];
2628 prev = &context->u.list->tokens[context->posn - 1];
2637 if (context->pushed_token)
2638 cpp_ice (pfile, "two tokens pushed in a row");
2639 if (token->type != CPP_EOF)
2640 context->pushed_token = token;
2641 /* Don't push back a directive's CPP_EOF, step back instead. */
2642 else if (pfile->cur_context == 0)
2643 pfile->contexts[0].posn--;
2646 /* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2647 introducing the directive. */
2649 _cpp_process_directive (pfile, token)
2651 const cpp_token *token;
2653 const struct directive *d = pfile->token_list.directive;
2656 /* Skip over the directive name. */
2657 if (token[1].type == CPP_NAME)
2658 _cpp_get_raw_token (pfile);
2659 else if (token[1].type != CPP_NUMBER)
2660 cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
2662 if (! (d->flags & EXPAND))
2663 prev_nme = prevent_macro_expansion (pfile);
2664 (void) (*d->handler) (pfile);
2665 if (! (d->flags & EXPAND))
2666 restore_macro_expansion (pfile, prev_nme);
2667 _cpp_skip_rest_of_line (pfile);
2670 /* The external interface to return the next token. All macro
2671 expansion and directive processing is handled internally, the
2672 caller only ever sees the output after preprocessing. */
2674 cpp_get_token (pfile)
2677 const cpp_token *token;
2678 /* Loop till we hit a non-directive, non-placemarker token. */
2681 token = _cpp_get_token (pfile);
2683 if (token->type == CPP_PLACEMARKER)
2686 if (token->type == CPP_HASH && token->flags & BOL
2687 && pfile->token_list.directive)
2689 _cpp_process_directive (pfile, token);
2697 /* The internal interface to return the next token. There are two
2698 differences between the internal and external interfaces: the
2699 internal interface may return a PLACEMARKER token, and it does not
2700 process directives. */
2702 _cpp_get_token (pfile)
2705 const cpp_token *token, *old_token;
2708 /* Loop until we hit a non-macro token. */
2711 token = get_raw_token (pfile);
2713 /* Short circuit EOF. */
2714 if (token->type == CPP_EOF)
2717 /* If we are skipping... */
2718 if (pfile->skipping)
2720 /* we still have to process directives, */
2721 if (pfile->token_list.directive)
2724 /* but everything else is ignored. */
2725 _cpp_skip_rest_of_line (pfile);
2729 /* If there's a potential control macro and we get here, then that
2730 #ifndef didn't cover the entire file and its argument shouldn't
2731 be taken as a control macro. */
2732 pfile->potential_control_macro = 0;
2734 /* If we are rescanning preprocessed input, no macro expansion or
2735 token pasting may occur. */
2736 if (CPP_OPTION (pfile, preprocessed))
2741 /* See if there's a token to paste with this one. */
2742 if (!pfile->paste_level)
2743 token = maybe_paste_with_next (pfile, token);
2745 /* If it isn't a macro, return it now. */
2746 if (token->type != CPP_NAME || token->val.node->type == T_VOID)
2749 /* Is macro expansion disabled in general, or are we in the
2750 middle of a token paste, or was this token just pasted?
2751 (Note we don't check token->flags & PASTED, because that
2752 counts tokens that were pasted at some point in the past,
2753 we're only interested in tokens that were pasted by this call
2754 to maybe_paste_with_next.) */
2755 if (pfile->no_expand_level == pfile->cur_context
2756 || pfile->paste_level
2757 || (token != old_token
2758 && pfile->no_expand_level + 1 == pfile->cur_context))
2761 node = token->val.node;
2762 if (node->type != T_MACRO)
2763 return special_symbol (pfile, node, token);
2765 if (is_macro_disabled (pfile, node->value.expansion, token))
2768 if (push_macro_context (pfile, token))
2774 /* Returns the next raw token, i.e. without performing macro
2775 expansion. Argument contexts are automatically entered. */
2776 static const cpp_token *
2777 get_raw_token (pfile)
2780 const cpp_token *result;
2781 cpp_context *context;
2785 context = CURRENT_CONTEXT (pfile);
2786 if (context->pushed_token)
2788 result = context->pushed_token;
2789 context->pushed_token = 0;
2790 return result; /* Cannot be a CPP_MACRO_ARG */
2792 else if (context->posn == context->count)
2794 if (pop_context (pfile))
2798 else if (IS_ARG_CONTEXT (context))
2800 result = context->u.arg[context->posn++];
2803 context->flags ^= CONTEXT_RAW;
2804 result = context->u.arg[context->posn++];
2806 return result; /* Cannot be a CPP_MACRO_ARG */
2809 result = &context->u.list->tokens[context->posn++];
2811 if (result->type != CPP_MACRO_ARG)
2814 if (result->flags & STRINGIFY_ARG)
2815 return stringify_arg (pfile, result);
2817 push_arg_context (pfile, result);
2821 /* Internal interface to get the token without macro expanding. */
2823 _cpp_get_raw_token (pfile)
2826 int prev_nme = prevent_macro_expansion (pfile);
2827 const cpp_token *result = _cpp_get_token (pfile);
2828 restore_macro_expansion (pfile, prev_nme);
2832 /* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2833 list should be overwritten, or zero if we need to append
2834 (typically, if we are within the arguments to a macro, or looking
2835 for the '(' to start a function-like macro invocation). */
2837 lex_next (pfile, clear)
2841 cpp_toklist *list = &pfile->token_list;
2842 const cpp_token *old_list = list->tokens;
2843 unsigned int old_used = list->tokens_used;
2847 /* Release all temporary tokens. */
2848 _cpp_clear_toklist (list);
2849 pfile->contexts[0].posn = 0;
2850 if (pfile->temp_used)
2851 release_temp_tokens (pfile);
2853 lex_line (pfile, list);
2854 pfile->contexts[0].count = list->tokens_used;
2856 if (!clear && pfile->args)
2858 /* Fix up argument token pointers. */
2859 if (old_list != list->tokens)
2863 for (i = 0; i < pfile->args->used; i++)
2865 const cpp_token *token = pfile->args->tokens[i];
2866 if (token >= old_list && token < old_list + old_used)
2867 pfile->args->tokens[i] = (const cpp_token *)
2868 ((char *) token + ((char *) list->tokens - (char *) old_list));
2872 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
2873 tokens within the list of arguments that would otherwise act as
2874 preprocessing directives, the behavior is undefined.
2876 This implementation will report a hard error and treat the
2877 'sequence of preprocessing tokens' as part of the macro argument,
2880 Note if pfile->args == 0, we're OK since we're only inside a
2881 macro argument after a '('. */
2882 if (list->directive)
2884 cpp_error_with_line (pfile, list->tokens[old_used].line,
2885 list->tokens[old_used].col,
2886 "#%s may not be used inside a macro argument",
2887 list->directive->name);
2895 /* Pops a context off the context stack. If we're at the bottom, lexes
2896 the next logical line. Returns EOF if we're at the end of the
2897 argument list to the # operator, or we should not "overflow"
2898 into the rest of the file (e.g. 6.10.3.1.1). */
2903 cpp_context *context;
2905 if (pfile->cur_context == 0)
2907 /* If we are currently processing a directive, do not advance. 6.10
2908 paragraph 2: A new-line character ends the directive even if it
2909 occurs within what would otherwise be an invocation of a
2910 function-like macro. */
2911 if (pfile->token_list.directive)
2914 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
2917 /* Argument contexts, when parsing args or handling # operator
2918 return CPP_EOF at the end. */
2919 context = CURRENT_CONTEXT (pfile);
2920 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
2923 /* Free resources when leaving macro contexts. */
2925 free_macro_args (context->args);
2927 if (pfile->cur_context == pfile->no_expand_level)
2928 pfile->no_expand_level--;
2929 pfile->cur_context--;
2934 /* Turn off macro expansion at the current context level. */
2936 prevent_macro_expansion (pfile)
2939 unsigned int prev_value = pfile->no_expand_level;
2940 pfile->no_expand_level = pfile->cur_context;
2944 /* Restore macro expansion to its previous state. */
2946 restore_macro_expansion (pfile, prev_value)
2948 unsigned int prev_value;
2950 pfile->no_expand_level = prev_value;
2953 /* Used by cpperror.c to obtain the correct line and column to report
2956 _cpp_get_line (pfile, pcol)
2961 const cpp_token *cur_token;
2963 if (pfile->state.in_lex_line)
2964 index = pfile->token_list.tokens_used;
2967 index = pfile->contexts[0].posn;
2978 cur_token = &pfile->token_list.tokens[index];
2980 *pcol = cur_token->col;
2981 return cur_token->line;
2984 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
2985 static const char * const monthnames[] =
2987 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2988 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
2991 /* Handle builtin macros like __FILE__. */
2992 static const cpp_token *
2993 special_symbol (pfile, node, token)
2996 const cpp_token *token;
3008 ip = CPP_BUFFER (pfile);
3013 if (node->type == T_BASE_FILE)
3014 while (CPP_PREV_BUFFER (ip) != NULL)
3015 ip = CPP_PREV_BUFFER (ip);
3017 file = ip->nominal_fname;
3019 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3024 case T_INCLUDE_LEVEL:
3025 /* pfile->include_depth counts the primary source as level 1,
3026 but historically __INCLUDE_DEPTH__ has called the primary
3028 result = alloc_number_token (pfile, pfile->include_depth - 1);
3032 /* If __LINE__ is embedded in a macro, it must expand to the
3033 line of the macro's invocation, not its definition.
3034 Otherwise things like assert() will not work properly. */
3035 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3042 #ifdef STDC_0_IN_SYSTEM_HEADERS
3043 if (CPP_IN_SYSTEM_HEADER (pfile)
3044 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3047 result = alloc_number_token (pfile, stdc);
3053 if (pfile->date == 0)
3055 /* Allocate __DATE__ and __TIME__ from permanent storage,
3056 and save them in pfile so we don't have to do this again.
3057 We don't generate these strings at init time because
3058 time() and localtime() are very slow on some systems. */
3059 time_t tt = time (NULL);
3060 struct tm *tb = localtime (&tt);
3062 pfile->date = make_string_token
3063 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3064 pfile->time = make_string_token
3065 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3067 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3068 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3069 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3070 tb->tm_hour, tb->tm_min, tb->tm_sec);
3072 result = node->type == T_DATE ? pfile->date: pfile->time;
3076 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3080 cpp_ice (pfile, "invalid special hash type");
3084 ASSIGN_FLAGS_AND_POS (result, token);
3089 /* Allocate pfile->input_buffer, and initialize _cpp_trigraph_map[]
3090 if it hasn't happened already. */
3093 _cpp_init_input_buffer (pfile)
3098 _cpp_init_toklist (&pfile->token_list, 0);
3099 pfile->no_expand_level = UINT_MAX;
3100 pfile->context_cap = 20;
3101 pfile->cur_context = 0;
3103 pfile->contexts = (cpp_context *)
3104 xmalloc (pfile->context_cap * sizeof (cpp_context));
3106 /* Clear the base context. */
3107 base = &pfile->contexts[0];
3108 base->u.list = &pfile->token_list;
3114 base->pushed_token = 0;
3117 /* Moves to the end of the directive line, popping contexts as
3120 _cpp_skip_rest_of_line (pfile)
3123 /* Discard all stacked contexts. */
3125 for (i = pfile->cur_context; i > 0; i--)
3126 if (pfile->contexts[i].args)
3127 free_macro_args (pfile->contexts[i].args);
3129 if (pfile->no_expand_level <= pfile->cur_context)
3130 pfile->no_expand_level = 0;
3131 pfile->cur_context = 0;
3133 /* Clear the base context, and clear the directive pointer so that
3134 get_raw_token will advance to the next line. */
3135 pfile->contexts[0].count = 0;
3136 pfile->contexts[0].posn = 0;
3137 pfile->token_list.directive = 0;
3140 /* Directive handler wrapper used by the command line option
3143 _cpp_run_directive (pfile, dir, buf, count, name)
3145 const struct directive *dir;
3150 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3152 unsigned int prev_lvl = 0;
3155 CPP_BUFFER (pfile)->nominal_fname = name;
3157 CPP_BUFFER (pfile)->nominal_fname = _("<command line>");
3158 CPP_BUFFER (pfile)->lineno = (unsigned int)-1;
3160 /* Scan the line now, else prevent_macro_expansion won't work. */
3161 lex_next (pfile, 1);
3162 if (! (dir->flags & EXPAND))
3163 prev_lvl = prevent_macro_expansion (pfile);
3165 (void) (*dir->handler) (pfile);
3167 if (! (dir->flags & EXPAND))
3168 restore_macro_expansion (pfile, prev_lvl);
3170 _cpp_skip_rest_of_line (pfile);
3171 cpp_pop_buffer (pfile);