1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
27 o -dM and with _cpp_dump_list: too many \n output.
28 o Put a printer object in cpp_reader?
29 o Check line numbers assigned to all errors.
30 o Replace strncmp with memcmp almost everywhere.
31 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
32 o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
33 give it a run when we've got some code.
34 o Distinguish integers, floats, and 'other' pp-numbers.
35 o Store ints and char constants as binary values.
36 o New command-line assertion syntax.
37 o Work towards functions in cpperror.c taking a message level parameter.
38 If we do this, merge the common code of do_warning and do_error.
39 o Comment all functions, and describe macro expansion algorithm.
40 o Move as much out of header files as possible.
41 o Remove single quote pairs `', and some '', from diagnostics.
42 o Correct pastability test for CPP_NAME and CPP_NUMBER.
53 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
54 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
56 /* Flags for cpp_context. */
57 #define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
58 #define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
59 #define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
60 #define CONTEXT_ARG (1 << 3) /* If an argument context. */
62 typedef struct cpp_context cpp_context;
67 const cpp_toklist *list; /* Used for macro contexts only. */
68 const cpp_token **arg; /* Used for arg contexts only. */
71 /* Pushed token to be returned by next call to get_raw_token. */
72 const cpp_token *pushed_token;
74 struct macro_args *args; /* The arguments for a function-like
75 macro. NULL otherwise. */
76 unsigned short posn; /* Current posn, index into u. */
77 unsigned short count; /* No. of tokens in u. */
82 typedef struct macro_args macro_args;
86 const cpp_token **tokens;
87 unsigned int capacity;
92 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
93 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
94 macro_args *, unsigned int *));
95 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
96 static void save_token PARAMS ((macro_args *, const cpp_token *));
97 static int pop_context PARAMS ((cpp_reader *));
98 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
99 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
100 static void free_macro_args PARAMS ((macro_args *));
102 #define auto_expand_name_space(list) \
103 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
104 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
106 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
108 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
111 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
112 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
114 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
115 const unsigned char *));
116 static int skip_block_comment PARAMS ((cpp_reader *));
117 static int skip_line_comment PARAMS ((cpp_reader *));
118 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
119 static void skip_whitespace PARAMS ((cpp_reader *, int));
120 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
121 const U_CHAR *, const U_CHAR *));
122 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
123 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
125 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
126 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
127 const unsigned char *,
128 unsigned int, unsigned int));
129 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
130 static int lex_next PARAMS ((cpp_reader *, int));
131 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
134 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
135 static void expand_context_stack PARAMS ((cpp_reader *));
136 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
138 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
140 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
142 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
144 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
145 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
147 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
148 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
150 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
151 const cpp_token *, int *));
152 static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
153 static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
154 static cpp_token *get_temp_token PARAMS ((cpp_reader *));
155 static void release_temp_tokens PARAMS ((cpp_reader *));
156 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
157 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
159 #define INIT_TOKEN_STR(list, token) \
160 do {(token)->val.str.len = 0; \
161 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
164 #define VALID_SIGN(c, prevc) \
165 (((c) == '+' || (c) == '-') && \
166 ((prevc) == 'e' || (prevc) == 'E' \
167 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
169 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
170 character, if any, is in buffer. */
172 #define handle_newline(cur, limit, c) \
174 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
176 pfile->buffer->lineno++; \
177 pfile->buffer->line_base = (cur); \
178 pfile->col_adjust = 0; \
181 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
182 #define PREV_TOKEN_TYPE (cur_token[-1].type)
184 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
185 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
186 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
187 #define BACKUP_DIGRAPH(ttype) do { \
188 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
190 /* An upper bound on the number of bytes needed to spell a token,
191 including preceding whitespace. */
192 #define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
193 ? (token)->val.str.len \
194 : (TOKEN_SPELL(token) == SPELL_IDENT \
195 ? (token)->val.node->length \
198 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
199 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
201 #define ASSIGN_FLAGS_AND_POS(d, s) \
202 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
203 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
206 /* f is flags, just consisting of PREV_WHITE | BOL. */
207 #define MODIFY_FLAGS_AND_POS(d, s, f) \
208 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
209 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
212 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
213 #define I(e, s) {SPELL_IDENT, s},
214 #define S(e, s) {SPELL_STRING, s},
215 #define C(e, s) {SPELL_CHAR, s},
216 #define N(e, s) {SPELL_NONE, s},
218 const struct token_spelling
219 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
227 /* For debugging: the internal names of the tokens. */
228 #define T(e, s) U STRINGX(e),
229 #define I(e, s) U STRINGX(e),
230 #define S(e, s) U STRINGX(e),
231 #define C(e, s) U STRINGX(e),
232 #define N(e, s) U STRINGX(e),
234 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
242 /* The following table is used by trigraph_ok/trigraph_replace. If we
243 have designated initializers, it can be constant data; otherwise,
244 it is set up at runtime by _cpp_init_input_buffer. */
246 #if (GCC_VERSION >= 2007)
247 #define init_trigraph_map() /* nothing */
248 #define TRIGRAPH_MAP \
249 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
251 #define s(p, v) [p] = v,
253 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
254 static void init_trigraph_map PARAMS ((void)) { \
255 unsigned char *x = trigraph_map;
257 #define s(p, v) x[p] = v;
261 s('=', '#') s(')', ']') s('!', '|')
262 s('(', '[') s('\'', '^') s('>', '}')
263 s('/', '\\') s('<', '{') s('-', '~')
270 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
273 _cpp_grow_token_buffer (pfile, n)
277 long old_written = CPP_WRITTEN (pfile);
278 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
279 pfile->token_buffer = (U_CHAR *)
280 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
281 CPP_SET_WRITTEN (pfile, old_written);
284 /* Deal with the annoying semantics of fwrite. */
286 safe_fwrite (pfile, buf, len, fp)
296 count = fwrite (buf, 1, len, fp);
305 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
308 /* Notify the compiler proper that the current line number has jumped,
309 or the current file name has changed. */
312 output_line_command (pfile, print, line)
317 cpp_buffer *ip = CPP_BUFFER (pfile);
318 enum { same = 0, enter, leave, rname } change;
319 static const char * const codes[] = { "", " 1", " 2", "" };
324 /* End the previous line of text. */
325 if (pfile->need_newline)
326 putc ('\n', print->outf);
327 pfile->need_newline = 0;
329 if (CPP_OPTION (pfile, no_line_commands))
332 /* If ip is null, we've been called from cpp_finish, and they just
333 needed the final flush and trailing newline. */
337 if (pfile->include_depth == print->last_id)
339 /* Determine whether the current filename has changed, and if so,
340 how. 'nominal_fname' values are unique, so they can be compared
341 by comparing pointers. */
342 if (ip->nominal_fname == print->last_fname)
349 if (pfile->include_depth > print->last_id)
353 print->last_id = pfile->include_depth;
355 print->last_fname = ip->nominal_fname;
357 /* If the current file has not changed, we can output a few newlines
358 instead if we want to increase the line number by a small amount.
359 We cannot do this if print->lineno is zero, because that means we
360 haven't output any line commands yet. (The very first line
361 command output is a `same_file' command.) */
362 if (change == same && print->lineno > 0
363 && line >= print->lineno && line < print->lineno + 8)
365 while (line > print->lineno)
367 putc ('\n', print->outf);
373 #ifndef NO_IMPLICIT_EXTERN_C
374 if (CPP_OPTION (pfile, cplusplus))
375 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
377 ip->inc->sysp ? " 3" : "",
378 (ip->inc->sysp == 2) ? " 4" : "");
381 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
383 ip->inc->sysp ? " 3" : "");
384 print->lineno = line;
387 /* Write the contents of the token_buffer to the output stream, and
388 clear the token_buffer. Also handles generating line commands and
389 keeping track of file transitions. */
392 cpp_output_tokens (pfile, print, line)
397 if (CPP_WRITTEN (pfile) - print->written)
399 safe_fwrite (pfile, pfile->token_buffer,
400 CPP_WRITTEN (pfile) - print->written, print->outf);
401 pfile->need_newline = 1;
405 CPP_SET_WRITTEN (pfile, print->written);
407 output_line_command (pfile, print, line);
410 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
413 cpp_scan_buffer_nooutput (pfile)
416 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
417 const cpp_token *token;
419 /* In no-output mode, we can ignore everything but directives. */
422 token = _cpp_get_token (pfile);
424 if (token->type == CPP_EOF)
426 cpp_pop_buffer (pfile);
427 if (CPP_BUFFER (pfile) == stop)
431 if (token->type == CPP_HASH && token->flags & BOL
432 && pfile->token_list.directive)
434 process_directive (pfile, token);
438 _cpp_skip_rest_of_line (pfile);
442 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
444 cpp_scan_buffer (pfile, print)
448 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
449 const cpp_token *token, *prev = 0;
453 token = _cpp_get_token (pfile);
454 if (token->type == CPP_EOF)
456 cpp_pop_buffer (pfile);
458 if (CPP_BUFFER (pfile))
459 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
461 if (CPP_BUFFER (pfile) == stop)
468 if (token->flags & BOL)
470 if (token->type == CPP_HASH && pfile->token_list.directive)
472 process_directive (pfile, token);
476 cpp_output_tokens (pfile, print, pfile->token_list.line);
480 if (token->type != CPP_PLACEMARKER)
481 output_token (pfile, token, prev);
487 /* Scan a single line of the input into the token_buffer. */
489 cpp_scan_line (pfile)
492 const cpp_token *token, *prev = 0;
494 if (pfile->buffer == NULL)
499 token = cpp_get_token (pfile);
500 if (token->type == CPP_EOF)
502 cpp_pop_buffer (pfile);
506 /* If the last token on a line results from a macro expansion,
507 the check below will fail to stop us from proceeding to the
508 next line - so make sure we stick in a newline, at least. */
509 if (token->flags & BOL)
510 CPP_PUTC (pfile, '\n');
512 output_token (pfile, token, prev);
515 while (pfile->cur_context > 0
516 || pfile->contexts[0].posn < pfile->contexts[0].count);
520 /* Helper routine used by parse_include, which can't see spell_token.
521 Reinterpret the current line as an h-char-sequence (< ... >); we are
522 looking at the first token after the <. */
524 _cpp_glue_header_name (pfile)
527 unsigned int written = CPP_WRITTEN (pfile);
535 t = _cpp_get_token (pfile);
536 if (t->type == CPP_GREATER || t->type == CPP_EOF)
539 CPP_RESERVE (pfile, TOKEN_LEN (t));
540 if (t->flags & PREV_WHITE)
541 CPP_PUTC_Q (pfile, ' ');
542 pfile->limit = spell_token (pfile, t, pfile->limit);
545 if (t->type == CPP_EOF)
546 cpp_error (pfile, "missing terminating > character");
548 len = CPP_WRITTEN (pfile) - written;
550 memcpy (buf, pfile->token_buffer + written, len);
551 CPP_SET_WRITTEN (pfile, written);
553 hdr = get_temp_token (pfile);
554 hdr->type = CPP_HEADER_NAME;
556 hdr->val.str.text = buf;
557 hdr->val.str.len = len;
561 /* Token-buffer helper functions. */
563 /* Expand a token list's string space. It is *vital* that
564 list->tokens_used is correct, to get pointer fix-up right. */
566 _cpp_expand_name_space (list, len)
570 const U_CHAR *old_namebuf;
572 old_namebuf = list->namebuf;
573 list->name_cap += len;
574 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
576 /* Fix up token text pointers. */
577 if (list->namebuf != old_namebuf)
581 for (i = 0; i < list->tokens_used; i++)
582 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
583 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
587 /* If there is not enough room for LEN more characters, expand the
588 list by just enough to have room for LEN characters. */
590 _cpp_reserve_name_space (list, len)
594 unsigned int room = list->name_cap - list->name_used;
597 _cpp_expand_name_space (list, len - room);
600 /* Expand the number of tokens in a list. */
602 _cpp_expand_token_space (list, count)
608 list->tokens_cap += count;
609 n = list->tokens_cap;
610 if (list->flags & LIST_OFFSET)
612 list->tokens = (cpp_token *)
613 xrealloc (list->tokens, n * sizeof (cpp_token));
614 if (list->flags & LIST_OFFSET)
615 list->tokens++; /* Skip the dummy. */
618 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
619 an extra token in front of the token list, as this allows the lexer
620 to always peek at the previous token without worrying about
621 underflowing the list, and some initial space. Otherwise, no
622 token- or name-space is allocated, and there is no dummy token. */
624 _cpp_init_toklist (list, flags)
628 if (flags == NO_DUMMY_TOKEN)
630 list->tokens_cap = 0;
638 /* Initialize token space. Put a dummy token before the start
639 that will fail matches. */
640 list->tokens_cap = 256; /* 4K's worth. */
641 list->tokens = (cpp_token *)
642 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
643 list->tokens[0].type = CPP_EOF;
646 /* Initialize name space. */
647 list->name_cap = 1024;
648 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
649 list->flags = LIST_OFFSET;
652 _cpp_clear_toklist (list);
655 /* Clear a token list. */
657 _cpp_clear_toklist (list)
660 list->tokens_used = 0;
664 list->params_len = 0;
665 list->flags &= LIST_OFFSET; /* clear all but that one */
668 /* Free a token list. Does not free the list itself, which may be
669 embedded in a larger structure. */
671 _cpp_free_toklist (list)
672 const cpp_toklist *list;
674 if (list->flags & LIST_OFFSET)
675 free (list->tokens - 1); /* Backup over dummy token. */
678 free (list->namebuf);
681 /* Compare two tokens. */
683 _cpp_equiv_tokens (a, b)
684 const cpp_token *a, *b;
686 if (a->type == b->type && a->flags == b->flags)
687 switch (token_spellings[a->type].type)
689 default: /* Keep compiler happy. */
694 return a->val.aux == b->val.aux; /* arg_no or character. */
696 return a->val.node == b->val.node;
698 return (a->val.str.len == b->val.str.len
699 && !memcmp (a->val.str.text, b->val.str.text,
706 /* Compare two token lists. */
708 _cpp_equiv_toklists (a, b)
709 const cpp_toklist *a, *b;
713 if (a->tokens_used != b->tokens_used
714 || a->flags != b->flags
715 || a->paramc != b->paramc)
718 for (i = 0; i < a->tokens_used; i++)
719 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
726 Compares, the token TOKEN to the NUL-terminated string STRING.
727 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
730 cpp_ideq (token, string)
731 const cpp_token *token;
734 if (token->type != CPP_NAME)
737 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
742 The original lexer in cpplib was made up of two passes: a first pass
743 that replaced trigraphs and deleted esacped newlines, and a second
744 pass that tokenized the result of the first pass. Tokenisation was
745 performed by peeking at the next character in the input stream. For
746 example, if the input stream contained "!=", the handler for the !
747 character would peek at the next character, and if it were a '='
748 would skip over it, and return a "!=" token, otherwise it would
749 return just the "!" token.
751 To implement a single-pass lexer, this peeking ahead is unworkable.
752 An arbitrary number of escaped newlines, and trigraphs (in particular
753 ??/ which translates to the escape \), could separate the '!' and '='
754 in the input stream, yet the next token is still a "!=".
756 Suppose instead that we lex by one logical line at a time, producing
757 a token list or stack for each logical line, and when seeing the '!'
758 push a CPP_NOT token on the list. Then if the '!' is part of a
759 longer token ("!=") we know we must see the remainder of the token by
760 the time we reach the end of the logical line. Thus we can have the
761 '=' handler look at the previous token (at the end of the list / top
762 of the stack) and see if it is a "!" token, and if so, instead of
763 pushing a "=" token revise the existing token to be a "!=" token.
765 This works in the presence of escaped newlines, because the '\' would
766 have been pushed on the top of the stack as a CPP_BACKSLASH. The
767 newline ('\n' or '\r') handler looks at the token at the top of the
768 stack to see if it is a CPP_BACKSLASH, and if so discards both.
769 Hence the '=' handler would never see any intervening tokens.
771 To make trigraphs work in this context, as in precedence trigraphs
772 are highest and converted before anything else, the '?' handler does
773 lookahead to see if it is a trigraph, and if so skips the trigraph
774 and pushes the token it represents onto the top of the stack. This
775 also works in the particular case of a CPP_BACKSLASH trigraph.
777 To the preprocessor, whitespace is only significant to the point of
778 knowing whether whitespace precedes a particular token. For example,
779 the '=' handler needs to know whether there was whitespace between it
780 and a "!" token on the top of the stack, to make the token conversion
781 decision correctly. So each token has a PREV_WHITE flag to
782 indicate this - the standard permits consecutive whitespace to be
783 regarded as a single space. The compiler front ends are not
784 interested in whitespace at all; they just require a token stream.
785 Another place where whitespace is significant to the preprocessor is
786 a #define statment - if there is whitespace between the macro name
787 and an initial "(" token the macro is "object-like", otherwise it is
788 a function-like macro that takes arguments.
790 However, all is not rosy. Parsing of identifiers, numbers, comments
791 and strings becomes trickier because of the possibility of raw
792 trigraphs and escaped newlines in the input stream.
794 The trigraphs are three consecutive characters beginning with two
795 question marks. A question mark is not valid as part of a number or
796 identifier, so parsing of a number or identifier terminates normally
797 upon reaching it, returning to the mainloop which handles the
798 trigraph just like it would in any other position. Similarly for the
799 backslash of a backslash-newline combination. So we just need the
800 escaped-newline dropper in the mainloop to check if the token on the
801 top of the stack after dropping the escaped newline is a number or
802 identifier, and if so to continue the processing it as if nothing had
805 For strings, we replace trigraphs whenever we reach a quote or
806 newline, because there might be a backslash trigraph escaping them.
807 We need to be careful that we start trigraph replacing from where we
808 left off previously, because it is possible for a first scan to leave
809 "fake" trigraphs that a second scan would pick up as real (e.g. the
810 sequence "????/\n=" would find a fake ??= trigraph after removing the
813 For line comments, on reaching a newline we scan the previous
814 character(s) to see if it escaped, and continue if it is. Block
815 comments ignore everything and just focus on finding the comment
816 termination mark. The only difficult thing, and it is surprisingly
817 tricky, is checking if an asterisk precedes the final slash since
818 they could be separated by escaped newlines. If the preprocessor is
819 invoked with the output comments option, we don't bother removing
820 escaped newlines and replacing trigraphs for output.
822 Finally, numbers can begin with a period, which is pushed initially
823 as a CPP_DOT token in its own right. The digit handler checks if the
824 previous token was a CPP_DOT not separated by whitespace, and if so
825 pops it off the stack and pushes a period into the number's buffer
826 before calling the number parser.
830 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
831 U":>", U"<%", U"%>"};
833 /* Call when a trigraph is encountered. It warns if necessary, and
834 returns true if the trigraph should be honoured. END is the third
835 character of a trigraph in the input stream. */
837 trigraph_ok (pfile, end)
839 const unsigned char *end;
841 int accept = CPP_OPTION (pfile, trigraphs);
843 if (CPP_OPTION (pfile, warn_trigraphs))
845 unsigned int col = end - 1 - pfile->buffer->line_base;
847 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
848 "trigraph ??%c converted to %c",
849 (int) *end, (int) trigraph_map[*end]);
851 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
852 "trigraph ??%c ignored", (int) *end);
857 /* Scan a string for trigraphs, warning or replacing them inline as
858 appropriate. When parsing a string, we must call this routine
859 before processing a newline character (if trigraphs are enabled),
860 since the newline might be escaped by a preceding backslash
861 trigraph sequence. Returns a pointer to the end of the name after
864 static unsigned char *
865 trigraph_replace (pfile, src, limit)
868 unsigned char *limit;
872 /* Starting with src[1], find two consecutive '?'. The case of no
873 trigraphs is streamlined. */
875 for (src++; src + 1 < limit; src += 2)
880 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
883 else if (src + 2 == limit || src[1] != '?')
886 /* Check if it really is a trigraph. */
887 if (trigraph_map[src[2]] == 0)
895 /* Now we have a trigraph, we need to scan the remaining buffer, and
896 copy-shifting its contents left if replacement is enabled. */
897 for (; src + 2 < limit; dest++, src++)
898 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
902 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
903 *dest = trigraph_map[*src];
906 /* Copy remaining (at most 2) characters. */
912 /* If CUR is a backslash or the end of a trigraphed backslash, return
913 a pointer to its beginning, otherwise NULL. We don't read beyond
914 the buffer start, because there is the start of the comment in the
916 static const unsigned char *
917 backslash_start (pfile, cur)
919 const unsigned char *cur;
923 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
924 && trigraph_ok (pfile, cur))
929 /* Skip a C-style block comment. This is probably the trickiest
930 handler. We find the end of the comment by seeing if an asterisk
931 is before every '/' we encounter. The nasty complication is that a
932 previous asterisk may be separated by one or more escaped newlines.
933 Returns non-zero if comment terminated by EOF, zero otherwise. */
935 skip_block_comment (pfile)
938 cpp_buffer *buffer = pfile->buffer;
939 const unsigned char *char_after_star = 0;
940 const unsigned char *cur = buffer->cur;
942 for (; cur < buffer->rlimit; )
944 unsigned char c = *cur++;
946 /* People like decorating comments with '*', so check for
947 '/' instead for efficiency. */
950 /* Don't view / then * then / as finishing the comment. */
951 if ((cur[-2] == '*' && cur - 1 > buffer->cur)
952 || cur - 1 == char_after_star)
958 /* Warn about potential nested comments, but not when
959 the final character inside the comment is a '/'.
960 Don't bother to get it right across escaped newlines. */
961 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
962 && cur[0] == '*' && cur[1] != '/')
965 cpp_warning (pfile, "'/*' within comment");
968 else if (is_vspace (c))
970 const unsigned char* bslash = backslash_start (pfile, cur - 2);
972 handle_newline (cur, buffer->rlimit, c);
973 /* Work correctly if there is an asterisk before an
974 arbirtrarily long sequence of escaped newlines. */
975 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
976 char_after_star = cur;
981 adjust_column (pfile, cur - 1);
988 /* Skip a C++ line comment. Handles escaped newlines. Returns
989 non-zero if a multiline comment. */
991 skip_line_comment (pfile)
994 cpp_buffer *buffer = pfile->buffer;
995 register const unsigned char *cur = buffer->cur;
998 for (; cur < buffer->rlimit; )
1000 unsigned char c = *cur++;
1004 /* Check for a (trigaph?) backslash escaping the newline. */
1005 if (!backslash_start (pfile, cur - 2))
1008 handle_newline (cur, buffer->rlimit, c);
1014 buffer->cur = cur - 1; /* Leave newline for caller. */
1018 /* TAB points to a \t character. Update col_adjust so we track the
1019 column correctly. */
1021 adjust_column (pfile, tab)
1025 /* Zero-based column. */
1026 unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
1028 /* Round it up to multiple of the tabstop, but subtract 1 since the
1029 tab itself occupies a character position. */
1030 pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
1031 - col % CPP_OPTION (pfile, tabstop)) - 1;
1034 /* Skips whitespace, stopping at next non-whitespace character.
1035 Adjusts pfile->col_adjust to account for tabs. This enables tokens
1036 to be assigned the correct column. */
1038 skip_whitespace (pfile, in_directive)
1042 cpp_buffer *buffer = pfile->buffer;
1043 unsigned short warned = 0;
1045 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1046 while (buffer->cur < buffer->rlimit)
1048 unsigned char c = *buffer->cur;
1050 if (!is_nvspace (c))
1054 /* Horizontal space always OK. */
1058 adjust_column (pfile, buffer->cur - 1);
1059 /* Must be \f \v or \0. */
1063 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
1064 CPP_BUF_COL (buffer),
1065 "embedded null character ignored");
1068 else if (in_directive && CPP_PEDANTIC (pfile))
1069 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1070 CPP_BUF_COL (buffer),
1071 "%s in preprocessing directive",
1072 c == '\f' ? "form feed" : "vertical tab");
1076 /* Parse (append) an identifier. Calculates the hash value of the
1077 token while parsing, for performance. The algorithm *must* match
1079 static const U_CHAR *
1080 parse_name (pfile, tok, cur, rlimit)
1083 const U_CHAR *cur, *rlimit;
1091 while (cur < rlimit)
1093 if (! is_idchar (*cur))
1095 /* $ is not a legal identifier character in the standard, but is
1096 commonly accepted as an extension. Don't warn about it in
1097 skipped conditional blocks. */
1098 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1100 CPP_BUFFER (pfile)->cur = cur;
1101 cpp_pedwarn (pfile, "'$' character in identifier");
1104 r = HASHSTEP (r, cur);
1109 if (tok->val.node == 0)
1110 tok->val.node = _cpp_lookup_with_hash (pfile, name, len, r);
1113 unsigned int oldlen = tok->val.node->length;
1114 U_CHAR *newname = alloca (oldlen + len);
1115 memcpy (newname, tok->val.node->name, oldlen);
1116 memcpy (newname + oldlen, name, len);
1117 tok->val.node = cpp_lookup (pfile, newname, len + oldlen);
1123 /* Parse (append) a number. */
1125 parse_number (pfile, list, name)
1130 const unsigned char *name_limit;
1131 unsigned char *namebuf;
1132 cpp_buffer *buffer = pfile->buffer;
1133 register const unsigned char *cur = buffer->cur;
1136 name_limit = list->namebuf + list->name_cap;
1137 namebuf = list->namebuf + list->name_used;
1139 for (; cur < buffer->rlimit && namebuf < name_limit; )
1141 unsigned char c = *namebuf = *cur; /* Copy a single char. */
1143 /* Perhaps we should accept '$' here if we accept it for
1144 identifiers. We know namebuf[-1] is safe, because for c to
1145 be a sign we must have pushed at least one character. */
1146 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1153 /* Run out of name space? */
1154 if (cur < buffer->rlimit)
1156 list->name_used = namebuf - list->namebuf;
1157 auto_expand_name_space (list);
1163 name->len = namebuf - name->text;
1164 list->name_used = namebuf - list->namebuf;
1167 /* Places a string terminated by an unescaped TERMINATOR into a
1168 cpp_string, which should be expandable and thus at the top of the
1169 list's stack. Handles embedded trigraphs, if necessary, and
1172 Can be used for character constants (terminator = '\''), string
1173 constants ('"') and angled headers ('>'). Multi-line strings are
1174 allowed, except for within directives. */
1177 parse_string (pfile, list, token, terminator)
1181 unsigned int terminator;
1183 cpp_buffer *buffer = pfile->buffer;
1184 cpp_string *name = &token->val.str;
1185 register const unsigned char *cur = buffer->cur;
1186 const unsigned char *name_limit;
1187 unsigned char *namebuf;
1188 unsigned int null_count = 0;
1189 unsigned int trigraphed = list->name_used;
1192 name_limit = list->namebuf + list->name_cap;
1193 namebuf = list->namebuf + list->name_used;
1195 for (; cur < buffer->rlimit && namebuf < name_limit; )
1197 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
1201 else if (c == terminator || is_vspace (c))
1203 /* Needed for trigraph_replace and multiline string warning. */
1206 /* Scan for trigraphs before checking if backslash-escaped. */
1207 if ((CPP_OPTION (pfile, trigraphs)
1208 || CPP_OPTION (pfile, warn_trigraphs))
1209 && namebuf - (list->namebuf + trigraphed) >= 3)
1211 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1213 /* The test above guarantees trigraphed will be positive. */
1214 trigraphed = namebuf - list->namebuf - 2;
1217 namebuf--; /* Drop the newline / terminator from the name. */
1220 /* Drop a backslash newline, and continue. */
1221 if (namebuf[-1] == '\\')
1223 handle_newline (cur, buffer->rlimit, c);
1230 /* In assembly language, silently terminate strings of
1231 either variety at end of line. This is a kludge
1232 around not knowing where comments are. */
1233 if (CPP_OPTION (pfile, lang_asm))
1236 /* Character constants and header names may not extend
1237 over multiple lines. In Standard C, neither may
1238 strings. We accept multiline strings as an
1239 extension. (Even in directives - otherwise, glibc's
1240 longlong.h breaks.) */
1241 if (terminator != '"')
1244 cur++; /* Move forwards again. */
1246 if (pfile->multiline_string_line == 0)
1248 pfile->multiline_string_line = token->line;
1249 pfile->multiline_string_column = token->col;
1250 if (CPP_PEDANTIC (pfile))
1251 cpp_pedwarn (pfile, "multi-line string constant");
1255 handle_newline (cur, buffer->rlimit, c);
1259 unsigned char *temp;
1261 /* An odd number of consecutive backslashes represents
1262 an escaped terminator. */
1264 while (temp >= name->text && *temp == '\\')
1267 if ((namebuf - temp) & 1)
1274 /* Run out of name space? */
1275 if (cur < buffer->rlimit)
1277 list->name_used = namebuf - list->namebuf;
1278 auto_expand_name_space (list);
1282 /* We may not have trigraph-replaced the input for this code path,
1283 but as the input is in error by being unterminated we don't
1284 bother. Prevent warnings about no newlines at EOF. */
1285 if (is_vspace (cur[-1]))
1289 cpp_error (pfile, "missing terminating %c character", (int) terminator);
1291 if (terminator == '\"' && pfile->multiline_string_line != list->line
1292 && pfile->multiline_string_line != 0)
1294 cpp_error_with_line (pfile, pfile->multiline_string_line,
1295 pfile->multiline_string_column,
1296 "possible start of unterminated string literal");
1297 pfile->multiline_string_line = 0;
1302 name->len = namebuf - name->text;
1303 list->name_used = namebuf - list->namebuf;
1306 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1307 : "null character preserved"));
1310 /* The character TYPE helps us distinguish comment types: '*' = C
1311 style, '/' = C++ style. For code simplicity, the stored comment
1312 includes the comment start and any terminator. */
1314 #define COMMENT_START_LEN 2
1316 save_comment (list, token, from, len, type)
1319 const unsigned char *from;
1323 unsigned char *buffer;
1325 len += COMMENT_START_LEN;
1327 if (list->name_used + len > list->name_cap)
1328 _cpp_expand_name_space (list, len);
1330 INIT_TOKEN_STR (list, token);
1331 token->type = CPP_COMMENT;
1332 token->val.str.len = len;
1334 buffer = list->namebuf + list->name_used;
1335 list->name_used += len;
1337 /* Copy the comment. */
1348 memcpy (buffer, from, len - COMMENT_START_LEN);
1352 * The tokenizer's main loop. Returns a token list, representing a
1353 * logical line in the input file. On EOF after some tokens have
1354 * been processed, we return immediately. Then in next call, or if
1355 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1356 * token is placed in the list.
1358 * Implementation relies almost entirely on lookback, rather than
1359 * looking forwards. This means that tokenization requires just
1360 * a single pass of the file, even in the presence of trigraphs and
1361 * escaped newlines, providing significant performance benefits.
1362 * Trigraph overhead is negligible if they are disabled, and low
1363 * even when enabled.
1366 #define KNOWN_DIRECTIVE() (list->directive != 0)
1367 #define MIGHT_BE_DIRECTIVE() \
1368 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1371 lex_line (pfile, list)
1375 cpp_token *cur_token, *token_limit, *first;
1376 cpp_buffer *buffer = pfile->buffer;
1377 const unsigned char *cur = buffer->cur;
1378 unsigned char flags = 0;
1379 unsigned int first_token = list->tokens_used;
1381 if (!(list->flags & LIST_OFFSET))
1384 list->file = buffer->nominal_fname;
1385 list->line = CPP_BUF_LINE (buffer);
1386 pfile->col_adjust = 0;
1387 pfile->in_lex_line = 1;
1388 if (cur == buffer->buf)
1389 list->flags |= BEG_OF_FILE;
1392 token_limit = list->tokens + list->tokens_cap;
1393 cur_token = list->tokens + list->tokens_used;
1395 for (; cur < buffer->rlimit && cur_token < token_limit;)
1399 /* Optimize non-vertical whitespace skipping; most tokens are
1400 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1405 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1406 && cur_token > &list->tokens[first_token]));
1410 if (cur == buffer->rlimit)
1416 /* Initialize current token. CPP_EOF will not be fixed up by
1417 expand_name_space. */
1418 list->tokens_used = cur_token - list->tokens + 1;
1419 cur_token->type = CPP_EOF;
1420 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1421 cur_token->line = CPP_BUF_LINE (buffer);
1422 cur_token->flags = flags;
1427 case '0': case '1': case '2': case '3': case '4':
1428 case '5': case '6': case '7': case '8': case '9':
1432 cur--; /* Backup character. */
1433 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1436 INIT_TOKEN_STR (list, cur_token);
1437 /* Prepend an immediately previous CPP_DOT token. */
1440 if (list->name_cap == list->name_used)
1441 auto_expand_name_space (list);
1443 cur_token->val.str.len = 1;
1444 list->namebuf[list->name_used++] = '.';
1448 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1450 parse_number (pfile, list, &cur_token->val.str);
1453 /* Check for # 123 form of #line. */
1454 if (MIGHT_BE_DIRECTIVE ())
1455 list->directive = _cpp_check_linemarker (pfile, cur_token,
1456 !(cur_token[-1].flags
1463 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1464 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1465 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1466 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1468 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1469 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1470 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1471 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1473 cur--; /* Backup character. */
1474 cur_token->val.node = 0;
1475 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1478 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1480 if (MIGHT_BE_DIRECTIVE ())
1481 list->directive = _cpp_check_directive (pfile, cur_token,
1482 !(list->tokens[0].flags
1489 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1490 /* Do we have a wide string? */
1491 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1492 && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1493 BACKUP_TOKEN (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1496 /* Here c is one of ' " or >. */
1497 INIT_TOKEN_STR (list, cur_token);
1499 parse_string (pfile, list, cur_token, c);
1505 cur_token->type = CPP_DIV;
1508 if (PREV_TOKEN_TYPE == CPP_DIV)
1510 /* We silently allow C++ comments in system headers,
1511 irrespective of conformance mode, because lots of
1512 broken systems do that and trying to clean it up
1513 in fixincludes is a nightmare. */
1514 if (CPP_IN_SYSTEM_HEADER (pfile))
1515 goto do_line_comment;
1516 else if (CPP_OPTION (pfile, cplusplus_comments))
1518 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1519 && ! buffer->warned_cplusplus_comments)
1523 "C++ style comments are not allowed in ISO C89");
1525 "(this will be reported only once per input file)");
1526 buffer->warned_cplusplus_comments = 1;
1530 #if 0 /* Leave until new lexer in place. */
1533 "comment start split across lines");
1535 if (skip_line_comment (pfile))
1536 cpp_warning (pfile, "multi-line comment");
1538 /* Back-up to first '-' or '/'. */
1540 if (!CPP_OPTION (pfile, discard_comments)
1541 && (!KNOWN_DIRECTIVE()
1542 || (list->directive->flags & COMMENTS)))
1543 save_comment (list, cur_token++, cur,
1544 buffer->cur - cur, c);
1557 cur_token->type = CPP_MULT;
1560 if (PREV_TOKEN_TYPE == CPP_DIV)
1563 #if 0 /* Leave until new lexer in place. */
1566 "comment start '/*' split across lines");
1568 if (skip_block_comment (pfile))
1569 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1570 "unterminated comment");
1571 #if 0 /* Leave until new lexer in place. */
1572 else if (buffer->cur[-2] != '*')
1574 "comment end '*/' split across lines");
1576 /* Back up to opening '/'. */
1578 if (!CPP_OPTION (pfile, discard_comments)
1579 && (!KNOWN_DIRECTIVE()
1580 || (list->directive->flags & COMMENTS)))
1581 save_comment (list, cur_token++, cur,
1582 buffer->cur - cur, c);
1589 else if (CPP_OPTION (pfile, cplusplus))
1591 /* In C++, there are .* and ->* operators. */
1592 if (PREV_TOKEN_TYPE == CPP_DEREF)
1593 BACKUP_TOKEN (CPP_DEREF_STAR);
1594 else if (PREV_TOKEN_TYPE == CPP_DOT)
1595 BACKUP_TOKEN (CPP_DOT_STAR);
1603 handle_newline (cur, buffer->rlimit, c);
1604 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1608 /* Remove the escaped newline. Then continue to process
1609 any interrupted name or number. */
1611 /* Backslash-newline may not be immediately followed by
1612 EOF (C99 5.1.1.2). */
1613 if (cur >= buffer->rlimit)
1615 cpp_pedwarn (pfile, "backslash-newline at end of file");
1621 if (cur_token->type == CPP_NAME)
1623 else if (cur_token->type == CPP_NUMBER)
1624 goto continue_number;
1627 /* Remember whitespace setting. */
1628 flags = cur_token->flags;
1635 "backslash and newline separated by space");
1638 else if (MIGHT_BE_DIRECTIVE ())
1640 /* "Null directive." C99 6.10.7: A preprocessing
1641 directive of the form # <new-line> has no effect.
1643 But it is still a directive, and therefore disappears
1646 if (cur_token->flags & PREV_WHITE
1647 && CPP_WTRADITIONAL (pfile))
1648 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1651 /* Skip vertical space until we have at least one token to
1653 if (cur_token != &list->tokens[first_token])
1655 list->line = CPP_BUF_LINE (buffer);
1659 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1660 REVISE_TOKEN (CPP_MINUS_MINUS);
1662 PUSH_TOKEN (CPP_MINUS);
1667 /* The digraph flag checking ensures that ## and %:%:
1668 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1669 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1670 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1671 REVISE_TOKEN (CPP_PASTE);
1673 PUSH_TOKEN (CPP_HASH);
1677 cur_token->type = CPP_COLON;
1680 if (PREV_TOKEN_TYPE == CPP_COLON
1681 && CPP_OPTION (pfile, cplusplus))
1682 BACKUP_TOKEN (CPP_SCOPE);
1683 else if (CPP_OPTION (pfile, digraphs))
1685 /* Digraph: "<:" is a '[' */
1686 if (PREV_TOKEN_TYPE == CPP_LESS)
1687 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1688 /* Digraph: "%:" is a '#' */
1689 else if (PREV_TOKEN_TYPE == CPP_MOD)
1691 (--cur_token)->flags |= DIGRAPH;
1700 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1701 REVISE_TOKEN (CPP_AND_AND);
1703 PUSH_TOKEN (CPP_AND);
1708 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1709 REVISE_TOKEN (CPP_OR_OR);
1711 PUSH_TOKEN (CPP_OR);
1715 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1716 REVISE_TOKEN (CPP_PLUS_PLUS);
1718 PUSH_TOKEN (CPP_PLUS);
1722 /* This relies on equidistance of "?=" and "?" tokens. */
1723 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1724 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1726 PUSH_TOKEN (CPP_EQ);
1730 cur_token->type = CPP_GREATER;
1733 if (PREV_TOKEN_TYPE == CPP_GREATER)
1734 BACKUP_TOKEN (CPP_RSHIFT);
1735 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1736 BACKUP_TOKEN (CPP_DEREF);
1737 else if (CPP_OPTION (pfile, digraphs))
1739 /* Digraph: ":>" is a ']' */
1740 if (PREV_TOKEN_TYPE == CPP_COLON)
1741 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1742 /* Digraph: "%>" is a '}' */
1743 else if (PREV_TOKEN_TYPE == CPP_MOD)
1744 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1751 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1753 REVISE_TOKEN (CPP_LSHIFT);
1756 /* Is this the beginning of a header name? */
1757 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1759 c = '>'; /* Terminator. */
1760 cur_token->type = CPP_HEADER_NAME;
1761 goto do_parse_string;
1763 PUSH_TOKEN (CPP_LESS);
1767 /* Digraph: "<%" is a '{' */
1768 cur_token->type = CPP_MOD;
1769 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1770 && CPP_OPTION (pfile, digraphs))
1771 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1776 if (cur + 1 < buffer->rlimit && *cur == '?'
1777 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1779 /* Handle trigraph. */
1783 case '(': goto make_open_square;
1784 case ')': goto make_close_square;
1785 case '<': goto make_open_brace;
1786 case '>': goto make_close_brace;
1787 case '=': goto make_hash;
1788 case '!': goto make_or;
1789 case '-': goto make_complement;
1790 case '/': goto make_backslash;
1791 case '\'': goto make_xor;
1794 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1796 /* GNU C++ defines <? and >? operators. */
1797 if (PREV_TOKEN_TYPE == CPP_LESS)
1799 REVISE_TOKEN (CPP_MIN);
1802 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1804 REVISE_TOKEN (CPP_MAX);
1808 PUSH_TOKEN (CPP_QUERY);
1812 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1814 && !(cur_token[-1].flags & PREV_WHITE))
1817 PUSH_TOKEN (CPP_ELLIPSIS);
1820 PUSH_TOKEN (CPP_DOT);
1824 case '~': PUSH_TOKEN (CPP_COMPL); break;
1826 case '^': PUSH_TOKEN (CPP_XOR); break;
1828 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1830 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1832 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1834 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1836 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1837 case '!': PUSH_TOKEN (CPP_NOT); break;
1838 case ',': PUSH_TOKEN (CPP_COMMA); break;
1839 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1840 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1841 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1844 if (CPP_OPTION (pfile, dollars_in_ident))
1848 cur_token->val.aux = c;
1849 PUSH_TOKEN (CPP_OTHER);
1854 /* Run out of token space? */
1855 if (cur_token == token_limit)
1857 list->tokens_used = cur_token - list->tokens;
1858 _cpp_expand_token_space (list, 256);
1862 cur_token->flags = flags;
1863 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1865 if (cur > buffer->buf && !is_vspace (cur[-1]))
1866 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1867 CPP_BUF_COLUMN (buffer, cur),
1868 "no newline at end of file");
1869 cur_token++->type = CPP_EOF;
1873 /* All tokens are allocated, so the memory location is fixed. */
1874 first = &list->tokens[first_token];
1876 /* Don't complain about the null directive, nor directives in
1877 assembly source: we don't know where the comments are, and # may
1878 introduce assembler pseudo-ops. Don't complain about invalid
1879 directives in skipped conditional groups (6.10 p4). */
1880 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1881 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1883 if (first[1].type == CPP_NAME)
1884 cpp_error (pfile, "invalid preprocessing directive #%.*s",
1885 (int) first[1].val.node->length, first[1].val.node->name);
1887 cpp_error (pfile, "invalid preprocessing directive");
1890 /* Put EOF at end of known directives. This covers "directives do
1891 not extend beyond the end of the line (description 6.10 part 2)". */
1892 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1894 pfile->first_directive_token = first;
1895 cur_token++->type = CPP_EOF;
1898 /* Directives, known or not, always start a new line. */
1899 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
1900 first->flags |= BOL;
1902 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1903 up the invocation of a function-like macro, new line is
1904 considered a normal white-space character. */
1905 first->flags |= PREV_WHITE;
1908 list->tokens_used = cur_token - list->tokens;
1909 pfile->in_lex_line = 0;
1912 /* Write the spelling of a token TOKEN, with any appropriate
1913 whitespace before it, to the token_buffer. PREV is the previous
1914 token, which is used to determine if we need to shove in an extra
1915 space in order to avoid accidental token paste. */
1917 output_token (pfile, token, prev)
1919 const cpp_token *token, *prev;
1923 if (token->col && (token->flags & BOL))
1925 /* Supply enough whitespace to put this token in its original
1926 column. Don't bother trying to reconstruct tabs; we can't
1927 get it right in general, and nothing ought to care. (Yes,
1928 some things do care; the fault lies with them.) */
1929 unsigned char *buffer;
1930 unsigned int spaces = token->col - 1;
1932 CPP_RESERVE (pfile, token->col);
1933 buffer = pfile->limit;
1937 pfile->limit = buffer;
1939 else if (token->flags & PREV_WHITE)
1940 CPP_PUTC (pfile, ' ');
1943 /* Check for and prevent accidental token pasting. */
1944 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1945 CPP_PUTC (pfile, ' ');
1946 /* can_paste doesn't catch all the accidental pastes.
1947 Consider a + ++b - if there is not a space between the + and ++, it
1948 will be misparsed as a++ + b. */
1949 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1950 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1951 CPP_PUTC (pfile, ' ');
1954 CPP_RESERVE (pfile, TOKEN_LEN (token));
1955 pfile->limit = spell_token (pfile, token, pfile->limit);
1958 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1959 already contain the enough space to hold the token's spelling.
1960 Returns a pointer to the character after the last character
1963 static unsigned char *
1964 spell_token (pfile, token, buffer)
1965 cpp_reader *pfile; /* Would be nice to be rid of this... */
1966 const cpp_token *token;
1967 unsigned char *buffer;
1969 switch (token_spellings[token->type].type)
1971 case SPELL_OPERATOR:
1973 const unsigned char *spelling;
1976 if (token->flags & DIGRAPH)
1977 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1979 spelling = token_spellings[token->type].spelling;
1981 while ((c = *spelling++) != '\0')
1987 memcpy (buffer, token->val.node->name, token->val.node->length);
1988 buffer += token->val.node->length;
1993 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1996 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1998 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
2001 memcpy (buffer, token->val.str.text, token->val.str.len);
2002 buffer += token->val.str.len;
2004 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
2006 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
2012 *buffer++ = token->val.aux;
2016 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
2023 /* Return the spelling of a token known to be an operator.
2024 Does not distinguish digraphs from their counterparts. */
2025 const unsigned char *
2026 _cpp_spell_operator (type)
2027 enum cpp_ttype type;
2029 if (token_spellings[type].type == SPELL_OPERATOR)
2030 return token_spellings[type].spelling;
2032 return token_names[type];
2036 /* Macro expansion algorithm.
2038 Macro expansion is implemented by a single-pass algorithm; there are
2039 no rescan passes involved. cpp_get_token expands just enough to be
2040 able to return a token to the caller, a consequence is that when it
2041 returns the preprocessor can be in a state of mid-expansion. The
2042 algorithm does not work by fully expanding a macro invocation into
2043 some kind of token list, and then returning them one by one.
2045 Our expansion state is recorded in a context stack. We start out with
2046 a single context on the stack, let's call it base context. This
2047 consists of the token list returned by lex_line that forms the next
2048 logical line in the source file.
2050 The current level in the context stack is stored in the cur_context
2051 member of the cpp_reader structure. The context it references keeps,
2052 amongst other things, a count of how many tokens form that context and
2053 our position within those tokens.
2055 Fundamentally, calling cpp_get_token will return the next token from
2056 the current context. If we're at the end of the current context, that
2057 context is popped from the stack first, unless it is the base context,
2058 in which case the next logical line is lexed from the source file.
2060 However, before returning the token, if it is a CPP_NAME token
2061 _cpp_get_token checks to see if it is a macro and if it is enabled.
2062 Each time it encounters a macro name, it calls push_macro_context.
2063 This function checks that the macro should be expanded (with
2064 is_macro_enabled), and if so pushes a new macro context on the stack
2065 which becomes the current context. It then loops back to read the
2066 first token of the macro context.
2068 A macro context basically consists of the token list representing the
2069 macro's replacement list, which was saved in the hash table by
2070 save_macro_expansion when its #define statement was parsed. If the
2071 macro is function-like, it also contains the tokens that form the
2072 arguments to the macro. I say more about macro arguments below, but
2073 for now just saying that each argument is a set of pointers to tokens
2076 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
2077 token. This represents an argument passed to the macro, with the
2078 argument number stored in the token's AUX field. The argument should
2079 be substituted, this is achieved by pushing an "argument context". An
2080 argument context is just refers to the tokens forming the argument,
2081 which are obtained directly from the macro context. The STRINGIFY
2082 flag on a CPP_MACRO_ARG token indicates that the argument should be
2085 Here's a few simple rules the context stack obeys:-
2087 1) The lex_line token list is always context zero.
2089 2) Context 1, if it exists, must be a macro context.
2091 3) An argument context can only appear above a macro context.
2093 4) A macro context can appear above the base context, another macro
2094 context, or an argument context.
2096 5) These imply that the minimal level of an argument context is 2.
2098 The only tricky thing left is ensuring that macros are enabled and
2099 disabled correctly. The algorithm controls macro expansion by the
2100 level of the context a token is taken from in the context stack. If a
2101 token is taken from a level equal to no_expand_level (a member of
2102 struct cpp_reader), no expansion is performed.
2104 When popping a context off the stack, if no_expand_level equals the
2105 level of the popped context, it is reduced by one to match the new
2106 context level, so that expansion is still disabled. It does not
2107 increase if a context is pushed, though. It starts out life as
2108 UINT_MAX, which has the effect that initially macro expansion is
2109 enabled. I explain how this mechanism works below.
2111 The standard requires:-
2113 1) Arguments to be fully expanded before substitution.
2115 2) Stringified arguments to not be expanded, nor the tokens
2116 immediately surrounding a ## operator.
2118 3) Continual rescanning until there are no more macros left to
2121 4) Once a macro has been expanded in stage 1) or 3), it cannot be
2122 expanded again during later rescans. This prevents infinite
2125 The first thing to observe is that stage 3) is mostly redundant.
2126 Since a macro is disabled once it has been expanded, how can a rescan
2127 find an unexpanded macro name? There are only two cases where this is
2130 a) If the macro name results from a token paste operation.
2132 b) If the macro in question is a function-like macro that hasn't
2133 already been expanded because previously there was not the required
2134 '(' token immediately following it. This is only possible when an
2135 argument is substituted, and after substitution the last token of
2136 the argument can bind with a parenthesis appearing in the tokens
2137 following the substitution. Note that if the '(' appears within the
2138 argument, the ')' must too, as expanding macro arguments cannot
2139 "suck in" tokens outside the argument.
2141 So we tackle this as follows. When parsing the macro invocation for
2142 arguments, we record the tokens forming each argument as a list of
2143 pointers to those tokens. We do not expand any tokens that are "raw",
2144 i.e. directly from the macro invocation, but other tokens that come
2145 from (nested) argument substitution are fully expanded.
2147 This is achieved by setting the no_expand_level to that of the macro
2148 invocation. A CPP_MACRO_ARG token never appears in the list of tokens
2149 forming an argument, because parse_args (indirectly) calls
2150 get_raw_token which automatically pushes argument contexts and traces
2151 into them. Since these contexts are at a higher level than the
2152 no_expand_level, they get fully macro expanded.
2154 "Raw" and non-raw tokens are separated in arguments by null pointers,
2155 with the policy that the initial state of an argument is raw. If the
2156 first token is not raw, it should be preceded by a null pointer. When
2157 tracing through the tokens of an argument context, each time
2158 get_raw_token encounters a null pointer, it toggles the flag
2161 This flag, when set, indicates to is_macro_disabled that we are
2162 reading raw tokens which should be macro-expanded. Similarly, if
2163 clear, is_macro_disabled suppresses re-expansion.
2165 It's probably time for an example.
2169 #define xstr(y) str(y hash)
2171 xstr(hash) // "# hash"
2173 In the invocation of str, parse_args turns off macro expansion and so
2174 parses the argument as <hash>. This is the only token (pointer)
2175 passed as the argument to str. Since <hash> is raw there is no need
2176 for an initial null pointer. stringify_arg is called from
2177 get_raw_token when tracing through the expansion of str, since the
2178 argument has the STRINGIFY flag set. stringify_arg turns off
2179 macro_expansion by setting the no_expand_level to that of the argument
2180 context. Thus it gets the token <hash> and stringifies it to "hash"
2183 Similary xstr is passed <hash>. However, when parse_args is parsing
2184 the invocation of str() in xstr's expansion, get_raw_token encounters
2185 a CPP_MACRO_ARG token for y. Transparently to parse_args, it pushes
2186 an argument context, and enters the tokens of the argument,
2187 i.e. <hash>. This is at a higher context level than parse_args
2188 disabled, and so is_macro_disabled permits expansion of it and a macro
2189 context is pushed on top of the argument context. This contains the
2190 <#> token, and the end result is that <hash> is macro expanded.
2191 However, after popping off the argument context, the <hash> of xstr's
2192 expansion does not get macro expanded because we're back at the
2193 no_expand_level. The end result is that the argument passed to str is
2194 <NULL> <#> <NULL> <hash>. Note the nulls - policy is we start off
2195 raw, <#> is not raw, but then <hash> is.
2200 /* Free the storage allocated for macro arguments. */
2202 free_macro_args (args)
2206 free ((PTR) args->tokens);
2211 /* Determines if a macro has been already used (and is therefore
2214 is_macro_disabled (pfile, expansion, token)
2216 const cpp_toklist *expansion;
2217 const cpp_token *token;
2219 cpp_context *context = CURRENT_CONTEXT (pfile);
2221 /* Don't expand anything if this file has already been preprocessed. */
2222 if (CPP_OPTION (pfile, preprocessed))
2225 /* Arguments on either side of ## are inserted in place without
2226 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2227 occurs during a later rescan pass. The effect is that we expand
2228 iff we would as part of the macro's expansion list, so we should
2229 drop to the macro's context. */
2230 if (IS_ARG_CONTEXT (context))
2232 if (token->flags & PASTED)
2234 else if (!(context->flags & CONTEXT_RAW))
2236 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2240 /* Have we already used this macro? */
2241 while (context->level > 0)
2243 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2245 /* Raw argument tokens are judged based on the token list they
2247 if (context->flags & CONTEXT_RAW)
2248 context = pfile->contexts + context->level;
2253 /* Function-like macros may be disabled if the '(' is not in the
2254 current context. We check this without disrupting the context
2256 if (expansion->paramc >= 0)
2258 const cpp_token *next;
2259 unsigned int prev_nme;
2261 context = CURRENT_CONTEXT (pfile);
2262 /* Drop down any contexts we're at the end of: the '(' may
2263 appear in lower macro expansions, or in the rest of the file. */
2264 while (context->posn == context->count && context > pfile->contexts)
2267 /* If we matched, we are disabled, as we appear in the
2268 expansion of each macro we meet. */
2269 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2273 prev_nme = pfile->no_expand_level;
2274 pfile->no_expand_level = context - pfile->contexts;
2275 next = _cpp_get_token (pfile);
2276 restore_macro_expansion (pfile, prev_nme);
2277 if (next->type != CPP_OPEN_PAREN)
2279 _cpp_push_token (pfile, next);
2280 if (CPP_WTRADITIONAL (pfile))
2282 "function macro %.*s must be used with arguments in traditional C",
2283 (int) token->val.node->length, token->val.node->name);
2291 /* Add a token to the set of tokens forming the arguments to the macro
2292 being parsed in parse_args. */
2294 save_token (args, token)
2296 const cpp_token *token;
2298 if (args->used == args->capacity)
2300 args->capacity += args->capacity + 100;
2301 args->tokens = (const cpp_token **)
2302 xrealloc ((PTR) args->tokens,
2303 args->capacity * sizeof (const cpp_token *));
2305 args->tokens[args->used++] = token;
2308 /* Take and save raw tokens until we finish one argument. Empty
2309 arguments are saved as a single CPP_PLACEMARKER token. */
2310 static const cpp_token *
2311 parse_arg (pfile, var_args, paren_context, args, pcount)
2314 unsigned int paren_context;
2316 unsigned int *pcount;
2318 const cpp_token *token;
2319 unsigned int paren = 0, count = 0;
2320 int raw, was_raw = 1;
2322 for (count = 0;; count++)
2324 token = _cpp_get_token (pfile);
2326 switch (token->type)
2331 case CPP_OPEN_PAREN:
2335 case CPP_CLOSE_PAREN:
2341 /* Commas are not terminators within parantheses or var_args. */
2342 if (paren || var_args)
2346 case CPP_EOF: /* Error reported by caller. */
2350 raw = pfile->cur_context <= paren_context;
2354 save_token (args, 0);
2357 save_token (args, token);
2363 /* Duplicate the placemarker. Then we can set its flags and
2364 position and safely be using more than one. */
2365 save_token (args, duplicate_token (pfile, &placemarker_token));
2373 /* This macro returns true if the argument starting at offset O of arglist
2374 A is empty - that is, it's either a single PLACEMARKER token, or a null
2375 pointer followed by a PLACEMARKER. */
2377 #define empty_argument(A, O) \
2378 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2379 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2381 /* Parse the arguments making up a macro invocation. Nested arguments
2382 are automatically macro expanded, but immediate macros are not
2383 expanded; this enables e.g. operator # to work correctly. Returns
2384 non-zero on error. */
2386 parse_args (pfile, hp, args)
2391 const cpp_token *token;
2392 const cpp_toklist *macro;
2393 unsigned int total = 0;
2394 unsigned int paren_context = pfile->cur_context;
2397 macro = hp->value.expansion;
2402 token = parse_arg (pfile, (argc + 1 == macro->paramc
2403 && (macro->flags & VAR_ARGS)),
2404 paren_context, args, &count);
2405 if (argc < macro->paramc)
2408 args->ends[argc] = total;
2412 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2414 if (token->type == CPP_EOF)
2416 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2417 hp->length, hp->name);
2420 else if (argc < macro->paramc)
2422 /* A rest argument is allowed to not appear in the invocation at all.
2423 e.g. #define debug(format, args...) ...
2425 This is exactly the same as if the rest argument had received no
2426 tokens - debug("string",); This extension is deprecated. */
2428 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2430 /* Duplicate the placemarker. Then we can set its flags and
2431 position and safely be using more than one. */
2432 save_token (args, duplicate_token (pfile, &placemarker_token));
2433 args->ends[argc] = total + 1;
2439 "insufficient arguments in invocation of macro \"%.*s\"",
2440 hp->length, hp->name);
2444 /* An empty argument to an empty function-like macro is fine. */
2445 else if (argc > macro->paramc
2446 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2449 "too many arguments in invocation of macro \"%.*s\"",
2450 hp->length, hp->name);
2457 /* Adds backslashes before all backslashes and double quotes appearing
2458 in strings. Non-printable characters are converted to octal. */
2460 quote_string (dest, src, len)
2469 if (c == '\\' || c == '"')
2480 sprintf ((char *) dest, "\\%03o", c);
2489 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2490 CPP_STRING token containing TEXT in quoted form. */
2492 make_string_token (token, text, len)
2499 buf = (U_CHAR *) xmalloc (len * 4);
2500 token->type = CPP_STRING;
2502 token->val.str.text = buf;
2503 token->val.str.len = quote_string (buf, text, len) - buf;
2507 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2508 evaluating to NUMBER. */
2510 alloc_number_token (pfile, number)
2517 result = get_temp_token (pfile);
2519 sprintf (buf, "%d", number);
2521 result->type = CPP_NUMBER;
2523 result->val.str.text = (U_CHAR *) buf;
2524 result->val.str.len = strlen (buf);
2528 /* Returns a temporary token from the temporary token store of PFILE. */
2530 get_temp_token (pfile)
2533 if (pfile->temp_used == pfile->temp_alloced)
2535 if (pfile->temp_used == pfile->temp_cap)
2537 pfile->temp_cap += pfile->temp_cap + 20;
2538 pfile->temp_tokens = (cpp_token **) xrealloc
2539 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2541 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2542 (sizeof (cpp_token));
2545 return pfile->temp_tokens[pfile->temp_used++];
2548 /* Release (not free) for re-use the temporary tokens of PFILE. */
2550 release_temp_tokens (pfile)
2553 while (pfile->temp_used)
2555 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2557 if (token_spellings[token->type].type == SPELL_STRING)
2559 free ((char *) token->val.str.text);
2560 token->val.str.text = 0;
2565 /* Free all of PFILE's dynamically-allocated temporary tokens. */
2567 _cpp_free_temp_tokens (pfile)
2570 if (pfile->temp_tokens)
2572 /* It is possible, though unlikely (looking for '(' of a funlike
2573 macro into EOF), that we haven't released the tokens yet. */
2574 release_temp_tokens (pfile);
2575 while (pfile->temp_alloced)
2576 free (pfile->temp_tokens[--pfile->temp_alloced]);
2577 free (pfile->temp_tokens);
2582 free ((char *) pfile->date->val.str.text);
2584 free ((char *) pfile->time->val.str.text);
2589 /* Copy TOKEN into a temporary token from PFILE's store. */
2591 duplicate_token (pfile, token)
2593 const cpp_token *token;
2595 cpp_token *result = get_temp_token (pfile);
2598 if (token_spellings[token->type].type == SPELL_STRING)
2600 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2601 memcpy (buff, token->val.str.text, token->val.str.len);
2602 result->val.str.text = buff;
2607 /* Determine whether two tokens can be pasted together, and if so,
2608 what the resulting token is. Returns CPP_EOF if the tokens cannot
2609 be pasted, or the appropriate type for the merged token if they
2611 static enum cpp_ttype
2612 can_paste (pfile, token1, token2, digraph)
2614 const cpp_token *token1, *token2;
2617 enum cpp_ttype a = token1->type, b = token2->type;
2618 int cxx = CPP_OPTION (pfile, cplusplus);
2620 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2621 return a + (CPP_EQ_EQ - CPP_EQ);
2626 if (b == a) return CPP_RSHIFT;
2627 if (b == CPP_QUERY && cxx) return CPP_MAX;
2628 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2631 if (b == a) return CPP_LSHIFT;
2632 if (b == CPP_QUERY && cxx) return CPP_MIN;
2633 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2634 if (CPP_OPTION (pfile, digraphs))
2637 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2639 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2643 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2644 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2645 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2648 if (b == a) return CPP_MINUS_MINUS;
2649 if (b == CPP_GREATER) return CPP_DEREF;
2652 if (b == a && cxx) return CPP_SCOPE;
2653 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2654 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2658 if (CPP_OPTION (pfile, digraphs))
2660 if (b == CPP_GREATER)
2661 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2663 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2667 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2670 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2671 if (b == CPP_NUMBER) return CPP_NUMBER;
2675 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2677 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2681 if (b == CPP_NAME) return CPP_NAME;
2683 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
2685 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2687 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2691 if (b == CPP_NUMBER) return CPP_NUMBER;
2692 if (b == CPP_NAME) return CPP_NUMBER;
2693 if (b == CPP_DOT) return CPP_NUMBER;
2694 /* Numbers cannot have length zero, so this is safe. */
2695 if ((b == CPP_PLUS || b == CPP_MINUS)
2696 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2707 /* Check if TOKEN is to be ##-pasted with the token after it. */
2708 static const cpp_token *
2709 maybe_paste_with_next (pfile, token)
2711 const cpp_token *token;
2714 const cpp_token *second;
2715 cpp_context *context = CURRENT_CONTEXT (pfile);
2717 /* Is this token on the LHS of ## ? */
2719 while ((token->flags & PASTE_LEFT)
2720 || ((context->flags & CONTEXT_PASTEL)
2721 && context->posn == context->count))
2723 /* Suppress macro expansion for next token, but don't conflict
2724 with the other method of suppression. If it is an argument,
2725 macro expansion within the argument will still occur. */
2726 pfile->paste_level = pfile->cur_context;
2727 second = _cpp_get_token (pfile);
2728 pfile->paste_level = 0;
2730 /* Ignore placemarker argument tokens (cannot be from an empty
2731 macro since macros are not expanded). */
2732 if (token->type == CPP_PLACEMARKER)
2733 pasted = duplicate_token (pfile, second);
2734 else if (second->type == CPP_PLACEMARKER)
2736 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2737 /* GCC has special extended semantics for a ## b where b is
2738 a varargs parameter: a disappears if b consists of no
2739 tokens. This extension is deprecated. */
2740 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2741 && (mac_context->u.list->tokens[mac_context->posn-1].val.aux + 1
2742 == (unsigned) mac_context->u.list->paramc))
2744 cpp_warning (pfile, "deprecated GNU ## extension used");
2745 pasted = duplicate_token (pfile, second);
2748 pasted = duplicate_token (pfile, token);
2753 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2755 if (type == CPP_EOF)
2757 if (CPP_OPTION (pfile, warn_paste))
2759 "pasting would not give a valid preprocessing token");
2760 _cpp_push_token (pfile, second);
2764 if (type == CPP_NAME || type == CPP_NUMBER)
2766 /* Join spellings. */
2769 pasted = get_temp_token (pfile);
2770 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2771 end = spell_token (pfile, token, buf);
2772 end = spell_token (pfile, second, end);
2775 if (type == CPP_NAME)
2776 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2779 pasted->val.str.text = uxstrdup (buf);
2780 pasted->val.str.len = end - buf;
2783 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2784 pasted = duplicate_token (pfile, second);
2787 pasted = get_temp_token (pfile);
2788 pasted->val.integer = 0;
2791 pasted->type = type;
2792 pasted->flags = digraph ? DIGRAPH : 0;
2795 /* The pasted token gets the whitespace flags and position of the
2796 first token, the PASTE_LEFT flag of the second token, plus the
2797 PASTED flag to indicate it is the result of a paste. However, we
2798 want to preserve the DIGRAPH flag. */
2799 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2800 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2801 | (second->flags & PASTE_LEFT) | PASTED);
2802 pasted->col = token->col;
2803 pasted->line = token->line;
2805 /* See if there is another token to be pasted onto the one we just
2808 context = CURRENT_CONTEXT (pfile);
2814 /* Convert a token sequence to a single string token according to the
2815 rules of the ISO C #-operator. */
2816 #define INIT_SIZE 200
2818 stringify_arg (pfile, token)
2820 const cpp_token *token;
2823 unsigned char *main_buf;
2824 unsigned int prev_value, backslash_count = 0;
2825 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2827 push_arg_context (pfile, token);
2828 prev_value = prevent_macro_expansion (pfile);
2829 main_buf = (unsigned char *) xmalloc (buf_cap);
2831 result = get_temp_token (pfile);
2832 ASSIGN_FLAGS_AND_POS (result, token);
2834 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2838 unsigned int len = TOKEN_LEN (token);
2840 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2841 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2845 if (buf_used + len > buf_cap)
2847 buf_cap = buf_used + len + INIT_SIZE;
2848 main_buf = xrealloc (main_buf, buf_cap);
2851 if (whitespace && (token->flags & PREV_WHITE))
2852 main_buf[buf_used++] = ' ';
2855 buf = (unsigned char *) xmalloc (len);
2857 buf = main_buf + buf_used;
2859 len = spell_token (pfile, token, buf) - buf;
2862 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2869 if (token->type == CPP_BACKSLASH)
2872 backslash_count = 0;
2875 /* Ignore the final \ of invalid string literals. */
2876 if (backslash_count & 1)
2878 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2882 result->type = CPP_STRING;
2883 result->val.str.text = main_buf;
2884 result->val.str.len = buf_used;
2885 restore_macro_expansion (pfile, prev_value);
2889 /* Allocate more room on the context stack of PFILE. */
2891 expand_context_stack (pfile)
2894 pfile->context_cap += pfile->context_cap + 20;
2895 pfile->contexts = (cpp_context *)
2896 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2899 /* Push the context of macro NODE onto the context stack. TOKEN is
2900 the CPP_NAME token invoking the macro. */
2902 push_macro_context (pfile, token)
2904 const cpp_token *token;
2906 unsigned char orig_flags;
2908 cpp_context *context;
2909 cpp_hashnode *node = token->val.node;
2911 /* Token's flags may change when parsing args containing a nested
2912 invocation of this macro. */
2913 orig_flags = token->flags & (PREV_WHITE | BOL);
2915 if (node->value.expansion->paramc >= 0)
2917 unsigned int error, prev_nme;
2919 /* Allocate room for the argument contexts, and parse them. */
2920 args = (macro_args *) xmalloc (sizeof (macro_args));
2921 args->ends = (unsigned int *)
2922 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2926 args->level = pfile->cur_context;
2928 prev_nme = prevent_macro_expansion (pfile);
2930 error = parse_args (pfile, node, args);
2932 restore_macro_expansion (pfile, prev_nme);
2935 free_macro_args (args);
2940 /* Now push its context. */
2941 pfile->cur_context++;
2942 if (pfile->cur_context == pfile->context_cap)
2943 expand_context_stack (pfile);
2945 context = CURRENT_CONTEXT (pfile);
2946 context->u.list = node->value.expansion;
2947 context->args = args;
2949 context->count = context->u.list->tokens_used;
2950 context->level = pfile->cur_context;
2952 context->pushed_token = 0;
2954 /* Set the flags of the first token. We know there must
2955 be one, empty macros are a single placemarker token. */
2956 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2961 /* Push an argument to the current macro onto the context stack.
2962 TOKEN is the MACRO_ARG token representing the argument expansion. */
2964 push_arg_context (pfile, token)
2966 const cpp_token *token;
2968 cpp_context *context;
2971 pfile->cur_context++;
2972 if (pfile->cur_context == pfile->context_cap)
2973 expand_context_stack (pfile);
2975 context = CURRENT_CONTEXT (pfile);
2976 args = context[-1].args;
2978 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2979 context->u.arg = args->tokens + context->count;
2980 context->count = args->ends[token->val.aux] - context->count;
2983 context->level = args->level;
2984 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2985 context->pushed_token = 0;
2987 /* Set the flags of the first token. There is one. */
2989 const cpp_token *first = context->u.arg[0];
2991 first = context->u.arg[1];
2993 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2994 token->flags & (PREV_WHITE | BOL));
2997 if (token->flags & PASTE_LEFT)
2998 context->flags |= CONTEXT_PASTEL;
2999 if (pfile->paste_level)
3000 context->flags |= CONTEXT_PASTER;
3003 /* "Unget" a token. It is effectively inserted in the token queue and
3004 will be returned by the next call to get_raw_token. */
3006 _cpp_push_token (pfile, token)
3008 const cpp_token *token;
3010 cpp_context *context = CURRENT_CONTEXT (pfile);
3011 if (context->pushed_token)
3012 cpp_ice (pfile, "two tokens pushed in a row");
3013 if (token->type != CPP_EOF)
3014 context->pushed_token = token;
3015 /* Don't push back a directive's CPP_EOF, step back instead. */
3016 else if (pfile->cur_context == 0)
3017 pfile->contexts[0].posn--;
3020 /* Handle a preprocessing directive. TOKEN is the CPP_HASH token
3021 introducing the directive. */
3023 process_directive (pfile, token)
3025 const cpp_token *token;
3027 const struct directive *d = pfile->token_list.directive;
3030 /* Skip over the directive name. */
3031 if (token[1].type == CPP_NAME)
3032 _cpp_get_raw_token (pfile);
3033 else if (token[1].type != CPP_NUMBER)
3034 cpp_ice (pfile, "directive begins with %s?!",
3035 token_names[token[1].type]);
3037 /* Flush pending tokens at this point, in case the directive produces
3038 output. XXX Directive output won't be visible to a direct caller of
3040 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
3041 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3043 if (! (d->flags & EXPAND))
3044 prev_nme = prevent_macro_expansion (pfile);
3045 (void) (*d->handler) (pfile);
3046 if (! (d->flags & EXPAND))
3047 restore_macro_expansion (pfile, prev_nme);
3048 _cpp_skip_rest_of_line (pfile);
3051 /* The external interface to return the next token. All macro
3052 expansion and directive processing is handled internally, the
3053 caller only ever sees the output after preprocessing. */
3055 cpp_get_token (pfile)
3058 const cpp_token *token;
3059 /* Loop till we hit a non-directive, non-placemarker token. */
3062 token = _cpp_get_token (pfile);
3064 if (token->type == CPP_PLACEMARKER)
3067 if (token->type == CPP_HASH && token->flags & BOL
3068 && pfile->token_list.directive)
3070 process_directive (pfile, token);
3078 /* The internal interface to return the next token. There are two
3079 differences between the internal and external interfaces: the
3080 internal interface may return a PLACEMARKER token, and it does not
3081 process directives. */
3083 _cpp_get_token (pfile)
3086 const cpp_token *token;
3089 /* Loop until we hit a non-macro token. */
3092 token = get_raw_token (pfile);
3094 /* Short circuit EOF. */
3095 if (token->type == CPP_EOF)
3098 /* If we are skipping... */
3099 if (pfile->skipping)
3101 /* we still have to process directives, */
3102 if (pfile->token_list.directive)
3105 /* but everything else is ignored. */
3106 _cpp_skip_rest_of_line (pfile);
3110 /* If there's a potential control macro and we get here, then that
3111 #ifndef didn't cover the entire file and its argument shouldn't
3112 be taken as a control macro. */
3113 pfile->potential_control_macro = 0;
3115 /* See if there's a token to paste with this one. */
3116 if (!pfile->paste_level)
3117 token = maybe_paste_with_next (pfile, token);
3119 /* If it isn't a macro, return it now. */
3120 if (token->type != CPP_NAME
3121 || token->val.node->type == T_VOID)
3124 /* Is macro expansion disabled in general? */
3125 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
3128 node = token->val.node;
3129 if (node->type != T_MACRO)
3130 return special_symbol (pfile, node, token);
3132 if (is_macro_disabled (pfile, node->value.expansion, token))
3135 if (pfile->cur_context > CPP_STACK_MAX)
3137 cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
3141 if (push_macro_context (pfile, token))
3147 /* Returns the next raw token, i.e. without performing macro
3148 expansion. Argument contexts are automatically entered. */
3149 static const cpp_token *
3150 get_raw_token (pfile)
3153 const cpp_token *result;
3154 cpp_context *context;
3158 context = CURRENT_CONTEXT (pfile);
3159 if (context->pushed_token)
3161 result = context->pushed_token;
3162 context->pushed_token = 0;
3164 else if (context->posn == context->count)
3166 if (pop_context (pfile))
3172 if (IS_ARG_CONTEXT (context))
3174 result = context->u.arg[context->posn++];
3177 context->flags ^= CONTEXT_RAW;
3178 result = context->u.arg[context->posn++];
3180 return result; /* Cannot be a CPP_MACRO_ARG */
3182 result = &context->u.list->tokens[context->posn++];
3185 if (result->type != CPP_MACRO_ARG)
3188 if (result->flags & STRINGIFY_ARG)
3189 return stringify_arg (pfile, result);
3191 push_arg_context (pfile, result);
3195 /* Internal interface to get the token without macro expanding. */
3197 _cpp_get_raw_token (pfile)
3200 int prev_nme = prevent_macro_expansion (pfile);
3201 const cpp_token *result = _cpp_get_token (pfile);
3202 restore_macro_expansion (pfile, prev_nme);
3206 /* A thin wrapper to lex_line. CLEAR is non-zero if the current token
3207 list should be overwritten, or zero if we need to append
3208 (typically, if we are within the arguments to a macro, or looking
3209 for the '(' to start a function-like macro invocation). */
3211 lex_next (pfile, clear)
3215 cpp_toklist *list = &pfile->token_list;
3216 const cpp_token *old_list = list->tokens;
3217 unsigned int old_used = list->tokens_used;
3221 /* Release all temporary tokens. */
3222 _cpp_clear_toklist (list);
3223 pfile->contexts[0].posn = 0;
3224 if (pfile->temp_used)
3225 release_temp_tokens (pfile);
3228 lex_line (pfile, list);
3229 pfile->contexts[0].count = list->tokens_used;
3231 if (!clear && pfile->args)
3233 /* Fix up argument token pointers. */
3234 if (old_list != list->tokens)
3238 for (i = 0; i < pfile->args->used; i++)
3240 const cpp_token *token = pfile->args->tokens[i];
3241 if (token >= old_list && token < old_list + old_used)
3242 pfile->args->tokens[i] = (const cpp_token *)
3243 ((char *) token + ((char *) list->tokens - (char *) old_list));
3247 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3248 tokens within the list of arguments that would otherwise act as
3249 preprocessing directives, the behavior is undefined.
3251 This implementation will report a hard error and treat the
3252 'sequence of preprocessing tokens' as part of the macro argument,
3255 Note if pfile->args == 0, we're OK since we're only inside a
3256 macro argument after a '('. */
3257 if (list->directive)
3259 cpp_error_with_line (pfile, list->tokens[old_used].line,
3260 list->tokens[old_used].col,
3261 "#%s may not be used inside a macro argument",
3262 list->directive->name);
3270 /* Pops a context off the context stack. If we're at the bottom, lexes
3271 the next logical line. Returns EOF if we're at the end of the
3272 argument list to the # operator, or if it is illegal to "overflow"
3273 into the rest of the file (e.g. 6.10.3.1.1). */
3278 cpp_context *context;
3280 if (pfile->cur_context == 0)
3282 /* If we are currently processing a directive, do not advance. 6.10
3283 paragraph 2: A new-line character ends the directive even if it
3284 occurs within what would otherwise be an invocation of a
3285 function-like macro. */
3286 if (pfile->token_list.directive)
3289 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3292 /* Argument contexts, when parsing args or handling # operator
3293 return CPP_EOF at the end. */
3294 context = CURRENT_CONTEXT (pfile);
3295 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3298 /* Free resources when leaving macro contexts. */
3300 free_macro_args (context->args);
3302 if (pfile->cur_context == pfile->no_expand_level)
3303 pfile->no_expand_level--;
3304 pfile->cur_context--;
3309 /* Turn off macro expansion at the current context level. */
3311 prevent_macro_expansion (pfile)
3314 unsigned int prev_value = pfile->no_expand_level;
3315 pfile->no_expand_level = pfile->cur_context;
3319 /* Restore macro expansion to its previous state. */
3321 restore_macro_expansion (pfile, prev_value)
3323 unsigned int prev_value;
3325 pfile->no_expand_level = prev_value;
3328 /* Used by cpperror.c to obtain the correct line and column to report
3331 _cpp_get_line (pfile, pcol)
3336 const cpp_token *cur_token;
3338 if (pfile->in_lex_line)
3339 index = pfile->token_list.tokens_used;
3341 index = pfile->contexts[0].posn;
3343 cur_token = &pfile->token_list.tokens[index - 1];
3345 *pcol = cur_token->col;
3346 return cur_token->line;
3349 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3350 static const char * const monthnames[] =
3352 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3353 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3356 /* Handle builtin macros like __FILE__. */
3357 static const cpp_token *
3358 special_symbol (pfile, node, token)
3361 const cpp_token *token;
3373 ip = CPP_BUFFER (pfile);
3378 if (node->type == T_BASE_FILE)
3379 while (CPP_PREV_BUFFER (ip) != NULL)
3380 ip = CPP_PREV_BUFFER (ip);
3382 file = ip->nominal_fname;
3384 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3389 case T_INCLUDE_LEVEL:
3390 /* pfile->include_depth counts the primary source as level 1,
3391 but historically __INCLUDE_DEPTH__ has called the primary
3393 result = alloc_number_token (pfile, pfile->include_depth - 1);
3397 /* If __LINE__ is embedded in a macro, it must expand to the
3398 line of the macro's invocation, not its definition.
3399 Otherwise things like assert() will not work properly. */
3400 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3407 #ifdef STDC_0_IN_SYSTEM_HEADERS
3408 if (CPP_IN_SYSTEM_HEADER (pfile)
3409 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3412 result = alloc_number_token (pfile, stdc);
3418 if (pfile->date == 0)
3420 /* Allocate __DATE__ and __TIME__ from permanent storage,
3421 and save them in pfile so we don't have to do this again.
3422 We don't generate these strings at init time because
3423 time() and localtime() are very slow on some systems. */
3424 time_t tt = time (NULL);
3425 struct tm *tb = localtime (&tt);
3427 pfile->date = make_string_token
3428 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3429 pfile->time = make_string_token
3430 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3432 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3433 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3434 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3435 tb->tm_hour, tb->tm_min, tb->tm_sec);
3437 result = node->type == T_DATE ? pfile->date: pfile->time;
3441 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3445 cpp_ice (pfile, "invalid special hash type");
3449 ASSIGN_FLAGS_AND_POS (result, token);
3454 /* Dump the original user's spelling of argument index ARG_NO to the
3455 macro whose expansion is LIST. */
3457 dump_param_spelling (pfile, list, arg_no)
3459 const cpp_toklist *list;
3460 unsigned int arg_no;
3462 const U_CHAR *param = list->namebuf;
3465 param += ustrlen (param) + 1;
3466 CPP_PUTS (pfile, param, ustrlen (param));
3469 /* Dump a token list to the output. */
3471 _cpp_dump_list (pfile, list, token, flush)
3473 const cpp_toklist *list;
3474 const cpp_token *token;
3477 const cpp_token *limit = list->tokens + list->tokens_used;
3478 const cpp_token *prev = 0;
3480 /* Avoid the CPP_EOF. */
3481 if (list->directive)
3484 while (token < limit)
3486 if (token->type == CPP_MACRO_ARG)
3488 if (token->flags & PREV_WHITE)
3489 CPP_PUTC (pfile, ' ');
3490 if (token->flags & STRINGIFY_ARG)
3491 CPP_PUTC (pfile, '#');
3492 dump_param_spelling (pfile, list, token->val.aux);
3495 output_token (pfile, token, prev);
3496 if (token->flags & PASTE_LEFT)
3497 CPP_PUTS (pfile, " ##", 3);
3502 if (flush && pfile->printer)
3503 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3506 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3507 if it hasn't happened already. */
3510 _cpp_init_input_buffer (pfile)
3515 init_trigraph_map ();
3516 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3517 pfile->no_expand_level = UINT_MAX;
3518 pfile->context_cap = 20;
3519 pfile->cur_context = 0;
3521 pfile->contexts = (cpp_context *)
3522 xmalloc (pfile->context_cap * sizeof (cpp_context));
3524 /* Clear the base context. */
3525 base = &pfile->contexts[0];
3526 base->u.list = &pfile->token_list;
3532 base->pushed_token = 0;
3535 /* Moves to the end of the directive line, popping contexts as
3538 _cpp_skip_rest_of_line (pfile)
3541 /* Discard all stacked contexts. */
3543 for (i = pfile->cur_context; i > 0; i--)
3544 if (pfile->contexts[i].args)
3545 free_macro_args (pfile->contexts[i].args);
3547 if (pfile->no_expand_level <= pfile->cur_context)
3548 pfile->no_expand_level = 0;
3549 pfile->cur_context = 0;
3551 /* Clear the base context, and clear the directive pointer so that
3552 get_raw_token will advance to the next line. */
3553 pfile->contexts[0].count = 0;
3554 pfile->contexts[0].posn = 0;
3555 pfile->token_list.directive = 0;
3558 /* Directive handler wrapper used by the command line option
3561 _cpp_run_directive (pfile, dir, buf, count)
3563 const struct directive *dir;
3567 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3569 unsigned int prev_lvl = 0;
3571 /* Scan the line now, else prevent_macro_expansion won't work. */
3572 lex_next (pfile, 1);
3573 if (! (dir->flags & EXPAND))
3574 prev_lvl = prevent_macro_expansion (pfile);
3576 (void) (*dir->handler) (pfile);
3578 if (! (dir->flags & EXPAND))
3579 restore_macro_expansion (pfile, prev_lvl);
3581 _cpp_skip_rest_of_line (pfile);
3582 cpp_pop_buffer (pfile);