1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
27 o Check line numbers assigned to all errors.
28 o Distinguish integers, floats, and 'other' pp-numbers.
29 o Store ints and char constants as binary values.
30 o New command-line assertion syntax.
31 o Work towards functions in cpperror.c taking a message level parameter.
32 If we do this, merge the common code of do_warning and do_error.
33 o Comment all functions, and describe macro expansion algorithm.
34 o Move as much out of header files as possible.
35 o Remove single quote pairs `', and some '', from diagnostics.
36 o Correct pastability test for CPP_NAME and CPP_NUMBER.
47 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER,
49 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
51 /* Flags for cpp_context. */
52 #define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
53 #define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
54 #define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
55 #define CONTEXT_ARG (1 << 3) /* If an argument context. */
57 typedef struct cpp_context cpp_context;
62 const cpp_toklist *list; /* Used for macro contexts only. */
63 const cpp_token **arg; /* Used for arg contexts only. */
66 /* Pushed token to be returned by next call to get_raw_token. */
67 const cpp_token *pushed_token;
69 struct macro_args *args; /* The arguments for a function-like
70 macro. NULL otherwise. */
71 unsigned short posn; /* Current posn, index into u. */
72 unsigned short count; /* No. of tokens in u. */
77 typedef struct macro_args macro_args;
81 const cpp_token **tokens;
82 unsigned int capacity;
87 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
88 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
89 macro_args *, unsigned int *));
90 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
91 static void save_token PARAMS ((macro_args *, const cpp_token *));
92 static int pop_context PARAMS ((cpp_reader *));
93 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
94 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
95 static void free_macro_args PARAMS ((macro_args *));
96 static void dump_param_spelling PARAMS ((FILE *, const cpp_toklist *,
98 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
101 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
102 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
103 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
105 static int skip_block_comment PARAMS ((cpp_reader *));
106 static int skip_line_comment PARAMS ((cpp_buffer *));
107 static void adjust_column PARAMS ((cpp_reader *));
108 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
109 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
110 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t));
111 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
112 static void unterminated PARAMS ((cpp_reader *, unsigned int, int));
113 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
114 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
115 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
116 static void check_long_token PARAMS ((cpp_buffer *,
120 static void lex_token PARAMS ((cpp_reader *, cpp_token *));
121 static int lex_next PARAMS ((cpp_reader *, int));
123 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
124 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
127 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
128 static void expand_context_stack PARAMS ((cpp_reader *));
129 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
131 static void output_token PARAMS ((cpp_reader *, FILE *, const cpp_token *,
132 const cpp_token *, int));
133 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
135 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
137 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
138 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
140 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
141 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
143 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
144 const cpp_token *, int *));
145 static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
146 static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
147 static cpp_token *get_temp_token PARAMS ((cpp_reader *));
148 static void release_temp_tokens PARAMS ((cpp_reader *));
149 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
150 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
152 #define VALID_SIGN(c, prevc) \
153 (((c) == '+' || (c) == '-') && \
154 ((prevc) == 'e' || (prevc) == 'E' \
155 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
157 /* An upper bound on the number of bytes needed to spell a token,
158 including preceding whitespace. */
159 static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
162 const cpp_token *token;
166 switch (TOKEN_SPELL (token))
168 default: len = 0; break;
169 case SPELL_STRING: len = token->val.str.len; break;
170 case SPELL_IDENT: len = token->val.node->length; break;
175 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
176 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
177 #define ON_REST_ARG(c) \
178 (((c)->u.list->flags & VAR_ARGS) \
179 && (c)->u.list->tokens[(c)->posn - 1].val.aux \
180 == (unsigned int) ((c)->u.list->paramc - 1))
182 #define ASSIGN_FLAGS_AND_POS(d, s) \
183 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
184 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
187 /* f is flags, just consisting of PREV_WHITE | BOL. */
188 #define MODIFY_FLAGS_AND_POS(d, s, f) \
189 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
190 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
193 #define OP(e, s) { SPELL_OPERATOR, U s },
194 #define TK(e, s) { s, U STRINGX (e) },
196 const struct token_spelling
197 _cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
202 /* Notify the compiler proper that the current line number has jumped,
203 or the current file name has changed. */
206 output_line_command (pfile, print, line)
211 cpp_buffer *ip = CPP_BUFFER (pfile);
216 /* End the previous line of text. */
217 if (pfile->need_newline)
219 putc ('\n', print->outf);
222 pfile->need_newline = 0;
224 if (CPP_OPTION (pfile, no_line_commands))
227 /* If the current file has not changed, we can output a few newlines
228 instead if we want to increase the line number by a small amount.
229 We cannot do this if print->lineno is zero, because that means we
230 haven't output any line commands yet. (The very first line
231 command output is a `same_file' command.)
233 'nominal_fname' values are unique, so they can be compared by
234 comparing pointers. */
235 if (ip->nominal_fname == print->last_fname && print->lineno > 0
236 && line >= print->lineno && line < print->lineno + 8)
238 while (line > print->lineno)
240 putc ('\n', print->outf);
246 fprintf (print->outf, "# %u \"%s\"%s\n", line, ip->nominal_fname,
247 cpp_syshdr_flags (pfile, ip));
249 print->last_fname = ip->nominal_fname;
250 print->lineno = line;
253 /* Like fprintf, but writes to a printer object. You should be sure
254 always to generate a complete line when you use this function. */
256 cpp_printf VPARAMS ((cpp_reader *pfile, cpp_printer *print,
257 const char *fmt, ...))
260 #ifndef ANSI_PROTOTYPES
268 #ifndef ANSI_PROTOTYPES
269 pfile = va_arg (ap, cpp_reader *);
270 print = va_arg (ap, cpp_printer *);
271 fmt = va_arg (ap, const char *);
274 /* End the previous line of text. */
275 if (pfile->need_newline)
277 putc ('\n', print->outf);
280 pfile->need_newline = 0;
282 vfprintf (print->outf, fmt, ap);
286 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
289 cpp_scan_buffer_nooutput (pfile)
292 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
293 const cpp_token *token;
295 /* In no-output mode, we can ignore everything but directives. */
298 token = _cpp_get_token (pfile);
300 if (token->type == CPP_EOF)
302 cpp_pop_buffer (pfile);
303 if (CPP_BUFFER (pfile) == stop)
307 if (token->type == CPP_HASH && token->flags & BOL
308 && pfile->token_list.directive)
310 process_directive (pfile, token);
314 _cpp_skip_rest_of_line (pfile);
318 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
320 cpp_scan_buffer (pfile, print)
324 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
325 const cpp_token *token, *prev = 0;
329 token = _cpp_get_token (pfile);
330 if (token->type == CPP_EOF)
332 cpp_pop_buffer (pfile);
334 if (CPP_BUFFER (pfile) == stop)
341 if (token->flags & BOL)
343 output_line_command (pfile, print, token->line);
346 if (token->type == CPP_HASH && pfile->token_list.directive)
348 process_directive (pfile, token);
353 if (token->type != CPP_PLACEMARKER)
355 output_token (pfile, print->outf, token, prev, 1);
356 pfile->need_newline = 1;
363 /* Helper routine used by parse_include, which can't see spell_token.
364 Reinterpret the current line as an h-char-sequence (< ... >); we are
365 looking at the first token after the <. */
367 _cpp_glue_header_name (pfile)
377 buf = xmalloc (avail);
381 t = _cpp_get_token (pfile);
382 if (t->type == CPP_GREATER || t->type == CPP_EOF)
385 if (len + TOKEN_LEN (t) > avail)
387 avail = len + TOKEN_LEN (t) + 40;
388 buf = xrealloc (buf, avail);
391 if (t->flags & PREV_WHITE)
394 p = spell_token (pfile, t, buf + len);
395 len = (size_t) (p - buf); /* p known >= buf */
398 if (t->type == CPP_EOF)
399 cpp_error (pfile, "missing terminating > character");
401 buf = xrealloc (buf, len);
403 hdr = get_temp_token (pfile);
404 hdr->type = CPP_HEADER_NAME;
406 hdr->val.str.text = buf;
407 hdr->val.str.len = len;
411 /* Token-buffer helper functions. */
413 /* Expand a token list's string space. It is *vital* that
414 list->tokens_used is correct, to get pointer fix-up right. */
416 _cpp_expand_name_space (list, len)
420 const U_CHAR *old_namebuf;
422 old_namebuf = list->namebuf;
423 list->name_cap += len;
424 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
426 /* Fix up token text pointers. */
427 if (list->namebuf != old_namebuf)
431 for (i = 0; i < list->tokens_used; i++)
432 if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
433 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
437 /* If there is not enough room for LEN more characters, expand the
438 list by just enough to have room for LEN characters. */
440 _cpp_reserve_name_space (list, len)
444 unsigned int room = list->name_cap - list->name_used;
447 _cpp_expand_name_space (list, len - room);
450 /* Expand the number of tokens in a list. */
452 _cpp_expand_token_space (list, count)
458 list->tokens_cap += count;
459 n = list->tokens_cap;
460 if (list->flags & LIST_OFFSET)
462 list->tokens = (cpp_token *)
463 xrealloc (list->tokens, n * sizeof (cpp_token));
464 if (list->flags & LIST_OFFSET)
465 list->tokens++; /* Skip the dummy. */
468 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
469 an extra token in front of the token list, as this allows the lexer
470 to always peek at the previous token without worrying about
471 underflowing the list, and some initial space. Otherwise, no
472 token- or name-space is allocated, and there is no dummy token. */
474 _cpp_init_toklist (list, flags)
478 if (flags == NO_DUMMY_TOKEN)
480 list->tokens_cap = 0;
488 /* Initialize token space. Put a dummy token before the start
489 that will fail matches. */
490 list->tokens_cap = 256; /* 4K's worth. */
491 list->tokens = (cpp_token *)
492 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
493 list->tokens[0].type = CPP_EOF;
496 /* Initialize name space. */
497 list->name_cap = 1024;
498 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
499 list->flags = LIST_OFFSET;
502 _cpp_clear_toklist (list);
505 /* Clear a token list. */
507 _cpp_clear_toklist (list)
510 list->tokens_used = 0;
514 list->params_len = 0;
515 list->flags &= LIST_OFFSET; /* clear all but that one */
518 /* Free a token list. Does not free the list itself, which may be
519 embedded in a larger structure. */
521 _cpp_free_toklist (list)
522 const cpp_toklist *list;
524 if (list->flags & LIST_OFFSET)
525 free (list->tokens - 1); /* Backup over dummy token. */
528 free (list->namebuf);
531 /* Compare two tokens. */
533 _cpp_equiv_tokens (a, b)
534 const cpp_token *a, *b;
536 if (a->type == b->type && a->flags == b->flags)
537 switch (TOKEN_SPELL (a))
539 default: /* Keep compiler happy. */
544 return a->val.aux == b->val.aux; /* arg_no or character. */
546 return a->val.node == b->val.node;
548 return (a->val.str.len == b->val.str.len
549 && !memcmp (a->val.str.text, b->val.str.text,
556 /* Compare two token lists. */
558 _cpp_equiv_toklists (a, b)
559 const cpp_toklist *a, *b;
563 if (a->tokens_used != b->tokens_used
564 || a->flags != b->flags
565 || a->paramc != b->paramc)
568 for (i = 0; i < a->tokens_used; i++)
569 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
576 Compares, the token TOKEN to the NUL-terminated string STRING.
577 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
580 cpp_ideq (token, string)
581 const cpp_token *token;
584 if (token->type != CPP_NAME)
587 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
590 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
591 U":>", U"<%", U"%>"};
593 /* Call when meeting a newline. Returns the character after the newline
594 (or carriage-return newline combination), or EOF. */
596 handle_newline (buffer, newline_char)
598 cppchar_t newline_char;
600 cppchar_t next = EOF;
602 buffer->col_adjust = 0;
604 buffer->line_base = buffer->cur;
606 /* Handle CR-LF and LF-CR combinations, get the next character. */
607 if (buffer->cur < buffer->rlimit)
609 next = *buffer->cur++;
610 if (next + newline_char == '\r' + '\n')
612 buffer->line_base = buffer->cur;
613 if (buffer->cur < buffer->rlimit)
614 next = *buffer->cur++;
620 buffer->read_ahead = next;
624 /* Subroutine of skip_escaped_newlines; called when a trigraph is
625 encountered. It warns if necessary, and returns true if the
626 trigraph should be honoured. FROM_CHAR is the third character of a
627 trigraph, and presumed to be the previous character for position
630 trigraph_ok (pfile, from_char)
634 int accept = CPP_OPTION (pfile, trigraphs);
636 if (CPP_OPTION (pfile, warn_trigraphs))
638 cpp_buffer *buffer = pfile->buffer;
640 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
641 "trigraph ??%c converted to %c",
643 (int) _cpp_trigraph_map[from_char]);
645 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
646 "trigraph ??%c ignored", (int) from_char);
652 /* Assumes local variables buffer and result. */
653 #define ACCEPT_CHAR(t) \
654 do { result->type = t; buffer->read_ahead = EOF; } while (0)
656 /* When we move to multibyte character sets, add to these something
657 that saves and restores the state of the multibyte conversion
658 library. This probably involves saving and restoring a "cookie".
659 In the case of glibc it is an 8-byte structure, so is not a high
660 overhead operation. In any case, it's out of the fast path. */
661 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
662 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
664 /* Skips any escaped newlines introduced by NEXT, which is either a
665 '?' or a '\\'. Returns the next character, which will also have
666 been placed in buffer->read_ahead. */
668 skip_escaped_newlines (buffer, next)
673 const unsigned char *saved_cur;
678 if (buffer->cur == buffer->rlimit)
684 next1 = *buffer->cur++;
685 if (next1 != '?' || buffer->cur == buffer->rlimit)
691 next1 = *buffer->cur++;
692 if (!_cpp_trigraph_map[next1] || !trigraph_ok (buffer->pfile, next1))
698 /* We have a full trigraph here. */
699 next = _cpp_trigraph_map[next1];
700 if (next != '\\' || buffer->cur == buffer->rlimit)
705 /* We have a backslash, and room for at least one more character. */
709 next1 = *buffer->cur++;
710 if (!is_nvspace (next1))
714 while (buffer->cur < buffer->rlimit);
716 if (!is_vspace (next1))
723 cpp_warning (buffer->pfile,
724 "backslash and newline separated by space");
726 next = handle_newline (buffer, next1);
728 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
730 while (next == '\\' || next == '?');
732 buffer->read_ahead = next;
736 /* Obtain the next character, after trigraph conversion and skipping
737 an arbitrary string of escaped newlines. The common case of no
738 trigraphs or escaped newlines falls through quickly. */
740 get_effective_char (buffer)
743 cppchar_t next = EOF;
745 if (buffer->cur < buffer->rlimit)
747 next = *buffer->cur++;
749 /* '?' can introduce trigraphs (and therefore backslash); '\\'
750 can introduce escaped newlines, which we want to skip, or
751 UCNs, which, depending upon lexer state, we will handle in
753 if (next == '?' || next == '\\')
754 next = skip_escaped_newlines (buffer, next);
757 buffer->read_ahead = next;
761 /* Skip a C-style block comment. We find the end of the comment by
762 seeing if an asterisk is before every '/' we encounter. Returns
763 non-zero if comment terminated by EOF, zero otherwise. */
765 skip_block_comment (pfile)
768 cpp_buffer *buffer = pfile->buffer;
769 cppchar_t c = EOF, prevc;
771 while (buffer->cur != buffer->rlimit)
773 prevc = c, c = *buffer->cur++;
776 /* FIXME: For speed, create a new character class of characters
777 of no interest inside block comments. */
778 if (c == '?' || c == '\\')
779 c = skip_escaped_newlines (buffer, c);
781 /* People like decorating comments with '*', so check for '/'
782 instead for efficiency. */
788 /* Warn about potential nested comments, but not if the '/'
789 comes immediately before the true comment delimeter.
790 Don't bother to get it right across escaped newlines. */
791 if (CPP_OPTION (pfile, warn_comments)
792 && buffer->cur != buffer->rlimit)
794 prevc = c, c = *buffer->cur++;
795 if (c == '*' && buffer->cur != buffer->rlimit)
797 prevc = c, c = *buffer->cur++;
799 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
800 CPP_BUF_COL (buffer),
801 "\"/*\" within comment");
806 else if (is_vspace (c))
808 prevc = c, c = handle_newline (buffer, c);
812 adjust_column (pfile);
815 buffer->read_ahead = EOF;
816 return c != '/' || prevc != '*';
819 /* Skip a C++ line comment. Handles escaped newlines. Returns
820 non-zero if a multiline comment. The following new line, if any,
821 is left in buffer->read_ahead. */
823 skip_line_comment (buffer)
826 unsigned int orig_lineno = buffer->lineno;
832 if (buffer->cur == buffer->rlimit)
836 if (c == '?' || c == '\\')
837 c = skip_escaped_newlines (buffer, c);
839 while (!is_vspace (c));
841 buffer->read_ahead = c; /* Leave any newline for caller. */
842 return orig_lineno != buffer->lineno;
845 /* pfile->buffer->cur is one beyond the \t character. Update
846 col_adjust so we track the column correctly. */
848 adjust_column (pfile)
851 cpp_buffer *buffer = pfile->buffer;
852 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
854 /* Round it up to multiple of the tabstop, but subtract 1 since the
855 tab itself occupies a character position. */
856 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
857 - col % CPP_OPTION (pfile, tabstop)) - 1;
860 /* Skips whitespace, saving the next non-whitespace character.
861 Adjusts pfile->col_adjust to account for tabs. Without this,
862 tokens might be assigned an incorrect column. */
864 skip_whitespace (pfile, c)
868 cpp_buffer *buffer = pfile->buffer;
869 unsigned int warned = 0;
873 /* Horizontal space always OK. */
877 adjust_column (pfile);
878 /* Just \f \v or \0 left. */
883 cpp_warning (pfile, "null character(s) ignored");
887 else if (IN_DIRECTIVE (pfile) && CPP_PEDANTIC (pfile))
888 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
889 CPP_BUF_COL (buffer),
890 "%s in preprocessing directive",
891 c == '\f' ? "form feed" : "vertical tab");
894 if (buffer->cur == buffer->rlimit)
898 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
899 while (is_nvspace (c));
901 /* Remember the next character. */
902 buffer->read_ahead = c;
905 /* Parse an identifier, skipping embedded backslash-newlines.
906 Calculate the hash value of the token while parsing, for improved
907 performance. The hashing algorithm *must* match cpp_lookup(). */
909 static cpp_hashnode *
910 parse_identifier (pfile, c)
914 cpp_buffer *buffer = pfile->buffer;
915 unsigned int r = 0, saw_dollar = 0;
916 unsigned int orig_used = pfile->token_list.name_used;
922 if (pfile->token_list.name_used == pfile->token_list.name_cap)
923 _cpp_expand_name_space (&pfile->token_list,
924 pfile->token_list.name_used + 256);
925 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
932 if (buffer->cur == buffer->rlimit)
937 while (is_idchar (c));
939 /* Potential escaped newline? */
940 if (c != '?' && c != '\\')
942 c = skip_escaped_newlines (buffer, c);
944 while (is_idchar (c));
946 /* $ is not a identifier character in the standard, but is commonly
947 accepted as an extension. Don't warn about it in skipped
948 conditional blocks. */
949 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
950 cpp_pedwarn (pfile, "'$' character(s) in identifier");
952 /* Remember the next character. */
953 buffer->read_ahead = c;
954 return _cpp_lookup_with_hash (pfile, &pfile->token_list.namebuf[orig_used],
955 pfile->token_list.name_used - orig_used, r);
958 /* Parse a number, skipping embedded backslash-newlines. */
960 parse_number (pfile, number, c)
966 cpp_buffer *buffer = pfile->buffer;
967 unsigned int orig_used = pfile->token_list.name_used;
973 if (pfile->token_list.name_used == pfile->token_list.name_cap)
974 _cpp_expand_name_space (&pfile->token_list,
975 pfile->token_list.name_used + 256);
976 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
980 if (buffer->cur == buffer->rlimit)
985 while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
987 /* Potential escaped newline? */
988 if (c != '?' && c != '\\')
990 c = skip_escaped_newlines (buffer, c);
992 while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
994 /* Remember the next character. */
995 buffer->read_ahead = c;
997 number->text = &pfile->token_list.namebuf[orig_used];
998 number->len = pfile->token_list.name_used - orig_used;
1001 /* Subroutine of parse_string. Emits error for unterminated strings. */
1003 unterminated (pfile, line, term)
1008 cpp_error (pfile, "missing terminating %c character", term);
1010 if (term == '\"' && pfile->mls_line && pfile->mls_line != line)
1012 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_column,
1013 "possible start of unterminated string literal");
1014 pfile->mls_line = 0;
1018 /* Parses a string, character constant, or angle-bracketed header file
1019 name. Handles embedded trigraphs and escaped newlines.
1021 Multi-line strings are allowed, but they are deprecated within
1024 parse_string (pfile, token, terminator)
1027 cppchar_t terminator;
1029 cpp_buffer *buffer = pfile->buffer;
1030 unsigned int orig_used = pfile->token_list.name_used;
1032 unsigned int nulls = 0;
1036 if (buffer->cur == buffer->rlimit)
1039 unterminated (pfile, token->line, terminator);
1045 /* Handle trigraphs, escaped newlines etc. */
1046 if (c == '?' || c == '\\')
1047 c = skip_escaped_newlines (buffer, c);
1049 if (c == terminator)
1051 unsigned int u = pfile->token_list.name_used;
1053 /* An odd number of consecutive backslashes represents an
1054 escaped terminator. */
1055 while (u > orig_used && pfile->token_list.namebuf[u - 1] == '\\')
1058 if ((pfile->token_list.name_used - u) % 2 == 0)
1064 else if (is_vspace (c))
1066 /* In assembly language, silently terminate string and
1067 character literals at end of line. This is a kludge
1068 around not knowing where comments are. */
1069 if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
1072 /* Character constants and header names may not extend over
1073 multiple lines. In Standard C, neither may strings.
1074 Unfortunately, we accept multiline strings as an
1075 extension. (Deprecatedly even in directives - otherwise,
1076 glibc's longlong.h breaks.) */
1077 if (terminator != '"')
1079 unterminated (pfile, token->line, terminator);
1083 if (pfile->mls_line == 0)
1085 pfile->mls_line = token->line;
1086 pfile->mls_column = token->col;
1087 if (CPP_PEDANTIC (pfile))
1088 cpp_pedwarn (pfile, "multi-line string constant");
1091 handle_newline (buffer, c); /* Stores to read_ahead. */
1097 cpp_warning (pfile, "null character(s) preserved in literal");
1100 if (pfile->token_list.name_used == pfile->token_list.name_cap)
1101 _cpp_expand_name_space (&pfile->token_list,
1102 pfile->token_list.name_used + 256);
1104 pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
1105 /* If we had a new line, the next character is in read_ahead. */
1108 c = buffer->read_ahead;
1113 buffer->read_ahead = c;
1115 token->val.str.text = &pfile->token_list.namebuf[orig_used];
1116 token->val.str.len = pfile->token_list.name_used - orig_used;
1119 /* For output routine simplicity, the stored comment includes the
1120 comment start and any terminator. */
1122 save_comment (pfile, token, from)
1125 const unsigned char *from;
1127 unsigned char *buffer;
1129 cpp_toklist *list = &pfile->token_list;
1131 #define COMMENT_START_LEN 2
1132 len = pfile->buffer->cur - from + COMMENT_START_LEN;
1133 _cpp_reserve_name_space (list, len);
1134 buffer = list->namebuf + list->name_used;
1135 list->name_used += len;
1137 token->type = CPP_COMMENT;
1138 token->val.str.len = len;
1139 token->val.str.text = buffer;
1141 /* from[-1] is '/' or '*' depending on the comment type. */
1143 *buffer++ = from[-1];
1144 memcpy (buffer, from, len - COMMENT_START_LEN);
1147 /* A helper routine for lex_token. With some long tokens, we need
1148 to read ahead to see if that is the token we have, but back-track
1151 check_long_token (buffer, result, wanted, type)
1155 enum cpp_ttype type;
1157 const unsigned char *saved_cur;
1158 cppchar_t c = buffer->read_ahead;
1161 if (get_effective_char (buffer) == wanted)
1165 /* Restore state. */
1167 buffer->read_ahead = c;
1172 lex_token (pfile, result)
1177 cpp_buffer *buffer = pfile->buffer;
1178 const unsigned char *comment_start;
1182 result->line = CPP_BUF_LINE (buffer);
1184 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1186 c = buffer->read_ahead;
1187 if (c == EOF && buffer->cur < buffer->rlimit)
1194 buffer->read_ahead = EOF;
1198 /* Non-empty files should end in a newline. Testing
1199 skip_newlines ensures we only emit the warning once. */
1200 if (buffer->cur != buffer->line_base && buffer->cur != buffer->buf
1201 && pfile->state.skip_newlines)
1202 cpp_pedwarn_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer),
1203 "no newline at end of file");
1204 result->type = CPP_EOF;
1207 case ' ': case '\t': case '\f': case '\v': case '\0':
1208 skip_whitespace (pfile, c);
1209 result->flags |= PREV_WHITE;
1212 case '\n': case '\r':
1213 result->type = CPP_EOF;
1214 handle_newline (buffer, c);
1215 /* Handling here will change significantly when moving to
1217 if (pfile->state.skip_newlines)
1219 result->flags &= ~PREV_WHITE; /* Clear any whitespace flag. */
1226 /* These could start an escaped newline, or '?' a trigraph. Let
1227 skip_escaped_newlines do all the work. */
1229 unsigned int lineno = buffer->lineno;
1231 c = skip_escaped_newlines (buffer, c);
1232 if (lineno != buffer->lineno)
1233 /* We had at least one escaped newline of some sort, and the
1234 next character is in buffer->read_ahead. Update the
1235 token's line and column. */
1238 /* We are either the original '?' or '\\', or a trigraph. */
1239 result->type = CPP_QUERY;
1240 buffer->read_ahead = EOF;
1242 result->type = CPP_BACKSLASH;
1249 case '0': case '1': case '2': case '3': case '4':
1250 case '5': case '6': case '7': case '8': case '9':
1251 result->type = CPP_NUMBER;
1252 parse_number (pfile, &result->val.str, c);
1256 if (!CPP_OPTION (pfile, dollars_in_ident))
1258 /* Fall through... */
1261 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1262 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1263 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1264 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1266 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1267 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1268 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1269 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1271 result->type = CPP_NAME;
1272 result->val.node = parse_identifier (pfile, c);
1274 /* 'L' may introduce wide characters or strings. */
1275 if (result->val.node == pfile->spec_nodes->n_L)
1277 c = buffer->read_ahead; /* For make_string. */
1278 if (c == '\'' || c == '"')
1280 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1284 /* Convert named operators to their proper types. */
1285 else if (result->val.node->type == T_OPERATOR)
1287 result->flags |= NAMED_OP;
1288 result->type = result->val.node->value.code;
1294 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1296 parse_string (pfile, result, c);
1300 result->type = CPP_DIV;
1301 c = get_effective_char (buffer);
1303 ACCEPT_CHAR (CPP_DIV_EQ);
1306 comment_start = buffer->cur;
1308 /* Skip_block_comment updates buffer->read_ahead. */
1309 if (skip_block_comment (pfile))
1310 cpp_error_with_line (pfile, result->line, result->col,
1311 "unterminated comment");
1312 if (!pfile->state.save_comments)
1314 result->flags |= PREV_WHITE;
1318 /* Save the comment as a token in its own right. */
1319 save_comment (pfile, result, comment_start);
1323 /* We silently allow C++ comments in system headers,
1324 irrespective of conformance mode, because lots of
1325 broken systems do that and trying to clean it up in
1326 fixincludes is a nightmare. */
1327 if (CPP_IN_SYSTEM_HEADER (pfile))
1328 goto do_line_comment;
1329 if (CPP_OPTION (pfile, cplusplus_comments))
1331 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1332 && ! buffer->warned_cplusplus_comments)
1335 "C++ style comments are not allowed in ISO C89");
1337 "(this will be reported only once per input file)");
1338 buffer->warned_cplusplus_comments = 1;
1342 comment_start = buffer->cur;
1344 /* Skip_line_comment updates buffer->read_ahead. */
1345 if (skip_line_comment (buffer))
1346 cpp_warning_with_line (pfile, result->line, result->col,
1347 "multi-line comment");
1349 if (!pfile->state.save_comments)
1351 result->flags |= PREV_WHITE;
1355 /* Save the comment as a token in its own right. */
1356 save_comment (pfile, result, comment_start);
1362 if (pfile->state.angled_headers)
1364 result->type = CPP_HEADER_NAME;
1365 c = '>'; /* terminator. */
1369 result->type = CPP_LESS;
1370 c = get_effective_char (buffer);
1372 ACCEPT_CHAR (CPP_LESS_EQ);
1375 ACCEPT_CHAR (CPP_LSHIFT);
1376 if (get_effective_char (buffer) == '=')
1377 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1379 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1381 ACCEPT_CHAR (CPP_MIN);
1382 if (get_effective_char (buffer) == '=')
1383 ACCEPT_CHAR (CPP_MIN_EQ);
1385 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1387 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1388 result->flags |= DIGRAPH;
1390 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1392 ACCEPT_CHAR (CPP_OPEN_BRACE);
1393 result->flags |= DIGRAPH;
1398 result->type = CPP_GREATER;
1399 c = get_effective_char (buffer);
1401 ACCEPT_CHAR (CPP_GREATER_EQ);
1404 ACCEPT_CHAR (CPP_RSHIFT);
1405 if (get_effective_char (buffer) == '=')
1406 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1408 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1410 ACCEPT_CHAR (CPP_MAX);
1411 if (get_effective_char (buffer) == '=')
1412 ACCEPT_CHAR (CPP_MAX_EQ);
1418 const unsigned char *saved_cur;
1421 /* Save state to avoid needing to pass 2 chars to parse_number. */
1423 c1 = get_effective_char (buffer);
1424 /* All known character sets have 0...9 contiguous. */
1425 if (c1 >= '0' && c1 <= '9')
1431 result->type = CPP_DOT;
1434 if (get_effective_char (buffer) == '.')
1435 ACCEPT_CHAR (CPP_ELLIPSIS);
1438 buffer->read_ahead = EOF;
1442 else if (c1 == '*' && CPP_OPTION (pfile, cplusplus))
1443 ACCEPT_CHAR (CPP_DOT_STAR);
1448 result->type = CPP_MOD;
1449 c = get_effective_char (buffer);
1451 ACCEPT_CHAR (CPP_MOD_EQ);
1452 else if (CPP_OPTION (pfile, digraphs))
1456 result->flags |= DIGRAPH;
1457 ACCEPT_CHAR (CPP_HASH);
1458 if (get_effective_char (buffer) == '%')
1459 check_long_token (buffer, result, ':', CPP_PASTE);
1463 result->flags |= DIGRAPH;
1464 ACCEPT_CHAR (CPP_CLOSE_BRACE);
1470 result->type = CPP_PLUS;
1471 c = get_effective_char (buffer);
1473 ACCEPT_CHAR (CPP_PLUS_EQ);
1475 ACCEPT_CHAR (CPP_PLUS_PLUS);
1479 result->type = CPP_MINUS;
1480 c = get_effective_char (buffer);
1483 ACCEPT_CHAR (CPP_DEREF);
1484 if (CPP_OPTION (pfile, cplusplus)
1485 && get_effective_char (buffer) == '*')
1486 ACCEPT_CHAR (CPP_DEREF_STAR);
1489 ACCEPT_CHAR (CPP_MINUS_EQ);
1491 ACCEPT_CHAR (CPP_MINUS_MINUS);
1495 result->type = CPP_MULT;
1496 if (get_effective_char (buffer) == '=')
1497 ACCEPT_CHAR (CPP_MULT_EQ);
1501 result->type = CPP_EQ;
1502 if (get_effective_char (buffer) == '=')
1503 ACCEPT_CHAR (CPP_EQ_EQ);
1507 result->type = CPP_NOT;
1508 if (get_effective_char (buffer) == '=')
1509 ACCEPT_CHAR (CPP_NOT_EQ);
1513 result->type = CPP_AND;
1514 c = get_effective_char (buffer);
1516 ACCEPT_CHAR (CPP_AND_EQ);
1518 ACCEPT_CHAR (CPP_AND_AND);
1522 result->type = CPP_HASH;
1523 if (get_effective_char (buffer) == '#')
1524 ACCEPT_CHAR (CPP_PASTE);
1528 result->type = CPP_OR;
1529 c = get_effective_char (buffer);
1531 ACCEPT_CHAR (CPP_OR_EQ);
1533 ACCEPT_CHAR (CPP_OR_OR);
1537 result->type = CPP_XOR;
1538 if (get_effective_char (buffer) == '=')
1539 ACCEPT_CHAR (CPP_XOR_EQ);
1543 result->type = CPP_COLON;
1544 c = get_effective_char (buffer);
1545 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1546 ACCEPT_CHAR (CPP_SCOPE);
1547 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1549 result->flags |= DIGRAPH;
1550 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1554 case '~': result->type = CPP_COMPL; break;
1555 case ',': result->type = CPP_COMMA; break;
1556 case '(': result->type = CPP_OPEN_PAREN; break;
1557 case ')': result->type = CPP_CLOSE_PAREN; break;
1558 case '[': result->type = CPP_OPEN_SQUARE; break;
1559 case ']': result->type = CPP_CLOSE_SQUARE; break;
1560 case '{': result->type = CPP_OPEN_BRACE; break;
1561 case '}': result->type = CPP_CLOSE_BRACE; break;
1562 case ';': result->type = CPP_SEMICOLON; break;
1565 if (CPP_OPTION (pfile, objc))
1567 /* In Objective C, '@' may begin keywords or strings, like
1568 @keyword or @"string". It would be nice to call
1569 get_effective_char here and test the result. However, we
1570 would then need to pass 2 characters to parse_identifier,
1571 making it ugly and slowing down its main loop. Instead,
1572 we assume we have an identifier, and recover if not. */
1573 result->type = CPP_NAME;
1574 result->val.node = parse_identifier (pfile, c);
1575 if (result->val.node->length != 1)
1578 /* OK, so it wasn't an identifier. Maybe a string? */
1579 if (buffer->read_ahead == '"')
1582 ACCEPT_CHAR (CPP_OSTRING);
1590 result->type = CPP_OTHER;
1591 result->val.aux = c;
1597 * The tokenizer's main loop. Returns a token list, representing a
1598 * logical line in the input file. On EOF after some tokens have
1599 * been processed, we return immediately. Then in next call, or if
1600 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1601 * token is placed in the list.
1605 lex_line (pfile, list)
1609 unsigned int first_token;
1610 cpp_token *cur_token, *first;
1611 cpp_buffer *buffer = pfile->buffer;
1613 if (!(list->flags & LIST_OFFSET))
1616 pfile->state.in_lex_line = 1;
1617 if (pfile->buffer->cur == pfile->buffer->buf)
1618 list->flags |= BEG_OF_FILE;
1621 pfile->state.in_directive = 0;
1622 pfile->state.angled_headers = 0;
1623 pfile->state.skip_newlines = 1;
1624 pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);
1625 first_token = list->tokens_used;
1626 list->file = buffer->nominal_fname;
1630 if (list->tokens_used >= list->tokens_cap)
1631 _cpp_expand_token_space (list, 256);
1633 cur_token = list->tokens + list->tokens_used;
1634 lex_token (pfile, cur_token);
1636 if (pfile->state.skip_newlines)
1638 pfile->state.skip_newlines = 0;
1639 list->line = buffer->lineno;
1640 if (cur_token->type == CPP_HASH)
1642 pfile->state.in_directive = 1;
1643 pfile->state.save_comments = 0;
1644 pfile->state.indented = cur_token->flags & PREV_WHITE;
1646 /* 6.10.3.10: Within the sequence of preprocessing tokens
1647 making up the invocation of a function-like macro, new
1648 line is considered a normal white-space character. */
1649 else if (first_token != 0)
1650 cur_token->flags |= PREV_WHITE;
1652 else if (IN_DIRECTIVE (pfile) && list->tokens_used == first_token + 1)
1654 if (cur_token->type == CPP_NUMBER)
1655 list->directive = _cpp_check_linemarker (pfile, cur_token);
1657 list->directive = _cpp_check_directive (pfile, cur_token);
1660 /* _cpp_get_line assumes list->tokens_used refers to the current
1661 token being lexed. So do this after _cpp_check_directive to
1662 get the warnings therein correct. */
1663 list->tokens_used++;
1665 while (cur_token->type != CPP_EOF);
1667 /* All tokens are allocated, so the memory location is fixed. */
1668 first = &list->tokens[first_token];
1669 first->flags |= BOL;
1670 pfile->first_directive_token = first;
1672 /* Don't complain about the null directive, nor directives in
1673 assembly source: we don't know where the comments are, and # may
1674 introduce assembler pseudo-ops. Don't complain about invalid
1675 directives in skipped conditional groups (6.10 p4). */
1676 if (IN_DIRECTIVE (pfile) && !KNOWN_DIRECTIVE (list) && !pfile->skipping
1677 && !CPP_OPTION (pfile, lang_asm))
1679 if (cur_token > first + 1)
1681 if (first[1].type == CPP_NAME)
1682 cpp_error_with_line (pfile, first->line, first->col,
1683 "invalid preprocessing directive #%s",
1684 first[1].val.node->name);
1686 cpp_error_with_line (pfile, first->line, first->col,
1687 "invalid preprocessing directive");
1690 /* Discard this line to prevent further errors from cc1. */
1691 _cpp_clear_toklist (list);
1695 /* Drop the EOF unless really at EOF or in a directive. */
1696 if (cur_token != first && !KNOWN_DIRECTIVE (list)
1697 && pfile->done_initializing)
1698 list->tokens_used--;
1700 pfile->state.in_lex_line = 0;
1703 /* Write the spelling of a token TOKEN, with any appropriate
1704 whitespace before it, to FP. PREV is the previous token, which
1705 is used to determine if we need to shove in an extra space in order
1706 to avoid accidental token paste. If WHITE is 0, do not insert any
1707 leading whitespace. */
1709 output_token (pfile, fp, token, prev, white)
1712 const cpp_token *token, *prev;
1719 if (token->col && (token->flags & BOL))
1721 /* Supply enough whitespace to put this token in its original
1722 column. Don't bother trying to reconstruct tabs; we can't
1723 get it right in general, and nothing ought to care. (Yes,
1724 some things do care; the fault lies with them.) */
1725 unsigned int spaces = token->col - 1;
1730 else if (token->flags & PREV_WHITE)
1733 /* Check for and prevent accidental token pasting.
1734 In addition to the cases handled by can_paste, consider
1736 a + ++b - if there is not a space between the + and ++, it
1737 will be misparsed as a++ + b. But + ## ++ doesn't produce
1740 && (can_paste (pfile, prev, token, &dummy) != CPP_EOF
1741 || (prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1742 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS)))
1746 switch (TOKEN_SPELL (token))
1748 case SPELL_OPERATOR:
1750 const unsigned char *spelling;
1752 if (token->flags & DIGRAPH)
1753 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1754 else if (token->flags & NAMED_OP)
1757 spelling = TOKEN_NAME (token);
1759 ufputs (spelling, fp);
1765 ufputs (token->val.node->name, fp);
1770 int left, right, tag;
1771 switch (token->type)
1773 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1774 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1775 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1776 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1777 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1778 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1779 default: left = '\0'; right = '\0'; tag = '\0'; break;
1781 if (tag) putc (tag, fp);
1782 if (left) putc (left, fp);
1783 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1784 if (right) putc (right, fp);
1789 putc (token->val.aux, fp);
1793 /* Placemarker or EOF - no output. (Macro args are handled
1799 /* Dump the original user's spelling of argument index ARG_NO to the
1800 macro whose expansion is LIST. */
1802 dump_param_spelling (fp, list, arg_no)
1804 const cpp_toklist *list;
1805 unsigned int arg_no;
1807 const U_CHAR *param = list->namebuf;
1810 param += ustrlen (param) + 1;
1814 /* Output all the tokens of LIST, starting at TOKEN, to FP. */
1816 cpp_output_list (pfile, fp, list, token)
1819 const cpp_toklist *list;
1820 const cpp_token *token;
1822 const cpp_token *limit = list->tokens + list->tokens_used;
1823 const cpp_token *prev = 0;
1826 while (token < limit)
1828 /* XXX Find some way we can write macro args from inside
1829 output_token/spell_token. */
1830 if (token->type == CPP_MACRO_ARG)
1832 if (white && token->flags & PREV_WHITE)
1834 if (token->flags & STRINGIFY_ARG)
1836 dump_param_spelling (fp, list, token->val.aux);
1839 output_token (pfile, fp, token, prev, white);
1840 if (token->flags & PASTE_LEFT)
1849 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1850 already contain the enough space to hold the token's spelling.
1851 Returns a pointer to the character after the last character
1854 static unsigned char *
1855 spell_token (pfile, token, buffer)
1856 cpp_reader *pfile; /* Would be nice to be rid of this... */
1857 const cpp_token *token;
1858 unsigned char *buffer;
1860 switch (TOKEN_SPELL (token))
1862 case SPELL_OPERATOR:
1864 const unsigned char *spelling;
1867 if (token->flags & DIGRAPH)
1868 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1869 else if (token->flags & NAMED_OP)
1872 spelling = TOKEN_NAME (token);
1874 while ((c = *spelling++) != '\0')
1881 memcpy (buffer, token->val.node->name, token->val.node->length);
1882 buffer += token->val.node->length;
1887 int left, right, tag;
1888 switch (token->type)
1890 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1891 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1892 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1893 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1894 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1895 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1896 default: left = '\0'; right = '\0'; tag = '\0'; break;
1898 if (tag) *buffer++ = tag;
1899 if (left) *buffer++ = left;
1900 memcpy (buffer, token->val.str.text, token->val.str.len);
1901 buffer += token->val.str.len;
1902 if (right) *buffer++ = right;
1907 *buffer++ = token->val.aux;
1911 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1918 /* Macro expansion algorithm.
1920 Macro expansion is implemented by a single-pass algorithm; there are
1921 no rescan passes involved. cpp_get_token expands just enough to be
1922 able to return a token to the caller, a consequence is that when it
1923 returns the preprocessor can be in a state of mid-expansion. The
1924 algorithm does not work by fully expanding a macro invocation into
1925 some kind of token list, and then returning them one by one.
1927 Our expansion state is recorded in a context stack. We start out with
1928 a single context on the stack, let's call it base context. This
1929 consists of the token list returned by lex_line that forms the next
1930 logical line in the source file.
1932 The current level in the context stack is stored in the cur_context
1933 member of the cpp_reader structure. The context it references keeps,
1934 amongst other things, a count of how many tokens form that context and
1935 our position within those tokens.
1937 Fundamentally, calling cpp_get_token will return the next token from
1938 the current context. If we're at the end of the current context, that
1939 context is popped from the stack first, unless it is the base context,
1940 in which case the next logical line is lexed from the source file.
1942 However, before returning the token, if it is a CPP_NAME token
1943 _cpp_get_token checks to see if it is a macro and if it is enabled.
1944 Each time it encounters a macro name, it calls push_macro_context.
1945 This function checks that the macro should be expanded (with
1946 is_macro_enabled), and if so pushes a new macro context on the stack
1947 which becomes the current context. It then loops back to read the
1948 first token of the macro context.
1950 A macro context basically consists of the token list representing the
1951 macro's replacement list, which was saved in the hash table by
1952 save_macro_expansion when its #define statement was parsed. If the
1953 macro is function-like, it also contains the tokens that form the
1954 arguments to the macro. I say more about macro arguments below, but
1955 for now just saying that each argument is a set of pointers to tokens
1958 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
1959 token. This represents an argument passed to the macro, with the
1960 argument number stored in the token's AUX field. The argument should
1961 be substituted, this is achieved by pushing an "argument context". An
1962 argument context is just refers to the tokens forming the argument,
1963 which are obtained directly from the macro context. The STRINGIFY
1964 flag on a CPP_MACRO_ARG token indicates that the argument should be
1967 Here's a few simple rules the context stack obeys:-
1969 1) The lex_line token list is always context zero.
1971 2) Context 1, if it exists, must be a macro context.
1973 3) An argument context can only appear above a macro context.
1975 4) A macro context can appear above the base context, another macro
1976 context, or an argument context.
1978 5) These imply that the minimal level of an argument context is 2.
1980 The only tricky thing left is ensuring that macros are enabled and
1981 disabled correctly. The algorithm controls macro expansion by the
1982 level of the context a token is taken from in the context stack. If a
1983 token is taken from a level equal to no_expand_level (a member of
1984 struct cpp_reader), no expansion is performed.
1986 When popping a context off the stack, if no_expand_level equals the
1987 level of the popped context, it is reduced by one to match the new
1988 context level, so that expansion is still disabled. It does not
1989 increase if a context is pushed, though. It starts out life as
1990 UINT_MAX, which has the effect that initially macro expansion is
1991 enabled. I explain how this mechanism works below.
1993 The standard requires:-
1995 1) Arguments to be fully expanded before substitution.
1997 2) Stringified arguments to not be expanded, nor the tokens
1998 immediately surrounding a ## operator.
2000 3) Continual rescanning until there are no more macros left to
2003 4) Once a macro has been expanded in stage 1) or 3), it cannot be
2004 expanded again during later rescans. This prevents infinite
2007 The first thing to observe is that stage 3) is mostly redundant.
2008 Since a macro is disabled once it has been expanded, how can a rescan
2009 find an unexpanded macro name? There are only two cases where this is
2012 a) If the macro name results from a token paste operation.
2014 b) If the macro in question is a function-like macro that hasn't
2015 already been expanded because previously there was not the required
2016 '(' token immediately following it. This is only possible when an
2017 argument is substituted, and after substitution the last token of
2018 the argument can bind with a parenthesis appearing in the tokens
2019 following the substitution. Note that if the '(' appears within the
2020 argument, the ')' must too, as expanding macro arguments cannot
2021 "suck in" tokens outside the argument.
2023 So we tackle this as follows. When parsing the macro invocation for
2024 arguments, we record the tokens forming each argument as a list of
2025 pointers to those tokens. We do not expand any tokens that are "raw",
2026 i.e. directly from the macro invocation, but other tokens that come
2027 from (nested) argument substitution are fully expanded.
2029 This is achieved by setting the no_expand_level to that of the macro
2030 invocation. A CPP_MACRO_ARG token never appears in the list of tokens
2031 forming an argument, because parse_args (indirectly) calls
2032 get_raw_token which automatically pushes argument contexts and traces
2033 into them. Since these contexts are at a higher level than the
2034 no_expand_level, they get fully macro expanded.
2036 "Raw" and non-raw tokens are separated in arguments by null pointers,
2037 with the policy that the initial state of an argument is raw. If the
2038 first token is not raw, it should be preceded by a null pointer. When
2039 tracing through the tokens of an argument context, each time
2040 get_raw_token encounters a null pointer, it toggles the flag
2043 This flag, when set, indicates to is_macro_disabled that we are
2044 reading raw tokens which should be macro-expanded. Similarly, if
2045 clear, is_macro_disabled suppresses re-expansion.
2047 It's probably time for an example.
2051 #define xstr(y) str(y hash)
2053 xstr(hash) // "# hash"
2055 In the invocation of str, parse_args turns off macro expansion and so
2056 parses the argument as <hash>. This is the only token (pointer)
2057 passed as the argument to str. Since <hash> is raw there is no need
2058 for an initial null pointer. stringify_arg is called from
2059 get_raw_token when tracing through the expansion of str, since the
2060 argument has the STRINGIFY flag set. stringify_arg turns off
2061 macro_expansion by setting the no_expand_level to that of the argument
2062 context. Thus it gets the token <hash> and stringifies it to "hash"
2065 Similary xstr is passed <hash>. However, when parse_args is parsing
2066 the invocation of str() in xstr's expansion, get_raw_token encounters
2067 a CPP_MACRO_ARG token for y. Transparently to parse_args, it pushes
2068 an argument context, and enters the tokens of the argument,
2069 i.e. <hash>. This is at a higher context level than parse_args
2070 disabled, and so is_macro_disabled permits expansion of it and a macro
2071 context is pushed on top of the argument context. This contains the
2072 <#> token, and the end result is that <hash> is macro expanded.
2073 However, after popping off the argument context, the <hash> of xstr's
2074 expansion does not get macro expanded because we're back at the
2075 no_expand_level. The end result is that the argument passed to str is
2076 <NULL> <#> <NULL> <hash>. Note the nulls - policy is we start off
2077 raw, <#> is not raw, but then <hash> is.
2082 /* Free the storage allocated for macro arguments. */
2084 free_macro_args (args)
2088 free ((PTR) args->tokens);
2093 /* Determines if a macro has been already used (and is therefore
2096 is_macro_disabled (pfile, expansion, token)
2098 const cpp_toklist *expansion;
2099 const cpp_token *token;
2101 cpp_context *context = CURRENT_CONTEXT (pfile);
2103 /* Arguments on either side of ## are inserted in place without
2104 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2105 occurs during a later rescan pass. The effect is that we expand
2106 iff we would as part of the macro's expansion list, so we should
2107 drop to the macro's context. */
2108 if (IS_ARG_CONTEXT (context))
2110 if (token->flags & PASTED)
2112 else if (!(context->flags & CONTEXT_RAW))
2114 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2118 /* Have we already used this macro? */
2119 while (context->level > 0)
2121 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2123 /* Raw argument tokens are judged based on the token list they
2125 if (context->flags & CONTEXT_RAW)
2126 context = pfile->contexts + context->level;
2131 /* Function-like macros may be disabled if the '(' is not in the
2132 current context. We check this without disrupting the context
2134 if (expansion->paramc >= 0)
2136 const cpp_token *next;
2137 unsigned int prev_nme;
2139 context = CURRENT_CONTEXT (pfile);
2140 /* Drop down any contexts we're at the end of: the '(' may
2141 appear in lower macro expansions, or in the rest of the file. */
2142 while (context->posn == context->count && context > pfile->contexts)
2145 /* If we matched, we are disabled, as we appear in the
2146 expansion of each macro we meet. */
2147 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2151 prev_nme = pfile->no_expand_level;
2152 pfile->no_expand_level = context - pfile->contexts;
2153 next = _cpp_get_token (pfile);
2154 restore_macro_expansion (pfile, prev_nme);
2155 if (next->type != CPP_OPEN_PAREN)
2157 _cpp_push_token (pfile, next);
2158 if (CPP_WTRADITIONAL (pfile))
2160 "function macro %s must be used with arguments in traditional C",
2161 token->val.node->name);
2169 /* Add a token to the set of tokens forming the arguments to the macro
2170 being parsed in parse_args. */
2172 save_token (args, token)
2174 const cpp_token *token;
2176 if (args->used == args->capacity)
2178 args->capacity += args->capacity + 100;
2179 args->tokens = (const cpp_token **)
2180 xrealloc ((PTR) args->tokens,
2181 args->capacity * sizeof (const cpp_token *));
2183 args->tokens[args->used++] = token;
2186 /* Take and save raw tokens until we finish one argument. Empty
2187 arguments are saved as a single CPP_PLACEMARKER token. */
2188 static const cpp_token *
2189 parse_arg (pfile, var_args, paren_context, args, pcount)
2192 unsigned int paren_context;
2194 unsigned int *pcount;
2196 const cpp_token *token;
2197 unsigned int paren = 0, count = 0;
2198 int raw, was_raw = 1;
2200 for (count = 0;; count++)
2202 token = _cpp_get_token (pfile);
2204 switch (token->type)
2209 case CPP_OPEN_PAREN:
2213 case CPP_CLOSE_PAREN:
2219 /* Commas are not terminators within parantheses or var_args. */
2220 if (paren || var_args)
2224 case CPP_EOF: /* Error reported by caller. */
2228 raw = pfile->cur_context <= paren_context;
2232 save_token (args, 0);
2235 save_token (args, token);
2241 /* Duplicate the placemarker. Then we can set its flags and
2242 position and safely be using more than one. */
2243 save_token (args, duplicate_token (pfile, &placemarker_token));
2251 /* This macro returns true if the argument starting at offset O of arglist
2252 A is empty - that is, it's either a single PLACEMARKER token, or a null
2253 pointer followed by a PLACEMARKER. */
2255 #define empty_argument(A, O) \
2256 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2257 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2259 /* Parse the arguments making up a macro invocation. Nested arguments
2260 are automatically macro expanded, but immediate macros are not
2261 expanded; this enables e.g. operator # to work correctly. Returns
2262 non-zero on error. */
2264 parse_args (pfile, hp, args)
2269 const cpp_token *token;
2270 const cpp_toklist *macro;
2271 unsigned int total = 0;
2272 unsigned int paren_context = pfile->cur_context;
2275 macro = hp->value.expansion;
2280 token = parse_arg (pfile, (argc + 1 == macro->paramc
2281 && (macro->flags & VAR_ARGS)),
2282 paren_context, args, &count);
2283 if (argc < macro->paramc)
2286 args->ends[argc] = total;
2290 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2292 if (token->type == CPP_EOF)
2294 cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
2297 else if (argc < macro->paramc)
2299 /* A rest argument is allowed to not appear in the invocation at all.
2300 e.g. #define debug(format, args...) ...
2302 This is exactly the same as if the rest argument had received no
2303 tokens - debug("string",); This extension is deprecated. */
2305 if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2307 /* Duplicate the placemarker. Then we can set its flags and
2308 position and safely be using more than one. */
2309 cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2310 pm->flags = VOID_REST;
2311 save_token (args, pm);
2312 args->ends[argc] = total + 1;
2314 if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2315 cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2321 cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
2325 /* An empty argument to an empty function-like macro is fine. */
2326 else if (argc > macro->paramc
2327 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2329 cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
2336 /* Adds backslashes before all backslashes and double quotes appearing
2337 in strings. Non-printable characters are converted to octal. */
2339 quote_string (dest, src, len)
2348 if (c == '\\' || c == '"')
2359 sprintf ((char *) dest, "\\%03o", c);
2368 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2369 CPP_STRING token containing TEXT in quoted form. */
2371 make_string_token (token, text, len)
2378 buf = (U_CHAR *) xmalloc (len * 4);
2379 token->type = CPP_STRING;
2381 token->val.str.text = buf;
2382 token->val.str.len = quote_string (buf, text, len) - buf;
2386 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2387 evaluating to NUMBER. */
2389 alloc_number_token (pfile, number)
2396 result = get_temp_token (pfile);
2398 sprintf (buf, "%d", number);
2400 result->type = CPP_NUMBER;
2402 result->val.str.text = (U_CHAR *) buf;
2403 result->val.str.len = strlen (buf);
2407 /* Returns a temporary token from the temporary token store of PFILE. */
2409 get_temp_token (pfile)
2412 if (pfile->temp_used == pfile->temp_alloced)
2414 if (pfile->temp_used == pfile->temp_cap)
2416 pfile->temp_cap += pfile->temp_cap + 20;
2417 pfile->temp_tokens = (cpp_token **) xrealloc
2418 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2420 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2421 (sizeof (cpp_token));
2424 return pfile->temp_tokens[pfile->temp_used++];
2427 /* Release (not free) for re-use the temporary tokens of PFILE. */
2429 release_temp_tokens (pfile)
2432 while (pfile->temp_used)
2434 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2436 if (TOKEN_SPELL (token) == SPELL_STRING)
2438 free ((char *) token->val.str.text);
2439 token->val.str.text = 0;
2444 /* Free all of PFILE's dynamically-allocated temporary tokens. */
2446 _cpp_free_temp_tokens (pfile)
2449 if (pfile->temp_tokens)
2451 /* It is possible, though unlikely (looking for '(' of a funlike
2452 macro into EOF), that we haven't released the tokens yet. */
2453 release_temp_tokens (pfile);
2454 while (pfile->temp_alloced)
2455 free (pfile->temp_tokens[--pfile->temp_alloced]);
2456 free (pfile->temp_tokens);
2461 free ((char *) pfile->date->val.str.text);
2463 free ((char *) pfile->time->val.str.text);
2468 /* Copy TOKEN into a temporary token from PFILE's store. */
2470 duplicate_token (pfile, token)
2472 const cpp_token *token;
2474 cpp_token *result = get_temp_token (pfile);
2477 if (TOKEN_SPELL (token) == SPELL_STRING)
2479 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2480 memcpy (buff, token->val.str.text, token->val.str.len);
2481 result->val.str.text = buff;
2486 /* Determine whether two tokens can be pasted together, and if so,
2487 what the resulting token is. Returns CPP_EOF if the tokens cannot
2488 be pasted, or the appropriate type for the merged token if they
2490 static enum cpp_ttype
2491 can_paste (pfile, token1, token2, digraph)
2493 const cpp_token *token1, *token2;
2496 enum cpp_ttype a = token1->type, b = token2->type;
2497 int cxx = CPP_OPTION (pfile, cplusplus);
2499 /* Treat named operators as if they were ordinary NAMEs. */
2500 if (token1->flags & NAMED_OP)
2502 if (token2->flags & NAMED_OP)
2505 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2506 return a + (CPP_EQ_EQ - CPP_EQ);
2511 if (b == a) return CPP_RSHIFT;
2512 if (b == CPP_QUERY && cxx) return CPP_MAX;
2513 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2516 if (b == a) return CPP_LSHIFT;
2517 if (b == CPP_QUERY && cxx) return CPP_MIN;
2518 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2519 if (CPP_OPTION (pfile, digraphs))
2522 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2524 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2528 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2529 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2530 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2533 if (b == a) return CPP_MINUS_MINUS;
2534 if (b == CPP_GREATER) return CPP_DEREF;
2537 if (b == a && cxx) return CPP_SCOPE;
2538 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2539 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2543 if (CPP_OPTION (pfile, digraphs))
2545 if (b == CPP_GREATER)
2546 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2548 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2552 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2555 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2556 if (b == CPP_NUMBER) return CPP_NUMBER;
2560 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2562 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2566 if (b == CPP_NAME) return CPP_NAME;
2568 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
2570 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2572 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2576 if (b == CPP_NUMBER) return CPP_NUMBER;
2577 if (b == CPP_NAME) return CPP_NUMBER;
2578 if (b == CPP_DOT) return CPP_NUMBER;
2579 /* Numbers cannot have length zero, so this is safe. */
2580 if ((b == CPP_PLUS || b == CPP_MINUS)
2581 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2586 if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2588 if (b == CPP_NAME) return CPP_NAME;
2589 if (b == CPP_STRING) return CPP_OSTRING;
2599 /* Check if TOKEN is to be ##-pasted with the token after it. */
2600 static const cpp_token *
2601 maybe_paste_with_next (pfile, token)
2603 const cpp_token *token;
2606 const cpp_token *second;
2607 cpp_context *context = CURRENT_CONTEXT (pfile);
2609 /* Is this token on the LHS of ## ? */
2611 while ((token->flags & PASTE_LEFT)
2612 || ((context->flags & CONTEXT_PASTEL)
2613 && context->posn == context->count))
2615 /* Suppress macro expansion for next token, but don't conflict
2616 with the other method of suppression. If it is an argument,
2617 macro expansion within the argument will still occur. */
2618 pfile->paste_level = pfile->cur_context;
2619 second = _cpp_get_token (pfile);
2620 pfile->paste_level = 0;
2622 /* Ignore placemarker argument tokens (cannot be from an empty
2623 macro since macros are not expanded). */
2624 if (token->type == CPP_PLACEMARKER)
2625 pasted = duplicate_token (pfile, second);
2626 else if (second->type == CPP_PLACEMARKER)
2628 /* GCC has special extended semantics for , ## b where b is
2629 a varargs parameter: the comma disappears if b was given
2630 no actual arguments (not merely if b is an empty
2632 if (token->type == CPP_COMMA && second->flags & VOID_REST)
2633 pasted = duplicate_token (pfile, second);
2635 pasted = duplicate_token (pfile, token);
2640 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2642 if (type == CPP_EOF)
2644 if (CPP_OPTION (pfile, warn_paste))
2646 /* Do not complain about , ## <whatever> if
2647 <whatever> came from a variable argument, because
2648 the author probably intended the ## to trigger
2649 the special extended semantics (see above). */
2650 if (token->type == CPP_COMMA
2651 && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
2652 && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
2656 "pasting would not give a valid preprocessing token");
2658 _cpp_push_token (pfile, second);
2659 /* A short term hack to safely clear the PASTE_LEFT flag. */
2660 pasted = duplicate_token (pfile, token);
2661 pasted->flags &= ~PASTE_LEFT;
2665 if (type == CPP_NAME || type == CPP_NUMBER)
2667 /* Join spellings. */
2670 pasted = get_temp_token (pfile);
2671 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2672 end = spell_token (pfile, token, buf);
2673 end = spell_token (pfile, second, end);
2676 if (type == CPP_NAME)
2677 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2680 pasted->val.str.text = uxstrdup (buf);
2681 pasted->val.str.len = end - buf;
2684 else if (type == CPP_WCHAR || type == CPP_WSTRING
2685 || type == CPP_OSTRING)
2686 pasted = duplicate_token (pfile, second);
2689 pasted = get_temp_token (pfile);
2690 pasted->val.integer = 0;
2693 pasted->type = type;
2694 pasted->flags = digraph ? DIGRAPH : 0;
2696 if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2698 pasted->type = pasted->val.node->value.code;
2699 pasted->flags |= NAMED_OP;
2703 /* The pasted token gets the whitespace flags and position of the
2704 first token, the PASTE_LEFT flag of the second token, plus the
2705 PASTED flag to indicate it is the result of a paste. However, we
2706 want to preserve the DIGRAPH flag. */
2707 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2708 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2709 | (second->flags & PASTE_LEFT) | PASTED);
2710 pasted->col = token->col;
2711 pasted->line = token->line;
2713 /* See if there is another token to be pasted onto the one we just
2716 context = CURRENT_CONTEXT (pfile);
2722 /* Convert a token sequence to a single string token according to the
2723 rules of the ISO C #-operator. */
2724 #define INIT_SIZE 200
2726 stringify_arg (pfile, token)
2728 const cpp_token *token;
2731 unsigned char *main_buf;
2732 unsigned int prev_value, backslash_count = 0;
2733 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2735 push_arg_context (pfile, token);
2736 prev_value = prevent_macro_expansion (pfile);
2737 main_buf = (unsigned char *) xmalloc (buf_cap);
2739 result = get_temp_token (pfile);
2740 ASSIGN_FLAGS_AND_POS (result, token);
2742 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2746 unsigned int len = TOKEN_LEN (token);
2748 if (token->type == CPP_PLACEMARKER)
2751 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2752 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2756 if (buf_used + len > buf_cap)
2758 buf_cap = buf_used + len + INIT_SIZE;
2759 main_buf = xrealloc (main_buf, buf_cap);
2762 if (whitespace && (token->flags & PREV_WHITE))
2763 main_buf[buf_used++] = ' ';
2766 buf = (unsigned char *) xmalloc (len);
2768 buf = main_buf + buf_used;
2770 len = spell_token (pfile, token, buf) - buf;
2773 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2780 if (token->type == CPP_BACKSLASH)
2783 backslash_count = 0;
2786 /* Ignore the final \ of invalid string literals. */
2787 if (backslash_count & 1)
2789 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2793 result->type = CPP_STRING;
2794 result->val.str.text = main_buf;
2795 result->val.str.len = buf_used;
2796 restore_macro_expansion (pfile, prev_value);
2800 /* Allocate more room on the context stack of PFILE. */
2802 expand_context_stack (pfile)
2805 pfile->context_cap += pfile->context_cap + 20;
2806 pfile->contexts = (cpp_context *)
2807 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2810 /* Push the context of macro NODE onto the context stack. TOKEN is
2811 the CPP_NAME token invoking the macro. */
2813 push_macro_context (pfile, token)
2815 const cpp_token *token;
2817 unsigned char orig_flags;
2819 cpp_context *context;
2820 cpp_hashnode *node = token->val.node;
2822 /* Token's flags may change when parsing args containing a nested
2823 invocation of this macro. */
2824 orig_flags = token->flags & (PREV_WHITE | BOL);
2826 if (node->value.expansion->paramc >= 0)
2828 unsigned int error, prev_nme;
2830 /* Allocate room for the argument contexts, and parse them. */
2831 args = (macro_args *) xmalloc (sizeof (macro_args));
2832 args->ends = (unsigned int *)
2833 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2838 prev_nme = prevent_macro_expansion (pfile);
2840 error = parse_args (pfile, node, args);
2842 restore_macro_expansion (pfile, prev_nme);
2845 free_macro_args (args);
2848 /* Set the level after the call to parse_args. */
2849 args->level = pfile->cur_context;
2852 /* Now push its context. */
2853 pfile->cur_context++;
2854 if (pfile->cur_context == pfile->context_cap)
2855 expand_context_stack (pfile);
2857 context = CURRENT_CONTEXT (pfile);
2858 context->u.list = node->value.expansion;
2859 context->args = args;
2861 context->count = context->u.list->tokens_used;
2862 context->level = pfile->cur_context;
2864 context->pushed_token = 0;
2866 /* Set the flags of the first token. We know there must
2867 be one, empty macros are a single placemarker token. */
2868 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2873 /* Push an argument to the current macro onto the context stack.
2874 TOKEN is the MACRO_ARG token representing the argument expansion. */
2876 push_arg_context (pfile, token)
2878 const cpp_token *token;
2880 cpp_context *context;
2883 pfile->cur_context++;
2884 if (pfile->cur_context == pfile->context_cap)
2885 expand_context_stack (pfile);
2887 context = CURRENT_CONTEXT (pfile);
2888 args = context[-1].args;
2890 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2891 context->u.arg = args->tokens + context->count;
2892 context->count = args->ends[token->val.aux] - context->count;
2895 context->level = args->level;
2896 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2897 context->pushed_token = 0;
2899 /* Set the flags of the first token. There is one. */
2901 const cpp_token *first = context->u.arg[0];
2903 first = context->u.arg[1];
2905 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2906 token->flags & (PREV_WHITE | BOL));
2909 if (token->flags & PASTE_LEFT)
2910 context->flags |= CONTEXT_PASTEL;
2911 if (pfile->paste_level)
2912 context->flags |= CONTEXT_PASTER;
2915 /* "Unget" a token. It is effectively inserted in the token queue and
2916 will be returned by the next call to get_raw_token. */
2918 _cpp_push_token (pfile, token)
2920 const cpp_token *token;
2922 cpp_context *context = CURRENT_CONTEXT (pfile);
2924 if (context->posn > 0)
2926 const cpp_token *prev;
2927 if (IS_ARG_CONTEXT (context))
2928 prev = context->u.arg[context->posn - 1];
2930 prev = &context->u.list->tokens[context->posn - 1];
2939 if (context->pushed_token)
2940 cpp_ice (pfile, "two tokens pushed in a row");
2941 if (token->type != CPP_EOF)
2942 context->pushed_token = token;
2943 /* Don't push back a directive's CPP_EOF, step back instead. */
2944 else if (pfile->cur_context == 0)
2945 pfile->contexts[0].posn--;
2948 /* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2949 introducing the directive. */
2951 process_directive (pfile, token)
2953 const cpp_token *token;
2955 const struct directive *d = pfile->token_list.directive;
2958 /* Skip over the directive name. */
2959 if (token[1].type == CPP_NAME)
2960 _cpp_get_raw_token (pfile);
2961 else if (token[1].type != CPP_NUMBER)
2962 cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
2964 if (! (d->flags & EXPAND))
2965 prev_nme = prevent_macro_expansion (pfile);
2966 (void) (*d->handler) (pfile);
2967 if (! (d->flags & EXPAND))
2968 restore_macro_expansion (pfile, prev_nme);
2969 _cpp_skip_rest_of_line (pfile);
2972 /* The external interface to return the next token. All macro
2973 expansion and directive processing is handled internally, the
2974 caller only ever sees the output after preprocessing. */
2976 cpp_get_token (pfile)
2979 const cpp_token *token;
2980 /* Loop till we hit a non-directive, non-placemarker token. */
2983 token = _cpp_get_token (pfile);
2985 if (token->type == CPP_PLACEMARKER)
2988 if (token->type == CPP_HASH && token->flags & BOL
2989 && pfile->token_list.directive)
2991 process_directive (pfile, token);
2999 /* The internal interface to return the next token. There are two
3000 differences between the internal and external interfaces: the
3001 internal interface may return a PLACEMARKER token, and it does not
3002 process directives. */
3004 _cpp_get_token (pfile)
3007 const cpp_token *token, *old_token;
3010 /* Loop until we hit a non-macro token. */
3013 token = get_raw_token (pfile);
3015 /* Short circuit EOF. */
3016 if (token->type == CPP_EOF)
3019 /* If we are skipping... */
3020 if (pfile->skipping)
3022 /* we still have to process directives, */
3023 if (pfile->token_list.directive)
3026 /* but everything else is ignored. */
3027 _cpp_skip_rest_of_line (pfile);
3031 /* If there's a potential control macro and we get here, then that
3032 #ifndef didn't cover the entire file and its argument shouldn't
3033 be taken as a control macro. */
3034 pfile->potential_control_macro = 0;
3036 /* If we are rescanning preprocessed input, no macro expansion or
3037 token pasting may occur. */
3038 if (CPP_OPTION (pfile, preprocessed))
3043 /* See if there's a token to paste with this one. */
3044 if (!pfile->paste_level)
3045 token = maybe_paste_with_next (pfile, token);
3047 /* If it isn't a macro, return it now. */
3048 if (token->type != CPP_NAME || token->val.node->type == T_VOID)
3051 /* Is macro expansion disabled in general, or are we in the
3052 middle of a token paste, or was this token just pasted?
3053 (Note we don't check token->flags & PASTED, because that
3054 counts tokens that were pasted at some point in the past,
3055 we're only interested in tokens that were pasted by this call
3056 to maybe_paste_with_next.) */
3057 if (pfile->no_expand_level == pfile->cur_context
3058 || pfile->paste_level
3059 || (token != old_token
3060 && pfile->no_expand_level + 1 == pfile->cur_context))
3063 node = token->val.node;
3064 if (node->type != T_MACRO)
3065 return special_symbol (pfile, node, token);
3067 if (is_macro_disabled (pfile, node->value.expansion, token))
3070 if (push_macro_context (pfile, token))
3076 /* Returns the next raw token, i.e. without performing macro
3077 expansion. Argument contexts are automatically entered. */
3078 static const cpp_token *
3079 get_raw_token (pfile)
3082 const cpp_token *result;
3083 cpp_context *context;
3087 context = CURRENT_CONTEXT (pfile);
3088 if (context->pushed_token)
3090 result = context->pushed_token;
3091 context->pushed_token = 0;
3092 return result; /* Cannot be a CPP_MACRO_ARG */
3094 else if (context->posn == context->count)
3096 if (pop_context (pfile))
3100 else if (IS_ARG_CONTEXT (context))
3102 result = context->u.arg[context->posn++];
3105 context->flags ^= CONTEXT_RAW;
3106 result = context->u.arg[context->posn++];
3108 return result; /* Cannot be a CPP_MACRO_ARG */
3111 result = &context->u.list->tokens[context->posn++];
3113 if (result->type != CPP_MACRO_ARG)
3116 if (result->flags & STRINGIFY_ARG)
3117 return stringify_arg (pfile, result);
3119 push_arg_context (pfile, result);
3123 /* Internal interface to get the token without macro expanding. */
3125 _cpp_get_raw_token (pfile)
3128 int prev_nme = prevent_macro_expansion (pfile);
3129 const cpp_token *result = _cpp_get_token (pfile);
3130 restore_macro_expansion (pfile, prev_nme);
3134 /* A thin wrapper to lex_line. CLEAR is non-zero if the current token
3135 list should be overwritten, or zero if we need to append
3136 (typically, if we are within the arguments to a macro, or looking
3137 for the '(' to start a function-like macro invocation). */
3139 lex_next (pfile, clear)
3143 cpp_toklist *list = &pfile->token_list;
3144 const cpp_token *old_list = list->tokens;
3145 unsigned int old_used = list->tokens_used;
3149 /* Release all temporary tokens. */
3150 _cpp_clear_toklist (list);
3151 pfile->contexts[0].posn = 0;
3152 if (pfile->temp_used)
3153 release_temp_tokens (pfile);
3155 lex_line (pfile, list);
3156 pfile->contexts[0].count = list->tokens_used;
3158 if (!clear && pfile->args)
3160 /* Fix up argument token pointers. */
3161 if (old_list != list->tokens)
3165 for (i = 0; i < pfile->args->used; i++)
3167 const cpp_token *token = pfile->args->tokens[i];
3168 if (token >= old_list && token < old_list + old_used)
3169 pfile->args->tokens[i] = (const cpp_token *)
3170 ((char *) token + ((char *) list->tokens - (char *) old_list));
3174 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3175 tokens within the list of arguments that would otherwise act as
3176 preprocessing directives, the behavior is undefined.
3178 This implementation will report a hard error and treat the
3179 'sequence of preprocessing tokens' as part of the macro argument,
3182 Note if pfile->args == 0, we're OK since we're only inside a
3183 macro argument after a '('. */
3184 if (list->directive)
3186 cpp_error_with_line (pfile, list->tokens[old_used].line,
3187 list->tokens[old_used].col,
3188 "#%s may not be used inside a macro argument",
3189 list->directive->name);
3197 /* Pops a context off the context stack. If we're at the bottom, lexes
3198 the next logical line. Returns EOF if we're at the end of the
3199 argument list to the # operator, or we should not "overflow"
3200 into the rest of the file (e.g. 6.10.3.1.1). */
3205 cpp_context *context;
3207 if (pfile->cur_context == 0)
3209 /* If we are currently processing a directive, do not advance. 6.10
3210 paragraph 2: A new-line character ends the directive even if it
3211 occurs within what would otherwise be an invocation of a
3212 function-like macro. */
3213 if (pfile->token_list.directive)
3216 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3219 /* Argument contexts, when parsing args or handling # operator
3220 return CPP_EOF at the end. */
3221 context = CURRENT_CONTEXT (pfile);
3222 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3225 /* Free resources when leaving macro contexts. */
3227 free_macro_args (context->args);
3229 if (pfile->cur_context == pfile->no_expand_level)
3230 pfile->no_expand_level--;
3231 pfile->cur_context--;
3236 /* Turn off macro expansion at the current context level. */
3238 prevent_macro_expansion (pfile)
3241 unsigned int prev_value = pfile->no_expand_level;
3242 pfile->no_expand_level = pfile->cur_context;
3246 /* Restore macro expansion to its previous state. */
3248 restore_macro_expansion (pfile, prev_value)
3250 unsigned int prev_value;
3252 pfile->no_expand_level = prev_value;
3255 /* Used by cpperror.c to obtain the correct line and column to report
3258 _cpp_get_line (pfile, pcol)
3263 const cpp_token *cur_token;
3265 if (pfile->state.in_lex_line)
3266 index = pfile->token_list.tokens_used;
3269 index = pfile->contexts[0].posn;
3280 cur_token = &pfile->token_list.tokens[index];
3282 *pcol = cur_token->col;
3283 return cur_token->line;
3286 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3287 static const char * const monthnames[] =
3289 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3290 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3293 /* Handle builtin macros like __FILE__. */
3294 static const cpp_token *
3295 special_symbol (pfile, node, token)
3298 const cpp_token *token;
3310 ip = CPP_BUFFER (pfile);
3315 if (node->type == T_BASE_FILE)
3316 while (CPP_PREV_BUFFER (ip) != NULL)
3317 ip = CPP_PREV_BUFFER (ip);
3319 file = ip->nominal_fname;
3321 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3326 case T_INCLUDE_LEVEL:
3327 /* pfile->include_depth counts the primary source as level 1,
3328 but historically __INCLUDE_DEPTH__ has called the primary
3330 result = alloc_number_token (pfile, pfile->include_depth - 1);
3334 /* If __LINE__ is embedded in a macro, it must expand to the
3335 line of the macro's invocation, not its definition.
3336 Otherwise things like assert() will not work properly. */
3337 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3344 #ifdef STDC_0_IN_SYSTEM_HEADERS
3345 if (CPP_IN_SYSTEM_HEADER (pfile)
3346 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3349 result = alloc_number_token (pfile, stdc);
3355 if (pfile->date == 0)
3357 /* Allocate __DATE__ and __TIME__ from permanent storage,
3358 and save them in pfile so we don't have to do this again.
3359 We don't generate these strings at init time because
3360 time() and localtime() are very slow on some systems. */
3361 time_t tt = time (NULL);
3362 struct tm *tb = localtime (&tt);
3364 pfile->date = make_string_token
3365 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3366 pfile->time = make_string_token
3367 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3369 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3370 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3371 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3372 tb->tm_hour, tb->tm_min, tb->tm_sec);
3374 result = node->type == T_DATE ? pfile->date: pfile->time;
3378 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3382 cpp_ice (pfile, "invalid special hash type");
3386 ASSIGN_FLAGS_AND_POS (result, token);
3391 /* Allocate pfile->input_buffer, and initialize _cpp_trigraph_map[]
3392 if it hasn't happened already. */
3395 _cpp_init_input_buffer (pfile)
3400 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3401 pfile->no_expand_level = UINT_MAX;
3402 pfile->context_cap = 20;
3403 pfile->cur_context = 0;
3405 pfile->contexts = (cpp_context *)
3406 xmalloc (pfile->context_cap * sizeof (cpp_context));
3408 /* Clear the base context. */
3409 base = &pfile->contexts[0];
3410 base->u.list = &pfile->token_list;
3416 base->pushed_token = 0;
3419 /* Moves to the end of the directive line, popping contexts as
3422 _cpp_skip_rest_of_line (pfile)
3425 /* Discard all stacked contexts. */
3427 for (i = pfile->cur_context; i > 0; i--)
3428 if (pfile->contexts[i].args)
3429 free_macro_args (pfile->contexts[i].args);
3431 if (pfile->no_expand_level <= pfile->cur_context)
3432 pfile->no_expand_level = 0;
3433 pfile->cur_context = 0;
3435 /* Clear the base context, and clear the directive pointer so that
3436 get_raw_token will advance to the next line. */
3437 pfile->contexts[0].count = 0;
3438 pfile->contexts[0].posn = 0;
3439 pfile->token_list.directive = 0;
3442 /* Directive handler wrapper used by the command line option
3445 _cpp_run_directive (pfile, dir, buf, count, name)
3447 const struct directive *dir;
3452 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3454 unsigned int prev_lvl = 0;
3457 CPP_BUFFER (pfile)->nominal_fname = name;
3459 CPP_BUFFER (pfile)->nominal_fname = _("<command line>");
3460 CPP_BUFFER (pfile)->lineno = (unsigned int)-1;
3462 /* Scan the line now, else prevent_macro_expansion won't work. */
3463 lex_next (pfile, 1);
3464 if (! (dir->flags & EXPAND))
3465 prev_lvl = prevent_macro_expansion (pfile);
3467 (void) (*dir->handler) (pfile);
3469 if (! (dir->flags & EXPAND))
3470 restore_macro_expansion (pfile, prev_lvl);
3472 _cpp_skip_rest_of_line (pfile);
3473 cpp_pop_buffer (pfile);