1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
27 o -dM and with _cpp_dump_list: too many \n output.
28 o Put a printer object in cpp_reader?
29 o Check line numbers assigned to all errors.
30 o Replace strncmp with memcmp almost everywhere.
31 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
32 o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
33 give it a run when we've got some code.
34 o Distinguish integers, floats, and 'other' pp-numbers.
35 o Store ints and char constants as binary values.
36 o New command-line assertion syntax.
37 o Work towards functions in cpperror.c taking a message level parameter.
38 If we do this, merge the common code of do_warning and do_error.
39 o Comment all functions, and describe macro expansion algorithm.
40 o Move as much out of header files as possible.
41 o Remove single quote pairs `', and some '', from diagnostics.
42 o Correct pastability test for CPP_NAME and CPP_NUMBER.
53 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
54 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
56 /* Flags for cpp_context. */
57 #define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
58 #define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
59 #define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
60 #define CONTEXT_ARG (1 << 3) /* If an argument context. */
62 typedef struct cpp_context cpp_context;
67 const cpp_toklist *list; /* Used for macro contexts only. */
68 const cpp_token **arg; /* Used for arg contexts only. */
71 /* Pushed token to be returned by next call to get_raw_token. */
72 const cpp_token *pushed_token;
74 struct macro_args *args; /* 0 for arguments and object-like macros. */
75 unsigned short posn; /* Current posn, index into u. */
76 unsigned short count; /* No. of tokens in u. */
81 typedef struct macro_args macro_args;
85 const cpp_token **tokens;
86 unsigned int capacity;
91 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
92 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
93 macro_args *, unsigned int *));
94 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
95 static void save_token PARAMS ((macro_args *, const cpp_token *));
96 static int pop_context PARAMS ((cpp_reader *));
97 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
98 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
99 static void free_macro_args PARAMS ((macro_args *));
101 #define auto_expand_name_space(list) \
102 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
103 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
105 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
107 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
110 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
111 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
113 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
114 const unsigned char *));
115 static int skip_block_comment PARAMS ((cpp_reader *));
116 static int skip_line_comment PARAMS ((cpp_reader *));
117 static void adjust_column PARAMS ((cpp_reader *, const U_CHAR *));
118 static void skip_whitespace PARAMS ((cpp_reader *, int));
119 static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
120 const U_CHAR *, const U_CHAR *));
121 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
122 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
124 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
125 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
126 const unsigned char *,
127 unsigned int, unsigned int));
128 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
129 static int lex_next PARAMS ((cpp_reader *, int));
130 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
133 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
134 static void expand_context_stack PARAMS ((cpp_reader *));
135 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
137 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
139 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
141 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
143 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
144 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
146 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
147 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
149 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
150 const cpp_token *, int *));
151 static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
152 static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
153 static cpp_token *get_temp_token PARAMS ((cpp_reader *));
154 static void release_temp_tokens PARAMS ((cpp_reader *));
155 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
156 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
158 #define INIT_TOKEN_STR(list, token) \
159 do {(token)->val.str.len = 0; \
160 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
163 #define VALID_SIGN(c, prevc) \
164 (((c) == '+' || (c) == '-') && \
165 ((prevc) == 'e' || (prevc) == 'E' \
166 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
168 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
169 character, if any, is in buffer. */
171 #define handle_newline(cur, limit, c) \
173 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
175 pfile->buffer->lineno++; \
176 pfile->buffer->line_base = (cur); \
177 pfile->col_adjust = 0; \
180 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
181 #define PREV_TOKEN_TYPE (cur_token[-1].type)
183 #define PUSH_TOKEN(ttype) cur_token++->type = (ttype)
184 #define REVISE_TOKEN(ttype) cur_token[-1].type = (ttype)
185 #define BACKUP_TOKEN(ttype) (--cur_token)->type = (ttype)
186 #define BACKUP_DIGRAPH(ttype) do { \
187 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
189 /* An upper bound on the number of bytes needed to spell a token,
190 including preceding whitespace. */
191 #define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
192 ? (token)->val.str.len \
193 : (TOKEN_SPELL(token) == SPELL_IDENT \
194 ? (token)->val.node->length \
197 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
198 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
200 #define ASSIGN_FLAGS_AND_POS(d, s) \
201 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
202 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
205 /* f is flags, just consisting of PREV_WHITE | BOL. */
206 #define MODIFY_FLAGS_AND_POS(d, s, f) \
207 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
208 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
211 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
212 #define I(e, s) {SPELL_IDENT, s},
213 #define S(e, s) {SPELL_STRING, s},
214 #define C(e, s) {SPELL_CHAR, s},
215 #define N(e, s) {SPELL_NONE, s},
217 const struct token_spelling
218 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
226 /* For debugging: the internal names of the tokens. */
227 #define T(e, s) U STRINGX(e),
228 #define I(e, s) U STRINGX(e),
229 #define S(e, s) U STRINGX(e),
230 #define C(e, s) U STRINGX(e),
231 #define N(e, s) U STRINGX(e),
233 const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
241 /* The following table is used by trigraph_ok/trigraph_replace. If we
242 have designated initializers, it can be constant data; otherwise,
243 it is set up at runtime by _cpp_init_input_buffer. */
245 #if (GCC_VERSION >= 2007)
246 #define init_trigraph_map() /* nothing */
247 #define TRIGRAPH_MAP \
248 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
250 #define s(p, v) [p] = v,
252 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
253 static void init_trigraph_map PARAMS ((void)) { \
254 unsigned char *x = trigraph_map;
256 #define s(p, v) x[p] = v;
260 s('=', '#') s(')', ']') s('!', '|')
261 s('(', '[') s('\'', '^') s('>', '}')
262 s('/', '\\') s('<', '{') s('-', '~')
269 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
272 _cpp_grow_token_buffer (pfile, n)
276 long old_written = CPP_WRITTEN (pfile);
277 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
278 pfile->token_buffer = (U_CHAR *)
279 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
280 CPP_SET_WRITTEN (pfile, old_written);
283 /* Deal with the annoying semantics of fwrite. */
285 safe_fwrite (pfile, buf, len, fp)
295 count = fwrite (buf, 1, len, fp);
304 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
307 /* Notify the compiler proper that the current line number has jumped,
308 or the current file name has changed. */
311 output_line_command (pfile, print, line)
316 cpp_buffer *ip = CPP_BUFFER (pfile);
317 enum { same = 0, enter, leave, rname } change;
318 static const char * const codes[] = { "", " 1", " 2", "" };
323 /* End the previous line of text. */
324 if (pfile->need_newline)
325 putc ('\n', print->outf);
326 pfile->need_newline = 0;
328 if (CPP_OPTION (pfile, no_line_commands))
331 /* If ip is null, we've been called from cpp_finish, and they just
332 needed the final flush and trailing newline. */
336 if (pfile->include_depth == print->last_id)
338 /* Determine whether the current filename has changed, and if so,
339 how. 'nominal_fname' values are unique, so they can be compared
340 by comparing pointers. */
341 if (ip->nominal_fname == print->last_fname)
348 if (pfile->include_depth > print->last_id)
352 print->last_id = pfile->include_depth;
354 print->last_fname = ip->nominal_fname;
356 /* If the current file has not changed, we can output a few newlines
357 instead if we want to increase the line number by a small amount.
358 We cannot do this if print->lineno is zero, because that means we
359 haven't output any line commands yet. (The very first line
360 command output is a `same_file' command.) */
361 if (change == same && print->lineno > 0
362 && line >= print->lineno && line < print->lineno + 8)
364 while (line > print->lineno)
366 putc ('\n', print->outf);
372 #ifndef NO_IMPLICIT_EXTERN_C
373 if (CPP_OPTION (pfile, cplusplus))
374 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
376 ip->inc->sysp ? " 3" : "",
377 (ip->inc->sysp == 2) ? " 4" : "");
380 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
382 ip->inc->sysp ? " 3" : "");
383 print->lineno = line;
386 /* Write the contents of the token_buffer to the output stream, and
387 clear the token_buffer. Also handles generating line commands and
388 keeping track of file transitions. */
391 cpp_output_tokens (pfile, print, line)
396 if (CPP_WRITTEN (pfile) - print->written)
398 safe_fwrite (pfile, pfile->token_buffer,
399 CPP_WRITTEN (pfile) - print->written, print->outf);
400 pfile->need_newline = 1;
404 CPP_SET_WRITTEN (pfile, print->written);
406 output_line_command (pfile, print, line);
409 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
412 cpp_scan_buffer_nooutput (pfile)
415 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
416 const cpp_token *token;
418 /* In no-output mode, we can ignore everything but directives. */
421 token = _cpp_get_token (pfile);
423 if (token->type == CPP_EOF)
425 cpp_pop_buffer (pfile);
426 if (CPP_BUFFER (pfile) == stop)
430 if (token->type == CPP_HASH && token->flags & BOL
431 && pfile->token_list.directive)
433 process_directive (pfile, token);
437 _cpp_skip_rest_of_line (pfile);
441 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
443 cpp_scan_buffer (pfile, print)
447 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
448 const cpp_token *token, *prev = 0;
452 token = _cpp_get_token (pfile);
453 if (token->type == CPP_EOF)
455 cpp_pop_buffer (pfile);
456 if (CPP_BUFFER (pfile) == stop)
459 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
464 if (token->flags & BOL)
466 if (token->type == CPP_HASH && pfile->token_list.directive)
468 process_directive (pfile, token);
472 cpp_output_tokens (pfile, print, pfile->token_list.line);
476 if (token->type != CPP_PLACEMARKER)
477 output_token (pfile, token, prev);
483 /* Scan a single line of the input into the token_buffer. */
485 cpp_scan_line (pfile)
488 const cpp_token *token, *prev = 0;
490 if (pfile->buffer == NULL)
495 token = cpp_get_token (pfile);
496 if (token->type == CPP_EOF)
498 cpp_pop_buffer (pfile);
502 /* If the last token on a line results from a macro expansion,
503 the check below will fail to stop us from proceeding to the
504 next line - so make sure we stick in a newline, at least. */
505 if (token->flags & BOL)
506 CPP_PUTC (pfile, '\n');
508 output_token (pfile, token, prev);
511 while (pfile->cur_context > 0
512 || pfile->contexts[0].posn < pfile->contexts[0].count);
516 /* Helper routine used by parse_include, which can't see spell_token.
517 Reinterpret the current line as an h-char-sequence (< ... >); we are
518 looking at the first token after the <. */
520 _cpp_glue_header_name (pfile)
523 unsigned int written = CPP_WRITTEN (pfile);
531 t = _cpp_get_token (pfile);
532 if (t->type == CPP_GREATER || t->type == CPP_EOF)
535 CPP_RESERVE (pfile, TOKEN_LEN (t));
536 if (t->flags & PREV_WHITE)
537 CPP_PUTC_Q (pfile, ' ');
538 pfile->limit = spell_token (pfile, t, pfile->limit);
541 if (t->type == CPP_EOF)
542 cpp_error (pfile, "missing terminating > character");
544 len = CPP_WRITTEN (pfile) - written;
546 memcpy (buf, pfile->token_buffer + written, len);
547 CPP_SET_WRITTEN (pfile, written);
549 hdr = get_temp_token (pfile);
550 hdr->type = CPP_HEADER_NAME;
552 hdr->val.str.text = buf;
553 hdr->val.str.len = len;
557 /* Token-buffer helper functions. */
559 /* Expand a token list's string space. It is *vital* that
560 list->tokens_used is correct, to get pointer fix-up right. */
562 _cpp_expand_name_space (list, len)
566 const U_CHAR *old_namebuf;
568 old_namebuf = list->namebuf;
569 list->name_cap += len;
570 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
572 /* Fix up token text pointers. */
573 if (list->namebuf != old_namebuf)
577 for (i = 0; i < list->tokens_used; i++)
578 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
579 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
583 /* If there is not enough room for LEN more characters, expand the
584 list by just enough to have room for LEN characters. */
586 _cpp_reserve_name_space (list, len)
590 unsigned int room = list->name_cap - list->name_used;
593 _cpp_expand_name_space (list, len - room);
596 /* Expand the number of tokens in a list. */
598 _cpp_expand_token_space (list, count)
604 list->tokens_cap += count;
605 n = list->tokens_cap;
606 if (list->flags & LIST_OFFSET)
608 list->tokens = (cpp_token *)
609 xrealloc (list->tokens, n * sizeof (cpp_token));
610 if (list->flags & LIST_OFFSET)
611 list->tokens++; /* Skip the dummy. */
614 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
615 an extra token in front of the token list, as this allows the lexer
616 to always peek at the previous token without worrying about
617 underflowing the list, and some initial space. Otherwise, no
618 token- or name-space is allocated, and there is no dummy token. */
620 _cpp_init_toklist (list, flags)
624 if (flags == NO_DUMMY_TOKEN)
626 list->tokens_cap = 0;
634 /* Initialize token space. Put a dummy token before the start
635 that will fail matches. */
636 list->tokens_cap = 256; /* 4K's worth. */
637 list->tokens = (cpp_token *)
638 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
639 list->tokens[0].type = CPP_EOF;
642 /* Initialize name space. */
643 list->name_cap = 1024;
644 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
645 list->flags = LIST_OFFSET;
648 _cpp_clear_toklist (list);
651 /* Clear a token list. */
653 _cpp_clear_toklist (list)
656 list->tokens_used = 0;
660 list->params_len = 0;
661 list->flags &= LIST_OFFSET; /* clear all but that one */
664 /* Free a token list. Does not free the list itself, which may be
665 embedded in a larger structure. */
667 _cpp_free_toklist (list)
668 const cpp_toklist *list;
670 if (list->flags & LIST_OFFSET)
671 free (list->tokens - 1); /* Backup over dummy token. */
674 free (list->namebuf);
677 /* Compare two tokens. */
679 _cpp_equiv_tokens (a, b)
680 const cpp_token *a, *b;
682 if (a->type == b->type && a->flags == b->flags)
683 switch (token_spellings[a->type].type)
685 default: /* Keep compiler happy. */
690 return a->val.aux == b->val.aux; /* arg_no or character. */
692 return a->val.node == b->val.node;
694 return (a->val.str.len == b->val.str.len
695 && !memcmp (a->val.str.text, b->val.str.text,
702 /* Compare two token lists. */
704 _cpp_equiv_toklists (a, b)
705 const cpp_toklist *a, *b;
709 if (a->tokens_used != b->tokens_used
710 || a->flags != b->flags
711 || a->paramc != b->paramc)
714 for (i = 0; i < a->tokens_used; i++)
715 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
722 Compares, the token TOKEN to the NUL-terminated string STRING.
723 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
726 cpp_ideq (token, string)
727 const cpp_token *token;
730 if (token->type != CPP_NAME)
733 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
738 The original lexer in cpplib was made up of two passes: a first pass
739 that replaced trigraphs and deleted esacped newlines, and a second
740 pass that tokenized the result of the first pass. Tokenisation was
741 performed by peeking at the next character in the input stream. For
742 example, if the input stream contained "!=", the handler for the !
743 character would peek at the next character, and if it were a '='
744 would skip over it, and return a "!=" token, otherwise it would
745 return just the "!" token.
747 To implement a single-pass lexer, this peeking ahead is unworkable.
748 An arbitrary number of escaped newlines, and trigraphs (in particular
749 ??/ which translates to the escape \), could separate the '!' and '='
750 in the input stream, yet the next token is still a "!=".
752 Suppose instead that we lex by one logical line at a time, producing
753 a token list or stack for each logical line, and when seeing the '!'
754 push a CPP_NOT token on the list. Then if the '!' is part of a
755 longer token ("!=") we know we must see the remainder of the token by
756 the time we reach the end of the logical line. Thus we can have the
757 '=' handler look at the previous token (at the end of the list / top
758 of the stack) and see if it is a "!" token, and if so, instead of
759 pushing a "=" token revise the existing token to be a "!=" token.
761 This works in the presence of escaped newlines, because the '\' would
762 have been pushed on the top of the stack as a CPP_BACKSLASH. The
763 newline ('\n' or '\r') handler looks at the token at the top of the
764 stack to see if it is a CPP_BACKSLASH, and if so discards both.
765 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
766 the '=' handler would never see any intervening escaped newlines.
768 To make trigraphs work in this context, as in precedence trigraphs
769 are highest and converted before anything else, the '?' handler does
770 lookahead to see if it is a trigraph, and if so skips the trigraph
771 and pushes the token it represents onto the top of the stack. This
772 also works in the particular case of a CPP_BACKSLASH trigraph.
774 To the preprocessor, whitespace is only significant to the point of
775 knowing whether whitespace precedes a particular token. For example,
776 the '=' handler needs to know whether there was whitespace between it
777 and a "!" token on the top of the stack, to make the token conversion
778 decision correctly. So each token has a PREV_WHITE flag to
779 indicate this - the standard permits consecutive whitespace to be
780 regarded as a single space. The compiler front ends are not
781 interested in whitespace at all; they just require a token stream.
782 Another place where whitespace is significant to the preprocessor is
783 a #define statment - if there is whitespace between the macro name
784 and an initial "(" token the macro is "object-like", otherwise it is
785 a function-like macro that takes arguments.
787 However, all is not rosy. Parsing of identifiers, numbers, comments
788 and strings becomes trickier because of the possibility of raw
789 trigraphs and escaped newlines in the input stream.
791 The trigraphs are three consecutive characters beginning with two
792 question marks. A question mark is not valid as part of a number or
793 identifier, so parsing of a number or identifier terminates normally
794 upon reaching it, returning to the mainloop which handles the
795 trigraph just like it would in any other position. Similarly for the
796 backslash of a backslash-newline combination. So we just need the
797 escaped-newline dropper in the mainloop to check if the token on the
798 top of the stack after dropping the escaped newline is a number or
799 identifier, and if so to continue the processing it as if nothing had
802 For strings, we replace trigraphs whenever we reach a quote or
803 newline, because there might be a backslash trigraph escaping them.
804 We need to be careful that we start trigraph replacing from where we
805 left off previously, because it is possible for a first scan to leave
806 "fake" trigraphs that a second scan would pick up as real (e.g. the
807 sequence "????/\n=" would find a fake ??= trigraph after removing the
810 For line comments, on reaching a newline we scan the previous
811 character(s) to see if it escaped, and continue if it is. Block
812 comments ignore everything and just focus on finding the comment
813 termination mark. The only difficult thing, and it is surprisingly
814 tricky, is checking if an asterisk precedes the final slash since
815 they could be separated by escaped newlines. If the preprocessor is
816 invoked with the output comments option, we don't bother removing
817 escaped newlines and replacing trigraphs for output.
819 Finally, numbers can begin with a period, which is pushed initially
820 as a CPP_DOT token in its own right. The digit handler checks if the
821 previous token was a CPP_DOT not separated by whitespace, and if so
822 pops it off the stack and pushes a period into the number's buffer
823 before calling the number parser.
827 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
828 U":>", U"<%", U"%>"};
830 /* Call when a trigraph is encountered. It warns if necessary, and
831 returns true if the trigraph should be honoured. END is the third
832 character of a trigraph in the input stream. */
834 trigraph_ok (pfile, end)
836 const unsigned char *end;
838 int accept = CPP_OPTION (pfile, trigraphs);
840 if (CPP_OPTION (pfile, warn_trigraphs))
842 unsigned int col = end - 1 - pfile->buffer->line_base;
844 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
845 "trigraph ??%c converted to %c",
846 (int) *end, (int) trigraph_map[*end]);
848 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
849 "trigraph ??%c ignored", (int) *end);
854 /* Scan a string for trigraphs, warning or replacing them inline as
855 appropriate. When parsing a string, we must call this routine
856 before processing a newline character (if trigraphs are enabled),
857 since the newline might be escaped by a preceding backslash
858 trigraph sequence. Returns a pointer to the end of the name after
861 static unsigned char *
862 trigraph_replace (pfile, src, limit)
865 unsigned char *limit;
869 /* Starting with src[1], find two consecutive '?'. The case of no
870 trigraphs is streamlined. */
872 for (src++; src + 1 < limit; src += 2)
877 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
880 else if (src + 2 == limit || src[1] != '?')
883 /* Check if it really is a trigraph. */
884 if (trigraph_map[src[2]] == 0)
892 /* Now we have a trigraph, we need to scan the remaining buffer, and
893 copy-shifting its contents left if replacement is enabled. */
894 for (; src + 2 < limit; dest++, src++)
895 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
899 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
900 *dest = trigraph_map[*src];
903 /* Copy remaining (at most 2) characters. */
909 /* If CUR is a backslash or the end of a trigraphed backslash, return
910 a pointer to its beginning, otherwise NULL. We don't read beyond
911 the buffer start, because there is the start of the comment in the
913 static const unsigned char *
914 backslash_start (pfile, cur)
916 const unsigned char *cur;
920 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
921 && trigraph_ok (pfile, cur))
926 /* Skip a C-style block comment. This is probably the trickiest
927 handler. We find the end of the comment by seeing if an asterisk
928 is before every '/' we encounter. The nasty complication is that a
929 previous asterisk may be separated by one or more escaped newlines.
930 Returns non-zero if comment terminated by EOF, zero otherwise. */
932 skip_block_comment (pfile)
935 cpp_buffer *buffer = pfile->buffer;
936 const unsigned char *char_after_star = 0;
937 const unsigned char *cur = buffer->cur;
939 for (; cur < buffer->rlimit; )
941 unsigned char c = *cur++;
943 /* People like decorating comments with '*', so check for
944 '/' instead for efficiency. */
947 /* Don't view / then * then / as finishing the comment. */
948 if ((cur[-2] == '*' && cur - 1 > buffer->cur)
949 || cur - 1 == char_after_star)
955 /* Warn about potential nested comments, but not when
956 the final character inside the comment is a '/'.
957 Don't bother to get it right across escaped newlines. */
958 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
959 && cur[0] == '*' && cur[1] != '/')
962 cpp_warning (pfile, "'/*' within comment");
965 else if (is_vspace (c))
967 const unsigned char* bslash = backslash_start (pfile, cur - 2);
969 handle_newline (cur, buffer->rlimit, c);
970 /* Work correctly if there is an asterisk before an
971 arbirtrarily long sequence of escaped newlines. */
972 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
973 char_after_star = cur;
978 adjust_column (pfile, cur - 1);
985 /* Skip a C++ line comment. Handles escaped newlines. Returns
986 non-zero if a multiline comment. */
988 skip_line_comment (pfile)
991 cpp_buffer *buffer = pfile->buffer;
992 register const unsigned char *cur = buffer->cur;
995 for (; cur < buffer->rlimit; )
997 unsigned char c = *cur++;
1001 /* Check for a (trigaph?) backslash escaping the newline. */
1002 if (!backslash_start (pfile, cur - 2))
1005 handle_newline (cur, buffer->rlimit, c);
1011 buffer->cur = cur - 1; /* Leave newline for caller. */
1015 /* TAB points to a \t character. Update col_adjust so we track the
1016 column correctly. */
1018 adjust_column (pfile, tab)
1022 /* Zero-based column. */
1023 unsigned int col = CPP_BUF_COLUMN (pfile->buffer, tab);
1025 /* Round it up to multiple of the tabstop, but subtract 1 since the
1026 tab itself occupies a character position. */
1027 pfile->col_adjust += (CPP_OPTION (pfile, tabstop)
1028 - col % CPP_OPTION (pfile, tabstop)) - 1;
1031 /* Skips whitespace, stopping at next non-whitespace character.
1032 Adjusts pfile->col_adjust to account for tabs. This enables tokens
1033 to be assigned the correct column. */
1035 skip_whitespace (pfile, in_directive)
1039 cpp_buffer *buffer = pfile->buffer;
1040 unsigned short warned = 0;
1042 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
1043 while (buffer->cur < buffer->rlimit)
1045 unsigned char c = *buffer->cur;
1047 if (!is_nvspace (c))
1051 /* Horizontal space always OK. */
1055 adjust_column (pfile, buffer->cur - 1);
1056 /* Must be \f \v or \0. */
1060 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
1061 CPP_BUF_COL (buffer),
1062 "embedded null character ignored");
1065 else if (in_directive && CPP_PEDANTIC (pfile))
1066 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1067 CPP_BUF_COL (buffer),
1068 "%s in preprocessing directive",
1069 c == '\f' ? "form feed" : "vertical tab");
1073 /* Parse (append) an identifier. */
1074 static const U_CHAR *
1075 parse_name (pfile, tok, cur, rlimit)
1078 const U_CHAR *cur, *rlimit;
1080 const U_CHAR *name = cur;
1083 while (cur < rlimit)
1085 if (! is_idchar (*cur))
1087 /* $ is not a legal identifier character in the standard, but is
1088 commonly accepted as an extension. Don't warn about it in
1089 skipped conditional blocks. */
1090 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1092 CPP_BUFFER (pfile)->cur = cur;
1093 cpp_pedwarn (pfile, "'$' character in identifier");
1101 unsigned int oldlen = tok->val.node->length;
1102 U_CHAR *newname = alloca (oldlen + len);
1103 memcpy (newname, tok->val.node->name, oldlen);
1104 memcpy (newname + oldlen, name, len);
1109 tok->val.node = cpp_lookup (pfile, name, len);
1113 /* Parse (append) a number. */
1115 parse_number (pfile, list, name)
1120 const unsigned char *name_limit;
1121 unsigned char *namebuf;
1122 cpp_buffer *buffer = pfile->buffer;
1123 register const unsigned char *cur = buffer->cur;
1126 name_limit = list->namebuf + list->name_cap;
1127 namebuf = list->namebuf + list->name_used;
1129 for (; cur < buffer->rlimit && namebuf < name_limit; )
1131 unsigned char c = *namebuf = *cur; /* Copy a single char. */
1133 /* Perhaps we should accept '$' here if we accept it for
1134 identifiers. We know namebuf[-1] is safe, because for c to
1135 be a sign we must have pushed at least one character. */
1136 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1143 /* Run out of name space? */
1144 if (cur < buffer->rlimit)
1146 list->name_used = namebuf - list->namebuf;
1147 auto_expand_name_space (list);
1153 name->len = namebuf - name->text;
1154 list->name_used = namebuf - list->namebuf;
1157 /* Places a string terminated by an unescaped TERMINATOR into a
1158 cpp_string, which should be expandable and thus at the top of the
1159 list's stack. Handles embedded trigraphs, if necessary, and
1162 Can be used for character constants (terminator = '\''), string
1163 constants ('"') and angled headers ('>'). Multi-line strings are
1164 allowed, except for within directives. */
1167 parse_string (pfile, list, token, terminator)
1171 unsigned int terminator;
1173 cpp_buffer *buffer = pfile->buffer;
1174 cpp_string *name = &token->val.str;
1175 register const unsigned char *cur = buffer->cur;
1176 const unsigned char *name_limit;
1177 unsigned char *namebuf;
1178 unsigned int null_count = 0;
1179 unsigned int trigraphed = list->name_used;
1182 name_limit = list->namebuf + list->name_cap;
1183 namebuf = list->namebuf + list->name_used;
1185 for (; cur < buffer->rlimit && namebuf < name_limit; )
1187 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
1191 else if (c == terminator || is_vspace (c))
1193 /* Needed for trigraph_replace and multiline string warning. */
1196 /* Scan for trigraphs before checking if backslash-escaped. */
1197 if ((CPP_OPTION (pfile, trigraphs)
1198 || CPP_OPTION (pfile, warn_trigraphs))
1199 && namebuf - (list->namebuf + trigraphed) >= 3)
1201 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1203 /* The test above guarantees trigraphed will be positive. */
1204 trigraphed = namebuf - list->namebuf - 2;
1207 namebuf--; /* Drop the newline / terminator from the name. */
1210 /* Drop a backslash newline, and continue. */
1211 if (namebuf[-1] == '\\')
1213 handle_newline (cur, buffer->rlimit, c);
1220 /* In assembly language, silently terminate strings of
1221 either variety at end of line. This is a kludge
1222 around not knowing where comments are. */
1223 if (CPP_OPTION (pfile, lang_asm))
1226 /* Character constants and header names may not extend
1227 over multiple lines. In Standard C, neither may
1228 strings. We accept multiline strings as an
1229 extension. (Even in directives - otherwise, glibc's
1230 longlong.h breaks.) */
1231 if (terminator != '"')
1234 cur++; /* Move forwards again. */
1236 if (pfile->multiline_string_line == 0)
1238 pfile->multiline_string_line = token->line;
1239 pfile->multiline_string_column = token->col;
1240 if (CPP_PEDANTIC (pfile))
1241 cpp_pedwarn (pfile, "multi-line string constant");
1245 handle_newline (cur, buffer->rlimit, c);
1249 unsigned char *temp;
1251 /* An odd number of consecutive backslashes represents
1252 an escaped terminator. */
1254 while (temp >= name->text && *temp == '\\')
1257 if ((namebuf - temp) & 1)
1264 /* Run out of name space? */
1265 if (cur < buffer->rlimit)
1267 list->name_used = namebuf - list->namebuf;
1268 auto_expand_name_space (list);
1272 /* We may not have trigraph-replaced the input for this code path,
1273 but as the input is in error by being unterminated we don't
1274 bother. Prevent warnings about no newlines at EOF. */
1275 if (is_vspace (cur[-1]))
1279 cpp_error (pfile, "missing terminating %c character", (int) terminator);
1281 if (terminator == '\"' && pfile->multiline_string_line != list->line
1282 && pfile->multiline_string_line != 0)
1284 cpp_error_with_line (pfile, pfile->multiline_string_line,
1285 pfile->multiline_string_column,
1286 "possible start of unterminated string literal");
1287 pfile->multiline_string_line = 0;
1292 name->len = namebuf - name->text;
1293 list->name_used = namebuf - list->namebuf;
1296 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1297 : "null character preserved"));
1300 /* The character TYPE helps us distinguish comment types: '*' = C
1301 style, '/' = C++ style. For code simplicity, the stored comment
1302 includes the comment start and any terminator. */
1304 #define COMMENT_START_LEN 2
1306 save_comment (list, token, from, len, type)
1309 const unsigned char *from;
1313 unsigned char *buffer;
1315 len += COMMENT_START_LEN;
1317 if (list->name_used + len > list->name_cap)
1318 _cpp_expand_name_space (list, len);
1320 INIT_TOKEN_STR (list, token);
1321 token->type = CPP_COMMENT;
1322 token->val.str.len = len;
1324 buffer = list->namebuf + list->name_used;
1325 list->name_used += len;
1327 /* Copy the comment. */
1338 memcpy (buffer, from, len - COMMENT_START_LEN);
1342 * The tokenizer's main loop. Returns a token list, representing a
1343 * logical line in the input file. On EOF after some tokens have
1344 * been processed, we return immediately. Then in next call, or if
1345 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1346 * token is placed in the list.
1348 * Implementation relies almost entirely on lookback, rather than
1349 * looking forwards. This means that tokenization requires just
1350 * a single pass of the file, even in the presence of trigraphs and
1351 * escaped newlines, providing significant performance benefits.
1352 * Trigraph overhead is negligible if they are disabled, and low
1353 * even when enabled.
1356 #define KNOWN_DIRECTIVE() (list->directive != 0)
1357 #define MIGHT_BE_DIRECTIVE() \
1358 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1361 lex_line (pfile, list)
1365 cpp_token *cur_token, *token_limit, *first;
1366 cpp_buffer *buffer = pfile->buffer;
1367 const unsigned char *cur = buffer->cur;
1368 unsigned char flags = 0;
1369 unsigned int first_token = list->tokens_used;
1371 if (!(list->flags & LIST_OFFSET))
1374 list->file = buffer->nominal_fname;
1375 list->line = CPP_BUF_LINE (buffer);
1376 pfile->col_adjust = 0;
1377 pfile->in_lex_line = 1;
1378 if (cur == buffer->buf)
1379 list->flags |= BEG_OF_FILE;
1382 token_limit = list->tokens + list->tokens_cap;
1383 cur_token = list->tokens + list->tokens_used;
1385 for (; cur < buffer->rlimit && cur_token < token_limit;)
1389 /* Optimize non-vertical whitespace skipping; most tokens are
1390 probably separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
1395 skip_whitespace (pfile, (list->tokens[first_token].type == CPP_HASH
1396 && cur_token > &list->tokens[first_token]));
1400 if (cur == buffer->rlimit)
1406 /* Initialize current token. CPP_EOF will not be fixed up by
1407 expand_name_space. */
1408 list->tokens_used = cur_token - list->tokens + 1;
1409 cur_token->type = CPP_EOF;
1410 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1411 cur_token->line = CPP_BUF_LINE (buffer);
1412 cur_token->flags = flags;
1417 case '0': case '1': case '2': case '3': case '4':
1418 case '5': case '6': case '7': case '8': case '9':
1422 cur--; /* Backup character. */
1423 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1426 INIT_TOKEN_STR (list, cur_token);
1427 /* Prepend an immediately previous CPP_DOT token. */
1430 if (list->name_cap == list->name_used)
1431 auto_expand_name_space (list);
1433 cur_token->val.str.len = 1;
1434 list->namebuf[list->name_used++] = '.';
1438 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1440 parse_number (pfile, list, &cur_token->val.str);
1443 /* Check for # 123 form of #line. */
1444 if (MIGHT_BE_DIRECTIVE ())
1445 list->directive = _cpp_check_linemarker (pfile, cur_token,
1446 !(cur_token[-1].flags
1453 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1454 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1455 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1456 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1458 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1459 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1460 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1461 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1463 cur--; /* Backup character. */
1464 cur_token->val.node = 0;
1465 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1468 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
1470 if (MIGHT_BE_DIRECTIVE ())
1471 list->directive = _cpp_check_directive (pfile, cur_token,
1472 !(list->tokens[0].flags
1479 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1480 /* Do we have a wide string? */
1481 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1482 && cur_token[-1].val.node == pfile->spec_nodes->n_L)
1483 BACKUP_TOKEN (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1486 /* Here c is one of ' " or >. */
1487 INIT_TOKEN_STR (list, cur_token);
1489 parse_string (pfile, list, cur_token, c);
1495 cur_token->type = CPP_DIV;
1498 if (PREV_TOKEN_TYPE == CPP_DIV)
1500 /* We silently allow C++ comments in system headers,
1501 irrespective of conformance mode, because lots of
1502 broken systems do that and trying to clean it up
1503 in fixincludes is a nightmare. */
1504 if (CPP_IN_SYSTEM_HEADER (pfile))
1505 goto do_line_comment;
1506 else if (CPP_OPTION (pfile, cplusplus_comments))
1508 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1509 && ! buffer->warned_cplusplus_comments)
1513 "C++ style comments are not allowed in ISO C89");
1515 "(this will be reported only once per input file)");
1516 buffer->warned_cplusplus_comments = 1;
1520 #if 0 /* Leave until new lexer in place. */
1523 "comment start split across lines");
1525 if (skip_line_comment (pfile))
1526 cpp_warning (pfile, "multi-line comment");
1528 /* Back-up to first '-' or '/'. */
1530 if (!CPP_OPTION (pfile, discard_comments)
1531 && (!KNOWN_DIRECTIVE()
1532 || (list->directive->flags & COMMENTS)))
1533 save_comment (list, cur_token++, cur,
1534 buffer->cur - cur, c);
1547 cur_token->type = CPP_MULT;
1550 if (PREV_TOKEN_TYPE == CPP_DIV)
1553 #if 0 /* Leave until new lexer in place. */
1556 "comment start '/*' split across lines");
1558 if (skip_block_comment (pfile))
1559 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1560 "unterminated comment");
1561 #if 0 /* Leave until new lexer in place. */
1562 else if (buffer->cur[-2] != '*')
1564 "comment end '*/' split across lines");
1566 /* Back up to opening '/'. */
1568 if (!CPP_OPTION (pfile, discard_comments)
1569 && (!KNOWN_DIRECTIVE()
1570 || (list->directive->flags & COMMENTS)))
1571 save_comment (list, cur_token++, cur,
1572 buffer->cur - cur, c);
1579 else if (CPP_OPTION (pfile, cplusplus))
1581 /* In C++, there are .* and ->* operators. */
1582 if (PREV_TOKEN_TYPE == CPP_DEREF)
1583 BACKUP_TOKEN (CPP_DEREF_STAR);
1584 else if (PREV_TOKEN_TYPE == CPP_DOT)
1585 BACKUP_TOKEN (CPP_DOT_STAR);
1593 handle_newline (cur, buffer->rlimit, c);
1594 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1598 /* Remove the escaped newline. Then continue to process
1599 any interrupted name or number. */
1601 /* Backslash-newline may not be immediately followed by
1602 EOF (C99 5.1.1.2). */
1603 if (cur >= buffer->rlimit)
1605 cpp_pedwarn (pfile, "backslash-newline at end of file");
1611 if (cur_token->type == CPP_NAME)
1613 else if (cur_token->type == CPP_NUMBER)
1614 goto continue_number;
1617 /* Remember whitespace setting. */
1618 flags = cur_token->flags;
1625 "backslash and newline separated by space");
1628 else if (MIGHT_BE_DIRECTIVE ())
1630 /* "Null directive." C99 6.10.7: A preprocessing
1631 directive of the form # <new-line> has no effect.
1633 But it is still a directive, and therefore disappears
1636 if (cur_token->flags & PREV_WHITE
1637 && CPP_WTRADITIONAL (pfile))
1638 cpp_warning (pfile, "K+R C ignores #\\n with the # indented");
1641 /* Skip vertical space until we have at least one token to
1643 if (cur_token != &list->tokens[first_token])
1645 list->line = CPP_BUF_LINE (buffer);
1649 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1650 REVISE_TOKEN (CPP_MINUS_MINUS);
1652 PUSH_TOKEN (CPP_MINUS);
1657 /* The digraph flag checking ensures that ## and %:%:
1658 are interpreted as CPP_PASTE, but #%: and %:# are not. */
1659 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1660 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1661 REVISE_TOKEN (CPP_PASTE);
1663 PUSH_TOKEN (CPP_HASH);
1667 cur_token->type = CPP_COLON;
1670 if (PREV_TOKEN_TYPE == CPP_COLON
1671 && CPP_OPTION (pfile, cplusplus))
1672 BACKUP_TOKEN (CPP_SCOPE);
1673 else if (CPP_OPTION (pfile, digraphs))
1675 /* Digraph: "<:" is a '[' */
1676 if (PREV_TOKEN_TYPE == CPP_LESS)
1677 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1678 /* Digraph: "%:" is a '#' */
1679 else if (PREV_TOKEN_TYPE == CPP_MOD)
1681 (--cur_token)->flags |= DIGRAPH;
1690 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1691 REVISE_TOKEN (CPP_AND_AND);
1693 PUSH_TOKEN (CPP_AND);
1698 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1699 REVISE_TOKEN (CPP_OR_OR);
1701 PUSH_TOKEN (CPP_OR);
1705 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1706 REVISE_TOKEN (CPP_PLUS_PLUS);
1708 PUSH_TOKEN (CPP_PLUS);
1712 /* This relies on equidistance of "?=" and "?" tokens. */
1713 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1714 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1716 PUSH_TOKEN (CPP_EQ);
1720 cur_token->type = CPP_GREATER;
1723 if (PREV_TOKEN_TYPE == CPP_GREATER)
1724 BACKUP_TOKEN (CPP_RSHIFT);
1725 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1726 BACKUP_TOKEN (CPP_DEREF);
1727 else if (CPP_OPTION (pfile, digraphs))
1729 /* Digraph: ":>" is a ']' */
1730 if (PREV_TOKEN_TYPE == CPP_COLON)
1731 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1732 /* Digraph: "%>" is a '}' */
1733 else if (PREV_TOKEN_TYPE == CPP_MOD)
1734 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1741 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1743 REVISE_TOKEN (CPP_LSHIFT);
1746 /* Is this the beginning of a header name? */
1747 if (KNOWN_DIRECTIVE () && (list->directive->flags & INCL))
1749 c = '>'; /* Terminator. */
1750 cur_token->type = CPP_HEADER_NAME;
1751 goto do_parse_string;
1753 PUSH_TOKEN (CPP_LESS);
1757 /* Digraph: "<%" is a '{' */
1758 cur_token->type = CPP_MOD;
1759 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS
1760 && CPP_OPTION (pfile, digraphs))
1761 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1766 if (cur + 1 < buffer->rlimit && *cur == '?'
1767 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1769 /* Handle trigraph. */
1773 case '(': goto make_open_square;
1774 case ')': goto make_close_square;
1775 case '<': goto make_open_brace;
1776 case '>': goto make_close_brace;
1777 case '=': goto make_hash;
1778 case '!': goto make_or;
1779 case '-': goto make_complement;
1780 case '/': goto make_backslash;
1781 case '\'': goto make_xor;
1784 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1786 /* GNU C++ defines <? and >? operators. */
1787 if (PREV_TOKEN_TYPE == CPP_LESS)
1789 REVISE_TOKEN (CPP_MIN);
1792 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1794 REVISE_TOKEN (CPP_MAX);
1798 PUSH_TOKEN (CPP_QUERY);
1802 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1804 && !(cur_token[-1].flags & PREV_WHITE))
1807 PUSH_TOKEN (CPP_ELLIPSIS);
1810 PUSH_TOKEN (CPP_DOT);
1814 case '~': PUSH_TOKEN (CPP_COMPL); break;
1816 case '^': PUSH_TOKEN (CPP_XOR); break;
1818 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1820 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1822 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1824 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1826 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1827 case '!': PUSH_TOKEN (CPP_NOT); break;
1828 case ',': PUSH_TOKEN (CPP_COMMA); break;
1829 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1830 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1831 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1834 if (CPP_OPTION (pfile, dollars_in_ident))
1838 cur_token->val.aux = c;
1839 PUSH_TOKEN (CPP_OTHER);
1844 /* Run out of token space? */
1845 if (cur_token == token_limit)
1847 list->tokens_used = cur_token - list->tokens;
1848 _cpp_expand_token_space (list, 256);
1852 cur_token->flags = flags;
1853 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1855 if (cur > buffer->buf && !is_vspace (cur[-1]))
1856 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1857 CPP_BUF_COLUMN (buffer, cur),
1858 "no newline at end of file");
1859 cur_token++->type = CPP_EOF;
1863 /* All tokens are allocated, so the memory location is fixed. */
1864 first = &list->tokens[first_token];
1866 /* Don't complain about the null directive, nor directives in
1867 assembly source: we don't know where the comments are, and # may
1868 introduce assembler pseudo-ops. Don't complain about invalid
1869 directives in skipped conditional groups (6.10 p4). */
1870 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1871 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1873 if (first[1].type == CPP_NAME)
1874 cpp_error (pfile, "invalid preprocessing directive #%.*s",
1875 (int) first[1].val.node->length, first[1].val.node->name);
1877 cpp_error (pfile, "invalid preprocessing directive");
1880 /* Put EOF at end of known directives. This covers "directives do
1881 not extend beyond the end of the line (description 6.10 part 2)". */
1882 if (KNOWN_DIRECTIVE () || !pfile->done_initializing)
1884 pfile->first_directive_token = first;
1885 cur_token++->type = CPP_EOF;
1888 /* Directives, known or not, always start a new line. */
1889 if (first_token == 0 || list->tokens[first_token].type == CPP_HASH)
1890 first->flags |= BOL;
1892 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1893 up the invocation of a function-like macro, new line is
1894 considered a normal white-space character. */
1895 first->flags |= PREV_WHITE;
1898 list->tokens_used = cur_token - list->tokens;
1899 pfile->in_lex_line = 0;
1902 /* Write the spelling of a token TOKEN, with any appropriate
1903 whitespace before it, to the token_buffer. PREV is the previous
1904 token, which is used to determine if we need to shove in an extra
1905 space in order to avoid accidental token paste. */
1907 output_token (pfile, token, prev)
1909 const cpp_token *token, *prev;
1913 if (token->col && (token->flags & BOL))
1915 /* Supply enough whitespace to put this token in its original
1916 column. Don't bother trying to reconstruct tabs; we can't
1917 get it right in general, and nothing ought to care. (Yes,
1918 some things do care; the fault lies with them.) */
1919 unsigned char *buffer;
1920 unsigned int spaces = token->col - 1;
1922 CPP_RESERVE (pfile, token->col);
1923 buffer = pfile->limit;
1927 pfile->limit = buffer;
1929 else if (token->flags & PREV_WHITE)
1930 CPP_PUTC (pfile, ' ');
1933 /* Check for and prevent accidental token pasting. */
1934 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1935 CPP_PUTC (pfile, ' ');
1936 /* can_paste doesn't catch all the accidental pastes.
1937 Consider a + ++b - if there is not a space between the + and ++, it
1938 will be misparsed as a++ + b. */
1939 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1940 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1941 CPP_PUTC (pfile, ' ');
1944 CPP_RESERVE (pfile, TOKEN_LEN (token));
1945 pfile->limit = spell_token (pfile, token, pfile->limit);
1948 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1949 already contain the enough space to hold the token's spelling.
1950 Returns a pointer to the character after the last character
1953 static unsigned char *
1954 spell_token (pfile, token, buffer)
1955 cpp_reader *pfile; /* Would be nice to be rid of this... */
1956 const cpp_token *token;
1957 unsigned char *buffer;
1959 switch (token_spellings[token->type].type)
1961 case SPELL_OPERATOR:
1963 const unsigned char *spelling;
1966 if (token->flags & DIGRAPH)
1967 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1969 spelling = token_spellings[token->type].spelling;
1971 while ((c = *spelling++) != '\0')
1977 memcpy (buffer, token->val.node->name, token->val.node->length);
1978 buffer += token->val.node->length;
1983 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1986 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1988 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1991 memcpy (buffer, token->val.str.text, token->val.str.len);
1992 buffer += token->val.str.len;
1994 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1996 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
2002 *buffer++ = token->val.aux;
2006 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
2013 /* Return the spelling of a token known to be an operator.
2014 Does not distinguish digraphs from their counterparts. */
2015 const unsigned char *
2016 _cpp_spell_operator (type)
2017 enum cpp_ttype type;
2019 if (token_spellings[type].type == SPELL_OPERATOR)
2020 return token_spellings[type].spelling;
2022 return token_names[type];
2026 /* Macro expansion algorithm. TODO. */
2029 /* Free the storage allocated for macro arguments. */
2031 free_macro_args (args)
2035 free ((PTR) args->tokens);
2040 /* Determines if a macro has been already used (and is therefore
2043 is_macro_disabled (pfile, expansion, token)
2045 const cpp_toklist *expansion;
2046 const cpp_token *token;
2048 cpp_context *context = CURRENT_CONTEXT (pfile);
2050 /* Don't expand anything if this file has already been preprocessed. */
2051 if (CPP_OPTION (pfile, preprocessed))
2054 /* Arguments on either side of ## are inserted in place without
2055 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2056 occurs during a later rescan pass. The effect is that we expand
2057 iff we would as part of the macro's expansion list, so we should
2058 drop to the macro's context. */
2059 if (IS_ARG_CONTEXT (context))
2061 if (token->flags & PASTED)
2063 else if (!(context->flags & CONTEXT_RAW))
2065 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2069 /* Have we already used this macro? */
2070 while (context->level > 0)
2072 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2074 /* Raw argument tokens are judged based on the token list they
2076 if (context->flags & CONTEXT_RAW)
2077 context = pfile->contexts + context->level;
2082 /* Function-like macros may be disabled if the '(' is not in the
2083 current context. We check this without disrupting the context
2085 if (expansion->paramc >= 0)
2087 const cpp_token *next;
2088 unsigned int prev_nme;
2090 context = CURRENT_CONTEXT (pfile);
2091 /* Drop down any contexts we're at the end of: the '(' may
2092 appear in lower macro expansions, or in the rest of the file. */
2093 while (context->posn == context->count && context > pfile->contexts)
2096 /* If we matched, we are disabled, as we appear in the
2097 expansion of each macro we meet. */
2098 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2102 prev_nme = pfile->no_expand_level;
2103 pfile->no_expand_level = context - pfile->contexts;
2104 next = _cpp_get_token (pfile);
2105 restore_macro_expansion (pfile, prev_nme);
2106 if (next->type != CPP_OPEN_PAREN)
2108 _cpp_push_token (pfile, next);
2109 if (CPP_WTRADITIONAL (pfile))
2111 "function macro %.*s must be used with arguments in traditional C",
2112 (int) token->val.node->length, token->val.node->name);
2120 /* Add a token to the set of tokens forming the arguments to the macro
2121 being parsed in parse_args. */
2123 save_token (args, token)
2125 const cpp_token *token;
2127 if (args->used == args->capacity)
2129 args->capacity += args->capacity + 100;
2130 args->tokens = (const cpp_token **)
2131 xrealloc ((PTR) args->tokens,
2132 args->capacity * sizeof (const cpp_token *));
2134 args->tokens[args->used++] = token;
2137 /* Take and save raw tokens until we finish one argument. Empty
2138 arguments are saved as a single CPP_PLACEMARKER token. */
2139 static const cpp_token *
2140 parse_arg (pfile, var_args, paren_context, args, pcount)
2143 unsigned int paren_context;
2145 unsigned int *pcount;
2147 const cpp_token *token;
2148 unsigned int paren = 0, count = 0;
2149 int raw, was_raw = 1;
2151 for (count = 0;; count++)
2153 token = _cpp_get_token (pfile);
2155 switch (token->type)
2160 case CPP_OPEN_PAREN:
2164 case CPP_CLOSE_PAREN:
2170 /* Commas are not terminators within parantheses or var_args. */
2171 if (paren || var_args)
2175 case CPP_EOF: /* Error reported by caller. */
2179 raw = pfile->cur_context <= paren_context;
2183 save_token (args, 0);
2186 save_token (args, token);
2192 /* Duplicate the placemarker. Then we can set its flags and
2193 position and safely be using more than one. */
2194 save_token (args, duplicate_token (pfile, &placemarker_token));
2202 /* This macro returns true if the argument starting at offset O of arglist
2203 A is empty - that is, it's either a single PLACEMARKER token, or a null
2204 pointer followed by a PLACEMARKER. */
2206 #define empty_argument(A, O) \
2207 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2208 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2210 /* Parse the arguments making up a macro invocation. Nested arguments
2211 are automatically macro expanded, but immediate macros are not
2212 expanded; this enables e.g. operator # to work correctly. Returns
2213 non-zero on error. */
2215 parse_args (pfile, hp, args)
2220 const cpp_token *token;
2221 const cpp_toklist *macro;
2222 unsigned int total = 0;
2223 unsigned int paren_context = pfile->cur_context;
2226 macro = hp->value.expansion;
2231 token = parse_arg (pfile, (argc + 1 == macro->paramc
2232 && (macro->flags & VAR_ARGS)),
2233 paren_context, args, &count);
2234 if (argc < macro->paramc)
2237 args->ends[argc] = total;
2241 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2243 if (token->type == CPP_EOF)
2245 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2246 hp->length, hp->name);
2249 else if (argc < macro->paramc)
2251 /* A rest argument is allowed to not appear in the invocation at all.
2252 e.g. #define debug(format, args...) ...
2254 This is exactly the same as if the rest argument had received no
2255 tokens - debug("string",); This extension is deprecated. */
2257 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
2259 /* Duplicate the placemarker. Then we can set its flags and
2260 position and safely be using more than one. */
2261 save_token (args, duplicate_token (pfile, &placemarker_token));
2262 args->ends[argc] = total + 1;
2268 "insufficient arguments in invocation of macro \"%.*s\"",
2269 hp->length, hp->name);
2273 /* An empty argument to an empty function-like macro is fine. */
2274 else if (argc > macro->paramc
2275 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2278 "too many arguments in invocation of macro \"%.*s\"",
2279 hp->length, hp->name);
2286 /* Adds backslashes before all backslashes and double quotes appearing
2287 in strings. Non-printable characters are converted to octal. */
2289 quote_string (dest, src, len)
2298 if (c == '\\' || c == '"')
2309 sprintf ((char *) dest, "\\%03o", c);
2318 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2319 CPP_STRING token containing TEXT in quoted form. */
2321 make_string_token (token, text, len)
2328 buf = (U_CHAR *) xmalloc (len * 4);
2329 token->type = CPP_STRING;
2331 token->val.str.text = buf;
2332 token->val.str.len = quote_string (buf, text, len) - buf;
2336 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2337 evaluating to NUMBER. */
2339 alloc_number_token (pfile, number)
2346 result = get_temp_token (pfile);
2348 sprintf (buf, "%d", number);
2350 result->type = CPP_NUMBER;
2352 result->val.str.text = (U_CHAR *) buf;
2353 result->val.str.len = strlen (buf);
2357 /* Returns a temporary token from the temporary token store of PFILE. */
2359 get_temp_token (pfile)
2362 if (pfile->temp_used == pfile->temp_alloced)
2364 if (pfile->temp_used == pfile->temp_cap)
2366 pfile->temp_cap += pfile->temp_cap + 20;
2367 pfile->temp_tokens = (cpp_token **) xrealloc
2368 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2370 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2371 (sizeof (cpp_token));
2374 return pfile->temp_tokens[pfile->temp_used++];
2377 /* Release (not free) for re-use the temporary tokens of PFILE. */
2379 release_temp_tokens (pfile)
2382 while (pfile->temp_used)
2384 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2386 if (token_spellings[token->type].type == SPELL_STRING)
2388 free ((char *) token->val.str.text);
2389 token->val.str.text = 0;
2394 /* Free all of PFILE's dynamically-allocated temporary tokens. */
2396 _cpp_free_temp_tokens (pfile)
2399 if (pfile->temp_tokens)
2401 /* It is possible, though unlikely (looking for '(' of a funlike
2402 macro into EOF), that we haven't released the tokens yet. */
2403 release_temp_tokens (pfile);
2404 while (pfile->temp_alloced)
2405 free (pfile->temp_tokens[--pfile->temp_alloced]);
2406 free (pfile->temp_tokens);
2411 free ((char *) pfile->date->val.str.text);
2413 free ((char *) pfile->time->val.str.text);
2418 /* Copy TOKEN into a temporary token from PFILE's store. */
2420 duplicate_token (pfile, token)
2422 const cpp_token *token;
2424 cpp_token *result = get_temp_token (pfile);
2427 if (token_spellings[token->type].type == SPELL_STRING)
2429 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2430 memcpy (buff, token->val.str.text, token->val.str.len);
2431 result->val.str.text = buff;
2436 /* Determine whether two tokens can be pasted together, and if so,
2437 what the resulting token is. Returns CPP_EOF if the tokens cannot
2438 be pasted, or the appropriate type for the merged token if they
2440 static enum cpp_ttype
2441 can_paste (pfile, token1, token2, digraph)
2443 const cpp_token *token1, *token2;
2446 enum cpp_ttype a = token1->type, b = token2->type;
2447 int cxx = CPP_OPTION (pfile, cplusplus);
2449 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2450 return a + (CPP_EQ_EQ - CPP_EQ);
2455 if (b == a) return CPP_RSHIFT;
2456 if (b == CPP_QUERY && cxx) return CPP_MAX;
2457 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2460 if (b == a) return CPP_LSHIFT;
2461 if (b == CPP_QUERY && cxx) return CPP_MIN;
2462 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2463 if (CPP_OPTION (pfile, digraphs))
2466 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2468 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2472 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2473 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2474 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2477 if (b == a) return CPP_MINUS_MINUS;
2478 if (b == CPP_GREATER) return CPP_DEREF;
2481 if (b == a && cxx) return CPP_SCOPE;
2482 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2483 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2487 if (CPP_OPTION (pfile, digraphs))
2489 if (b == CPP_GREATER)
2490 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2492 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2496 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2499 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2500 if (b == CPP_NUMBER) return CPP_NUMBER;
2504 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2506 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2510 if (b == CPP_NAME) return CPP_NAME;
2512 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
2514 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2516 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2520 if (b == CPP_NUMBER) return CPP_NUMBER;
2521 if (b == CPP_NAME) return CPP_NUMBER;
2522 if (b == CPP_DOT) return CPP_NUMBER;
2523 /* Numbers cannot have length zero, so this is safe. */
2524 if ((b == CPP_PLUS || b == CPP_MINUS)
2525 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2536 /* Check if TOKEN is to be ##-pasted with the token after it. */
2537 static const cpp_token *
2538 maybe_paste_with_next (pfile, token)
2540 const cpp_token *token;
2543 const cpp_token *second;
2544 cpp_context *context = CURRENT_CONTEXT (pfile);
2546 /* Is this token on the LHS of ## ? */
2548 while ((token->flags & PASTE_LEFT)
2549 || ((context->flags & CONTEXT_PASTEL)
2550 && context->posn == context->count))
2552 /* Suppress macro expansion for next token, but don't conflict
2553 with the other method of suppression. If it is an argument,
2554 macro expansion within the argument will still occur. */
2555 pfile->paste_level = pfile->cur_context;
2556 second = _cpp_get_token (pfile);
2557 pfile->paste_level = 0;
2559 /* Ignore placemarker argument tokens (cannot be from an empty
2560 macro since macros are not expanded). */
2561 if (token->type == CPP_PLACEMARKER)
2562 pasted = duplicate_token (pfile, second);
2563 else if (second->type == CPP_PLACEMARKER)
2565 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
2566 /* GCC has special extended semantics for a ## b where b is
2567 a varargs parameter: a disappears if b consists of no
2568 tokens. This extension is deprecated. */
2569 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2570 && (mac_context->u.list->tokens[mac_context->posn-1].val.aux + 1
2571 == (unsigned) mac_context->u.list->paramc))
2573 cpp_warning (pfile, "deprecated GNU ## extension used");
2574 pasted = duplicate_token (pfile, second);
2577 pasted = duplicate_token (pfile, token);
2582 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2584 if (type == CPP_EOF)
2586 if (CPP_OPTION (pfile, warn_paste))
2588 "pasting would not give a valid preprocessing token");
2589 _cpp_push_token (pfile, second);
2593 if (type == CPP_NAME || type == CPP_NUMBER)
2595 /* Join spellings. */
2598 pasted = get_temp_token (pfile);
2599 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2600 end = spell_token (pfile, token, buf);
2601 end = spell_token (pfile, second, end);
2604 if (type == CPP_NAME)
2605 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2608 pasted->val.str.text = uxstrdup (buf);
2609 pasted->val.str.len = end - buf;
2612 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2613 pasted = duplicate_token (pfile, second);
2616 pasted = get_temp_token (pfile);
2617 pasted->val.integer = 0;
2620 pasted->type = type;
2621 pasted->flags = digraph ? DIGRAPH : 0;
2624 /* The pasted token gets the whitespace flags and position of the
2625 first token, the PASTE_LEFT flag of the second token, plus the
2626 PASTED flag to indicate it is the result of a paste. However, we
2627 want to preserve the DIGRAPH flag. */
2628 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2629 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2630 | (second->flags & PASTE_LEFT) | PASTED);
2631 pasted->col = token->col;
2632 pasted->line = token->line;
2634 /* See if there is another token to be pasted onto the one we just
2637 context = CURRENT_CONTEXT (pfile);
2643 /* Convert a token sequence to a single string token according to the
2644 rules of the ISO C #-operator. */
2645 #define INIT_SIZE 200
2647 stringify_arg (pfile, token)
2649 const cpp_token *token;
2652 unsigned char *main_buf;
2653 unsigned int prev_value, backslash_count = 0;
2654 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2656 push_arg_context (pfile, token);
2657 prev_value = prevent_macro_expansion (pfile);
2658 main_buf = (unsigned char *) xmalloc (buf_cap);
2660 result = get_temp_token (pfile);
2661 ASSIGN_FLAGS_AND_POS (result, token);
2663 for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2667 unsigned int len = TOKEN_LEN (token);
2669 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2670 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2674 if (buf_used + len > buf_cap)
2676 buf_cap = buf_used + len + INIT_SIZE;
2677 main_buf = xrealloc (main_buf, buf_cap);
2680 if (whitespace && (token->flags & PREV_WHITE))
2681 main_buf[buf_used++] = ' ';
2684 buf = (unsigned char *) xmalloc (len);
2686 buf = main_buf + buf_used;
2688 len = spell_token (pfile, token, buf) - buf;
2691 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2698 if (token->type == CPP_BACKSLASH)
2701 backslash_count = 0;
2704 /* Ignore the final \ of invalid string literals. */
2705 if (backslash_count & 1)
2707 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2711 result->type = CPP_STRING;
2712 result->val.str.text = main_buf;
2713 result->val.str.len = buf_used;
2714 restore_macro_expansion (pfile, prev_value);
2718 /* Allocate more room on the context stack of PFILE. */
2720 expand_context_stack (pfile)
2723 pfile->context_cap += pfile->context_cap + 20;
2724 pfile->contexts = (cpp_context *)
2725 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2728 /* Push the context of macro NODE onto the context stack. TOKEN is
2729 the CPP_NAME token invoking the macro. */
2731 push_macro_context (pfile, token)
2733 const cpp_token *token;
2735 unsigned char orig_flags;
2737 cpp_context *context;
2738 cpp_hashnode *node = token->val.node;
2740 /* Token's flags may change when parsing args containing a nested
2741 invocation of this macro. */
2742 orig_flags = token->flags & (PREV_WHITE | BOL);
2744 if (node->value.expansion->paramc >= 0)
2746 unsigned int error, prev_nme;
2748 /* Allocate room for the argument contexts, and parse them. */
2749 args = (macro_args *) xmalloc (sizeof (macro_args));
2750 args->ends = (unsigned int *)
2751 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2755 args->level = pfile->cur_context;
2757 prev_nme = prevent_macro_expansion (pfile);
2759 error = parse_args (pfile, node, args);
2761 restore_macro_expansion (pfile, prev_nme);
2764 free_macro_args (args);
2769 /* Now push its context. */
2770 pfile->cur_context++;
2771 if (pfile->cur_context == pfile->context_cap)
2772 expand_context_stack (pfile);
2774 context = CURRENT_CONTEXT (pfile);
2775 context->u.list = node->value.expansion;
2776 context->args = args;
2778 context->count = context->u.list->tokens_used;
2779 context->level = pfile->cur_context;
2781 context->pushed_token = 0;
2783 /* Set the flags of the first token. We know there must
2784 be one, empty macros are a single placemarker token. */
2785 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2790 /* Push an argument to the current macro onto the context stack.
2791 TOKEN is the MACRO_ARG token representing the argument expansion. */
2793 push_arg_context (pfile, token)
2795 const cpp_token *token;
2797 cpp_context *context;
2800 pfile->cur_context++;
2801 if (pfile->cur_context == pfile->context_cap)
2802 expand_context_stack (pfile);
2804 context = CURRENT_CONTEXT (pfile);
2805 args = context[-1].args;
2807 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2808 context->u.arg = args->tokens + context->count;
2809 context->count = args->ends[token->val.aux] - context->count;
2812 context->level = args->level;
2813 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2814 context->pushed_token = 0;
2816 /* Set the flags of the first token. There is one. */
2818 const cpp_token *first = context->u.arg[0];
2820 first = context->u.arg[1];
2822 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2823 token->flags & (PREV_WHITE | BOL));
2826 if (token->flags & PASTE_LEFT)
2827 context->flags |= CONTEXT_PASTEL;
2828 if (pfile->paste_level)
2829 context->flags |= CONTEXT_PASTER;
2832 /* "Unget" a token. It is effectively inserted in the token queue and
2833 will be returned by the next call to get_raw_token. */
2835 _cpp_push_token (pfile, token)
2837 const cpp_token *token;
2839 cpp_context *context = CURRENT_CONTEXT (pfile);
2840 if (context->pushed_token)
2841 cpp_ice (pfile, "two tokens pushed in a row");
2842 if (token->type != CPP_EOF)
2843 context->pushed_token = token;
2844 /* Don't push back a directive's CPP_EOF, step back instead. */
2845 else if (pfile->cur_context == 0)
2846 pfile->contexts[0].posn--;
2849 /* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2850 introducing the directive. */
2852 process_directive (pfile, token)
2854 const cpp_token *token;
2856 const struct directive *d = pfile->token_list.directive;
2859 /* Skip over the directive name. */
2860 if (token[1].type == CPP_NAME)
2861 _cpp_get_raw_token (pfile);
2862 else if (token[1].type != CPP_NUMBER)
2863 cpp_ice (pfile, "directive begins with %s?!",
2864 token_names[token[1].type]);
2866 /* Flush pending tokens at this point, in case the directive produces
2867 output. XXX Directive output won't be visible to a direct caller of
2869 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2870 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2872 if (! (d->flags & EXPAND))
2873 prev_nme = prevent_macro_expansion (pfile);
2874 (void) (*d->handler) (pfile);
2875 if (! (d->flags & EXPAND))
2876 restore_macro_expansion (pfile, prev_nme);
2877 _cpp_skip_rest_of_line (pfile);
2880 /* The external interface to return the next token. All macro
2881 expansion and directive processing is handled internally, the
2882 caller only ever sees the output after preprocessing. */
2884 cpp_get_token (pfile)
2887 const cpp_token *token;
2888 /* Loop till we hit a non-directive, non-placemarker token. */
2891 token = _cpp_get_token (pfile);
2893 if (token->type == CPP_PLACEMARKER)
2896 if (token->type == CPP_HASH && token->flags & BOL
2897 && pfile->token_list.directive)
2899 process_directive (pfile, token);
2907 /* The internal interface to return the next token. There are two
2908 differences between the internal and external interfaces: the
2909 internal interface may return a PLACEMARKER token, and it does not
2910 process directives. */
2912 _cpp_get_token (pfile)
2915 const cpp_token *token;
2918 /* Loop until we hit a non-macro token. */
2921 token = get_raw_token (pfile);
2923 /* Short circuit EOF. */
2924 if (token->type == CPP_EOF)
2927 /* If we are skipping... */
2928 if (pfile->skipping)
2930 /* we still have to process directives, */
2931 if (pfile->token_list.directive)
2934 /* but everything else is ignored. */
2935 _cpp_skip_rest_of_line (pfile);
2939 /* If there's a potential control macro and we get here, then that
2940 #ifndef didn't cover the entire file and its argument shouldn't
2941 be taken as a control macro. */
2942 pfile->potential_control_macro = 0;
2944 /* See if there's a token to paste with this one. */
2945 if (!pfile->paste_level)
2946 token = maybe_paste_with_next (pfile, token);
2948 /* If it isn't a macro, return it now. */
2949 if (token->type != CPP_NAME
2950 || token->val.node->type == T_VOID)
2953 /* Is macro expansion disabled in general? */
2954 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2957 node = token->val.node;
2958 if (node->type != T_MACRO)
2959 return special_symbol (pfile, node, token);
2961 if (is_macro_disabled (pfile, node->value.expansion, token))
2964 if (pfile->cur_context > CPP_STACK_MAX)
2966 cpp_error (pfile, "macros nested too deep invoking '%s'", node->name);
2970 if (push_macro_context (pfile, token))
2976 /* Returns the next raw token, i.e. without performing macro
2977 expansion. Argument contexts are automatically entered. */
2978 static const cpp_token *
2979 get_raw_token (pfile)
2982 const cpp_token *result;
2983 cpp_context *context;
2987 context = CURRENT_CONTEXT (pfile);
2988 if (context->pushed_token)
2990 result = context->pushed_token;
2991 context->pushed_token = 0;
2993 else if (context->posn == context->count)
2995 if (pop_context (pfile))
3001 if (IS_ARG_CONTEXT (context))
3003 result = context->u.arg[context->posn++];
3006 context->flags ^= CONTEXT_RAW;
3007 result = context->u.arg[context->posn++];
3009 return result; /* Cannot be a CPP_MACRO_ARG */
3011 result = &context->u.list->tokens[context->posn++];
3014 if (result->type != CPP_MACRO_ARG)
3017 if (result->flags & STRINGIFY_ARG)
3018 return stringify_arg (pfile, result);
3020 push_arg_context (pfile, result);
3024 /* Internal interface to get the token without macro expanding. */
3026 _cpp_get_raw_token (pfile)
3029 int prev_nme = prevent_macro_expansion (pfile);
3030 const cpp_token *result = _cpp_get_token (pfile);
3031 restore_macro_expansion (pfile, prev_nme);
3035 /* A thin wrapper to lex_line. CLEAR is non-zero if the current token
3036 list should be overwritten, or zero if we need to append
3037 (typically, if we are within the arguments to a macro, or looking
3038 for the '(' to start a function-like macro invocation). */
3040 lex_next (pfile, clear)
3044 cpp_toklist *list = &pfile->token_list;
3045 const cpp_token *old_list = list->tokens;
3046 unsigned int old_used = list->tokens_used;
3050 /* Release all temporary tokens. */
3051 _cpp_clear_toklist (list);
3052 pfile->contexts[0].posn = 0;
3053 if (pfile->temp_used)
3054 release_temp_tokens (pfile);
3057 lex_line (pfile, list);
3058 pfile->contexts[0].count = list->tokens_used;
3060 if (!clear && pfile->args)
3062 /* Fix up argument token pointers. */
3063 if (old_list != list->tokens)
3067 for (i = 0; i < pfile->args->used; i++)
3069 const cpp_token *token = pfile->args->tokens[i];
3070 if (token >= old_list && token < old_list + old_used)
3071 pfile->args->tokens[i] = (const cpp_token *)
3072 ((char *) token + ((char *) list->tokens - (char *) old_list));
3076 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3077 tokens within the list of arguments that would otherwise act as
3078 preprocessing directives, the behavior is undefined.
3080 This implementation will report a hard error and treat the
3081 'sequence of preprocessing tokens' as part of the macro argument,
3084 Note if pfile->args == 0, we're OK since we're only inside a
3085 macro argument after a '('. */
3086 if (list->directive)
3088 cpp_error_with_line (pfile, list->tokens[old_used].line,
3089 list->tokens[old_used].col,
3090 "#%s may not be used inside a macro argument",
3091 list->directive->name);
3099 /* Pops a context off the context stack. If we're at the bottom, lexes
3100 the next logical line. Returns EOF if we're at the end of the
3101 argument list to the # operator, or if it is illegal to "overflow"
3102 into the rest of the file (e.g. 6.10.3.1.1). */
3107 cpp_context *context;
3109 if (pfile->cur_context == 0)
3111 /* If we are currently processing a directive, do not advance. 6.10
3112 paragraph 2: A new-line character ends the directive even if it
3113 occurs within what would otherwise be an invocation of a
3114 function-like macro. */
3115 if (pfile->token_list.directive)
3118 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3121 /* Argument contexts, when parsing args or handling # operator
3122 return CPP_EOF at the end. */
3123 context = CURRENT_CONTEXT (pfile);
3124 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3127 /* Free resources when leaving macro contexts. */
3129 free_macro_args (context->args);
3131 if (pfile->cur_context == pfile->no_expand_level)
3132 pfile->no_expand_level--;
3133 pfile->cur_context--;
3138 /* Turn off macro expansion at the current context level. */
3140 prevent_macro_expansion (pfile)
3143 unsigned int prev_value = pfile->no_expand_level;
3144 pfile->no_expand_level = pfile->cur_context;
3148 /* Restore macro expansion to its previous state. */
3150 restore_macro_expansion (pfile, prev_value)
3152 unsigned int prev_value;
3154 pfile->no_expand_level = prev_value;
3157 /* Used by cpperror.c to obtain the correct line and column to report
3160 _cpp_get_line (pfile, pcol)
3165 const cpp_token *cur_token;
3167 if (pfile->in_lex_line)
3168 index = pfile->token_list.tokens_used;
3170 index = pfile->contexts[0].posn;
3172 cur_token = &pfile->token_list.tokens[index - 1];
3174 *pcol = cur_token->col;
3175 return cur_token->line;
3178 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3179 static const char * const monthnames[] =
3181 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3182 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3185 /* Handle builtin macros like __FILE__. */
3186 static const cpp_token *
3187 special_symbol (pfile, node, token)
3190 const cpp_token *token;
3202 ip = CPP_BUFFER (pfile);
3207 if (node->type == T_BASE_FILE)
3208 while (CPP_PREV_BUFFER (ip) != NULL)
3209 ip = CPP_PREV_BUFFER (ip);
3211 file = ip->nominal_fname;
3213 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3218 case T_INCLUDE_LEVEL:
3219 /* pfile->include_depth counts the primary source as level 1,
3220 but historically __INCLUDE_DEPTH__ has called the primary
3222 result = alloc_number_token (pfile, pfile->include_depth - 1);
3226 /* If __LINE__ is embedded in a macro, it must expand to the
3227 line of the macro's invocation, not its definition.
3228 Otherwise things like assert() will not work properly. */
3229 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3236 #ifdef STDC_0_IN_SYSTEM_HEADERS
3237 if (CPP_IN_SYSTEM_HEADER (pfile)
3238 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3241 result = alloc_number_token (pfile, stdc);
3247 if (pfile->date == 0)
3249 /* Allocate __DATE__ and __TIME__ from permanent storage,
3250 and save them in pfile so we don't have to do this again.
3251 We don't generate these strings at init time because
3252 time() and localtime() are very slow on some systems. */
3253 time_t tt = time (NULL);
3254 struct tm *tb = localtime (&tt);
3256 pfile->date = make_string_token
3257 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3258 pfile->time = make_string_token
3259 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3261 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3262 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3263 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3264 tb->tm_hour, tb->tm_min, tb->tm_sec);
3266 result = node->type == T_DATE ? pfile->date: pfile->time;
3270 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3274 cpp_ice (pfile, "invalid special hash type");
3278 ASSIGN_FLAGS_AND_POS (result, token);
3283 /* Dump the original user's spelling of argument index ARG_NO to the
3284 macro whose expansion is LIST. */
3286 dump_param_spelling (pfile, list, arg_no)
3288 const cpp_toklist *list;
3289 unsigned int arg_no;
3291 const U_CHAR *param = list->namebuf;
3294 param += ustrlen (param) + 1;
3295 CPP_PUTS (pfile, param, ustrlen (param));
3298 /* Dump a token list to the output. */
3300 _cpp_dump_list (pfile, list, token, flush)
3302 const cpp_toklist *list;
3303 const cpp_token *token;
3306 const cpp_token *limit = list->tokens + list->tokens_used;
3307 const cpp_token *prev = 0;
3309 /* Avoid the CPP_EOF. */
3310 if (list->directive)
3313 while (token < limit)
3315 if (token->type == CPP_MACRO_ARG)
3317 if (token->flags & PREV_WHITE)
3318 CPP_PUTC (pfile, ' ');
3319 if (token->flags & STRINGIFY_ARG)
3320 CPP_PUTC (pfile, '#');
3321 dump_param_spelling (pfile, list, token->val.aux);
3324 output_token (pfile, token, prev);
3325 if (token->flags & PASTE_LEFT)
3326 CPP_PUTS (pfile, " ##", 3);
3331 if (flush && pfile->printer)
3332 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3335 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3336 if it hasn't happened already. */
3339 _cpp_init_input_buffer (pfile)
3344 init_trigraph_map ();
3345 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3346 pfile->no_expand_level = UINT_MAX;
3347 pfile->context_cap = 20;
3348 pfile->cur_context = 0;
3350 pfile->contexts = (cpp_context *)
3351 xmalloc (pfile->context_cap * sizeof (cpp_context));
3353 /* Clear the base context. */
3354 base = &pfile->contexts[0];
3355 base->u.list = &pfile->token_list;
3361 base->pushed_token = 0;
3364 /* Moves to the end of the directive line, popping contexts as
3367 _cpp_skip_rest_of_line (pfile)
3370 /* Discard all stacked contexts. */
3372 for (i = pfile->cur_context; i > 0; i--)
3373 if (pfile->contexts[i].args)
3374 free_macro_args (pfile->contexts[i].args);
3376 if (pfile->no_expand_level <= pfile->cur_context)
3377 pfile->no_expand_level = 0;
3378 pfile->cur_context = 0;
3380 /* Clear the base context, and clear the directive pointer so that
3381 get_raw_token will advance to the next line. */
3382 pfile->contexts[0].count = 0;
3383 pfile->contexts[0].posn = 0;
3384 pfile->token_list.directive = 0;
3387 /* Directive handler wrapper used by the command line option
3390 _cpp_run_directive (pfile, dir, buf, count)
3392 const struct directive *dir;
3396 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3398 unsigned int prev_lvl = 0;
3400 /* Scan the line now, else prevent_macro_expansion won't work. */
3401 lex_next (pfile, 1);
3402 if (! (dir->flags & EXPAND))
3403 prev_lvl = prevent_macro_expansion (pfile);
3405 (void) (*dir->handler) (pfile);
3407 if (! (dir->flags & EXPAND))
3408 restore_macro_expansion (pfile, prev_lvl);
3410 _cpp_skip_rest_of_line (pfile);
3411 cpp_pop_buffer (pfile);