1 /* CPP Library - traditional lexical analysis and macro expansion.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 Contributed by Neil Booth, May 2002
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24 /* The replacement text of a function-like macro is stored as a
25 contiguous sequence of aligned blocks, each representing the text
26 between subsequent parameters in that text.
28 Each block comprises the length of text contained therein, the
29 one-based index of the argument that immediately follows that text,
30 and the text itself. The final block in the macro expansion is
31 easily recognizable as it has an argument index of zero. */
35 unsigned int text_len;
36 unsigned short arg_index;
40 #define BLOCK_HEADER_LEN offsetof (struct block, text)
41 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + TEXT_LEN)
43 /* Structure holding information about a function-like macro
47 /* Memory buffer holding the trad_arg array. */
50 /* An array of size the number of macro parameters + 1, containing
51 the offsets of the start of each macro argument in the output
52 buffer. The argument continues until the character before the
53 start of the next one. */
56 /* The hashnode of the macro. */
59 /* The offset of the macro name in the output buffer. */
62 /* Zero-based index of argument being currently lexed. */
66 /* Lexing TODO: Maybe handle -CC and space in escaped newlines. Stop
67 cpplex.c from recognizing comments and directives during its lexing
68 pass. Get rid of line_base usage - seems pointless? */
70 static const uchar *handle_newline PARAMS ((cpp_reader *, const uchar *));
71 static const uchar *skip_escaped_newlines PARAMS ((cpp_reader *,
73 static const uchar *skip_whitespace PARAMS ((cpp_reader *, const uchar *));
74 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
75 static const uchar *copy_comment PARAMS ((cpp_reader *, const uchar *));
76 static void scan_out_logical_line PARAMS ((cpp_reader *pfile, cpp_macro *));
77 static void check_output_buffer PARAMS ((cpp_reader *, size_t));
78 static void push_replacement_text PARAMS ((cpp_reader *, cpp_hashnode *));
79 static bool scan_parameters PARAMS ((cpp_reader *, cpp_macro *));
80 static bool recursive_macro PARAMS ((cpp_reader *, cpp_hashnode *));
81 static void save_replacement_text PARAMS ((cpp_reader *, cpp_macro *,
83 static void maybe_start_funlike PARAMS ((cpp_reader *, cpp_hashnode *,
84 const uchar *, struct fun_macro *));
85 static void save_argument PARAMS ((struct fun_macro *, size_t));
86 static void replace_args_and_push PARAMS ((cpp_reader *, struct fun_macro *));
87 static size_t canonicalize_text PARAMS ((uchar *, const uchar *, size_t,
90 /* Ensures we have N bytes' space in the output buffer, and
91 reallocates it if not. */
93 check_output_buffer (pfile, n)
97 /* We might need two bytes to terminate an unterminated comment, and
98 one more to terminate with a NUL. */
101 if (n > (size_t) (pfile->out.limit - pfile->out.cur))
103 size_t size = pfile->out.cur - pfile->out.base;
104 size_t new_size = (size + n) * 3 / 2;
107 = (uchar *) xrealloc (pfile->out.base, new_size);
108 pfile->out.limit = pfile->out.base + new_size;
109 pfile->out.cur = pfile->out.base + size;
113 /* To be called whenever a newline character is encountered in the
114 input file, at CUR. Handles DOS, MAC and Unix ends of line, and
115 returns the character after the newline sequence. */
117 handle_newline (pfile, cur)
122 if (cur[0] + cur[1] == '\r' + '\n')
124 pfile->buffer->line_base = cur + 1;
128 /* CUR points to any character in the buffer, not necessarily a
129 backslash. Advances CUR until all escaped newlines are skipped,
130 and returns the new position. */
132 skip_escaped_newlines (pfile, cur)
136 if (*cur == '\\' && is_vspace (cur[1]))
139 cur = handle_newline (pfile, cur + 1);
140 while (*cur == '\\' && is_vspace (cur[1]));
142 if (cur == RLIMIT (pfile->context))
143 cpp_error (pfile, DL_PEDWARN,
144 "backslash-newline at end of file");
150 /* CUR points to the character after the asterisk introducing a
151 comment in the input buffer. The remaining comment is copied to
152 the buffer pointed to by pfile->out.cur, which must be of
153 sufficient size, and pfile->out.cur is updated. Unterminated
154 comments are diagnosed, and correctly terminated in the output.
156 Returns a pointer to the first character after the comment in the
159 copy_comment (pfile, cur)
163 unsigned int from_line = pfile->line;
164 const uchar *limit = RLIMIT (pfile->context);
165 uchar *out = pfile->out.cur;
169 unsigned int c = *cur++;
174 /* An immediate slash does not terminate the comment. */
175 if (out[-2] == '*' && out > pfile->out.cur + 1)
178 if (*cur == '*' && cur[1] != '/'
179 && CPP_OPTION (pfile, warn_comments))
180 cpp_error_with_line (pfile, DL_WARNING, pfile->line, 0,
181 "\"/*\" within comment");
183 else if (is_vspace (c))
185 cur = handle_newline (pfile, cur - 1);
186 /* Canonicalize newline sequences and skip escaped ones. */
194 cpp_error_with_line (pfile, DL_ERROR, from_line, 0, "unterminated comment");
199 pfile->out.cur = out;
203 /* Skip any horizontal whitespace and comments beginning at CUR,
204 returning the following character. */
206 skip_whitespace (pfile, cur)
214 while (is_nvspace (*cur) && *cur != 0)
217 if (*cur == '\0' && cur != RLIMIT (pfile->context))
223 cur = skip_escaped_newlines (pfile, cur);
230 tmp = skip_escaped_newlines (pfile, cur + 1);
233 cur = copy_comment (pfile, tmp + 1);
244 /* Lexes and outputs an identifier starting at CUR, which is assumed
245 to point to a valid first character of an identifier. Returns
246 the hashnode, and updates out.cur. */
247 static cpp_hashnode *
248 lex_identifier (pfile, cur)
253 uchar *out = pfile->out.cur;
254 cpp_hashnode *result;
260 while (is_numchar (*cur));
261 cur = skip_escaped_newlines (pfile, cur);
263 while (is_numchar (*cur));
265 CUR (pfile->context) = cur;
266 len = out - pfile->out.cur;
267 result = (cpp_hashnode *) ht_lookup (pfile->hash_table, pfile->out.cur,
269 pfile->out.cur = out;
273 /* Overlays the true file buffer temporarily with text of length LEN
274 starting at START. The true buffer is restored upon calling
277 _cpp_overlay_buffer (pfile, start, len)
282 cpp_buffer *buffer = pfile->buffer;
284 buffer->saved_cur = buffer->cur;
285 buffer->saved_rlimit = buffer->rlimit;
286 buffer->saved_line_base = buffer->line_base;
289 buffer->line_base = start;
290 buffer->rlimit = start + len;
292 pfile->saved_line = pfile->line;
295 /* Restores a buffer overlaid by _cpp_overlay_buffer(). */
297 _cpp_remove_overlay (pfile)
300 cpp_buffer *buffer = pfile->buffer;
302 buffer->cur = buffer->saved_cur;
303 buffer->rlimit = buffer->saved_rlimit;
304 buffer->line_base = buffer->saved_line_base;
306 pfile->line = pfile->saved_line;
309 /* Reads a logical line into the output buffer. Returns TRUE if there
310 is more text left in the buffer. */
312 _cpp_read_logical_line_trad (pfile)
315 cpp_buffer *buffer = pfile->buffer;
319 if (buffer->cur == buffer->rlimit)
323 /* Don't pop the last buffer. */
326 stop = buffer->return_at_eof;
327 _cpp_pop_buffer (pfile);
334 CUR (pfile->context) = buffer->cur;
335 RLIMIT (pfile->context) = buffer->rlimit;
336 scan_out_logical_line (pfile, NULL);
337 buffer->cur = CUR (pfile->context);
339 while (pfile->state.skipping);
344 /* Set up state for finding the opening '(' of a function-like
347 maybe_start_funlike (pfile, node, start, macro)
351 struct fun_macro *macro;
353 unsigned int n = node->value.macro->paramc + 1;
356 _cpp_release_buff (pfile, macro->buff);
357 macro->buff = _cpp_get_buff (pfile, n * sizeof (size_t));
358 macro->args = (size_t *) BUFF_FRONT (macro->buff);
360 macro->offset = start - pfile->out.base;
363 pfile->state.parsing_args = 1;
366 /* Save the OFFSET of the start of the next argument to MACRO. */
368 save_argument (macro, offset)
369 struct fun_macro *macro;
373 if (macro->argc <= macro->node->value.macro->paramc)
374 macro->args[macro->argc] = offset;
377 /* Copies the next logical line in the current buffer to the output
378 buffer. The output is guaranteed to terminate with a NUL
381 If MACRO is non-NULL, then we are scanning the replacement list of
382 MACRO, and we call save_replacement_text() every time we meet an
385 scan_out_logical_line (pfile, macro)
389 cpp_context *context;
391 unsigned int c, paren_depth = 0, quote = 0;
393 struct fun_macro fmacro;
398 pfile->out.cur = pfile->out.base;
399 pfile->out.first_line = pfile->line;
401 context = pfile->context;
403 check_output_buffer (pfile, RLIMIT (context) - cur);
404 out = pfile->out.cur;
411 /* There are only a few entities we need to catch: comments,
412 identifiers, newlines, escaped newlines, # and '\0'. */
416 if (cur - 1 != RLIMIT (context))
419 /* If this is a macro's expansion, pop it. */
422 pfile->out.cur = out - 1;
423 _cpp_pop_context (pfile);
427 /* Premature end of file. Fake a new line. */
429 if (!pfile->buffer->from_stage3)
430 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
431 if (pfile->state.parsing_args == 2)
432 cpp_error (pfile, DL_ERROR,
433 "unterminated argument list invoking macro \"%s\"",
434 NODE_NAME (fmacro.node));
438 case '\r': case '\n':
439 cur = handle_newline (pfile, cur - 1);
440 if (pfile->state.parsing_args == 2)
442 /* Newlines in arguments become a space. */
457 if (is_vspace (*cur))
458 out--, cur = skip_escaped_newlines (pfile, cur - 1);
461 /* Skip escaped quotes here, it's easier than above, but
462 take care to first skip escaped newlines. */
463 cur = skip_escaped_newlines (pfile, cur);
464 if (*cur == '\\' || *cur == '"' || *cur == '\'')
470 /* Traditional CPP does not recognize comments within
474 cur = skip_escaped_newlines (pfile, cur);
478 pfile->out.cur = out + 1;
479 cur = copy_comment (pfile, cur + 1);
481 /* Comments in directives become spaces so that
482 tokens are properly separated when the ISO
483 preprocessor re-lexes the line. The exception
485 if (pfile->state.in_directive && !macro)
487 else if (CPP_OPTION (pfile, discard_comments))
490 out = pfile->out.cur;
496 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
497 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
498 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
499 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
501 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
502 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
503 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
504 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
506 if (quote == 0 || macro)
510 pfile->out.cur = --out;
511 node = lex_identifier (pfile, cur - 1);
513 if (node->type == NT_MACRO
514 && !pfile->state.skipping
515 && pfile->state.parsing_args != 2
516 && !pfile->state.prevent_expansion
517 && !recursive_macro (pfile, node))
519 if (node->value.macro->fun_like)
520 maybe_start_funlike (pfile, node, out, &fmacro);
523 /* Remove the object-like macro's name from the
524 output, and push its replacement text. */
525 pfile->out.cur = out;
526 push_replacement_text (pfile, node);
530 else if (macro && node->arg_index)
532 /* Found a parameter in the replacement text of a
533 #define. Remove its name from the output. */
534 pfile->out.cur = out;
535 save_replacement_text (pfile, macro, node->arg_index);
538 out = pfile->out.cur;
547 if (pfile->state.parsing_args == 1)
549 const uchar *p = pfile->out.base + fmacro.offset;
551 /* Invoke a prior function-like macro if there is only
552 white space in-between. */
553 while (is_numchar (*p))
555 while (is_space (*p))
560 pfile->state.parsing_args = 2;
562 out = pfile->out.base + fmacro.offset;
563 fmacro.args[0] = fmacro.offset;
566 pfile->state.parsing_args = 0;
572 if (quote == 0 && pfile->state.parsing_args == 2 && paren_depth == 1)
573 save_argument (&fmacro, out - pfile->out.base);
580 if (pfile->state.parsing_args == 2 && paren_depth == 0)
582 cpp_macro *m = fmacro.node->value.macro;
584 pfile->state.parsing_args = 0;
585 save_argument (&fmacro, out - pfile->out.base);
587 /* A single zero-length argument is no argument. */
590 && out == pfile->out.base + 1)
593 if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
595 /* Remove the macro's invocation from the
596 output, and push its replacement text. */
597 pfile->out.cur = (pfile->out.base
600 replace_args_and_push (pfile, &fmacro);
608 /* At start of a line it's a directive. */
609 if (out - 1 == pfile->out.base && !pfile->state.in_directive)
611 /* This is a kludge. We want to have the ISO
612 preprocessor lex the next token. */
613 pfile->buffer->cur = cur;
614 if (_cpp_handle_directive (pfile, false /* indented */))
615 goto start_logical_line;
627 pfile->out.cur = out - 1;
629 _cpp_release_buff (pfile, fmacro.buff);
632 /* Push a context holding the replacement text of the macro NODE on
633 the context stack. NODE is either object-like, or a function-like
634 macro with no arguments. */
636 push_replacement_text (pfile, node)
640 cpp_macro *macro = node->value.macro;
642 _cpp_push_text_context (pfile, node, macro->exp.text, macro->count);
645 /* Returns TRUE if traditional macro recursion is detected. */
647 recursive_macro (pfile, node)
651 bool recursing = node->flags & NODE_DISABLED;
653 /* Object-like macros that are already expanding are necessarily
656 However, it is possible to have traditional function-like macros
657 that are not infinitely recursive but recurse to any given depth.
658 Further, it is easy to construct examples that get ever longer
659 until the point they stop recursing. So there is no easy way to
660 detect true recursion; instead we assume any expansion more than
661 20 deep since the first invocation of this macro must be
663 if (recursing && node->value.macro->fun_like)
666 cpp_context *context = pfile->context;
671 if (context->macro == node && depth > 20)
673 context = context->prev;
676 recursing = context != NULL;
680 cpp_error (pfile, DL_ERROR,
681 "detected recursion whilst expanding macro \"%s\"",
687 /* Push a context holding the replacement text of the macro NODE on
688 the context stack. NODE is either object-like, or a function-like
689 macro with no arguments. */
691 replace_args_and_push (pfile, fmacro)
693 struct fun_macro *fmacro;
695 cpp_macro *macro = fmacro->node->value.macro;
697 if (macro->paramc == 0)
698 push_replacement_text (pfile, fmacro->node);
706 /* Calculate the length of the argument-replaced text. */
707 for (exp = macro->exp.text;;)
709 struct block *b = (struct block *) exp;
712 if (b->arg_index == 0)
714 len += (fmacro->args[b->arg_index]
715 - fmacro->args[b->arg_index - 1] - 1);
716 exp += BLOCK_LEN (b->text_len);
719 /* Allocate room for the expansion plus NUL. */
720 buff = _cpp_get_buff (pfile, len + 1);
722 /* Copy the expansion and replace arguments. */
723 p = BUFF_FRONT (buff);
724 for (exp = macro->exp.text;;)
726 struct block *b = (struct block *) exp;
729 memcpy (p, b->text, b->text_len);
731 if (b->arg_index == 0)
733 arglen = (fmacro->args[b->arg_index]
734 - fmacro->args[b->arg_index - 1] - 1);
735 memcpy (p, pfile->out.base + fmacro->args[b->arg_index - 1],
738 exp += BLOCK_LEN (b->text_len);
743 _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
745 /* So we free buffer allocation when macro is left. */
746 pfile->context->buff = buff;
750 /* Read and record the parameters, if any, of a function-like macro
751 definition. Destroys pfile->out.cur.
753 Returns true on success, false on failure (syntax error or a
754 duplicate parameter). On success, CUR (pfile->context) is just
755 past the closing parenthesis. */
757 scan_parameters (pfile, macro)
761 const uchar *cur = CUR (pfile->context) + 1;
766 cur = skip_whitespace (pfile, cur);
768 if (is_idstart (*cur))
771 if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
773 cur = skip_whitespace (pfile, CUR (pfile->context));
783 ok = (*cur == ')' && macro->paramc == 0);
787 CUR (pfile->context) = cur + (*cur == ')');
792 /* Save the text from pfile->out.base to pfile->out.cur as
793 the replacement text for the current macro, followed by argument
794 ARG_INDEX, with zero indicating the end of the replacement
797 save_replacement_text (pfile, macro, arg_index)
800 unsigned int arg_index;
802 size_t len = pfile->out.cur - pfile->out.base;
805 if (macro->paramc == 0)
807 /* Object-like and function-like macros without parameters
808 simply store their NUL-terminated replacement text. */
809 exp = _cpp_unaligned_alloc (pfile, len + 1);
810 memcpy (exp, pfile->out.base, len);
812 macro->exp.text = exp;
817 /* Store the text's length (unsigned int), the argument index
818 (unsigned short, base 1) and then the text. */
819 size_t blen = BLOCK_LEN (len);
822 if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
823 _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
825 exp = BUFF_FRONT (pfile->a_buff);
826 block = (struct block *) (exp + macro->count);
827 macro->exp.text = exp;
829 /* Write out the block information. */
830 block->text_len = len;
831 block->arg_index = arg_index;
832 memcpy (block->text, pfile->out.base, len);
834 /* Lex the rest into the start of the output buffer. */
835 pfile->out.cur = pfile->out.base;
837 macro->count += blen;
839 /* If we've finished, commit the memory. */
841 BUFF_FRONT (pfile->a_buff) += macro->count;
845 /* Analyze and save the replacement text of a macro. Returns true on
848 _cpp_create_trad_definition (pfile, macro)
855 CUR (pfile->context) = pfile->buffer->cur;
857 /* Is this a function-like macro? */
858 if (* CUR (pfile->context) == '(')
860 /* Setting macro to NULL indicates an error occurred, and
861 prevents unnecessary work in scan_out_logical_line. */
862 if (!scan_parameters (pfile, macro))
866 /* Success. Commit the parameter array. */
867 macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
868 BUFF_FRONT (pfile->a_buff) = (uchar *) ¯o->params[macro->paramc];
873 /* Skip leading whitespace in the replacement text. */
874 CUR (pfile->context) = skip_whitespace (pfile, CUR (pfile->context));
876 pfile->state.prevent_expansion++;
877 scan_out_logical_line (pfile, macro);
878 pfile->state.prevent_expansion--;
883 /* Skip trailing white space. */
884 cur = pfile->out.base;
885 limit = pfile->out.cur;
886 while (limit > cur && is_space (limit[-1]))
888 pfile->out.cur = limit;
889 save_replacement_text (pfile, macro, 0);
894 /* Copy SRC of length LEN to DEST, but convert all contiguous
895 whitespace to a single space, provided it is not in quotes. The
896 quote currently in effect is pointed to by PQUOTE, and is updated
897 by the function. Returns the number of bytes copied. */
899 canonicalize_text (dest, src, len, pquote)
905 uchar *orig_dest = dest;
906 uchar quote = *pquote;
910 if (is_space (*src) && !quote)
914 while (len && is_space (*src));
919 if (*src == '\'' || *src == '"')
923 else if (quote == *src)
926 *dest++ = *src++, len--;
931 return dest - orig_dest;
934 /* Returns true if MACRO1 and MACRO2 have expansions different other
935 than in the form of their whitespace. */
937 _cpp_expansions_different_trad (macro1, macro2)
938 const cpp_macro *macro1, *macro2;
940 uchar *p1 = xmalloc (macro1->count + macro2->count);
941 uchar *p2 = p1 + macro1->count;
942 uchar quote1 = 0, quote2;
946 if (macro1->paramc > 0)
948 const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
953 struct block *b1 = (struct block *) exp1;
954 struct block *b2 = (struct block *) exp2;
956 if (b1->arg_index != b2->arg_index)
959 len1 = canonicalize_text (p1, b1->text, b1->text_len, "e1);
960 len2 = canonicalize_text (p2, b2->text, b2->text_len, "e2);
961 if (len1 != len2 || memcmp (p1, p2, len1))
963 if (b1->arg_index == 0)
968 exp1 += BLOCK_LEN (b1->text_len);
969 exp2 += BLOCK_LEN (b2->text_len);
974 len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, "e1);
975 len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, "e2);
976 mismatch = (len1 != len2 || memcmp (p1, p2, len1));
983 /* Prepare to be able to scan the current buffer. */
985 _cpp_set_trad_context (pfile)
988 cpp_buffer *buffer = pfile->buffer;
989 cpp_context *context = pfile->context;
991 if (pfile->context->prev)
994 pfile->out.cur = pfile->out.base;
995 CUR (context) = buffer->cur;
996 RLIMIT (context) = buffer->rlimit;
997 check_output_buffer (pfile, RLIMIT (context) - CUR (context));