1 /* CPP Library - traditional lexical analysis and macro expansion.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 Contributed by Neil Booth, May 2002
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24 /* The replacement text of a function-like macro is stored as a
25 contiguous sequence of aligned blocks. Each block represents the
26 portion of text from the start of the previous block (or the start
27 of the macro replacement text in the case of the first block) to
28 the next parameter, or the end of the replacement list if there
31 Each block consists of an unsigned int, which is the length of text
32 contained in the third part, an unsigned short, which is the
33 one-based index of the argument that immediately follows that text,
34 and the text itself. The final block in the macro expansion is
35 recognizable as it has an argument index of zero. */
39 unsigned int text_len;
40 unsigned short arg_index;
44 #define BLOCK_HEADER_LEN offsetof (struct block, text)
45 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + TEXT_LEN)
47 /* Structure holding information about a function-like macro
51 /* Memory buffer holding the trad_arg array. */
54 /* An array of size the number of macro parameters + 1, containing
55 the offsets of the start of each macro argument in the output
56 buffer. The argument continues until the character before the
57 start of the next one. */
60 /* The hashnode of the macro. */
63 /* The offset of the macro name in the output buffer. */
66 /* Zero-based index of argument being currently lexed. */
70 /* Lexing TODO: Handle -C, maybe -CC, and space in escaped newlines.
71 Stop cpplex.c from recognizing comments and directives during its
72 lexing pass. Get rid of line_base usage - seems pointless? Do we
73 get escaped newline at EOF correct? */
75 static const uchar *handle_newline PARAMS ((cpp_reader *, const uchar *));
76 static const uchar *skip_escaped_newlines PARAMS ((cpp_reader *,
78 static const uchar *skip_whitespace PARAMS ((cpp_reader *, const uchar *));
79 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
80 static const uchar *skip_comment PARAMS ((cpp_reader *, const uchar *));
81 static void scan_out_logical_line PARAMS ((cpp_reader *pfile, cpp_macro *));
82 static void check_output_buffer PARAMS ((cpp_reader *, size_t));
83 static void push_replacement_text PARAMS ((cpp_reader *, cpp_hashnode *));
84 static bool scan_parameters PARAMS ((cpp_reader *, cpp_macro *));
85 static void save_replacement_text PARAMS ((cpp_reader *, cpp_macro *,
87 static void maybe_start_funlike PARAMS ((cpp_reader *, cpp_hashnode *,
88 const uchar *, struct fun_macro *));
89 static void save_argument PARAMS ((struct fun_macro *, size_t));
90 static void replace_args_and_push PARAMS ((cpp_reader *, struct fun_macro *));
91 static size_t canonicalize_text PARAMS ((uchar *, const uchar *, size_t,
94 /* Ensures we have N bytes' space in the output buffer, and
95 reallocates it if not. */
97 check_output_buffer (pfile, n)
101 if (n > (size_t) (pfile->out.limit - pfile->out.cur))
103 size_t size = pfile->out.cur - pfile->out.base;
104 size_t new_size = (size + n) * 3 / 2;
107 = (uchar *) xrealloc (pfile->out.base, new_size);
108 pfile->out.limit = pfile->out.base + new_size;
109 pfile->out.cur = pfile->out.base + size;
113 /* To be called whenever a newline character is encountered in the
114 input file, at CUR. Handles DOS, MAC and Unix ends of line, and
115 returns the character after the newline sequence. */
117 handle_newline (pfile, cur)
122 if (cur[0] + cur[1] == '\r' + '\n')
124 pfile->buffer->line_base = cur + 1;
128 /* CUR points to any character in the buffer, not necessarily a
129 backslash. Advances CUR until all escaped newlines are skipped,
130 and returns the new position. */
132 skip_escaped_newlines (pfile, cur)
136 while (*cur == '\\' && is_vspace (cur[1]))
137 cur = handle_newline (pfile, cur + 1);
142 /* CUR points to the character after the asterisk introducing a
143 comment. Returns the position after the comment. */
145 skip_comment (pfile, cur)
149 unsigned int from_line = pfile->line;
150 unsigned int c = 0, prevc = 0;
151 const uchar *limit = RLIMIT (pfile->context);
162 if (*cur == '*' && cur[1] != '/'
163 && CPP_OPTION (pfile, warn_comments))
164 cpp_error_with_line (pfile, DL_WARNING, pfile->line, 0,
165 "\"/*\" within comment");
167 else if (is_vspace (c))
168 cur = handle_newline (pfile, cur - 1);
171 if (c != '/' || prevc != '*')
172 cpp_error_with_line (pfile, DL_ERROR, from_line, 0,
173 "unterminated comment");
178 /* Skip any horizontal whitespace and comments beginning at CUR,
179 returning the following character. */
181 skip_whitespace (pfile, cur)
189 while (is_nvspace (*cur) && *cur != 0)
192 if (*cur == '\0' && cur != RLIMIT (pfile->context))
198 cur = skip_escaped_newlines (pfile, cur);
205 tmp = skip_escaped_newlines (pfile, cur + 1);
208 cur = skip_comment (pfile, tmp + 1);
219 /* Lexes and outputs an identifier starting at CUR, which is assumed
220 to point to a valid first character of an identifier. Returns
221 the hashnode, and updates out.cur. */
222 static cpp_hashnode *
223 lex_identifier (pfile, cur)
228 uchar *out = pfile->out.cur;
229 cpp_hashnode *result;
235 while (is_numchar (*cur));
236 cur = skip_escaped_newlines (pfile, cur);
238 while (is_numchar (*cur));
240 CUR (pfile->context) = cur;
241 len = out - pfile->out.cur;
242 result = (cpp_hashnode *) ht_lookup (pfile->hash_table, pfile->out.cur,
244 pfile->out.cur = out;
248 /* Reads an identifier, returning its hashnode. If the next token is
249 not an identifier, returns NULL. */
251 _cpp_lex_identifier_trad (pfile)
254 const uchar *cur = skip_whitespace (pfile, CUR (pfile->context));
256 if (!is_idstart (*cur))
258 CUR (pfile->context) = cur;
262 return lex_identifier (pfile, cur);
265 /* Overlays the true file buffer temporarily with text of length LEN
266 starting at START. The true buffer is restored upon calling
269 _cpp_overlay_buffer (pfile, start, len)
274 cpp_buffer *buffer = pfile->buffer;
276 buffer->saved_cur = buffer->cur;
277 buffer->saved_rlimit = buffer->rlimit;
278 buffer->saved_line_base = buffer->line_base;
281 buffer->line_base = start;
282 buffer->rlimit = start + len;
284 pfile->saved_line = pfile->line;
287 /* Restores a buffer overlaid by _cpp_overlay_buffer(). */
289 _cpp_remove_overlay (pfile)
292 cpp_buffer *buffer = pfile->buffer;
294 buffer->cur = buffer->saved_cur;
295 buffer->rlimit = buffer->saved_rlimit;
296 buffer->line_base = buffer->saved_line_base;
298 pfile->line = pfile->saved_line;
301 /* Reads a logical line into the output buffer. Returns TRUE if there
302 is more text left in the buffer. */
304 _cpp_read_logical_line_trad (pfile)
309 buffer = pfile->buffer;
310 if (buffer->cur == buffer->rlimit)
314 /* Don't pop the last buffer. */
317 stop = buffer->return_at_eof;
318 _cpp_pop_buffer (pfile);
325 CUR (pfile->context) = buffer->cur;
326 RLIMIT (pfile->context) = buffer->rlimit;
327 pfile->out.cur = pfile->out.base;
328 pfile->out.first_line = pfile->line;
329 scan_out_logical_line (pfile, NULL);
330 buffer->cur = CUR (pfile->context);
335 /* Set up state for finding the opening '(' of a function-like
338 maybe_start_funlike (pfile, node, start, macro)
342 struct fun_macro *macro;
344 unsigned int n = node->value.macro->paramc + 1;
347 _cpp_release_buff (pfile, macro->buff);
348 macro->buff = _cpp_get_buff (pfile, n * sizeof (size_t));
349 macro->args = (size_t *) BUFF_FRONT (macro->buff);
351 macro->offset = start - pfile->out.base;
354 pfile->state.parsing_args = 1;
357 /* Save the OFFSET of the start of the next argument to MACRO. */
359 save_argument (macro, offset)
360 struct fun_macro *macro;
364 if (macro->argc <= macro->node->value.macro->paramc)
365 macro->args[macro->argc] = offset;
368 /* Copies the next logical line in the current buffer to the output
369 buffer. The output is guaranteed to terminate with a NUL
372 If MACRO is non-NULL, then we are scanning the replacement list of
373 MACRO, and we call save_replacement_text() every time we meet an
376 scan_out_logical_line (pfile, macro)
380 cpp_context *context;
382 unsigned int c, paren_depth, quote = 0;
384 struct fun_macro fmacro;
388 context = pfile->context;
390 check_output_buffer (pfile, RLIMIT (context) - cur);
391 out = pfile->out.cur;
398 /* There are only a few entities we need to catch: comments,
399 identifiers, newlines, escaped newlines, # and '\0'. */
403 if (cur - 1 != RLIMIT (context))
406 /* If this is a macro's expansion, pop it. */
409 pfile->out.cur = out - 1;
410 _cpp_pop_context (pfile);
414 /* Premature end of file. Fake a new line. */
416 if (!pfile->buffer->from_stage3)
417 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
418 if (pfile->state.parsing_args == 2)
419 cpp_error (pfile, DL_ERROR,
420 "unterminated argument list invoking macro \"%s\"",
421 NODE_NAME (fmacro.node));
425 case '\r': case '\n':
426 cur = handle_newline (pfile, cur - 1);
427 if (pfile->state.parsing_args == 2)
429 /* Newlines in arguments become a space. */
444 if (is_vspace (*cur))
445 out--, cur = skip_escaped_newlines (pfile, cur - 1);
448 /* Skip escaped quotes here, it's easier than above, but
449 take care to first skip escaped newlines. */
450 cur = skip_escaped_newlines (pfile, cur);
451 if (*cur == '\\' || *cur == '"' || *cur == '\'')
457 /* Traditional CPP does not recognize comments within
461 cur = skip_escaped_newlines (pfile, cur);
463 out--, cur = skip_comment (pfile, cur + 1);
468 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
469 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
470 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
471 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
473 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
474 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
475 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
476 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
478 if (quote == 0 || macro)
482 pfile->out.cur = --out;
483 node = lex_identifier (pfile, cur - 1);
485 if (node->type == NT_MACRO
486 && pfile->state.parsing_args != 2
487 && !pfile->state.prevent_expansion)
489 if (node->value.macro->fun_like)
490 maybe_start_funlike (pfile, node, out, &fmacro);
493 /* Remove the object-like macro's name from the
494 output, and push its replacement text. */
495 pfile->out.cur = out;
496 push_replacement_text (pfile, node);
500 else if (macro && node->arg_index)
502 /* Found a parameter in the replacement text of a
503 #define. Remove its name from the output. */
504 pfile->out.cur = out;
505 save_replacement_text (pfile, macro, node->arg_index);
508 out = pfile->out.cur;
517 if (pfile->state.parsing_args == 1)
519 const uchar *p = pfile->out.base + fmacro.offset;
521 /* Invoke a prior function-like macro if there is only
522 white space in-between. */
523 while (is_numchar (*p))
525 while (is_space (*p))
530 pfile->state.parsing_args = 2;
532 out = pfile->out.base + fmacro.offset;
533 fmacro.args[0] = fmacro.offset;
536 pfile->state.parsing_args = 0;
542 if (quote == 0 && pfile->state.parsing_args == 2 && paren_depth == 1)
543 save_argument (&fmacro, out - pfile->out.base);
550 if (pfile->state.parsing_args == 2 && paren_depth == 0)
552 cpp_macro *m = fmacro.node->value.macro;
554 pfile->state.parsing_args = 0;
555 save_argument (&fmacro, out - pfile->out.base);
557 /* A single zero-length argument is no argument. */
560 && out == pfile->out.base + 1)
563 if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
565 /* Remove the macro's invocation from the
566 output, and push its replacement text. */
567 pfile->out.cur = (pfile->out.base
570 replace_args_and_push (pfile, &fmacro);
578 /* At start of a line it's a directive. */
579 if (out - 1 == pfile->out.base && !pfile->state.in_directive)
581 /* This is a kludge. We want to have the ISO
582 preprocessor lex the next token. */
583 pfile->buffer->cur = cur;
584 if (_cpp_handle_directive (pfile, false /* indented */))
600 pfile->out.cur = out - 1;
602 _cpp_release_buff (pfile, fmacro.buff);
605 /* Push a context holding the replacement text of the macro NODE on
606 the context stack. NODE is either object-like, or a function-like
607 macro with no arguments. */
609 push_replacement_text (pfile, node)
613 cpp_macro *macro = node->value.macro;
615 _cpp_push_text_context (pfile, node, macro->exp.text, macro->count);
618 /* Push a context holding the replacement text of the macro NODE on
619 the context stack. NODE is either object-like, or a function-like
620 macro with no arguments. */
622 replace_args_and_push (pfile, fmacro)
624 struct fun_macro *fmacro;
626 cpp_macro *macro = fmacro->node->value.macro;
628 if (macro->paramc == 0)
629 push_replacement_text (pfile, fmacro->node);
637 /* Calculate the length of the argument-replaced text. */
638 for (exp = macro->exp.text;;)
640 struct block *b = (struct block *) exp;
643 if (b->arg_index == 0)
645 len += (fmacro->args[b->arg_index]
646 - fmacro->args[b->arg_index - 1] - 1);
647 exp += BLOCK_LEN (b->text_len);
650 /* Allocate room for the expansion plus NUL. */
651 buff = _cpp_get_buff (pfile, len + 1);
653 /* Copy the expansion and replace arguments. */
654 p = BUFF_FRONT (buff);
655 for (exp = macro->exp.text;;)
657 struct block *b = (struct block *) exp;
660 memcpy (p, b->text, b->text_len);
662 if (b->arg_index == 0)
664 arglen = (fmacro->args[b->arg_index]
665 - fmacro->args[b->arg_index - 1] - 1);
666 memcpy (p, pfile->out.base + fmacro->args[b->arg_index - 1],
669 exp += BLOCK_LEN (b->text_len);
674 _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
676 /* So we free buffer allocation when macro is left. */
677 pfile->context->buff = buff;
681 /* Read and record the parameters, if any, of a function-like macro
682 definition. Destroys pfile->out.cur.
684 Returns true on success, false on failure (syntax error or a
685 duplicate parameter). On success, CUR (pfile->context) is just
686 past the closing parenthesis. */
688 scan_parameters (pfile, macro)
692 const uchar *cur = CUR (pfile->context) + 1;
697 cur = skip_whitespace (pfile, cur);
699 if (is_idstart (*cur))
702 if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
704 cur = skip_whitespace (pfile, CUR (pfile->context));
714 ok = (*cur == ')' && macro->paramc == 0);
718 CUR (pfile->context) = cur + (*cur == ')');
723 /* Save the text from pfile->out.base to pfile->out.cur as
724 the replacement text for the current macro, followed by argument
725 ARG_INDEX, with zero indicating the end of the replacement
728 save_replacement_text (pfile, macro, arg_index)
731 unsigned int arg_index;
733 size_t len = pfile->out.cur - pfile->out.base;
736 if (macro->paramc == 0)
738 /* Object-like and function-like macros without parameters
739 simply store their NUL-terminated replacement text. */
740 exp = _cpp_unaligned_alloc (pfile, len + 1);
741 memcpy (exp, pfile->out.base, len);
743 macro->exp.text = exp;
748 /* Store the text's length (unsigned int), the argument index
749 (unsigned short, base 1) and then the text. */
750 size_t blen = BLOCK_LEN (len);
753 if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
754 _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
756 exp = BUFF_FRONT (pfile->a_buff);
757 block = (struct block *) (exp + macro->count);
758 macro->exp.text = exp;
760 /* Write out the block information. */
761 block->text_len = len;
762 block->arg_index = arg_index;
763 memcpy (block->text, pfile->out.base, len);
765 /* Lex the rest into the start of the output buffer. */
766 pfile->out.cur = pfile->out.base;
768 macro->count += blen;
770 /* If we've finished, commit the memory. */
772 BUFF_FRONT (pfile->a_buff) += macro->count;
776 /* Analyze and save the replacement text of a macro. Returns true on
779 _cpp_create_trad_definition (pfile, macro)
786 CUR (pfile->context) = pfile->buffer->cur;
788 /* Is this a function-like macro? */
789 if (* CUR (pfile->context) == '(')
791 /* Setting macro to NULL indicates an error occurred, and
792 prevents unnecessary work in scan_out_logical_line. */
793 if (!scan_parameters (pfile, macro))
797 /* Success. Commit the parameter array. */
798 macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
799 BUFF_FRONT (pfile->a_buff) = (uchar *) ¯o->params[macro->paramc];
804 /* Skip leading whitespace in the replacement text. */
805 CUR (pfile->context) = skip_whitespace (pfile, CUR (pfile->context));
807 pfile->out.cur = pfile->out.base;
808 pfile->state.prevent_expansion++;
809 scan_out_logical_line (pfile, macro);
810 pfile->state.prevent_expansion--;
815 /* Skip trailing white space. */
816 cur = pfile->out.base;
817 limit = pfile->out.cur;
818 while (limit > cur && is_space (limit[-1]))
820 pfile->out.cur = limit;
821 save_replacement_text (pfile, macro, 0);
826 /* Copy SRC of length LEN to DEST, but convert all contiguous
827 whitespace to a single space, provided it is not in quotes. The
828 quote currently in effect is pointed to by PQUOTE, and is updated
829 by the function. Returns the number of bytes copied. */
831 canonicalize_text (dest, src, len, pquote)
837 uchar *orig_dest = dest;
838 uchar quote = *pquote;
842 if (is_space (*src) && !quote)
846 while (len && is_space (*src));
851 if (*src == '\'' || *src == '"')
855 else if (quote == *src)
858 *dest++ = *src++, len--;
863 return dest - orig_dest;
866 /* Returns true if MACRO1 and MACRO2 have expansions different other
867 than in the form of their whitespace. */
869 _cpp_expansions_different_trad (macro1, macro2)
870 const cpp_macro *macro1, *macro2;
872 uchar *p1 = xmalloc (macro1->count + macro2->count);
873 uchar *p2 = p1 + macro1->count;
874 uchar quote1 = 0, quote2;
878 if (macro1->paramc > 0)
880 const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
885 struct block *b1 = (struct block *) exp1;
886 struct block *b2 = (struct block *) exp2;
888 if (b1->arg_index != b2->arg_index)
891 len1 = canonicalize_text (p1, b1->text, b1->text_len, "e1);
892 len2 = canonicalize_text (p2, b2->text, b2->text_len, "e2);
893 if (len1 != len2 || memcmp (p1, p2, len1))
895 if (b1->arg_index == 0)
900 exp1 += BLOCK_LEN (b1->text_len);
901 exp2 += BLOCK_LEN (b2->text_len);
906 len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, "e1);
907 len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, "e2);
908 mismatch = (len1 != len2 || memcmp (p1, p2, len1));
915 /* Prepare to be able to scan the current buffer. */
917 _cpp_set_trad_context (pfile)
920 cpp_buffer *buffer = pfile->buffer;
921 cpp_context *context = pfile->context;
923 if (pfile->context->prev)
926 pfile->out.cur = pfile->out.base;
927 CUR (context) = buffer->cur;
928 RLIMIT (context) = buffer->rlimit;
929 check_output_buffer (pfile, RLIMIT (context) - CUR (context));