1 /* CPP Library - traditional lexical analysis and macro expansion.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 Contributed by Neil Booth, May 2002
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24 /* The replacement text of a function-like macro is stored as a
25 contiguous sequence of aligned blocks. Each block represents the
26 portion of text from the start of the previous block (or the start
27 of the macro replacement text in the case of the first block) to
28 the next parameter, or the end of the replacement list if there
31 Each block consists of an unsigned int, which is the length of text
32 contained in the third part, an unsigned short, which is the
33 one-based index of the argument that immediately follows that text,
34 and the text itself. The final block in the macro expansion is
35 recognizable as it has an argument index of zero. */
39 unsigned int text_len;
40 unsigned short arg_index;
44 #define BLOCK_HEADER_LEN offsetof (struct block, text)
45 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + TEXT_LEN)
47 /* Structure holding information about a function-like macro
51 /* Memory buffer holding the trad_arg array. */
54 /* An array of size the number of macro parameters + 1, containing
55 the offsets of the start of each macro argument in the output
56 buffer. The argument continues until the character before the
57 start of the next one. */
60 /* The hashnode of the macro. */
63 /* The offset of the macro name in the output buffer. */
66 /* Zero-based index of argument being currently lexed. */
70 /* Lexing TODO: Handle -C, maybe -CC, and space in escaped newlines.
71 Stop cpplex.c from recognizing comments and directives during its
72 lexing pass. Get rid of line_base usage - seems pointless? Do we
73 get escaped newline at EOF correct? */
75 static const uchar *handle_newline PARAMS ((cpp_reader *, const uchar *));
76 static const uchar *skip_escaped_newlines PARAMS ((cpp_reader *,
78 static const uchar *skip_whitespace PARAMS ((cpp_reader *, const uchar *));
79 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
80 static const uchar *skip_comment PARAMS ((cpp_reader *, const uchar *));
81 static void scan_out_logical_line PARAMS ((cpp_reader *pfile, cpp_macro *));
82 static void check_output_buffer PARAMS ((cpp_reader *, size_t));
83 static void restore_buff PARAMS ((cpp_reader *));
84 static void push_replacement_text PARAMS ((cpp_reader *, cpp_hashnode *));
85 static bool scan_parameters PARAMS ((cpp_reader *, cpp_macro *));
86 static void save_replacement_text PARAMS ((cpp_reader *, cpp_macro *,
88 static void maybe_start_funlike PARAMS ((cpp_reader *, cpp_hashnode *,
89 const uchar *, struct fun_macro *));
90 static void save_argument PARAMS ((struct fun_macro *, size_t));
91 static void replace_args_and_push PARAMS ((cpp_reader *, struct fun_macro *));
92 static size_t canonicalize_text PARAMS ((uchar *, const uchar *, size_t,
95 /* Ensures we have N bytes' space in the output buffer, and
96 reallocates it if not. */
98 check_output_buffer (pfile, n)
102 if (n > (size_t) (pfile->trad_out_limit - pfile->trad_out_cur))
104 size_t size = pfile->trad_out_cur - pfile->trad_out_base;
105 size_t new_size = (size + n) * 3 / 2;
108 = (uchar *) xrealloc (pfile->trad_out_base, new_size);
109 pfile->trad_out_limit = pfile->trad_out_base + new_size;
110 pfile->trad_out_cur = pfile->trad_out_base + size;
114 /* To be called whenever a newline character is encountered in the
115 input file, at CUR. Handles DOS, MAC and Unix ends of line, and
116 returns the character after the newline sequence. */
118 handle_newline (pfile, cur)
123 if (cur[0] + cur[1] == '\r' + '\n')
125 pfile->buffer->line_base = cur + 1;
129 /* CUR points to any character in the buffer, not necessarily a
130 backslash. Advances CUR until all escaped newlines are skipped,
131 and returns the new position. */
133 skip_escaped_newlines (pfile, cur)
137 while (*cur == '\\' && is_vspace (cur[1]))
138 cur = handle_newline (pfile, cur + 1);
143 /* CUR points to the character after the asterisk introducing a
144 comment. Returns the position after the comment. */
146 skip_comment (pfile, cur)
150 unsigned int from_line = pfile->line;
151 unsigned int c = 0, prevc = 0;
152 const uchar *limit = RLIMIT (pfile->context);
163 if (*cur == '*' && cur[1] != '/'
164 && CPP_OPTION (pfile, warn_comments))
165 cpp_error_with_line (pfile, DL_WARNING, pfile->line, 0,
166 "\"/*\" within comment");
168 else if (is_vspace (c))
169 cur = handle_newline (pfile, cur - 1);
172 if (c != '/' || prevc != '*')
173 cpp_error_with_line (pfile, DL_ERROR, from_line, 0,
174 "unterminated comment");
179 /* Skip any horizontal whitespace and comments beginning at CUR,
180 returning the following character. */
182 skip_whitespace (pfile, cur)
190 while (is_nvspace (*cur) && *cur != 0)
193 if (*cur == '\0' && cur != RLIMIT (pfile->context))
199 cur = skip_escaped_newlines (pfile, cur);
206 tmp = skip_escaped_newlines (pfile, cur + 1);
209 cur = skip_comment (pfile, tmp + 1);
220 /* Lexes and outputs an identifier starting at CUR, which is assumed
221 to point to a valid first character of an identifier. Returns
222 the hashnode, and updates trad_out_cur. */
223 static cpp_hashnode *
224 lex_identifier (pfile, cur)
229 uchar *out = pfile->trad_out_cur;
230 cpp_hashnode *result;
236 while (is_numchar (*cur));
237 cur = skip_escaped_newlines (pfile, cur);
239 while (is_numchar (*cur));
241 CUR (pfile->context) = cur;
242 len = out - pfile->trad_out_cur;
243 result = (cpp_hashnode *) ht_lookup (pfile->hash_table, pfile->trad_out_cur,
245 pfile->trad_out_cur = out;
249 /* Reads an identifier, returning its hashnode. If the next token is
250 not an identifier, returns NULL. */
252 _cpp_lex_identifier_trad (pfile)
255 const uchar *cur = skip_whitespace (pfile, CUR (pfile->context));
257 if (!is_idstart (*cur))
259 CUR (pfile->context) = cur;
263 return lex_identifier (pfile, cur);
266 /* Overlays the true file buffer temporarily with text of length LEN
267 starting at START. The true buffer is restored upon calling
270 _cpp_overlay_buffer (pfile, start, len)
275 cpp_buffer *buffer = pfile->buffer;
277 buffer->saved_cur = buffer->cur;
278 buffer->saved_rlimit = buffer->rlimit;
279 buffer->saved_line_base = buffer->line_base;
282 buffer->line_base = start;
283 buffer->rlimit = start + len;
286 /* Restores a buffer overlaid by _cpp_overlay_buffer(). */
291 cpp_buffer *buffer = pfile->buffer;
293 buffer->cur = buffer->saved_cur;
294 buffer->rlimit = buffer->saved_rlimit;
295 buffer->line_base = buffer->saved_line_base;
298 /* Reads a logical line into the output buffer. Returns TRUE if there
299 is more text left in the buffer. */
301 _cpp_read_logical_line_trad (pfile)
305 unsigned int first_line;
307 restore_buff (pfile);
309 first_line = pfile->line = pfile->trad_line;
311 buffer = pfile->buffer;
312 if (buffer->cur == buffer->rlimit)
316 /* Don't pop the last buffer. */
319 stop = buffer->return_at_eof;
320 _cpp_pop_buffer (pfile);
327 CUR (pfile->context) = buffer->cur;
328 RLIMIT (pfile->context) = buffer->rlimit;
329 pfile->trad_out_cur = pfile->trad_out_base;
330 scan_out_logical_line (pfile, NULL);
331 buffer->cur = CUR (pfile->context);
333 pfile->trad_line = pfile->line;
334 pfile->line = first_line;
335 _cpp_overlay_buffer (pfile, pfile->trad_out_base,
336 pfile->trad_out_cur - pfile->trad_out_base);
340 /* Set up state for finding the opening '(' of a function-like
343 maybe_start_funlike (pfile, node, start, macro)
347 struct fun_macro *macro;
349 unsigned int n = node->value.macro->paramc + 1;
352 _cpp_release_buff (pfile, macro->buff);
353 macro->buff = _cpp_get_buff (pfile, n * sizeof (size_t));
354 macro->args = (size_t *) BUFF_FRONT (macro->buff);
356 macro->offset = start - pfile->trad_out_base;
359 pfile->state.parsing_args = 1;
362 /* Save the OFFSET of the start of the next argument to MACRO. */
364 save_argument (macro, offset)
365 struct fun_macro *macro;
369 if (macro->argc <= macro->node->value.macro->paramc)
370 macro->args[macro->argc] = offset;
373 /* Copies the next logical line in the current buffer to the output
374 buffer. The output is guaranteed to terminate with a NUL
377 If MACRO is non-NULL, then we are scanning the replacement list of
378 MACRO, and we call save_replacement_text() every time we meet an
381 scan_out_logical_line (pfile, macro)
385 cpp_context *context;
387 unsigned int c, paren_depth, quote = 0;
389 struct fun_macro fmacro;
393 context = pfile->context;
395 check_output_buffer (pfile, RLIMIT (context) - cur);
396 out = pfile->trad_out_cur;
403 /* There are only a few entities we need to catch: comments,
404 identifiers, newlines, escaped newlines, # and '\0'. */
408 if (cur - 1 != RLIMIT (context))
411 /* If this is a macro's expansion, pop it. */
414 pfile->trad_out_cur = out - 1;
415 _cpp_pop_context (pfile);
419 /* Premature end of file. Fake a new line. */
421 if (!pfile->buffer->from_stage3)
422 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
423 if (pfile->state.parsing_args == 2)
424 cpp_error (pfile, DL_ERROR,
425 "unterminated argument list invoking macro \"%s\"",
426 NODE_NAME (fmacro.node));
430 case '\r': case '\n':
431 cur = handle_newline (pfile, cur - 1);
432 if (pfile->state.parsing_args == 2)
434 /* Newlines in arguments become a space. */
449 if (is_vspace (*cur))
450 out--, cur = skip_escaped_newlines (pfile, cur - 1);
453 /* Skip escaped quotes here, it's easier than above, but
454 take care to first skip escaped newlines. */
455 cur = skip_escaped_newlines (pfile, cur);
456 if (*cur == '\\' || *cur == '"' || *cur == '\'')
462 /* Traditional CPP does not recognize comments within
466 cur = skip_escaped_newlines (pfile, cur);
468 out--, cur = skip_comment (pfile, cur + 1);
473 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
474 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
475 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
476 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
478 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
479 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
480 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
481 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
483 if (quote == 0 || macro)
487 pfile->trad_out_cur = --out;
488 node = lex_identifier (pfile, cur - 1);
490 if (node->type == NT_MACRO
491 && pfile->state.parsing_args != 2
492 && !pfile->state.prevent_expansion)
494 if (node->value.macro->fun_like)
495 maybe_start_funlike (pfile, node, out, &fmacro);
498 /* Remove the object-like macro's name from the
499 output, and push its replacement text. */
500 pfile->trad_out_cur = out;
501 push_replacement_text (pfile, node);
505 else if (macro && node->arg_index)
507 /* Found a parameter in the replacement text of a
508 #define. Remove its name from the output. */
509 pfile->trad_out_cur = out;
510 save_replacement_text (pfile, macro, node->arg_index);
513 out = pfile->trad_out_cur;
522 if (pfile->state.parsing_args == 1)
524 const uchar *p = pfile->trad_out_base + fmacro.offset;
526 /* Invoke a prior function-like macro if there is only
527 white space in-between. */
528 while (is_numchar (*p))
530 while (is_space (*p))
535 pfile->state.parsing_args = 2;
537 out = pfile->trad_out_base + fmacro.offset;
538 fmacro.args[0] = fmacro.offset;
541 pfile->state.parsing_args = 0;
547 if (quote == 0 && pfile->state.parsing_args == 2 && paren_depth == 1)
548 save_argument (&fmacro, out - pfile->trad_out_base);
555 if (pfile->state.parsing_args == 2 && paren_depth == 0)
557 cpp_macro *m = fmacro.node->value.macro;
559 pfile->state.parsing_args = 0;
560 save_argument (&fmacro, out - pfile->trad_out_base);
562 /* A single zero-length argument is no argument. */
565 && out == pfile->trad_out_base + 1)
568 if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
570 /* Remove the macro's invocation from the
571 output, and push its replacement text. */
572 pfile->trad_out_cur = (pfile->trad_out_base
575 replace_args_and_push (pfile, &fmacro);
590 pfile->trad_out_cur = out - 1;
592 _cpp_release_buff (pfile, fmacro.buff);
595 /* Push a context holding the replacement text of the macro NODE on
596 the context stack. NODE is either object-like, or a function-like
597 macro with no arguments. */
599 push_replacement_text (pfile, node)
603 cpp_macro *macro = node->value.macro;
605 _cpp_push_text_context (pfile, node, macro->exp.text, macro->count);
608 /* Push a context holding the replacement text of the macro NODE on
609 the context stack. NODE is either object-like, or a function-like
610 macro with no arguments. */
612 replace_args_and_push (pfile, fmacro)
614 struct fun_macro *fmacro;
616 cpp_macro *macro = fmacro->node->value.macro;
618 if (macro->paramc == 0)
619 push_replacement_text (pfile, fmacro->node);
627 /* Calculate the length of the argument-replaced text. */
628 for (exp = macro->exp.text;;)
630 struct block *b = (struct block *) exp;
633 if (b->arg_index == 0)
635 len += (fmacro->args[b->arg_index]
636 - fmacro->args[b->arg_index - 1] - 1);
637 exp += BLOCK_LEN (b->text_len);
640 /* Allocate room for the expansion plus NUL. */
641 buff = _cpp_get_buff (pfile, len + 1);
643 /* Copy the expansion and replace arguments. */
644 p = BUFF_FRONT (buff);
645 for (exp = macro->exp.text;;)
647 struct block *b = (struct block *) exp;
650 memcpy (p, b->text, b->text_len);
652 if (b->arg_index == 0)
654 arglen = (fmacro->args[b->arg_index]
655 - fmacro->args[b->arg_index - 1] - 1);
656 memcpy (p, pfile->trad_out_base + fmacro->args[b->arg_index - 1],
659 exp += BLOCK_LEN (b->text_len);
664 _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
666 /* So we free buffer allocation when macro is left. */
667 pfile->context->buff = buff;
671 /* Read and record the parameters, if any, of a function-like macro
672 definition. Destroys pfile->trad_out_cur.
674 Returns true on success, false on failure (syntax error or a
675 duplicate parameter). On success, CUR (pfile->context) is just
676 past the closing parenthesis. */
678 scan_parameters (pfile, macro)
682 const uchar *cur = CUR (pfile->context) + 1;
687 cur = skip_whitespace (pfile, cur);
689 if (is_idstart (*cur))
692 if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
694 cur = skip_whitespace (pfile, CUR (pfile->context));
704 ok = (*cur == ')' && macro->paramc == 0);
708 CUR (pfile->context) = cur + (*cur == ')');
713 /* Save the text from pfile->trad_out_base to pfile->trad_out_cur as
714 the replacement text for the current macro, followed by argument
715 ARG_INDEX, with zero indicating the end of the replacement
718 save_replacement_text (pfile, macro, arg_index)
721 unsigned int arg_index;
723 size_t len = pfile->trad_out_cur - pfile->trad_out_base;
726 if (macro->paramc == 0)
728 /* Object-like and function-like macros without parameters
729 simply store their NUL-terminated replacement text. */
730 exp = _cpp_unaligned_alloc (pfile, len + 1);
731 memcpy (exp, pfile->trad_out_base, len);
733 macro->exp.text = exp;
738 /* Store the text's length (unsigned int), the argument index
739 (unsigned short, base 1) and then the text. */
740 size_t blen = BLOCK_LEN (len);
743 if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
744 _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
746 exp = BUFF_FRONT (pfile->a_buff);
747 block = (struct block *) (exp + macro->count);
748 macro->exp.text = exp;
750 /* Write out the block information. */
751 block->text_len = len;
752 block->arg_index = arg_index;
753 memcpy (block->text, pfile->trad_out_base, len);
755 /* Lex the rest into the start of the output buffer. */
756 pfile->trad_out_cur = pfile->trad_out_base;
758 macro->count += blen;
760 /* If we've finished, commit the memory. */
762 BUFF_FRONT (pfile->a_buff) += macro->count;
766 /* Analyze and save the replacement text of a macro. Returns true on
769 _cpp_create_trad_definition (pfile, macro)
776 /* Is this a function-like macro? */
777 if (* CUR (pfile->context) == '(')
779 /* Setting macro to NULL indicates an error occurred, and
780 prevents unnecessary work in scan_out_logical_line. */
781 if (!scan_parameters (pfile, macro))
785 /* Success. Commit the parameter array. */
786 macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
787 BUFF_FRONT (pfile->a_buff) = (uchar *) ¯o->params[macro->paramc];
792 /* Skip leading whitespace in the replacement text. */
793 CUR (pfile->context) = skip_whitespace (pfile, CUR (pfile->context));
795 pfile->trad_out_cur = pfile->trad_out_base;
796 pfile->state.prevent_expansion++;
797 scan_out_logical_line (pfile, macro);
798 pfile->state.prevent_expansion--;
803 /* Skip trailing white space. */
804 cur = pfile->trad_out_base;
805 limit = pfile->trad_out_cur;
806 while (limit > cur && is_space (limit[-1]))
808 pfile->trad_out_cur = limit;
809 save_replacement_text (pfile, macro, 0);
814 /* Copy SRC of length LEN to DEST, but convert all contiguous
815 whitespace to a single space, provided it is not in quotes. The
816 quote currently in effect is pointed to by PQUOTE, and is updated
817 by the function. Returns the number of bytes copied. */
819 canonicalize_text (dest, src, len, pquote)
825 uchar *orig_dest = dest;
826 uchar quote = *pquote;
830 if (is_space (*src) && !quote)
834 while (len && is_space (*src));
839 if (*src == '\'' || *src == '"')
843 else if (quote == *src)
846 *dest++ = *src++, len--;
851 return dest - orig_dest;
854 /* Returns true if MACRO1 and MACRO2 have expansions different other
855 than in the form of their whitespace. */
857 _cpp_expansions_different_trad (macro1, macro2)
858 cpp_macro *macro1, *macro2;
860 uchar *p1 = xmalloc (macro1->count + macro2->count);
861 uchar *p2 = p1 + macro1->count;
862 uchar quote1 = 0, quote2;
866 if (macro1->paramc > 0)
868 const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
873 struct block *b1 = (struct block *) exp1;
874 struct block *b2 = (struct block *) exp2;
876 if (b1->arg_index != b2->arg_index)
879 len1 = canonicalize_text (p1, b1->text, b1->text_len, "e1);
880 len2 = canonicalize_text (p2, b2->text, b2->text_len, "e2);
881 if (len1 != len2 || memcmp (p1, p2, len1))
883 if (b1->arg_index == 0)
888 exp1 += BLOCK_LEN (b1->text_len);
889 exp2 += BLOCK_LEN (b2->text_len);
894 len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, "e1);
895 len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, "e2);
896 mismatch = (len1 != len2 || memcmp (p1, p2, len1));
903 /* Prepare to be able to scan the current buffer. */
905 _cpp_set_trad_context (pfile)
908 cpp_buffer *buffer = pfile->buffer;
909 cpp_context *context = pfile->context;
911 if (pfile->context->prev)
914 pfile->trad_out_cur = pfile->trad_out_base;
915 CUR (context) = buffer->cur;
916 RLIMIT (context) = buffer->rlimit;
917 check_output_buffer (pfile, RLIMIT (context) - CUR (context));