1 /* CPP Library - traditional lexical analysis and macro expansion.
2 Copyright (C) 2002 Free Software Foundation, Inc.
3 Contributed by Neil Booth, May 2002
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 2, or (at your option) any
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24 /* The replacement text of a function-like macro is stored as a
25 contiguous sequence of aligned blocks. Each block represents the
26 portion of text from the start of the previous block (or the start
27 of the macro replacement text in the case of the first block) to
28 the next parameter, or the end of the replacement list if there
31 Each block consists of an unsigned int, which is the length of text
32 contained in the third part, an unsigned short, which is the
33 one-based index of the argument that immediately follows that text,
34 and the text itself. The final block in the macro expansion is
35 recognizable as it has an argument index of zero. */
39 unsigned int text_len;
40 unsigned short arg_index;
44 #define BLOCK_HEADER_LEN offsetof (struct block, text)
45 #define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + TEXT_LEN)
47 /* Structure holding information about a function-like macro
51 /* Memory buffer holding the trad_arg array. */
54 /* An array of size the number of macro parameters + 1, containing
55 the offsets of the start of each macro argument in the output
56 buffer. The argument continues until the character before the
57 start of the next one. */
60 /* The hashnode of the macro. */
63 /* The offset of the macro name in the output buffer. */
66 /* Zero-based index of argument being currently lexed. */
70 /* Lexing TODO: Handle -C, maybe -CC, and space in escaped newlines.
71 Stop cpplex.c from recognizing comments and directives during its
72 lexing pass. Get rid of line_base usage - seems pointless? Do we
73 get escaped newline at EOF correct? */
75 static const uchar *handle_newline PARAMS ((cpp_reader *, const uchar *));
76 static const uchar *skip_escaped_newlines PARAMS ((cpp_reader *,
78 static const uchar *skip_whitespace PARAMS ((cpp_reader *, const uchar *));
79 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
80 static const uchar *skip_comment PARAMS ((cpp_reader *, const uchar *));
81 static void scan_out_logical_line PARAMS ((cpp_reader *pfile, cpp_macro *));
82 static void check_output_buffer PARAMS ((cpp_reader *, size_t));
83 static void restore_buff PARAMS ((cpp_reader *));
84 static void push_replacement_text PARAMS ((cpp_reader *, cpp_hashnode *));
85 static bool scan_parameters PARAMS ((cpp_reader *, cpp_macro *));
86 static void save_replacement_text PARAMS ((cpp_reader *, cpp_macro *,
88 static void maybe_start_funlike PARAMS ((cpp_reader *, cpp_hashnode *,
89 const uchar *, struct fun_macro *));
90 static void save_argument PARAMS ((struct fun_macro *, size_t));
91 static void replace_args_and_push PARAMS ((cpp_reader *, struct fun_macro *));
92 static size_t canonicalize_text PARAMS ((uchar *, const uchar *, size_t,
95 /* Ensures we have N bytes' space in the output buffer, and
96 reallocates it if not. */
98 check_output_buffer (pfile, n)
102 if (n > (size_t) (pfile->trad_out_limit - pfile->trad_out_cur))
104 size_t size = pfile->trad_out_cur - pfile->trad_out_base;
105 size_t new_size = (size + n) * 3 / 2;
108 = (uchar *) xrealloc (pfile->trad_out_base, new_size);
109 pfile->trad_out_limit = pfile->trad_out_base + new_size;
110 pfile->trad_out_cur = pfile->trad_out_base + size;
114 /* To be called whenever a newline character is encountered in the
115 input file, at CUR. Handles DOS, MAC and Unix ends of line, and
116 returns the character after the newline sequence. */
118 handle_newline (pfile, cur)
123 if (cur[0] + cur[1] == '\r' + '\n')
125 pfile->buffer->line_base = cur + 1;
129 /* CUR points to any character in the buffer, not necessarily a
130 backslash. Advances CUR until all escaped newlines are skipped,
131 and returns the new position. */
133 skip_escaped_newlines (pfile, cur)
137 while (*cur == '\\' && is_vspace (cur[1]))
138 cur = handle_newline (pfile, cur + 1);
143 /* CUR points to the character after the asterisk introducing a
144 comment. Returns the position after the comment. */
146 skip_comment (pfile, cur)
150 unsigned int from_line = pfile->line;
151 unsigned int c = 0, prevc = 0;
152 const uchar *limit = RLIMIT (pfile->context);
163 if (*cur == '*' && cur[1] != '/'
164 && CPP_OPTION (pfile, warn_comments))
165 cpp_error_with_line (pfile, DL_WARNING, pfile->line, 0,
166 "\"/*\" within comment");
168 else if (is_vspace (c))
169 cur = handle_newline (pfile, cur - 1);
172 if (c != '/' || prevc != '*')
173 cpp_error_with_line (pfile, DL_ERROR, from_line, 0,
174 "unterminated comment");
179 /* Skip any horizontal whitespace and comments beginning at CUR,
180 returning the following character. */
182 skip_whitespace (pfile, cur)
190 while (is_nvspace (*cur) && *cur != 0)
193 if (*cur == '\0' && cur != RLIMIT (pfile->context))
199 cur = skip_escaped_newlines (pfile, cur);
206 tmp = skip_escaped_newlines (pfile, cur + 1);
209 cur = skip_comment (pfile, tmp + 1);
220 /* Lexes and outputs an identifier starting at CUR, which is assumed
221 to point to a valid first character of an identifier. Returns
222 the hashnode, and updates trad_out_cur. */
223 static cpp_hashnode *
224 lex_identifier (pfile, cur)
229 uchar *out = pfile->trad_out_cur;
230 cpp_hashnode *result;
236 while (is_numchar (*cur));
237 cur = skip_escaped_newlines (pfile, cur);
239 while (is_numchar (*cur));
241 CUR (pfile->context) = cur;
242 len = out - pfile->trad_out_cur;
243 result = (cpp_hashnode *) ht_lookup (pfile->hash_table, pfile->trad_out_cur,
245 pfile->trad_out_cur = out;
249 /* Reads an identifier, returning its hashnode. If the next token is
250 not an identifier, returns NULL. */
252 _cpp_lex_identifier_trad (pfile)
255 const uchar *cur = skip_whitespace (pfile, CUR (pfile->context));
257 if (!is_idstart (*cur))
259 CUR (pfile->context) = cur;
263 return lex_identifier (pfile, cur);
266 /* Overlays the true file buffer temporarily with text of length LEN
267 starting at START. The true buffer is restored upon calling
270 _cpp_overlay_buffer (pfile, start, len)
275 cpp_buffer *buffer = pfile->buffer;
277 buffer->saved_cur = buffer->cur;
278 buffer->saved_rlimit = buffer->rlimit;
279 buffer->saved_line_base = buffer->line_base;
282 buffer->line_base = start;
283 buffer->rlimit = start + len;
286 /* Restores a buffer overlaid by _cpp_overlay_buffer(). */
291 cpp_buffer *buffer = pfile->buffer;
293 buffer->cur = buffer->saved_cur;
294 buffer->rlimit = buffer->saved_rlimit;
295 buffer->line_base = buffer->saved_line_base;
298 /* Reads a logical line into the output buffer. Returns TRUE if there
299 is more text left in the buffer. */
301 _cpp_read_logical_line_trad (pfile, overlay)
306 unsigned int first_line = 0;
310 restore_buff (pfile);
311 first_line = pfile->line = pfile->trad_line;
314 buffer = pfile->buffer;
315 if (buffer->cur == buffer->rlimit)
319 /* Don't pop the last buffer. */
322 stop = buffer->return_at_eof;
323 _cpp_pop_buffer (pfile);
330 CUR (pfile->context) = buffer->cur;
331 RLIMIT (pfile->context) = buffer->rlimit;
332 pfile->trad_out_cur = pfile->trad_out_base;
333 scan_out_logical_line (pfile, NULL);
334 buffer->cur = CUR (pfile->context);
338 pfile->trad_line = pfile->line;
339 pfile->line = first_line;
340 _cpp_overlay_buffer (pfile, pfile->trad_out_base,
341 pfile->trad_out_cur - pfile->trad_out_base);
347 /* Set up state for finding the opening '(' of a function-like
350 maybe_start_funlike (pfile, node, start, macro)
354 struct fun_macro *macro;
356 unsigned int n = node->value.macro->paramc + 1;
359 _cpp_release_buff (pfile, macro->buff);
360 macro->buff = _cpp_get_buff (pfile, n * sizeof (size_t));
361 macro->args = (size_t *) BUFF_FRONT (macro->buff);
363 macro->offset = start - pfile->trad_out_base;
366 pfile->state.parsing_args = 1;
369 /* Save the OFFSET of the start of the next argument to MACRO. */
371 save_argument (macro, offset)
372 struct fun_macro *macro;
376 if (macro->argc <= macro->node->value.macro->paramc)
377 macro->args[macro->argc] = offset;
380 /* Copies the next logical line in the current buffer to the output
381 buffer. The output is guaranteed to terminate with a NUL
384 If MACRO is non-NULL, then we are scanning the replacement list of
385 MACRO, and we call save_replacement_text() every time we meet an
388 scan_out_logical_line (pfile, macro)
392 cpp_context *context;
394 unsigned int c, paren_depth, quote = 0;
396 struct fun_macro fmacro;
400 context = pfile->context;
402 check_output_buffer (pfile, RLIMIT (context) - cur);
403 out = pfile->trad_out_cur;
410 /* There are only a few entities we need to catch: comments,
411 identifiers, newlines, escaped newlines, # and '\0'. */
415 if (cur - 1 != RLIMIT (context))
418 /* If this is a macro's expansion, pop it. */
421 pfile->trad_out_cur = out - 1;
422 _cpp_pop_context (pfile);
426 /* Premature end of file. Fake a new line. */
428 if (!pfile->buffer->from_stage3)
429 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
430 if (pfile->state.parsing_args == 2)
431 cpp_error (pfile, DL_ERROR,
432 "unterminated argument list invoking macro \"%s\"",
433 NODE_NAME (fmacro.node));
437 case '\r': case '\n':
438 cur = handle_newline (pfile, cur - 1);
439 if (pfile->state.parsing_args == 2)
441 /* Newlines in arguments become a space. */
456 if (is_vspace (*cur))
457 out--, cur = skip_escaped_newlines (pfile, cur - 1);
460 /* Skip escaped quotes here, it's easier than above, but
461 take care to first skip escaped newlines. */
462 cur = skip_escaped_newlines (pfile, cur);
463 if (*cur == '\\' || *cur == '"' || *cur == '\'')
469 /* Traditional CPP does not recognize comments within
473 cur = skip_escaped_newlines (pfile, cur);
475 out--, cur = skip_comment (pfile, cur + 1);
480 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
481 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
482 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
483 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
485 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
486 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
487 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
488 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
490 if (quote == 0 || macro)
494 pfile->trad_out_cur = --out;
495 node = lex_identifier (pfile, cur - 1);
497 if (node->type == NT_MACRO
498 && pfile->state.parsing_args != 2
499 && !pfile->state.prevent_expansion)
501 if (node->value.macro->fun_like)
502 maybe_start_funlike (pfile, node, out, &fmacro);
505 /* Remove the object-like macro's name from the
506 output, and push its replacement text. */
507 pfile->trad_out_cur = out;
508 push_replacement_text (pfile, node);
512 else if (macro && node->arg_index)
514 /* Found a parameter in the replacement text of a
515 #define. Remove its name from the output. */
516 pfile->trad_out_cur = out;
517 save_replacement_text (pfile, macro, node->arg_index);
520 out = pfile->trad_out_cur;
529 if (pfile->state.parsing_args == 1)
531 const uchar *p = pfile->trad_out_base + fmacro.offset;
533 /* Invoke a prior function-like macro if there is only
534 white space in-between. */
535 while (is_numchar (*p))
537 while (is_space (*p))
542 pfile->state.parsing_args = 2;
544 out = pfile->trad_out_base + fmacro.offset;
545 fmacro.args[0] = fmacro.offset;
548 pfile->state.parsing_args = 0;
554 if (quote == 0 && pfile->state.parsing_args == 2 && paren_depth == 1)
555 save_argument (&fmacro, out - pfile->trad_out_base);
562 if (pfile->state.parsing_args == 2 && paren_depth == 0)
564 cpp_macro *m = fmacro.node->value.macro;
566 pfile->state.parsing_args = 0;
567 save_argument (&fmacro, out - pfile->trad_out_base);
569 /* A single zero-length argument is no argument. */
572 && out == pfile->trad_out_base + 1)
575 if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
577 /* Remove the macro's invocation from the
578 output, and push its replacement text. */
579 pfile->trad_out_cur = (pfile->trad_out_base
582 replace_args_and_push (pfile, &fmacro);
597 pfile->trad_out_cur = out - 1;
599 _cpp_release_buff (pfile, fmacro.buff);
602 /* Push a context holding the replacement text of the macro NODE on
603 the context stack. NODE is either object-like, or a function-like
604 macro with no arguments. */
606 push_replacement_text (pfile, node)
610 cpp_macro *macro = node->value.macro;
612 _cpp_push_text_context (pfile, node, macro->exp.text, macro->count);
615 /* Push a context holding the replacement text of the macro NODE on
616 the context stack. NODE is either object-like, or a function-like
617 macro with no arguments. */
619 replace_args_and_push (pfile, fmacro)
621 struct fun_macro *fmacro;
623 cpp_macro *macro = fmacro->node->value.macro;
625 if (macro->paramc == 0)
626 push_replacement_text (pfile, fmacro->node);
634 /* Calculate the length of the argument-replaced text. */
635 for (exp = macro->exp.text;;)
637 struct block *b = (struct block *) exp;
640 if (b->arg_index == 0)
642 len += (fmacro->args[b->arg_index]
643 - fmacro->args[b->arg_index - 1] - 1);
644 exp += BLOCK_LEN (b->text_len);
647 /* Allocate room for the expansion plus NUL. */
648 buff = _cpp_get_buff (pfile, len + 1);
650 /* Copy the expansion and replace arguments. */
651 p = BUFF_FRONT (buff);
652 for (exp = macro->exp.text;;)
654 struct block *b = (struct block *) exp;
657 memcpy (p, b->text, b->text_len);
659 if (b->arg_index == 0)
661 arglen = (fmacro->args[b->arg_index]
662 - fmacro->args[b->arg_index - 1] - 1);
663 memcpy (p, pfile->trad_out_base + fmacro->args[b->arg_index - 1],
666 exp += BLOCK_LEN (b->text_len);
671 _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
673 /* So we free buffer allocation when macro is left. */
674 pfile->context->buff = buff;
678 /* Read and record the parameters, if any, of a function-like macro
679 definition. Destroys pfile->trad_out_cur.
681 Returns true on success, false on failure (syntax error or a
682 duplicate parameter). On success, CUR (pfile->context) is just
683 past the closing parenthesis. */
685 scan_parameters (pfile, macro)
689 const uchar *cur = CUR (pfile->context) + 1;
694 cur = skip_whitespace (pfile, cur);
696 if (is_idstart (*cur))
699 if (_cpp_save_parameter (pfile, macro, lex_identifier (pfile, cur)))
701 cur = skip_whitespace (pfile, CUR (pfile->context));
711 ok = (*cur == ')' && macro->paramc == 0);
715 CUR (pfile->context) = cur + (*cur == ')');
720 /* Save the text from pfile->trad_out_base to pfile->trad_out_cur as
721 the replacement text for the current macro, followed by argument
722 ARG_INDEX, with zero indicating the end of the replacement
725 save_replacement_text (pfile, macro, arg_index)
728 unsigned int arg_index;
730 size_t len = pfile->trad_out_cur - pfile->trad_out_base;
733 if (macro->paramc == 0)
735 /* Object-like and function-like macros without parameters
736 simply store their NUL-terminated replacement text. */
737 exp = _cpp_unaligned_alloc (pfile, len + 1);
738 memcpy (exp, pfile->trad_out_base, len);
740 macro->exp.text = exp;
745 /* Store the text's length (unsigned int), the argument index
746 (unsigned short, base 1) and then the text. */
747 size_t blen = BLOCK_LEN (len);
750 if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
751 _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
753 exp = BUFF_FRONT (pfile->a_buff);
754 block = (struct block *) (exp + macro->count);
755 macro->exp.text = exp;
757 /* Write out the block information. */
758 block->text_len = len;
759 block->arg_index = arg_index;
760 memcpy (block->text, pfile->trad_out_base, len);
762 /* Lex the rest into the start of the output buffer. */
763 pfile->trad_out_cur = pfile->trad_out_base;
765 macro->count += blen;
767 /* If we've finished, commit the memory. */
769 BUFF_FRONT (pfile->a_buff) += macro->count;
773 /* Analyze and save the replacement text of a macro. Returns true on
776 _cpp_create_trad_definition (pfile, macro)
783 /* Is this a function-like macro? */
784 if (* CUR (pfile->context) == '(')
786 /* Setting macro to NULL indicates an error occurred, and
787 prevents unnecessary work in scan_out_logical_line. */
788 if (!scan_parameters (pfile, macro))
792 /* Success. Commit the parameter array. */
793 macro->params = (cpp_hashnode **) BUFF_FRONT (pfile->a_buff);
794 BUFF_FRONT (pfile->a_buff) = (uchar *) ¯o->params[macro->paramc];
799 /* Skip leading whitespace in the replacement text. */
800 CUR (pfile->context) = skip_whitespace (pfile, CUR (pfile->context));
802 pfile->trad_out_cur = pfile->trad_out_base;
803 pfile->state.prevent_expansion++;
804 scan_out_logical_line (pfile, macro);
805 pfile->state.prevent_expansion--;
810 /* Skip trailing white space. */
811 cur = pfile->trad_out_base;
812 limit = pfile->trad_out_cur;
813 while (limit > cur && is_space (limit[-1]))
815 pfile->trad_out_cur = limit;
816 save_replacement_text (pfile, macro, 0);
821 /* Copy SRC of length LEN to DEST, but convert all contiguous
822 whitespace to a single space, provided it is not in quotes. The
823 quote currently in effect is pointed to by PQUOTE, and is updated
824 by the function. Returns the number of bytes copied. */
826 canonicalize_text (dest, src, len, pquote)
832 uchar *orig_dest = dest;
833 uchar quote = *pquote;
837 if (is_space (*src) && !quote)
841 while (len && is_space (*src));
846 if (*src == '\'' || *src == '"')
850 else if (quote == *src)
853 *dest++ = *src++, len--;
858 return dest - orig_dest;
861 /* Returns true if MACRO1 and MACRO2 have expansions different other
862 than in the form of their whitespace. */
864 _cpp_expansions_different_trad (macro1, macro2)
865 const cpp_macro *macro1, *macro2;
867 uchar *p1 = xmalloc (macro1->count + macro2->count);
868 uchar *p2 = p1 + macro1->count;
869 uchar quote1 = 0, quote2;
873 if (macro1->paramc > 0)
875 const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
880 struct block *b1 = (struct block *) exp1;
881 struct block *b2 = (struct block *) exp2;
883 if (b1->arg_index != b2->arg_index)
886 len1 = canonicalize_text (p1, b1->text, b1->text_len, "e1);
887 len2 = canonicalize_text (p2, b2->text, b2->text_len, "e2);
888 if (len1 != len2 || memcmp (p1, p2, len1))
890 if (b1->arg_index == 0)
895 exp1 += BLOCK_LEN (b1->text_len);
896 exp2 += BLOCK_LEN (b2->text_len);
901 len1 = canonicalize_text (p1, macro1->exp.text, macro1->count, "e1);
902 len2 = canonicalize_text (p2, macro2->exp.text, macro2->count, "e2);
903 mismatch = (len1 != len2 || memcmp (p1, p2, len1));
910 /* Prepare to be able to scan the current buffer. */
912 _cpp_set_trad_context (pfile)
915 cpp_buffer *buffer = pfile->buffer;
916 cpp_context *context = pfile->context;
918 if (pfile->context->prev)
921 pfile->trad_out_cur = pfile->trad_out_base;
922 CUR (context) = buffer->cur;
923 RLIMIT (context) = buffer->rlimit;
924 check_output_buffer (pfile, RLIMIT (context) - CUR (context));