1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
28 /* MULTIBYTE_CHARS support only works for native compilers.
29 ??? Ideally what we want is to model widechar support after
30 the current floating point support. */
32 #undef MULTIBYTE_CHARS
35 #ifdef MULTIBYTE_CHARS
40 /* Tokens with SPELL_STRING store their spelling in the token list,
41 and it's length in the token->val.name.len. */
54 enum spell_type category;
55 const unsigned char *name;
58 static const unsigned char *const digraph_spellings[] =
59 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
61 #define OP(e, s) { SPELL_OPERATOR, U s },
62 #define TK(e, s) { s, U STRINGX (e) },
63 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
67 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
68 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
69 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
71 static void handle_newline PARAMS ((cpp_reader *));
72 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
73 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
75 static int skip_block_comment PARAMS ((cpp_reader *));
76 static int skip_line_comment PARAMS ((cpp_reader *));
77 static void adjust_column PARAMS ((cpp_reader *));
78 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
79 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
80 static U_CHAR *parse_slow PARAMS ((cpp_reader *, const U_CHAR *, int,
82 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
83 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
84 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
85 static bool trigraph_p PARAMS ((cpp_reader *));
86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *,
88 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
89 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
90 const unsigned char *, unsigned int *));
91 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
93 static unsigned int hex_digit_value PARAMS ((unsigned int));
94 static _cpp_buff *new_buff PARAMS ((size_t));
98 Compares, the token TOKEN to the NUL-terminated string STRING.
99 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
101 cpp_ideq (token, string)
102 const cpp_token *token;
105 if (token->type != CPP_NAME)
108 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
111 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
112 Returns with buffer->cur pointing to the character immediately
113 following the newline (combination). */
115 handle_newline (pfile)
118 cpp_buffer *buffer = pfile->buffer;
120 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
121 only accept CR-LF; maybe we should fall back to that behaviour? */
122 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
125 buffer->line_base = buffer->cur;
126 buffer->col_adjust = 0;
130 /* Subroutine of skip_escaped_newlines; called when a 3-character
131 sequence beginning with "??" is encountered. buffer->cur points to
134 Warn if necessary, and returns true if the sequence forms a
135 trigraph and the trigraph should be honoured. */
140 cpp_buffer *buffer = pfile->buffer;
141 cppchar_t from_char = buffer->cur[1];
144 if (!_cpp_trigraph_map[from_char])
147 accept = CPP_OPTION (pfile, trigraphs);
149 /* Don't warn about trigraphs in comments. */
150 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
153 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
154 "trigraph ??%c converted to %c",
156 (int) _cpp_trigraph_map[from_char]);
157 else if (buffer->cur != buffer->last_Wtrigraphs)
159 buffer->last_Wtrigraphs = buffer->cur;
160 cpp_warning_with_line (pfile, pfile->line,
161 CPP_BUF_COL (buffer) - 1,
162 "trigraph ??%c ignored", (int) from_char);
169 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
170 lie in buffer->cur[-1]. Returns the next byte, which will be in
171 buffer->cur[-1]. This routine performs preprocessing stages 1 and
172 2 of the ISO C standard. */
174 skip_escaped_newlines (pfile)
177 cpp_buffer *buffer = pfile->buffer;
178 cppchar_t next = buffer->cur[-1];
180 /* Only do this if we apply stages 1 and 2. */
181 if (!buffer->from_stage3)
183 const unsigned char *saved_cur;
190 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
193 /* Translate the trigraph. */
194 next = _cpp_trigraph_map[buffer->cur[1]];
200 if (buffer->cur == buffer->rlimit)
203 /* We have a backslash, and room for at least one more
204 character. Skip horizontal whitespace. */
205 saved_cur = buffer->cur;
207 next1 = *buffer->cur++;
208 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
210 if (!is_vspace (next1))
212 buffer->cur = saved_cur;
216 if (saved_cur != buffer->cur - 1
217 && !pfile->state.lexing_comment)
218 cpp_warning (pfile, "backslash and newline separated by space");
220 handle_newline (pfile);
221 buffer->backup_to = buffer->cur;
222 if (buffer->cur == buffer->rlimit)
224 cpp_pedwarn (pfile, "backslash-newline at end of file");
228 next = *buffer->cur++;
230 while (next == '\\' || next == '?');
236 /* Obtain the next character, after trigraph conversion and skipping
237 an arbitrarily long string of escaped newlines. The common case of
238 no trigraphs or escaped newlines falls through quickly. On return,
239 buffer->backup_to points to where to return to if the character is
240 not to be processed. */
242 get_effective_char (pfile)
246 cpp_buffer *buffer = pfile->buffer;
248 buffer->backup_to = buffer->cur;
249 next = *buffer->cur++;
250 if (__builtin_expect (next == '?' || next == '\\', 0))
251 next = skip_escaped_newlines (pfile);
256 /* Skip a C-style block comment. We find the end of the comment by
257 seeing if an asterisk is before every '/' we encounter. Returns
258 non-zero if comment terminated by EOF, zero otherwise. */
260 skip_block_comment (pfile)
263 cpp_buffer *buffer = pfile->buffer;
264 cppchar_t c = EOF, prevc = EOF;
266 pfile->state.lexing_comment = 1;
267 while (buffer->cur != buffer->rlimit)
269 prevc = c, c = *buffer->cur++;
271 /* FIXME: For speed, create a new character class of characters
272 of interest inside block comments. */
273 if (c == '?' || c == '\\')
274 c = skip_escaped_newlines (pfile);
276 /* People like decorating comments with '*', so check for '/'
277 instead for efficiency. */
283 /* Warn about potential nested comments, but not if the '/'
284 comes immediately before the true comment delimiter.
285 Don't bother to get it right across escaped newlines. */
286 if (CPP_OPTION (pfile, warn_comments)
287 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
288 cpp_warning_with_line (pfile,
289 pfile->line, CPP_BUF_COL (buffer),
290 "\"/*\" within comment");
292 else if (is_vspace (c))
293 handle_newline (pfile);
295 adjust_column (pfile);
298 pfile->state.lexing_comment = 0;
299 return c != '/' || prevc != '*';
302 /* Skip a C++ line comment, leaving buffer->cur pointing to the
303 terminating newline. Handles escaped newlines. Returns non-zero
304 if a multiline comment. */
306 skip_line_comment (pfile)
309 cpp_buffer *buffer = pfile->buffer;
310 unsigned int orig_line = pfile->line;
313 pfile->state.lexing_comment = 1;
316 if (buffer->cur == buffer->rlimit)
320 if (c == '?' || c == '\\')
321 c = skip_escaped_newlines (pfile);
323 while (!is_vspace (c));
325 /* Step back over the newline, except at EOF. */
329 pfile->state.lexing_comment = 0;
330 return orig_line != pfile->line;
333 /* pfile->buffer->cur is one beyond the \t character. Update
334 col_adjust so we track the column correctly. */
336 adjust_column (pfile)
339 cpp_buffer *buffer = pfile->buffer;
340 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
342 /* Round it up to multiple of the tabstop, but subtract 1 since the
343 tab itself occupies a character position. */
344 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
345 - col % CPP_OPTION (pfile, tabstop)) - 1;
348 /* Skips whitespace, saving the next non-whitespace character.
349 Adjusts pfile->col_adjust to account for tabs. Without this,
350 tokens might be assigned an incorrect column. */
352 skip_whitespace (pfile, c)
356 cpp_buffer *buffer = pfile->buffer;
357 unsigned int warned = 0;
361 /* Horizontal space always OK. */
365 adjust_column (pfile);
366 /* Just \f \v or \0 left. */
369 if (buffer->cur - 1 == buffer->rlimit)
373 cpp_warning (pfile, "null character(s) ignored");
377 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
378 cpp_pedwarn_with_line (pfile, pfile->line,
379 CPP_BUF_COL (buffer),
380 "%s in preprocessing directive",
381 c == '\f' ? "form feed" : "vertical tab");
385 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
386 while (is_nvspace (c));
392 /* See if the characters of a number token are valid in a name (no
395 name_p (pfile, string)
397 const cpp_string *string;
401 for (i = 0; i < string->len; i++)
402 if (!is_idchar (string->text[i]))
408 /* Parse an identifier, skipping embedded backslash-newlines. This is
409 a critical inner loop. The common case is an identifier which has
410 not been split by backslash-newline, does not contain a dollar
411 sign, and has already been scanned (roughly 10:1 ratio of
412 seen:unseen identifiers in normal code; the distribution is
413 Poisson-like). Second most common case is a new identifier, not
414 split and no dollar sign. The other possibilities are rare and
415 have been relegated to parse_slow. */
416 static cpp_hashnode *
417 parse_identifier (pfile)
420 cpp_hashnode *result;
421 const U_CHAR *cur, *base;
423 /* Fast-path loop. Skim over a normal identifier.
424 N.B. ISIDNUM does not include $. */
425 cur = pfile->buffer->cur;
426 while (ISIDNUM (*cur))
429 /* Check for slow-path cases. */
430 if (*cur == '?' || *cur == '\\' || *cur == '$')
434 base = parse_slow (pfile, cur, 0, &len);
435 result = (cpp_hashnode *)
436 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
440 base = pfile->buffer->cur - 1;
441 pfile->buffer->cur = cur;
442 result = (cpp_hashnode *)
443 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
446 /* Rarely, identifiers require diagnostics when lexed.
447 XXX Has to be forced out of the fast path. */
448 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
449 && !pfile->state.skipping, 0))
451 /* It is allowed to poison the same identifier twice. */
452 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
453 cpp_error (pfile, "attempt to use poisoned \"%s\"",
456 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
457 replacement list of a variadic macro. */
458 if (result == pfile->spec_nodes.n__VA_ARGS__
459 && !pfile->state.va_args_ok)
461 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
467 /* Slow path. This handles numbers and identifiers which have been
468 split, or contain dollar signs. The part of the token from
469 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
470 1 if it's a number, and 2 if it has a leading period. Returns a
471 pointer to the token's NUL-terminated spelling in permanent
472 storage, and sets PLEN to its length. */
474 parse_slow (pfile, cur, number_p, plen)
480 cpp_buffer *buffer = pfile->buffer;
481 const U_CHAR *base = buffer->cur - 1;
482 struct obstack *stack = &pfile->hash_table->stack;
483 unsigned int c, prevc, saw_dollar = 0;
485 /* Place any leading period. */
487 obstack_1grow (stack, '.');
489 /* Copy the part of the token which is known to be okay. */
490 obstack_grow (stack, base, cur - base);
492 /* Now process the part which isn't. We are looking at one of
493 '$', '\\', or '?' on entry to this loop. */
499 /* Potential escaped newline? */
500 buffer->backup_to = buffer->cur - 1;
501 if (c == '?' || c == '\\')
502 c = skip_escaped_newlines (pfile);
508 if (c != '.' && !VALID_SIGN (c, prevc))
512 /* Handle normal identifier characters in this loop. */
516 obstack_1grow (stack, c);
523 while (is_idchar (c));
526 /* Step back over the unwanted char. */
529 /* $ is not an identifier character in the standard, but is commonly
530 accepted as an extension. Don't warn about it in skipped
531 conditional blocks. */
532 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
533 cpp_pedwarn (pfile, "'$' character(s) in identifier or number");
535 /* Identifiers and numbers are null-terminated. */
536 *plen = obstack_object_size (stack);
537 obstack_1grow (stack, '\0');
538 return obstack_finish (stack);
541 /* Parse a number, beginning with character C, skipping embedded
542 backslash-newlines. LEADING_PERIOD is non-zero if there was a "."
543 before C. Place the result in NUMBER. */
545 parse_number (pfile, number, leading_period)
552 /* Fast-path loop. Skim over a normal number.
553 N.B. ISIDNUM does not include $. */
554 cur = pfile->buffer->cur;
555 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
558 /* Check for slow-path cases. */
559 if (*cur == '?' || *cur == '\\' || *cur == '$')
560 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
563 const U_CHAR *base = pfile->buffer->cur - 1;
566 number->len = cur - base + leading_period;
567 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
568 dest[number->len] = '\0';
573 memcpy (dest, base, cur - base);
574 pfile->buffer->cur = cur;
578 /* Subroutine of parse_string. */
580 unescaped_terminator_p (pfile, dest)
582 const unsigned char *dest;
584 const unsigned char *start, *temp;
586 /* In #include-style directives, terminators are not escapeable. */
587 if (pfile->state.angled_headers)
590 start = BUFF_FRONT (pfile->u_buff);
592 /* An odd number of consecutive backslashes represents an escaped
594 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
597 return ((dest - temp) & 1) == 0;
600 /* Parses a string, character constant, or angle-bracketed header file
601 name. Handles embedded trigraphs and escaped newlines. The stored
602 string is guaranteed NUL-terminated, but it is not guaranteed that
603 this is the first NUL since embedded NULs are preserved.
605 When this function returns, buffer->cur points to the next
606 character to be processed. */
608 parse_string (pfile, token, terminator)
611 cppchar_t terminator;
613 cpp_buffer *buffer = pfile->buffer;
614 unsigned char *dest, *limit;
616 bool warned_nulls = false;
618 dest = BUFF_FRONT (pfile->u_buff);
619 limit = BUFF_LIMIT (pfile->u_buff);
623 /* We need room for another char, possibly the terminating NUL. */
624 if ((size_t) (limit - dest) < 1)
626 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
627 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
628 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
629 limit = BUFF_LIMIT (pfile->u_buff);
632 /* Handle trigraphs, escaped newlines etc. */
634 if (c == '?' || c == '\\')
635 c = skip_escaped_newlines (pfile);
639 if (unescaped_terminator_p (pfile, dest))
642 else if (is_vspace (c))
644 /* No string literal may extend over multiple lines. In
645 assembly language, suppress the error except for <>
646 includes. This is a kludge around not knowing where
649 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
650 cpp_error (pfile, "missing terminating %c character", terminator);
656 if (buffer->cur - 1 == buffer->rlimit)
661 cpp_warning (pfile, "null character(s) preserved in literal");
670 token->val.str.text = BUFF_FRONT (pfile->u_buff);
671 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
672 BUFF_FRONT (pfile->u_buff) = dest + 1;
675 /* The stored comment includes the comment start and any terminator. */
677 save_comment (pfile, token, from, type)
680 const unsigned char *from;
683 unsigned char *buffer;
684 unsigned int len, clen;
686 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
688 /* C++ comments probably (not definitely) have moved past a new
689 line, which we don't want to save in the comment. */
690 if (is_vspace (pfile->buffer->cur[-1]))
693 /* If we are currently in a directive, then we need to store all
694 C++ comments as C comments internally, and so we need to
695 allocate a little extra space in that case.
697 Note that the only time we encounter a directive here is
698 when we are saving comments in a "#define". */
699 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
701 buffer = _cpp_unaligned_alloc (pfile, clen);
703 token->type = CPP_COMMENT;
704 token->val.str.len = clen;
705 token->val.str.text = buffer;
708 memcpy (buffer + 1, from, len - 1);
710 /* Finish conversion to a C comment, if necessary. */
711 if (pfile->state.in_directive && type == '/')
714 buffer[clen - 2] = '*';
715 buffer[clen - 1] = '/';
719 /* Allocate COUNT tokens for RUN. */
721 _cpp_init_tokenrun (run, count)
725 run->base = xnewvec (cpp_token, count);
726 run->limit = run->base + count;
730 /* Returns the next tokenrun, or creates one if there is none. */
735 if (run->next == NULL)
737 run->next = xnew (tokenrun);
738 run->next->prev = run;
739 _cpp_init_tokenrun (run->next, 250);
745 /* Allocate a single token that is invalidated at the same time as the
746 rest of the tokens on the line. Has its line and col set to the
747 same as the last lexed token, so that diagnostics appear in the
750 _cpp_temp_token (pfile)
753 cpp_token *old, *result;
755 old = pfile->cur_token - 1;
756 if (pfile->cur_token == pfile->cur_run->limit)
758 pfile->cur_run = next_tokenrun (pfile->cur_run);
759 pfile->cur_token = pfile->cur_run->base;
762 result = pfile->cur_token++;
763 result->line = old->line;
764 result->col = old->col;
768 /* Lex a token into RESULT (external interface). Takes care of issues
769 like directive handling, token lookahead, multiple include
770 optimization and skipping. */
772 _cpp_lex_token (pfile)
779 if (pfile->cur_token == pfile->cur_run->limit)
781 pfile->cur_run = next_tokenrun (pfile->cur_run);
782 pfile->cur_token = pfile->cur_run->base;
785 if (pfile->lookaheads)
788 result = pfile->cur_token++;
791 result = _cpp_lex_direct (pfile);
793 if (result->flags & BOL)
795 /* Is this a directive. If _cpp_handle_directive returns
796 false, it is an assembler #. */
797 if (result->type == CPP_HASH
798 /* 6.10.3 p 11: Directives in a list of macro arguments
799 gives undefined behavior. This implementation
800 handles the directive as normal. */
801 && pfile->state.parsing_args != 1
802 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
804 if (pfile->cb.line_change && !pfile->state.skipping)
805 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
808 /* We don't skip tokens in directives. */
809 if (pfile->state.in_directive)
812 /* Outside a directive, invalidate controlling macros. At file
813 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
814 get here and MI optimisation works. */
815 pfile->mi_valid = false;
817 if (!pfile->state.skipping || result->type == CPP_EOF)
824 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
826 if (get_effective_char (pfile) == CHAR) \
827 result->type = THEN_TYPE; \
831 result->type = ELSE_TYPE; \
835 /* Lex a token into pfile->cur_token, which is also incremented, to
836 get diagnostics pointing to the correct location.
838 Does not handle issues such as token lookahead, multiple-include
839 optimisation, directives, skipping etc. This function is only
840 suitable for use by _cpp_lex_token, and in special cases like
841 lex_expansion_token which doesn't care for any of these issues.
843 When meeting a newline, returns CPP_EOF if parsing a directive,
844 otherwise returns to the start of the token buffer if permissible.
845 Returns the location of the lexed token. */
847 _cpp_lex_direct (pfile)
852 const unsigned char *comment_start;
853 cpp_token *result = pfile->cur_token++;
856 buffer = pfile->buffer;
857 result->flags = buffer->saved_flags;
858 buffer->saved_flags = 0;
860 result->line = pfile->line;
864 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
869 case ' ': case '\t': case '\f': case '\v': case '\0':
870 result->flags |= PREV_WHITE;
871 if (skip_whitespace (pfile, c))
876 buffer->saved_flags = BOL;
877 if (!pfile->state.parsing_args && !pfile->state.in_directive)
879 if (buffer->cur != buffer->line_base)
881 /* Non-empty files should end in a newline. Don't warn
882 for command line and _Pragma buffers. */
883 if (!buffer->from_stage3)
884 cpp_pedwarn (pfile, "no newline at end of file");
885 handle_newline (pfile);
888 /* Don't pop the last buffer. */
891 unsigned char stop = buffer->return_at_eof;
893 _cpp_pop_buffer (pfile);
898 result->type = CPP_EOF;
901 case '\n': case '\r':
902 handle_newline (pfile);
903 buffer->saved_flags = BOL;
904 if (! pfile->state.in_directive)
906 if (pfile->state.parsing_args == 2)
907 buffer->saved_flags |= PREV_WHITE;
908 if (!pfile->keep_tokens)
910 pfile->cur_run = &pfile->base_run;
911 result = pfile->base_run.base;
912 pfile->cur_token = result + 1;
916 result->type = CPP_EOF;
921 /* These could start an escaped newline, or '?' a trigraph. Let
922 skip_escaped_newlines do all the work. */
924 unsigned int line = pfile->line;
926 c = skip_escaped_newlines (pfile);
927 if (line != pfile->line)
930 /* We had at least one escaped newline of some sort.
931 Update the token's line and column. */
932 goto update_tokens_line;
936 /* We are either the original '?' or '\\', or a trigraph. */
938 result->type = CPP_QUERY;
945 case '0': case '1': case '2': case '3': case '4':
946 case '5': case '6': case '7': case '8': case '9':
947 result->type = CPP_NUMBER;
948 parse_number (pfile, &result->val.str, 0);
952 /* 'L' may introduce wide characters or strings. */
954 const unsigned char *pos = buffer->cur;
956 c = get_effective_char (pfile);
957 if (c == '\'' || c == '"')
959 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
960 parse_string (pfile, result, c);
969 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
970 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
971 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
972 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
974 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
975 case 'G': case 'H': case 'I': case 'J': case 'K':
976 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
977 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
979 result->type = CPP_NAME;
980 result->val.node = parse_identifier (pfile);
982 /* Convert named operators to their proper types. */
983 if (result->val.node->flags & NODE_OPERATOR)
985 result->flags |= NAMED_OP;
986 result->type = result->val.node->value.operator;
992 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
993 parse_string (pfile, result, c);
997 /* A potential block or line comment. */
998 comment_start = buffer->cur;
999 c = get_effective_char (pfile);
1003 if (skip_block_comment (pfile))
1004 cpp_error (pfile, "unterminated comment");
1006 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1007 || CPP_IN_SYSTEM_HEADER (pfile)))
1009 /* Warn about comments only if pedantically GNUC89, and not
1010 in system headers. */
1011 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1012 && ! buffer->warned_cplusplus_comments)
1015 "C++ style comments are not allowed in ISO C89");
1017 "(this will be reported only once per input file)");
1018 buffer->warned_cplusplus_comments = 1;
1021 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1022 cpp_warning (pfile, "multi-line comment");
1026 result->type = CPP_DIV_EQ;
1032 result->type = CPP_DIV;
1036 if (!pfile->state.save_comments)
1038 result->flags |= PREV_WHITE;
1039 goto update_tokens_line;
1042 /* Save the comment as a token in its own right. */
1043 save_comment (pfile, result, comment_start, c);
1047 if (pfile->state.angled_headers)
1049 result->type = CPP_HEADER_NAME;
1050 parse_string (pfile, result, '>');
1054 c = get_effective_char (pfile);
1056 result->type = CPP_LESS_EQ;
1058 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1059 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1060 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1061 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1063 result->type = CPP_OPEN_SQUARE;
1064 result->flags |= DIGRAPH;
1066 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1068 result->type = CPP_OPEN_BRACE;
1069 result->flags |= DIGRAPH;
1074 result->type = CPP_LESS;
1079 c = get_effective_char (pfile);
1081 result->type = CPP_GREATER_EQ;
1083 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1084 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1085 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1089 result->type = CPP_GREATER;
1094 c = get_effective_char (pfile);
1096 result->type = CPP_MOD_EQ;
1097 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1099 result->flags |= DIGRAPH;
1100 result->type = CPP_HASH;
1101 if (get_effective_char (pfile) == '%')
1103 const unsigned char *pos = buffer->cur;
1105 if (get_effective_char (pfile) == ':')
1106 result->type = CPP_PASTE;
1108 buffer->cur = pos - 1;
1113 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1115 result->flags |= DIGRAPH;
1116 result->type = CPP_CLOSE_BRACE;
1121 result->type = CPP_MOD;
1126 result->type = CPP_DOT;
1127 c = get_effective_char (pfile);
1130 const unsigned char *pos = buffer->cur;
1132 if (get_effective_char (pfile) == '.')
1133 result->type = CPP_ELLIPSIS;
1135 buffer->cur = pos - 1;
1137 /* All known character sets have 0...9 contiguous. */
1138 else if (ISDIGIT (c))
1140 result->type = CPP_NUMBER;
1141 parse_number (pfile, &result->val.str, 1);
1143 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1144 result->type = CPP_DOT_STAR;
1150 c = get_effective_char (pfile);
1152 result->type = CPP_PLUS_PLUS;
1154 result->type = CPP_PLUS_EQ;
1158 result->type = CPP_PLUS;
1163 c = get_effective_char (pfile);
1166 result->type = CPP_DEREF;
1167 if (CPP_OPTION (pfile, cplusplus))
1169 if (get_effective_char (pfile) == '*')
1170 result->type = CPP_DEREF_STAR;
1176 result->type = CPP_MINUS_MINUS;
1178 result->type = CPP_MINUS_EQ;
1182 result->type = CPP_MINUS;
1187 c = get_effective_char (pfile);
1189 result->type = CPP_AND_AND;
1191 result->type = CPP_AND_EQ;
1195 result->type = CPP_AND;
1200 c = get_effective_char (pfile);
1202 result->type = CPP_OR_OR;
1204 result->type = CPP_OR_EQ;
1208 result->type = CPP_OR;
1213 c = get_effective_char (pfile);
1214 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1215 result->type = CPP_SCOPE;
1216 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1218 result->flags |= DIGRAPH;
1219 result->type = CPP_CLOSE_SQUARE;
1224 result->type = CPP_COLON;
1228 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1229 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1230 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1231 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1232 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1234 case '~': result->type = CPP_COMPL; break;
1235 case ',': result->type = CPP_COMMA; break;
1236 case '(': result->type = CPP_OPEN_PAREN; break;
1237 case ')': result->type = CPP_CLOSE_PAREN; break;
1238 case '[': result->type = CPP_OPEN_SQUARE; break;
1239 case ']': result->type = CPP_CLOSE_SQUARE; break;
1240 case '{': result->type = CPP_OPEN_BRACE; break;
1241 case '}': result->type = CPP_CLOSE_BRACE; break;
1242 case ';': result->type = CPP_SEMICOLON; break;
1244 /* @ is a punctuator in Objective C. */
1245 case '@': result->type = CPP_ATSIGN; break;
1248 if (CPP_OPTION (pfile, dollars_in_ident))
1250 /* Fall through... */
1254 result->type = CPP_OTHER;
1262 /* An upper bound on the number of bytes needed to spell TOKEN,
1263 including preceding whitespace. */
1265 cpp_token_len (token)
1266 const cpp_token *token;
1270 switch (TOKEN_SPELL (token))
1272 default: len = 0; break;
1274 case SPELL_STRING: len = token->val.str.len; break;
1275 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1277 /* 1 for whitespace, 4 for comment delimiters. */
1281 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1282 already contain the enough space to hold the token's spelling.
1283 Returns a pointer to the character after the last character
1286 cpp_spell_token (pfile, token, buffer)
1287 cpp_reader *pfile; /* Would be nice to be rid of this... */
1288 const cpp_token *token;
1289 unsigned char *buffer;
1291 switch (TOKEN_SPELL (token))
1293 case SPELL_OPERATOR:
1295 const unsigned char *spelling;
1298 if (token->flags & DIGRAPH)
1300 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1301 else if (token->flags & NAMED_OP)
1304 spelling = TOKEN_NAME (token);
1306 while ((c = *spelling++) != '\0')
1312 *buffer++ = token->val.c;
1317 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1318 buffer += NODE_LEN (token->val.node);
1322 memcpy (buffer, token->val.str.text, token->val.str.len);
1323 buffer += token->val.str.len;
1328 int left, right, tag;
1329 switch (token->type)
1331 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1332 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1333 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1334 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1335 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1337 cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1340 if (tag) *buffer++ = tag;
1342 memcpy (buffer, token->val.str.text, token->val.str.len);
1343 buffer += token->val.str.len;
1349 cpp_ice (pfile, "unspellable token %s", TOKEN_NAME (token));
1356 /* Returns TOKEN spelt as a null-terminated string. The string is
1357 freed when the reader is destroyed. Useful for diagnostics. */
1359 cpp_token_as_text (pfile, token)
1361 const cpp_token *token;
1363 unsigned int len = cpp_token_len (token);
1364 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1366 end = cpp_spell_token (pfile, token, start);
1372 /* Used by C front ends, which really should move to using
1373 cpp_token_as_text. */
1375 cpp_type2name (type)
1376 enum cpp_ttype type;
1378 return (const char *) token_spellings[type].name;
1381 /* Writes the spelling of token to FP, without any preceding space.
1382 Separated from cpp_spell_token for efficiency - to avoid stdio
1383 double-buffering. */
1385 cpp_output_token (token, fp)
1386 const cpp_token *token;
1389 switch (TOKEN_SPELL (token))
1391 case SPELL_OPERATOR:
1393 const unsigned char *spelling;
1396 if (token->flags & DIGRAPH)
1398 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1399 else if (token->flags & NAMED_OP)
1402 spelling = TOKEN_NAME (token);
1407 while ((c = *++spelling) != '\0');
1412 putc (token->val.c, fp);
1417 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1421 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1426 int left, right, tag;
1427 switch (token->type)
1429 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1430 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1431 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1432 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1433 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1435 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1438 if (tag) putc (tag, fp);
1440 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1446 /* An error, most probably. */
1451 /* Compare two tokens. */
1453 _cpp_equiv_tokens (a, b)
1454 const cpp_token *a, *b;
1456 if (a->type == b->type && a->flags == b->flags)
1457 switch (TOKEN_SPELL (a))
1459 default: /* Keep compiler happy. */
1460 case SPELL_OPERATOR:
1463 return a->val.c == b->val.c; /* Character. */
1465 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1467 return a->val.node == b->val.node;
1470 return (a->val.str.len == b->val.str.len
1471 && !memcmp (a->val.str.text, b->val.str.text,
1478 /* Returns nonzero if a space should be inserted to avoid an
1479 accidental token paste for output. For simplicity, it is
1480 conservative, and occasionally advises a space where one is not
1481 needed, e.g. "." and ".2". */
1483 cpp_avoid_paste (pfile, token1, token2)
1485 const cpp_token *token1, *token2;
1487 enum cpp_ttype a = token1->type, b = token2->type;
1490 if (token1->flags & NAMED_OP)
1492 if (token2->flags & NAMED_OP)
1496 if (token2->flags & DIGRAPH)
1497 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1498 else if (token_spellings[b].category == SPELL_OPERATOR)
1499 c = token_spellings[b].name[0];
1501 /* Quickly get everything that can paste with an '='. */
1502 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1507 case CPP_GREATER: return c == '>' || c == '?';
1508 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1509 case CPP_PLUS: return c == '+';
1510 case CPP_MINUS: return c == '-' || c == '>';
1511 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1512 case CPP_MOD: return c == ':' || c == '>';
1513 case CPP_AND: return c == '&';
1514 case CPP_OR: return c == '|';
1515 case CPP_COLON: return c == ':' || c == '>';
1516 case CPP_DEREF: return c == '*';
1517 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1518 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1519 case CPP_NAME: return ((b == CPP_NUMBER
1520 && name_p (pfile, &token2->val.str))
1522 || b == CPP_CHAR || b == CPP_STRING); /* L */
1523 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1524 || c == '.' || c == '+' || c == '-');
1525 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1526 && token1->val.c == '@'
1527 && (b == CPP_NAME || b == CPP_STRING));
1534 /* Output all the remaining tokens on the current line, and a newline
1535 character, to FP. Leading whitespace is removed. If there are
1536 macros, special token padding is not performed. */
1538 cpp_output_line (pfile, fp)
1542 const cpp_token *token;
1544 token = cpp_get_token (pfile);
1545 while (token->type != CPP_EOF)
1547 cpp_output_token (token, fp);
1548 token = cpp_get_token (pfile);
1549 if (token->flags & PREV_WHITE)
1556 /* Returns the value of a hexadecimal digit. */
1562 return hex_value (c);
1567 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1568 failure if cpplib is not parsing C++ or C99. Such failure is
1569 silent, and no variables are updated. Otherwise returns 0, and
1570 warns if -Wtraditional.
1572 [lex.charset]: The character designated by the universal character
1573 name \UNNNNNNNN is that character whose character short name in
1574 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1575 universal character name \uNNNN is that character whose character
1576 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1577 for a universal character name is less than 0x20 or in the range
1578 0x7F-0x9F (inclusive), or if the universal character name
1579 designates a character in the basic source character set, then the
1580 program is ill-formed.
1582 We assume that wchar_t is Unicode, so we don't need to do any
1583 mapping. Is this ever wrong?
1585 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1586 LIMIT is the end of the string or charconst. PSTR is updated to
1587 point after the UCS on return, and the UCS is written into PC. */
1590 maybe_read_ucs (pfile, pstr, limit, pc)
1592 const unsigned char **pstr;
1593 const unsigned char *limit;
1596 const unsigned char *p = *pstr;
1597 unsigned int code = 0;
1598 unsigned int c = *pc, length;
1600 /* Only attempt to interpret a UCS for C++ and C99. */
1601 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1604 if (CPP_WTRADITIONAL (pfile))
1605 cpp_warning (pfile, "the meaning of '\\%c' is different in traditional C", c);
1607 length = (c == 'u' ? 4: 8);
1609 if ((size_t) (limit - p) < length)
1611 cpp_error (pfile, "incomplete universal-character-name");
1612 /* Skip to the end to avoid more diagnostics. */
1617 for (; length; length--, p++)
1621 code = (code << 4) + hex_digit_value (c);
1625 "non-hex digit '%c' in universal-character-name", c);
1626 /* We shouldn't skip in case there are multibyte chars. */
1632 #ifdef TARGET_EBCDIC
1633 cpp_error (pfile, "universal-character-name on EBCDIC target");
1634 code = 0x3f; /* EBCDIC invalid character */
1636 /* True extended characters are OK. */
1638 && !(code & 0x80000000)
1639 && !(code >= 0xD800 && code <= 0xDFFF))
1641 /* The standard permits $, @ and ` to be specified as UCNs. We use
1642 hex escapes so that this also works with EBCDIC hosts. */
1643 else if (code == 0x24 || code == 0x40 || code == 0x60)
1645 /* Don't give another error if one occurred above. */
1646 else if (length == 0)
1647 cpp_error (pfile, "universal-character-name out of range");
1655 /* Interpret an escape sequence, and return its value. PSTR points to
1656 the input pointer, which is just after the backslash. LIMIT is how
1657 much text we have. MASK is a bitmask for the precision for the
1658 destination type (char or wchar_t).
1660 Handles all relevant diagnostics. */
1662 cpp_parse_escape (pfile, pstr, limit, mask)
1664 const unsigned char **pstr;
1665 const unsigned char *limit;
1666 unsigned HOST_WIDE_INT mask;
1669 const unsigned char *str = *pstr;
1670 unsigned int c = *str++;
1674 case '\\': case '\'': case '"': case '?': break;
1675 case 'b': c = TARGET_BS; break;
1676 case 'f': c = TARGET_FF; break;
1677 case 'n': c = TARGET_NEWLINE; break;
1678 case 'r': c = TARGET_CR; break;
1679 case 't': c = TARGET_TAB; break;
1680 case 'v': c = TARGET_VT; break;
1682 case '(': case '{': case '[': case '%':
1683 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1684 '\%' is used to prevent SCCS from getting confused. */
1685 unknown = CPP_PEDANTIC (pfile);
1689 if (CPP_WTRADITIONAL (pfile))
1690 cpp_warning (pfile, "the meaning of '\\a' is different in traditional C");
1695 if (CPP_PEDANTIC (pfile))
1696 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1701 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1705 if (CPP_WTRADITIONAL (pfile))
1706 cpp_warning (pfile, "the meaning of '\\x' is different in traditional C");
1709 unsigned int i = 0, overflow = 0;
1710 int digits_found = 0;
1718 overflow |= i ^ (i << 4 >> 4);
1719 i = (i << 4) + hex_digit_value (c);
1724 cpp_error (pfile, "\\x used with no following hex digits");
1726 if (overflow | (i != (i & mask)))
1728 cpp_pedwarn (pfile, "hex escape sequence out of range");
1735 case '0': case '1': case '2': case '3':
1736 case '4': case '5': case '6': case '7':
1738 unsigned int i = c - '0';
1741 while (str < limit && ++count < 3)
1744 if (c < '0' || c > '7')
1747 i = (i << 3) + c - '0';
1750 if (i != (i & mask))
1752 cpp_pedwarn (pfile, "octal escape sequence out of range");
1767 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1769 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1773 cpp_pedwarn (pfile, "escape sequence out of range for character");
1779 #ifndef MAX_CHAR_TYPE_SIZE
1780 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1783 #ifndef MAX_WCHAR_TYPE_SIZE
1784 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1787 /* Interpret a (possibly wide) character constant in TOKEN.
1788 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points
1789 to a variable that is filled in with the number of characters seen. */
1791 cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
1793 const cpp_token *token;
1795 unsigned int *pchars_seen;
1797 const unsigned char *str = token->val.str.text;
1798 const unsigned char *limit = str + token->val.str.len;
1799 unsigned int chars_seen = 0;
1800 unsigned int width, max_chars, c;
1801 unsigned HOST_WIDE_INT mask;
1802 HOST_WIDE_INT result = 0;
1805 #ifdef MULTIBYTE_CHARS
1806 (void) local_mbtowc (NULL, NULL, 0);
1809 /* Width in bits. */
1810 if (token->type == CPP_CHAR)
1812 width = MAX_CHAR_TYPE_SIZE;
1813 unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
1817 width = MAX_WCHAR_TYPE_SIZE;
1818 unsigned_p = WCHAR_UNSIGNED;
1821 if (width < HOST_BITS_PER_WIDE_INT)
1822 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1825 max_chars = HOST_BITS_PER_WIDE_INT / width;
1829 #ifdef MULTIBYTE_CHARS
1833 char_len = local_mbtowc (&wc, str, limit - str);
1836 cpp_warning (pfile, "ignoring invalid multibyte character");
1849 c = cpp_parse_escape (pfile, &str, limit, mask);
1851 #ifdef MAP_CHARACTER
1853 c = MAP_CHARACTER (c);
1856 /* Merge character into result; ignore excess chars. */
1857 if (++chars_seen <= max_chars)
1859 if (width < HOST_BITS_PER_WIDE_INT)
1860 result = (result << width) | (c & mask);
1866 if (chars_seen == 0)
1867 cpp_error (pfile, "empty character constant");
1868 else if (chars_seen > max_chars)
1870 chars_seen = max_chars;
1871 cpp_warning (pfile, "character constant too long");
1873 else if (chars_seen > 1 && warn_multi)
1874 cpp_warning (pfile, "multi-character character constant");
1876 /* If relevant type is signed, sign-extend the constant. */
1879 unsigned int nbits = chars_seen * width;
1881 mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
1882 if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
1888 *pchars_seen = chars_seen;
1892 /* Memory buffers. Changing these three constants can have a dramatic
1893 effect on performance. The values here are reasonable defaults,
1894 but might be tuned. If you adjust them, be sure to test across a
1895 range of uses of cpplib, including heavy nested function-like macro
1896 expansion. Also check the change in peak memory usage (NJAMD is a
1897 good tool for this). */
1898 #define MIN_BUFF_SIZE 8000
1899 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1900 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1901 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1903 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1904 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1917 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1918 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1920 /* Create a new allocation buffer. Place the control block at the end
1921 of the buffer, so that buffer overflows will cause immediate chaos. */
1927 unsigned char *base;
1929 if (len < MIN_BUFF_SIZE)
1930 len = MIN_BUFF_SIZE;
1931 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1933 base = xmalloc (len + sizeof (_cpp_buff));
1934 result = (_cpp_buff *) (base + len);
1935 result->base = base;
1937 result->limit = base + len;
1938 result->next = NULL;
1942 /* Place a chain of unwanted allocation buffers on the free list. */
1944 _cpp_release_buff (pfile, buff)
1948 _cpp_buff *end = buff;
1952 end->next = pfile->free_buffs;
1953 pfile->free_buffs = buff;
1956 /* Return a free buffer of size at least MIN_SIZE. */
1958 _cpp_get_buff (pfile, min_size)
1962 _cpp_buff *result, **p;
1964 for (p = &pfile->free_buffs;; p = &(*p)->next)
1969 return new_buff (min_size);
1971 size = result->limit - result->base;
1972 /* Return a buffer that's big enough, but don't waste one that's
1974 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1979 result->next = NULL;
1980 result->cur = result->base;
1984 /* Creates a new buffer with enough space to hold the uncommitted
1985 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1986 the excess bytes to the new buffer. Chains the new buffer after
1987 BUFF, and returns the new buffer. */
1989 _cpp_append_extend_buff (pfile, buff, min_extra)
1994 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1995 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1997 buff->next = new_buff;
1998 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2002 /* Creates a new buffer with enough space to hold the uncommitted
2003 remaining bytes of the buffer pointed to by BUFF, and at least
2004 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2005 Chains the new buffer before the buffer pointed to by BUFF, and
2006 updates the pointer to point to the new buffer. */
2008 _cpp_extend_buff (pfile, pbuff, min_extra)
2013 _cpp_buff *new_buff, *old_buff = *pbuff;
2014 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2016 new_buff = _cpp_get_buff (pfile, size);
2017 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2018 new_buff->next = old_buff;
2022 /* Free a chain of buffers starting at BUFF. */
2024 _cpp_free_buff (buff)
2029 for (; buff; buff = next)
2036 /* Allocate permanent, unaligned storage of length LEN. */
2038 _cpp_unaligned_alloc (pfile, len)
2042 _cpp_buff *buff = pfile->u_buff;
2043 unsigned char *result = buff->cur;
2045 if (len > (size_t) (buff->limit - result))
2047 buff = _cpp_get_buff (pfile, len);
2048 buff->next = pfile->u_buff;
2049 pfile->u_buff = buff;
2053 buff->cur = result + len;
2057 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2058 That buffer is used for growing allocations when saving macro
2059 replacement lists in a #define, and when parsing an answer to an
2060 assertion in #assert, #unassert or #if (and therefore possibly
2061 whilst expanding macros). It therefore must not be used by any
2062 code that they might call: specifically the lexer and the guts of
2065 All existing other uses clearly fit this restriction: storing
2066 registered pragmas during initialization. */
2068 _cpp_aligned_alloc (pfile, len)
2072 _cpp_buff *buff = pfile->a_buff;
2073 unsigned char *result = buff->cur;
2075 if (len > (size_t) (buff->limit - result))
2077 buff = _cpp_get_buff (pfile, len);
2078 buff->next = pfile->a_buff;
2079 pfile->a_buff = buff;
2083 buff->cur = result + len;