1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
28 /* MULTIBYTE_CHARS support only works for native compilers.
29 ??? Ideally what we want is to model widechar support after
30 the current floating point support. */
32 #undef MULTIBYTE_CHARS
35 #ifdef MULTIBYTE_CHARS
40 /* Tokens with SPELL_STRING store their spelling in the token list,
41 and it's length in the token->val.name.len. */
54 enum spell_type category;
55 const unsigned char *name;
58 static const unsigned char *const digraph_spellings[] =
59 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
61 #define OP(e, s) { SPELL_OPERATOR, U s },
62 #define TK(e, s) { s, U STRINGX (e) },
63 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
67 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
68 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
69 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
71 static void handle_newline PARAMS ((cpp_reader *));
72 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
73 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
75 static int skip_block_comment PARAMS ((cpp_reader *));
76 static int skip_line_comment PARAMS ((cpp_reader *));
77 static void adjust_column PARAMS ((cpp_reader *));
78 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
79 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
80 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
82 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
83 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
84 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
85 static bool trigraph_p PARAMS ((cpp_reader *));
86 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
88 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
89 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
90 const unsigned char *, unsigned int *));
91 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
93 static unsigned int hex_digit_value PARAMS ((unsigned int));
94 static _cpp_buff *new_buff PARAMS ((size_t));
98 Compares, the token TOKEN to the NUL-terminated string STRING.
99 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
101 cpp_ideq (token, string)
102 const cpp_token *token;
105 if (token->type != CPP_NAME)
108 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
111 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
112 Returns with buffer->cur pointing to the character immediately
113 following the newline (combination). */
115 handle_newline (pfile)
118 cpp_buffer *buffer = pfile->buffer;
120 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
121 only accept CR-LF; maybe we should fall back to that behaviour? */
122 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
125 buffer->line_base = buffer->cur;
126 buffer->col_adjust = 0;
130 /* Subroutine of skip_escaped_newlines; called when a 3-character
131 sequence beginning with "??" is encountered. buffer->cur points to
134 Warn if necessary, and returns true if the sequence forms a
135 trigraph and the trigraph should be honoured. */
140 cpp_buffer *buffer = pfile->buffer;
141 cppchar_t from_char = buffer->cur[1];
144 if (!_cpp_trigraph_map[from_char])
147 accept = CPP_OPTION (pfile, trigraphs);
149 /* Don't warn about trigraphs in comments. */
150 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
153 cpp_error_with_line (pfile, DL_WARNING,
154 pfile->line, CPP_BUF_COL (buffer) - 1,
155 "trigraph ??%c converted to %c",
157 (int) _cpp_trigraph_map[from_char]);
158 else if (buffer->cur != buffer->last_Wtrigraphs)
160 buffer->last_Wtrigraphs = buffer->cur;
161 cpp_error_with_line (pfile, DL_WARNING,
162 pfile->line, CPP_BUF_COL (buffer) - 1,
163 "trigraph ??%c ignored", (int) from_char);
170 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
171 lie in buffer->cur[-1]. Returns the next byte, which will be in
172 buffer->cur[-1]. This routine performs preprocessing stages 1 and
173 2 of the ISO C standard. */
175 skip_escaped_newlines (pfile)
178 cpp_buffer *buffer = pfile->buffer;
179 cppchar_t next = buffer->cur[-1];
181 /* Only do this if we apply stages 1 and 2. */
182 if (!buffer->from_stage3)
184 const unsigned char *saved_cur;
191 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
194 /* Translate the trigraph. */
195 next = _cpp_trigraph_map[buffer->cur[1]];
201 if (buffer->cur == buffer->rlimit)
204 /* We have a backslash, and room for at least one more
205 character. Skip horizontal whitespace. */
206 saved_cur = buffer->cur;
208 next1 = *buffer->cur++;
209 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
211 if (!is_vspace (next1))
213 buffer->cur = saved_cur;
217 if (saved_cur != buffer->cur - 1
218 && !pfile->state.lexing_comment)
219 cpp_error (pfile, DL_WARNING,
220 "backslash and newline separated by space");
222 handle_newline (pfile);
223 buffer->backup_to = buffer->cur;
224 if (buffer->cur == buffer->rlimit)
226 cpp_error (pfile, DL_PEDWARN,
227 "backslash-newline at end of file");
231 next = *buffer->cur++;
233 while (next == '\\' || next == '?');
239 /* Obtain the next character, after trigraph conversion and skipping
240 an arbitrarily long string of escaped newlines. The common case of
241 no trigraphs or escaped newlines falls through quickly. On return,
242 buffer->backup_to points to where to return to if the character is
243 not to be processed. */
245 get_effective_char (pfile)
249 cpp_buffer *buffer = pfile->buffer;
251 buffer->backup_to = buffer->cur;
252 next = *buffer->cur++;
253 if (__builtin_expect (next == '?' || next == '\\', 0))
254 next = skip_escaped_newlines (pfile);
259 /* Skip a C-style block comment. We find the end of the comment by
260 seeing if an asterisk is before every '/' we encounter. Returns
261 non-zero if comment terminated by EOF, zero otherwise. */
263 skip_block_comment (pfile)
266 cpp_buffer *buffer = pfile->buffer;
267 cppchar_t c = EOF, prevc = EOF;
269 pfile->state.lexing_comment = 1;
270 while (buffer->cur != buffer->rlimit)
272 prevc = c, c = *buffer->cur++;
274 /* FIXME: For speed, create a new character class of characters
275 of interest inside block comments. */
276 if (c == '?' || c == '\\')
277 c = skip_escaped_newlines (pfile);
279 /* People like decorating comments with '*', so check for '/'
280 instead for efficiency. */
286 /* Warn about potential nested comments, but not if the '/'
287 comes immediately before the true comment delimiter.
288 Don't bother to get it right across escaped newlines. */
289 if (CPP_OPTION (pfile, warn_comments)
290 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
291 cpp_error_with_line (pfile, DL_WARNING,
292 pfile->line, CPP_BUF_COL (buffer),
293 "\"/*\" within comment");
295 else if (is_vspace (c))
296 handle_newline (pfile);
298 adjust_column (pfile);
301 pfile->state.lexing_comment = 0;
302 return c != '/' || prevc != '*';
305 /* Skip a C++ line comment, leaving buffer->cur pointing to the
306 terminating newline. Handles escaped newlines. Returns non-zero
307 if a multiline comment. */
309 skip_line_comment (pfile)
312 cpp_buffer *buffer = pfile->buffer;
313 unsigned int orig_line = pfile->line;
316 pfile->state.lexing_comment = 1;
319 if (buffer->cur == buffer->rlimit)
323 if (c == '?' || c == '\\')
324 c = skip_escaped_newlines (pfile);
326 while (!is_vspace (c));
328 /* Step back over the newline, except at EOF. */
332 pfile->state.lexing_comment = 0;
333 return orig_line != pfile->line;
336 /* pfile->buffer->cur is one beyond the \t character. Update
337 col_adjust so we track the column correctly. */
339 adjust_column (pfile)
342 cpp_buffer *buffer = pfile->buffer;
343 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
345 /* Round it up to multiple of the tabstop, but subtract 1 since the
346 tab itself occupies a character position. */
347 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
348 - col % CPP_OPTION (pfile, tabstop)) - 1;
351 /* Skips whitespace, saving the next non-whitespace character.
352 Adjusts pfile->col_adjust to account for tabs. Without this,
353 tokens might be assigned an incorrect column. */
355 skip_whitespace (pfile, c)
359 cpp_buffer *buffer = pfile->buffer;
360 unsigned int warned = 0;
364 /* Horizontal space always OK. */
368 adjust_column (pfile);
369 /* Just \f \v or \0 left. */
372 if (buffer->cur - 1 == buffer->rlimit)
376 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
380 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
381 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
382 CPP_BUF_COL (buffer),
383 "%s in preprocessing directive",
384 c == '\f' ? "form feed" : "vertical tab");
388 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
389 while (is_nvspace (c));
395 /* See if the characters of a number token are valid in a name (no
398 name_p (pfile, string)
400 const cpp_string *string;
404 for (i = 0; i < string->len; i++)
405 if (!is_idchar (string->text[i]))
411 /* Parse an identifier, skipping embedded backslash-newlines. This is
412 a critical inner loop. The common case is an identifier which has
413 not been split by backslash-newline, does not contain a dollar
414 sign, and has already been scanned (roughly 10:1 ratio of
415 seen:unseen identifiers in normal code; the distribution is
416 Poisson-like). Second most common case is a new identifier, not
417 split and no dollar sign. The other possibilities are rare and
418 have been relegated to parse_slow. */
419 static cpp_hashnode *
420 parse_identifier (pfile)
423 cpp_hashnode *result;
424 const uchar *cur, *base;
426 /* Fast-path loop. Skim over a normal identifier.
427 N.B. ISIDNUM does not include $. */
428 cur = pfile->buffer->cur;
429 while (ISIDNUM (*cur))
432 /* Check for slow-path cases. */
433 if (*cur == '?' || *cur == '\\' || *cur == '$')
437 base = parse_slow (pfile, cur, 0, &len);
438 result = (cpp_hashnode *)
439 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
443 base = pfile->buffer->cur - 1;
444 pfile->buffer->cur = cur;
445 result = (cpp_hashnode *)
446 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
449 /* Rarely, identifiers require diagnostics when lexed.
450 XXX Has to be forced out of the fast path. */
451 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
452 && !pfile->state.skipping, 0))
454 /* It is allowed to poison the same identifier twice. */
455 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
456 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
459 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
460 replacement list of a variadic macro. */
461 if (result == pfile->spec_nodes.n__VA_ARGS__
462 && !pfile->state.va_args_ok)
463 cpp_error (pfile, DL_PEDWARN,
464 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
470 /* Slow path. This handles numbers and identifiers which have been
471 split, or contain dollar signs. The part of the token from
472 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
473 1 if it's a number, and 2 if it has a leading period. Returns a
474 pointer to the token's NUL-terminated spelling in permanent
475 storage, and sets PLEN to its length. */
477 parse_slow (pfile, cur, number_p, plen)
483 cpp_buffer *buffer = pfile->buffer;
484 const uchar *base = buffer->cur - 1;
485 struct obstack *stack = &pfile->hash_table->stack;
486 unsigned int c, prevc, saw_dollar = 0;
488 /* Place any leading period. */
490 obstack_1grow (stack, '.');
492 /* Copy the part of the token which is known to be okay. */
493 obstack_grow (stack, base, cur - base);
495 /* Now process the part which isn't. We are looking at one of
496 '$', '\\', or '?' on entry to this loop. */
502 /* Potential escaped newline? */
503 buffer->backup_to = buffer->cur - 1;
504 if (c == '?' || c == '\\')
505 c = skip_escaped_newlines (pfile);
511 if (c != '.' && !VALID_SIGN (c, prevc))
515 /* Handle normal identifier characters in this loop. */
519 obstack_1grow (stack, c);
526 while (is_idchar (c));
529 /* Step back over the unwanted char. */
532 /* $ is not an identifier character in the standard, but is commonly
533 accepted as an extension. Don't warn about it in skipped
534 conditional blocks. */
535 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
536 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
538 /* Identifiers and numbers are null-terminated. */
539 *plen = obstack_object_size (stack);
540 obstack_1grow (stack, '\0');
541 return obstack_finish (stack);
544 /* Parse a number, beginning with character C, skipping embedded
545 backslash-newlines. LEADING_PERIOD is non-zero if there was a "."
546 before C. Place the result in NUMBER. */
548 parse_number (pfile, number, leading_period)
555 /* Fast-path loop. Skim over a normal number.
556 N.B. ISIDNUM does not include $. */
557 cur = pfile->buffer->cur;
558 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
561 /* Check for slow-path cases. */
562 if (*cur == '?' || *cur == '\\' || *cur == '$')
563 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
566 const uchar *base = pfile->buffer->cur - 1;
569 number->len = cur - base + leading_period;
570 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
571 dest[number->len] = '\0';
576 memcpy (dest, base, cur - base);
577 pfile->buffer->cur = cur;
581 /* Subroutine of parse_string. */
583 unescaped_terminator_p (pfile, dest)
585 const unsigned char *dest;
587 const unsigned char *start, *temp;
589 /* In #include-style directives, terminators are not escapeable. */
590 if (pfile->state.angled_headers)
593 start = BUFF_FRONT (pfile->u_buff);
595 /* An odd number of consecutive backslashes represents an escaped
597 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
600 return ((dest - temp) & 1) == 0;
603 /* Parses a string, character constant, or angle-bracketed header file
604 name. Handles embedded trigraphs and escaped newlines. The stored
605 string is guaranteed NUL-terminated, but it is not guaranteed that
606 this is the first NUL since embedded NULs are preserved.
608 When this function returns, buffer->cur points to the next
609 character to be processed. */
611 parse_string (pfile, token, terminator)
614 cppchar_t terminator;
616 cpp_buffer *buffer = pfile->buffer;
617 unsigned char *dest, *limit;
619 bool warned_nulls = false;
621 dest = BUFF_FRONT (pfile->u_buff);
622 limit = BUFF_LIMIT (pfile->u_buff);
626 /* We need room for another char, possibly the terminating NUL. */
627 if ((size_t) (limit - dest) < 1)
629 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
630 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
631 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
632 limit = BUFF_LIMIT (pfile->u_buff);
635 /* Handle trigraphs, escaped newlines etc. */
637 if (c == '?' || c == '\\')
638 c = skip_escaped_newlines (pfile);
642 if (unescaped_terminator_p (pfile, dest))
645 else if (is_vspace (c))
647 /* No string literal may extend over multiple lines. In
648 assembly language, suppress the error except for <>
649 includes. This is a kludge around not knowing where
652 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
653 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
660 if (buffer->cur - 1 == buffer->rlimit)
665 cpp_error (pfile, DL_WARNING,
666 "null character(s) preserved in literal");
675 token->val.str.text = BUFF_FRONT (pfile->u_buff);
676 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
677 BUFF_FRONT (pfile->u_buff) = dest + 1;
680 /* The stored comment includes the comment start and any terminator. */
682 save_comment (pfile, token, from, type)
685 const unsigned char *from;
688 unsigned char *buffer;
689 unsigned int len, clen;
691 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
693 /* C++ comments probably (not definitely) have moved past a new
694 line, which we don't want to save in the comment. */
695 if (is_vspace (pfile->buffer->cur[-1]))
698 /* If we are currently in a directive, then we need to store all
699 C++ comments as C comments internally, and so we need to
700 allocate a little extra space in that case.
702 Note that the only time we encounter a directive here is
703 when we are saving comments in a "#define". */
704 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
706 buffer = _cpp_unaligned_alloc (pfile, clen);
708 token->type = CPP_COMMENT;
709 token->val.str.len = clen;
710 token->val.str.text = buffer;
713 memcpy (buffer + 1, from, len - 1);
715 /* Finish conversion to a C comment, if necessary. */
716 if (pfile->state.in_directive && type == '/')
719 buffer[clen - 2] = '*';
720 buffer[clen - 1] = '/';
724 /* Allocate COUNT tokens for RUN. */
726 _cpp_init_tokenrun (run, count)
730 run->base = xnewvec (cpp_token, count);
731 run->limit = run->base + count;
735 /* Returns the next tokenrun, or creates one if there is none. */
740 if (run->next == NULL)
742 run->next = xnew (tokenrun);
743 run->next->prev = run;
744 _cpp_init_tokenrun (run->next, 250);
750 /* Allocate a single token that is invalidated at the same time as the
751 rest of the tokens on the line. Has its line and col set to the
752 same as the last lexed token, so that diagnostics appear in the
755 _cpp_temp_token (pfile)
758 cpp_token *old, *result;
760 old = pfile->cur_token - 1;
761 if (pfile->cur_token == pfile->cur_run->limit)
763 pfile->cur_run = next_tokenrun (pfile->cur_run);
764 pfile->cur_token = pfile->cur_run->base;
767 result = pfile->cur_token++;
768 result->line = old->line;
769 result->col = old->col;
773 /* Lex a token into RESULT (external interface). Takes care of issues
774 like directive handling, token lookahead, multiple include
775 optimization and skipping. */
777 _cpp_lex_token (pfile)
784 if (pfile->cur_token == pfile->cur_run->limit)
786 pfile->cur_run = next_tokenrun (pfile->cur_run);
787 pfile->cur_token = pfile->cur_run->base;
790 if (pfile->lookaheads)
793 result = pfile->cur_token++;
796 result = _cpp_lex_direct (pfile);
798 if (result->flags & BOL)
800 /* Is this a directive. If _cpp_handle_directive returns
801 false, it is an assembler #. */
802 if (result->type == CPP_HASH
803 /* 6.10.3 p 11: Directives in a list of macro arguments
804 gives undefined behavior. This implementation
805 handles the directive as normal. */
806 && pfile->state.parsing_args != 1
807 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
809 if (pfile->cb.line_change && !pfile->state.skipping)
810 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
813 /* We don't skip tokens in directives. */
814 if (pfile->state.in_directive)
817 /* Outside a directive, invalidate controlling macros. At file
818 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
819 get here and MI optimisation works. */
820 pfile->mi_valid = false;
822 if (!pfile->state.skipping || result->type == CPP_EOF)
829 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
831 if (get_effective_char (pfile) == CHAR) \
832 result->type = THEN_TYPE; \
836 result->type = ELSE_TYPE; \
840 /* Lex a token into pfile->cur_token, which is also incremented, to
841 get diagnostics pointing to the correct location.
843 Does not handle issues such as token lookahead, multiple-include
844 optimisation, directives, skipping etc. This function is only
845 suitable for use by _cpp_lex_token, and in special cases like
846 lex_expansion_token which doesn't care for any of these issues.
848 When meeting a newline, returns CPP_EOF if parsing a directive,
849 otherwise returns to the start of the token buffer if permissible.
850 Returns the location of the lexed token. */
852 _cpp_lex_direct (pfile)
857 const unsigned char *comment_start;
858 cpp_token *result = pfile->cur_token++;
861 buffer = pfile->buffer;
862 result->flags = buffer->saved_flags;
863 buffer->saved_flags = 0;
865 result->line = pfile->line;
869 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
874 case ' ': case '\t': case '\f': case '\v': case '\0':
875 result->flags |= PREV_WHITE;
876 if (skip_whitespace (pfile, c))
881 buffer->saved_flags = BOL;
882 if (!pfile->state.parsing_args && !pfile->state.in_directive)
884 if (buffer->cur != buffer->line_base)
886 /* Non-empty files should end in a newline. Don't warn
887 for command line and _Pragma buffers. */
888 if (!buffer->from_stage3)
889 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
890 handle_newline (pfile);
893 /* Don't pop the last buffer. */
896 unsigned char stop = buffer->return_at_eof;
898 _cpp_pop_buffer (pfile);
903 result->type = CPP_EOF;
906 case '\n': case '\r':
907 handle_newline (pfile);
908 buffer->saved_flags = BOL;
909 if (! pfile->state.in_directive)
911 if (pfile->state.parsing_args == 2)
912 buffer->saved_flags |= PREV_WHITE;
913 if (!pfile->keep_tokens)
915 pfile->cur_run = &pfile->base_run;
916 result = pfile->base_run.base;
917 pfile->cur_token = result + 1;
921 result->type = CPP_EOF;
926 /* These could start an escaped newline, or '?' a trigraph. Let
927 skip_escaped_newlines do all the work. */
929 unsigned int line = pfile->line;
931 c = skip_escaped_newlines (pfile);
932 if (line != pfile->line)
935 /* We had at least one escaped newline of some sort.
936 Update the token's line and column. */
937 goto update_tokens_line;
941 /* We are either the original '?' or '\\', or a trigraph. */
943 result->type = CPP_QUERY;
950 case '0': case '1': case '2': case '3': case '4':
951 case '5': case '6': case '7': case '8': case '9':
952 result->type = CPP_NUMBER;
953 parse_number (pfile, &result->val.str, 0);
957 /* 'L' may introduce wide characters or strings. */
959 const unsigned char *pos = buffer->cur;
961 c = get_effective_char (pfile);
962 if (c == '\'' || c == '"')
964 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
965 parse_string (pfile, result, c);
974 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
975 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
976 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
977 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
979 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
980 case 'G': case 'H': case 'I': case 'J': case 'K':
981 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
982 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
984 result->type = CPP_NAME;
985 result->val.node = parse_identifier (pfile);
987 /* Convert named operators to their proper types. */
988 if (result->val.node->flags & NODE_OPERATOR)
990 result->flags |= NAMED_OP;
991 result->type = result->val.node->value.operator;
997 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
998 parse_string (pfile, result, c);
1002 /* A potential block or line comment. */
1003 comment_start = buffer->cur;
1004 c = get_effective_char (pfile);
1008 if (skip_block_comment (pfile))
1009 cpp_error (pfile, DL_ERROR, "unterminated comment");
1011 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1012 || CPP_IN_SYSTEM_HEADER (pfile)))
1014 /* Warn about comments only if pedantically GNUC89, and not
1015 in system headers. */
1016 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1017 && ! buffer->warned_cplusplus_comments)
1019 cpp_error (pfile, DL_PEDWARN,
1020 "C++ style comments are not allowed in ISO C89");
1021 cpp_error (pfile, DL_PEDWARN,
1022 "(this will be reported only once per input file)");
1023 buffer->warned_cplusplus_comments = 1;
1026 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1027 cpp_error (pfile, DL_WARNING, "multi-line comment");
1031 result->type = CPP_DIV_EQ;
1037 result->type = CPP_DIV;
1041 if (!pfile->state.save_comments)
1043 result->flags |= PREV_WHITE;
1044 goto update_tokens_line;
1047 /* Save the comment as a token in its own right. */
1048 save_comment (pfile, result, comment_start, c);
1052 if (pfile->state.angled_headers)
1054 result->type = CPP_HEADER_NAME;
1055 parse_string (pfile, result, '>');
1059 c = get_effective_char (pfile);
1061 result->type = CPP_LESS_EQ;
1063 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1064 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1065 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1066 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1068 result->type = CPP_OPEN_SQUARE;
1069 result->flags |= DIGRAPH;
1071 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1073 result->type = CPP_OPEN_BRACE;
1074 result->flags |= DIGRAPH;
1079 result->type = CPP_LESS;
1084 c = get_effective_char (pfile);
1086 result->type = CPP_GREATER_EQ;
1088 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1089 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1090 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1094 result->type = CPP_GREATER;
1099 c = get_effective_char (pfile);
1101 result->type = CPP_MOD_EQ;
1102 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1104 result->flags |= DIGRAPH;
1105 result->type = CPP_HASH;
1106 if (get_effective_char (pfile) == '%')
1108 const unsigned char *pos = buffer->cur;
1110 if (get_effective_char (pfile) == ':')
1111 result->type = CPP_PASTE;
1113 buffer->cur = pos - 1;
1118 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1120 result->flags |= DIGRAPH;
1121 result->type = CPP_CLOSE_BRACE;
1126 result->type = CPP_MOD;
1131 result->type = CPP_DOT;
1132 c = get_effective_char (pfile);
1135 const unsigned char *pos = buffer->cur;
1137 if (get_effective_char (pfile) == '.')
1138 result->type = CPP_ELLIPSIS;
1140 buffer->cur = pos - 1;
1142 /* All known character sets have 0...9 contiguous. */
1143 else if (ISDIGIT (c))
1145 result->type = CPP_NUMBER;
1146 parse_number (pfile, &result->val.str, 1);
1148 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1149 result->type = CPP_DOT_STAR;
1155 c = get_effective_char (pfile);
1157 result->type = CPP_PLUS_PLUS;
1159 result->type = CPP_PLUS_EQ;
1163 result->type = CPP_PLUS;
1168 c = get_effective_char (pfile);
1171 result->type = CPP_DEREF;
1172 if (CPP_OPTION (pfile, cplusplus))
1174 if (get_effective_char (pfile) == '*')
1175 result->type = CPP_DEREF_STAR;
1181 result->type = CPP_MINUS_MINUS;
1183 result->type = CPP_MINUS_EQ;
1187 result->type = CPP_MINUS;
1192 c = get_effective_char (pfile);
1194 result->type = CPP_AND_AND;
1196 result->type = CPP_AND_EQ;
1200 result->type = CPP_AND;
1205 c = get_effective_char (pfile);
1207 result->type = CPP_OR_OR;
1209 result->type = CPP_OR_EQ;
1213 result->type = CPP_OR;
1218 c = get_effective_char (pfile);
1219 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1220 result->type = CPP_SCOPE;
1221 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1223 result->flags |= DIGRAPH;
1224 result->type = CPP_CLOSE_SQUARE;
1229 result->type = CPP_COLON;
1233 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1234 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1235 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1236 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1237 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1239 case '~': result->type = CPP_COMPL; break;
1240 case ',': result->type = CPP_COMMA; break;
1241 case '(': result->type = CPP_OPEN_PAREN; break;
1242 case ')': result->type = CPP_CLOSE_PAREN; break;
1243 case '[': result->type = CPP_OPEN_SQUARE; break;
1244 case ']': result->type = CPP_CLOSE_SQUARE; break;
1245 case '{': result->type = CPP_OPEN_BRACE; break;
1246 case '}': result->type = CPP_CLOSE_BRACE; break;
1247 case ';': result->type = CPP_SEMICOLON; break;
1249 /* @ is a punctuator in Objective C. */
1250 case '@': result->type = CPP_ATSIGN; break;
1253 if (CPP_OPTION (pfile, dollars_in_ident))
1255 /* Fall through... */
1259 result->type = CPP_OTHER;
1267 /* An upper bound on the number of bytes needed to spell TOKEN,
1268 including preceding whitespace. */
1270 cpp_token_len (token)
1271 const cpp_token *token;
1275 switch (TOKEN_SPELL (token))
1277 default: len = 0; break;
1279 case SPELL_STRING: len = token->val.str.len; break;
1280 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1282 /* 1 for whitespace, 4 for comment delimiters. */
1286 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1287 already contain the enough space to hold the token's spelling.
1288 Returns a pointer to the character after the last character
1291 cpp_spell_token (pfile, token, buffer)
1292 cpp_reader *pfile; /* Would be nice to be rid of this... */
1293 const cpp_token *token;
1294 unsigned char *buffer;
1296 switch (TOKEN_SPELL (token))
1298 case SPELL_OPERATOR:
1300 const unsigned char *spelling;
1303 if (token->flags & DIGRAPH)
1305 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1306 else if (token->flags & NAMED_OP)
1309 spelling = TOKEN_NAME (token);
1311 while ((c = *spelling++) != '\0')
1317 *buffer++ = token->val.c;
1322 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1323 buffer += NODE_LEN (token->val.node);
1327 memcpy (buffer, token->val.str.text, token->val.str.len);
1328 buffer += token->val.str.len;
1333 int left, right, tag;
1334 switch (token->type)
1336 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1337 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1338 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1339 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1340 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1342 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1343 TOKEN_NAME (token));
1346 if (tag) *buffer++ = tag;
1348 memcpy (buffer, token->val.str.text, token->val.str.len);
1349 buffer += token->val.str.len;
1355 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1362 /* Returns TOKEN spelt as a null-terminated string. The string is
1363 freed when the reader is destroyed. Useful for diagnostics. */
1365 cpp_token_as_text (pfile, token)
1367 const cpp_token *token;
1369 unsigned int len = cpp_token_len (token);
1370 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1372 end = cpp_spell_token (pfile, token, start);
1378 /* Used by C front ends, which really should move to using
1379 cpp_token_as_text. */
1381 cpp_type2name (type)
1382 enum cpp_ttype type;
1384 return (const char *) token_spellings[type].name;
1387 /* Writes the spelling of token to FP, without any preceding space.
1388 Separated from cpp_spell_token for efficiency - to avoid stdio
1389 double-buffering. */
1391 cpp_output_token (token, fp)
1392 const cpp_token *token;
1395 switch (TOKEN_SPELL (token))
1397 case SPELL_OPERATOR:
1399 const unsigned char *spelling;
1402 if (token->flags & DIGRAPH)
1404 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1405 else if (token->flags & NAMED_OP)
1408 spelling = TOKEN_NAME (token);
1413 while ((c = *++spelling) != '\0');
1418 putc (token->val.c, fp);
1423 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1427 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1432 int left, right, tag;
1433 switch (token->type)
1435 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1436 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1437 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1438 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1439 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1441 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1444 if (tag) putc (tag, fp);
1446 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1452 /* An error, most probably. */
1457 /* Compare two tokens. */
1459 _cpp_equiv_tokens (a, b)
1460 const cpp_token *a, *b;
1462 if (a->type == b->type && a->flags == b->flags)
1463 switch (TOKEN_SPELL (a))
1465 default: /* Keep compiler happy. */
1466 case SPELL_OPERATOR:
1469 return a->val.c == b->val.c; /* Character. */
1471 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1473 return a->val.node == b->val.node;
1476 return (a->val.str.len == b->val.str.len
1477 && !memcmp (a->val.str.text, b->val.str.text,
1484 /* Returns nonzero if a space should be inserted to avoid an
1485 accidental token paste for output. For simplicity, it is
1486 conservative, and occasionally advises a space where one is not
1487 needed, e.g. "." and ".2". */
1489 cpp_avoid_paste (pfile, token1, token2)
1491 const cpp_token *token1, *token2;
1493 enum cpp_ttype a = token1->type, b = token2->type;
1496 if (token1->flags & NAMED_OP)
1498 if (token2->flags & NAMED_OP)
1502 if (token2->flags & DIGRAPH)
1503 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1504 else if (token_spellings[b].category == SPELL_OPERATOR)
1505 c = token_spellings[b].name[0];
1507 /* Quickly get everything that can paste with an '='. */
1508 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1513 case CPP_GREATER: return c == '>' || c == '?';
1514 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1515 case CPP_PLUS: return c == '+';
1516 case CPP_MINUS: return c == '-' || c == '>';
1517 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1518 case CPP_MOD: return c == ':' || c == '>';
1519 case CPP_AND: return c == '&';
1520 case CPP_OR: return c == '|';
1521 case CPP_COLON: return c == ':' || c == '>';
1522 case CPP_DEREF: return c == '*';
1523 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1524 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1525 case CPP_NAME: return ((b == CPP_NUMBER
1526 && name_p (pfile, &token2->val.str))
1528 || b == CPP_CHAR || b == CPP_STRING); /* L */
1529 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1530 || c == '.' || c == '+' || c == '-');
1531 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1532 && token1->val.c == '@'
1533 && (b == CPP_NAME || b == CPP_STRING));
1540 /* Output all the remaining tokens on the current line, and a newline
1541 character, to FP. Leading whitespace is removed. If there are
1542 macros, special token padding is not performed. */
1544 cpp_output_line (pfile, fp)
1548 const cpp_token *token;
1550 token = cpp_get_token (pfile);
1551 while (token->type != CPP_EOF)
1553 cpp_output_token (token, fp);
1554 token = cpp_get_token (pfile);
1555 if (token->flags & PREV_WHITE)
1562 /* Returns the value of a hexadecimal digit. */
1568 return hex_value (c);
1573 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1574 failure if cpplib is not parsing C++ or C99. Such failure is
1575 silent, and no variables are updated. Otherwise returns 0, and
1576 warns if -Wtraditional.
1578 [lex.charset]: The character designated by the universal character
1579 name \UNNNNNNNN is that character whose character short name in
1580 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1581 universal character name \uNNNN is that character whose character
1582 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1583 for a universal character name is less than 0x20 or in the range
1584 0x7F-0x9F (inclusive), or if the universal character name
1585 designates a character in the basic source character set, then the
1586 program is ill-formed.
1588 We assume that wchar_t is Unicode, so we don't need to do any
1589 mapping. Is this ever wrong?
1591 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1592 LIMIT is the end of the string or charconst. PSTR is updated to
1593 point after the UCS on return, and the UCS is written into PC. */
1596 maybe_read_ucs (pfile, pstr, limit, pc)
1598 const unsigned char **pstr;
1599 const unsigned char *limit;
1602 const unsigned char *p = *pstr;
1603 unsigned int code = 0;
1604 unsigned int c = *pc, length;
1606 /* Only attempt to interpret a UCS for C++ and C99. */
1607 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1610 if (CPP_WTRADITIONAL (pfile))
1611 cpp_error (pfile, DL_WARNING,
1612 "the meaning of '\\%c' is different in traditional C", c);
1614 length = (c == 'u' ? 4: 8);
1616 if ((size_t) (limit - p) < length)
1618 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1619 /* Skip to the end to avoid more diagnostics. */
1624 for (; length; length--, p++)
1628 code = (code << 4) + hex_digit_value (c);
1631 cpp_error (pfile, DL_ERROR,
1632 "non-hex digit '%c' in universal-character-name", c);
1633 /* We shouldn't skip in case there are multibyte chars. */
1639 #ifdef TARGET_EBCDIC
1640 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1641 code = 0x3f; /* EBCDIC invalid character */
1643 /* True extended characters are OK. */
1645 && !(code & 0x80000000)
1646 && !(code >= 0xD800 && code <= 0xDFFF))
1648 /* The standard permits $, @ and ` to be specified as UCNs. We use
1649 hex escapes so that this also works with EBCDIC hosts. */
1650 else if (code == 0x24 || code == 0x40 || code == 0x60)
1652 /* Don't give another error if one occurred above. */
1653 else if (length == 0)
1654 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1662 /* Interpret an escape sequence, and return its value. PSTR points to
1663 the input pointer, which is just after the backslash. LIMIT is how
1664 much text we have. MASK is a bitmask for the precision for the
1665 destination type (char or wchar_t).
1667 Handles all relevant diagnostics. */
1669 cpp_parse_escape (pfile, pstr, limit, mask)
1671 const unsigned char **pstr;
1672 const unsigned char *limit;
1673 unsigned HOST_WIDE_INT mask;
1676 const unsigned char *str = *pstr;
1677 unsigned int c = *str++;
1681 case '\\': case '\'': case '"': case '?': break;
1682 case 'b': c = TARGET_BS; break;
1683 case 'f': c = TARGET_FF; break;
1684 case 'n': c = TARGET_NEWLINE; break;
1685 case 'r': c = TARGET_CR; break;
1686 case 't': c = TARGET_TAB; break;
1687 case 'v': c = TARGET_VT; break;
1689 case '(': case '{': case '[': case '%':
1690 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1691 '\%' is used to prevent SCCS from getting confused. */
1692 unknown = CPP_PEDANTIC (pfile);
1696 if (CPP_WTRADITIONAL (pfile))
1697 cpp_error (pfile, DL_WARNING,
1698 "the meaning of '\\a' is different in traditional C");
1703 if (CPP_PEDANTIC (pfile))
1704 cpp_error (pfile, DL_PEDWARN,
1705 "non-ISO-standard escape sequence, '\\%c'", c);
1710 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1714 if (CPP_WTRADITIONAL (pfile))
1715 cpp_error (pfile, DL_WARNING,
1716 "the meaning of '\\x' is different in traditional C");
1719 unsigned int i = 0, overflow = 0;
1720 int digits_found = 0;
1728 overflow |= i ^ (i << 4 >> 4);
1729 i = (i << 4) + hex_digit_value (c);
1734 cpp_error (pfile, DL_ERROR,
1735 "\\x used with no following hex digits");
1737 if (overflow | (i != (i & mask)))
1739 cpp_error (pfile, DL_PEDWARN,
1740 "hex escape sequence out of range");
1747 case '0': case '1': case '2': case '3':
1748 case '4': case '5': case '6': case '7':
1750 unsigned int i = c - '0';
1753 while (str < limit && ++count < 3)
1756 if (c < '0' || c > '7')
1759 i = (i << 3) + c - '0';
1762 if (i != (i & mask))
1764 cpp_error (pfile, DL_PEDWARN,
1765 "octal escape sequence out of range");
1780 cpp_error (pfile, DL_PEDWARN, "unknown escape sequence '\\%c'", c);
1782 cpp_error (pfile, DL_PEDWARN, "unknown escape sequence: '\\%03o'", c);
1786 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type");
1792 #ifndef MAX_CHAR_TYPE_SIZE
1793 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1796 #ifndef MAX_WCHAR_TYPE_SIZE
1797 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1800 /* Interpret a (possibly wide) character constant in TOKEN.
1801 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points
1802 to a variable that is filled in with the number of characters seen. */
1804 cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen)
1806 const cpp_token *token;
1808 unsigned int *pchars_seen;
1810 const unsigned char *str = token->val.str.text;
1811 const unsigned char *limit = str + token->val.str.len;
1812 unsigned int chars_seen = 0;
1813 unsigned int width, max_chars, c;
1814 unsigned HOST_WIDE_INT mask;
1815 HOST_WIDE_INT result = 0;
1818 #ifdef MULTIBYTE_CHARS
1819 (void) local_mbtowc (NULL, NULL, 0);
1822 /* Width in bits. */
1823 if (token->type == CPP_CHAR)
1825 width = MAX_CHAR_TYPE_SIZE;
1826 unsigned_p = CPP_OPTION (pfile, signed_char) == 0;
1830 width = MAX_WCHAR_TYPE_SIZE;
1831 unsigned_p = WCHAR_UNSIGNED;
1834 if (width < HOST_BITS_PER_WIDE_INT)
1835 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1838 max_chars = HOST_BITS_PER_WIDE_INT / width;
1842 #ifdef MULTIBYTE_CHARS
1846 char_len = local_mbtowc (&wc, str, limit - str);
1849 cpp_error (pfile, DL_WARNING,
1850 "ignoring invalid multibyte character");
1863 c = cpp_parse_escape (pfile, &str, limit, mask);
1865 #ifdef MAP_CHARACTER
1867 c = MAP_CHARACTER (c);
1870 /* Merge character into result; ignore excess chars. */
1871 if (++chars_seen <= max_chars)
1873 if (width < HOST_BITS_PER_WIDE_INT)
1874 result = (result << width) | (c & mask);
1880 if (chars_seen == 0)
1881 cpp_error (pfile, DL_ERROR, "empty character constant");
1882 else if (chars_seen > max_chars)
1884 chars_seen = max_chars;
1885 cpp_error (pfile, DL_WARNING, "character constant too long");
1887 else if (chars_seen > 1 && warn_multi)
1888 cpp_error (pfile, DL_WARNING, "multi-character character constant");
1890 /* If relevant type is signed, sign-extend the constant. */
1893 unsigned int nbits = chars_seen * width;
1895 mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits);
1896 if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0)
1902 *pchars_seen = chars_seen;
1906 /* Memory buffers. Changing these three constants can have a dramatic
1907 effect on performance. The values here are reasonable defaults,
1908 but might be tuned. If you adjust them, be sure to test across a
1909 range of uses of cpplib, including heavy nested function-like macro
1910 expansion. Also check the change in peak memory usage (NJAMD is a
1911 good tool for this). */
1912 #define MIN_BUFF_SIZE 8000
1913 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1914 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1915 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1917 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1918 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1931 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1932 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1934 /* Create a new allocation buffer. Place the control block at the end
1935 of the buffer, so that buffer overflows will cause immediate chaos. */
1941 unsigned char *base;
1943 if (len < MIN_BUFF_SIZE)
1944 len = MIN_BUFF_SIZE;
1945 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1947 base = xmalloc (len + sizeof (_cpp_buff));
1948 result = (_cpp_buff *) (base + len);
1949 result->base = base;
1951 result->limit = base + len;
1952 result->next = NULL;
1956 /* Place a chain of unwanted allocation buffers on the free list. */
1958 _cpp_release_buff (pfile, buff)
1962 _cpp_buff *end = buff;
1966 end->next = pfile->free_buffs;
1967 pfile->free_buffs = buff;
1970 /* Return a free buffer of size at least MIN_SIZE. */
1972 _cpp_get_buff (pfile, min_size)
1976 _cpp_buff *result, **p;
1978 for (p = &pfile->free_buffs;; p = &(*p)->next)
1983 return new_buff (min_size);
1985 size = result->limit - result->base;
1986 /* Return a buffer that's big enough, but don't waste one that's
1988 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1993 result->next = NULL;
1994 result->cur = result->base;
1998 /* Creates a new buffer with enough space to hold the uncommitted
1999 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2000 the excess bytes to the new buffer. Chains the new buffer after
2001 BUFF, and returns the new buffer. */
2003 _cpp_append_extend_buff (pfile, buff, min_extra)
2008 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2009 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2011 buff->next = new_buff;
2012 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2016 /* Creates a new buffer with enough space to hold the uncommitted
2017 remaining bytes of the buffer pointed to by BUFF, and at least
2018 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2019 Chains the new buffer before the buffer pointed to by BUFF, and
2020 updates the pointer to point to the new buffer. */
2022 _cpp_extend_buff (pfile, pbuff, min_extra)
2027 _cpp_buff *new_buff, *old_buff = *pbuff;
2028 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2030 new_buff = _cpp_get_buff (pfile, size);
2031 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2032 new_buff->next = old_buff;
2036 /* Free a chain of buffers starting at BUFF. */
2038 _cpp_free_buff (buff)
2043 for (; buff; buff = next)
2050 /* Allocate permanent, unaligned storage of length LEN. */
2052 _cpp_unaligned_alloc (pfile, len)
2056 _cpp_buff *buff = pfile->u_buff;
2057 unsigned char *result = buff->cur;
2059 if (len > (size_t) (buff->limit - result))
2061 buff = _cpp_get_buff (pfile, len);
2062 buff->next = pfile->u_buff;
2063 pfile->u_buff = buff;
2067 buff->cur = result + len;
2071 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2072 That buffer is used for growing allocations when saving macro
2073 replacement lists in a #define, and when parsing an answer to an
2074 assertion in #assert, #unassert or #if (and therefore possibly
2075 whilst expanding macros). It therefore must not be used by any
2076 code that they might call: specifically the lexer and the guts of
2079 All existing other uses clearly fit this restriction: storing
2080 registered pragmas during initialization. */
2082 _cpp_aligned_alloc (pfile, len)
2086 _cpp_buff *buff = pfile->a_buff;
2087 unsigned char *result = buff->cur;
2089 if (len > (size_t) (buff->limit - result))
2091 buff = _cpp_get_buff (pfile, len);
2092 buff->next = pfile->a_buff;
2093 pfile->a_buff = buff;
2097 buff->cur = result + len;