1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
28 /* MULTIBYTE_CHARS support only works for native compilers.
29 ??? Ideally what we want is to model widechar support after
30 the current floating point support. */
32 #undef MULTIBYTE_CHARS
35 #ifdef MULTIBYTE_CHARS
40 /* Tokens with SPELL_STRING store their spelling in the token list,
41 and it's length in the token->val.name.len. */
54 enum spell_type category;
55 const unsigned char *name;
58 static const unsigned char *const digraph_spellings[] =
59 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
61 #define OP(e, s) { SPELL_OPERATOR, U s },
62 #define TK(e, s) { s, U STRINGX (e) },
63 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
67 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
68 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
69 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
71 static void handle_newline PARAMS ((cpp_reader *));
72 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
73 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
75 static int skip_block_comment PARAMS ((cpp_reader *));
76 static int skip_line_comment PARAMS ((cpp_reader *));
77 static void adjust_column PARAMS ((cpp_reader *));
78 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
79 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
80 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
82 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
83 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
84 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
85 static void unterminated PARAMS ((cpp_reader *, int));
86 static bool trigraph_p PARAMS ((cpp_reader *));
87 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
88 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
89 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
90 const unsigned char *, unsigned int *));
91 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
93 static unsigned int hex_digit_value PARAMS ((unsigned int));
94 static _cpp_buff *new_buff PARAMS ((size_t));
98 Compares, the token TOKEN to the NUL-terminated string STRING.
99 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
102 cpp_ideq (token, string)
103 const cpp_token *token;
106 if (token->type != CPP_NAME)
109 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
112 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
113 Returns with buffer->cur pointing to the character immediately
114 following the newline (combination). */
116 handle_newline (pfile)
119 cpp_buffer *buffer = pfile->buffer;
121 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
122 only accept CR-LF; maybe we should fall back to that behaviour?
124 NOTE: the EOF case in _cpp_lex_direct currently requires the
125 buffer->cur != buffer->rlimit test here for 0-length files. */
126 if (buffer->cur != buffer->rlimit
127 && buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
130 buffer->line_base = buffer->cur;
131 buffer->col_adjust = 0;
135 /* Subroutine of skip_escaped_newlines; called when a 3-character
136 sequence beginning with "??" is encountered. buffer->cur points to
139 Warn if necessary, and returns true if the sequence forms a
140 trigraph and the trigraph should be honoured. */
145 cpp_buffer *buffer = pfile->buffer;
146 cppchar_t from_char = buffer->cur[1];
149 if (!_cpp_trigraph_map[from_char])
152 accept = CPP_OPTION (pfile, trigraphs);
154 /* Don't warn about trigraphs in comments. */
155 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
158 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
159 "trigraph ??%c converted to %c",
161 (int) _cpp_trigraph_map[from_char]);
162 else if (buffer->cur != buffer->last_Wtrigraphs)
164 buffer->last_Wtrigraphs = buffer->cur;
165 cpp_warning_with_line (pfile, pfile->line,
166 CPP_BUF_COL (buffer) - 1,
167 "trigraph ??%c ignored", (int) from_char);
174 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
175 lie in buffer->cur[-1]. Returns the next byte, which will be in
176 buffer->cur[-1]. This routine performs preprocessing stages 1 and
177 2 of the ISO C standard. */
179 skip_escaped_newlines (pfile)
182 cpp_buffer *buffer = pfile->buffer;
183 cppchar_t next = buffer->cur[-1];
185 /* Only do this if we apply stages 1 and 2. */
186 if (!buffer->from_stage3)
188 const unsigned char *saved_cur;
193 if (buffer->cur == buffer->rlimit)
198 if (buffer->cur[0] != '?' || buffer->cur + 1 == buffer->rlimit)
201 if (!trigraph_p (pfile))
204 /* Translate the trigraph. */
205 next = _cpp_trigraph_map[buffer->cur[1]];
207 if (next != '\\' || buffer->cur == buffer->rlimit)
211 /* We have a backslash, and room for at least one more
212 character. Skip horizontal whitespace. */
213 saved_cur = buffer->cur;
215 next1 = *buffer->cur++;
216 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
218 if (!is_vspace (next1))
220 buffer->cur = saved_cur;
224 if (saved_cur != buffer->cur - 1
225 && !pfile->state.lexing_comment)
226 cpp_warning (pfile, "backslash and newline separated by space");
228 handle_newline (pfile);
229 buffer->backup_to = buffer->cur;
230 if (buffer->cur == buffer->rlimit)
232 cpp_pedwarn (pfile, "backslash-newline at end of file");
236 next = *buffer->cur++;
238 while (next == '\\' || next == '?');
244 /* Obtain the next character, after trigraph conversion and skipping
245 an arbitrarily long string of escaped newlines. The common case of
246 no trigraphs or escaped newlines falls through quickly. On return,
247 buffer->backup_to points to where to return to if the character is
248 not to be processed. */
250 get_effective_char (pfile)
253 cppchar_t next = EOF;
254 cpp_buffer *buffer = pfile->buffer;
256 buffer->backup_to = buffer->cur;
257 if (buffer->cur < buffer->rlimit)
259 next = *buffer->cur++;
260 if (__builtin_expect (next == '?' || next == '\\', 0))
261 next = skip_escaped_newlines (pfile);
267 /* Skip a C-style block comment. We find the end of the comment by
268 seeing if an asterisk is before every '/' we encounter. Returns
269 non-zero if comment terminated by EOF, zero otherwise. */
271 skip_block_comment (pfile)
274 cpp_buffer *buffer = pfile->buffer;
275 cppchar_t c = EOF, prevc = EOF;
277 pfile->state.lexing_comment = 1;
278 while (buffer->cur != buffer->rlimit)
280 prevc = c, c = *buffer->cur++;
282 /* FIXME: For speed, create a new character class of characters
283 of interest inside block comments. */
284 if (c == '?' || c == '\\')
285 c = skip_escaped_newlines (pfile);
287 /* People like decorating comments with '*', so check for '/'
288 instead for efficiency. */
294 /* Warn about potential nested comments, but not if the '/'
295 comes immediately before the true comment delimeter.
296 Don't bother to get it right across escaped newlines. */
297 if (CPP_OPTION (pfile, warn_comments)
298 && buffer->cur + 1 < buffer->rlimit
299 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
300 cpp_warning_with_line (pfile,
301 pfile->line, CPP_BUF_COL (buffer),
302 "\"/*\" within comment");
304 else if (is_vspace (c))
305 handle_newline (pfile);
307 adjust_column (pfile);
310 pfile->state.lexing_comment = 0;
311 return c != '/' || prevc != '*';
314 /* Skip a C++ line comment, leaving buffer->cur pointing to the
315 terminating newline. Handles escaped newlines. Returns non-zero
316 if a multiline comment. */
318 skip_line_comment (pfile)
321 cpp_buffer *buffer = pfile->buffer;
322 unsigned int orig_line = pfile->line;
325 pfile->state.lexing_comment = 1;
328 if (buffer->cur == buffer->rlimit)
332 if (c == '?' || c == '\\')
333 c = skip_escaped_newlines (pfile);
335 while (!is_vspace (c));
337 /* Step back over the newline, except at EOF. */
341 pfile->state.lexing_comment = 0;
342 return orig_line != pfile->line;
345 /* pfile->buffer->cur is one beyond the \t character. Update
346 col_adjust so we track the column correctly. */
348 adjust_column (pfile)
351 cpp_buffer *buffer = pfile->buffer;
352 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
354 /* Round it up to multiple of the tabstop, but subtract 1 since the
355 tab itself occupies a character position. */
356 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
357 - col % CPP_OPTION (pfile, tabstop)) - 1;
360 /* Skips whitespace, saving the next non-whitespace character.
361 Adjusts pfile->col_adjust to account for tabs. Without this,
362 tokens might be assigned an incorrect column. */
364 skip_whitespace (pfile, c)
368 cpp_buffer *buffer = pfile->buffer;
369 unsigned int warned = 0;
373 /* Horizontal space always OK. */
377 adjust_column (pfile);
378 /* Just \f \v or \0 left. */
383 cpp_warning (pfile, "null character(s) ignored");
387 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
388 cpp_pedwarn_with_line (pfile, pfile->line,
389 CPP_BUF_COL (buffer),
390 "%s in preprocessing directive",
391 c == '\f' ? "form feed" : "vertical tab");
393 if (buffer->cur == buffer->rlimit)
397 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
398 while (is_nvspace (c));
403 /* See if the characters of a number token are valid in a name (no
406 name_p (pfile, string)
408 const cpp_string *string;
412 for (i = 0; i < string->len; i++)
413 if (!is_idchar (string->text[i]))
419 /* Parse an identifier, skipping embedded backslash-newlines. This is
420 a critical inner loop. The common case is an identifier which has
421 not been split by backslash-newline, does not contain a dollar
422 sign, and has already been scanned (roughly 10:1 ratio of
423 seen:unseen identifiers in normal code; the distribution is
424 Poisson-like). Second most common case is a new identifier, not
425 split and no dollar sign. The other possibilities are rare and
426 have been relegated to parse_identifier_slow. */
428 static cpp_hashnode *
429 parse_identifier (pfile)
432 cpp_hashnode *result;
433 const U_CHAR *cur, *rlimit;
435 /* Fast-path loop. Skim over a normal identifier.
436 N.B. ISIDNUM does not include $. */
437 cur = pfile->buffer->cur - 1;
438 rlimit = pfile->buffer->rlimit;
441 while (cur < rlimit && ISIDNUM (*cur));
443 /* Check for slow-path cases. */
444 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
445 result = parse_identifier_slow (pfile, cur);
448 const U_CHAR *base = pfile->buffer->cur - 1;
449 result = (cpp_hashnode *)
450 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
451 pfile->buffer->cur = cur;
454 /* Rarely, identifiers require diagnostics when lexed.
455 XXX Has to be forced out of the fast path. */
456 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
457 && !pfile->state.skipping, 0))
459 /* It is allowed to poison the same identifier twice. */
460 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
461 cpp_error (pfile, "attempt to use poisoned \"%s\"",
464 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
465 replacement list of a variadic macro. */
466 if (result == pfile->spec_nodes.n__VA_ARGS__
467 && !pfile->state.va_args_ok)
469 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
475 /* Slow path. This handles identifiers which have been split, and
476 identifiers which contain dollar signs. The part of the identifier
477 from PFILE->buffer->cur-1 to CUR has already been scanned. */
478 static cpp_hashnode *
479 parse_identifier_slow (pfile, cur)
483 cpp_buffer *buffer = pfile->buffer;
484 const U_CHAR *base = buffer->cur - 1;
485 struct obstack *stack = &pfile->hash_table->stack;
486 unsigned int c, saw_dollar = 0, len;
488 /* Copy the part of the token which is known to be okay. */
489 obstack_grow (stack, base, cur - base);
491 /* Now process the part which isn't. We are looking at one of
492 '$', '\\', or '?' on entry to this loop. */
497 while (is_idchar (c))
499 obstack_1grow (stack, c);
504 if (buffer->cur == buffer->rlimit)
510 /* Potential escaped newline? */
511 buffer->backup_to = buffer->cur - 1;
512 if (c != '?' && c != '\\')
514 c = skip_escaped_newlines (pfile);
516 while (is_idchar (c));
518 /* Step back over the unwanted char, except at EOF. */
522 /* $ is not an identifier character in the standard, but is commonly
523 accepted as an extension. Don't warn about it in skipped
524 conditional blocks. */
525 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
526 cpp_pedwarn (pfile, "'$' character(s) in identifier");
528 /* Identifiers are null-terminated. */
529 len = obstack_object_size (stack);
530 obstack_1grow (stack, '\0');
532 return (cpp_hashnode *)
533 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
536 /* Parse a number, skipping embedded backslash-newlines. */
538 parse_number (pfile, number, c, leading_period)
544 cpp_buffer *buffer = pfile->buffer;
545 unsigned char *dest, *limit;
547 dest = BUFF_FRONT (pfile->u_buff);
548 limit = BUFF_LIMIT (pfile->u_buff);
550 /* Place a leading period. */
555 _cpp_extend_buff (pfile, &pfile->u_buff, 1);
556 dest = BUFF_FRONT (pfile->u_buff);
557 limit = BUFF_LIMIT (pfile->u_buff);
566 /* Need room for terminating null. */
567 if ((size_t) (limit - dest) < 2)
569 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
570 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
571 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
572 limit = BUFF_LIMIT (pfile->u_buff);
576 if (buffer->cur == buffer->rlimit)
581 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
583 /* Potential escaped newline? */
584 buffer->backup_to = buffer->cur - 1;
585 if (c != '?' && c != '\\')
587 c = skip_escaped_newlines (pfile);
589 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
591 /* Step back over the unwanted char, except at EOF. */
595 /* Null-terminate the number. */
598 number->text = BUFF_FRONT (pfile->u_buff);
599 number->len = dest - number->text;
600 BUFF_FRONT (pfile->u_buff) = dest + 1;
603 /* Subroutine of parse_string. Emits error for unterminated strings. */
605 unterminated (pfile, term)
609 cpp_error (pfile, "missing terminating %c character", term);
611 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
613 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
614 "possible start of unterminated string literal");
619 /* Subroutine of parse_string. */
621 unescaped_terminator_p (pfile, dest)
623 const unsigned char *dest;
625 const unsigned char *start, *temp;
627 /* In #include-style directives, terminators are not escapeable. */
628 if (pfile->state.angled_headers)
631 start = BUFF_FRONT (pfile->u_buff);
633 /* An odd number of consecutive backslashes represents an escaped
635 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
638 return ((dest - temp) & 1) == 0;
641 /* Parses a string, character constant, or angle-bracketed header file
642 name. Handles embedded trigraphs and escaped newlines. The stored
643 string is guaranteed NUL-terminated, but it is not guaranteed that
644 this is the first NUL since embedded NULs are preserved.
645 Multi-line strings are allowed, but they are deprecated.
647 When this function returns, buffer->cur points to the next
648 character to be processed. */
650 parse_string (pfile, token, terminator)
653 cppchar_t terminator;
655 cpp_buffer *buffer = pfile->buffer;
656 unsigned char *dest, *limit;
658 bool warned_nulls = false, warned_multi = false;
660 dest = BUFF_FRONT (pfile->u_buff);
661 limit = BUFF_LIMIT (pfile->u_buff);
665 /* We need room for another char, possibly the terminating NUL. */
666 if ((size_t) (limit - dest) < 1)
668 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
669 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
670 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
671 limit = BUFF_LIMIT (pfile->u_buff);
674 if (buffer->cur == buffer->rlimit)
676 unterminated (pfile, terminator);
680 /* Handle trigraphs, escaped newlines etc. */
682 if (c == '?' || c == '\\')
683 c = skip_escaped_newlines (pfile);
687 if (unescaped_terminator_p (pfile, dest))
690 else if (is_vspace (c))
692 /* In assembly language, silently terminate string and
693 character literals at end of line. This is a kludge
694 around not knowing where comments are. */
695 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
701 /* Character constants and header names may not extend over
702 multiple lines. In Standard C, neither may strings.
703 Unfortunately, we accept multiline strings as an
704 extension, except in #include family directives. */
705 if (terminator != '"' || pfile->state.angled_headers)
707 unterminated (pfile, terminator);
715 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
718 if (pfile->mls_line == 0)
720 pfile->mls_line = token->line;
721 pfile->mls_col = token->col;
724 handle_newline (pfile);
727 else if (c == '\0' && !warned_nulls)
730 cpp_warning (pfile, "null character(s) preserved in literal");
738 token->val.str.text = BUFF_FRONT (pfile->u_buff);
739 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
740 BUFF_FRONT (pfile->u_buff) = dest + 1;
743 /* The stored comment includes the comment start and any terminator. */
745 save_comment (pfile, token, from)
748 const unsigned char *from;
750 unsigned char *buffer;
753 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
755 /* C++ comments probably (not definitely) have moved past a new
756 line, which we don't want to save in the comment. */
757 if (is_vspace (pfile->buffer->cur[-1]))
759 buffer = _cpp_unaligned_alloc (pfile, len);
761 token->type = CPP_COMMENT;
762 token->val.str.len = len;
763 token->val.str.text = buffer;
766 memcpy (buffer + 1, from, len - 1);
769 /* Allocate COUNT tokens for RUN. */
771 _cpp_init_tokenrun (run, count)
775 run->base = xnewvec (cpp_token, count);
776 run->limit = run->base + count;
780 /* Returns the next tokenrun, or creates one if there is none. */
785 if (run->next == NULL)
787 run->next = xnew (tokenrun);
788 run->next->prev = run;
789 _cpp_init_tokenrun (run->next, 250);
795 /* Allocate a single token that is invalidated at the same time as the
796 rest of the tokens on the line. Has its line and col set to the
797 same as the last lexed token, so that diagnostics appear in the
800 _cpp_temp_token (pfile)
803 cpp_token *old, *result;
805 old = pfile->cur_token - 1;
806 if (pfile->cur_token == pfile->cur_run->limit)
808 pfile->cur_run = next_tokenrun (pfile->cur_run);
809 pfile->cur_token = pfile->cur_run->base;
812 result = pfile->cur_token++;
813 result->line = old->line;
814 result->col = old->col;
818 /* Lex a token into RESULT (external interface). Takes care of issues
819 like directive handling, token lookahead, multiple include
820 opimisation and skipping. */
822 _cpp_lex_token (pfile)
829 if (pfile->cur_token == pfile->cur_run->limit)
831 pfile->cur_run = next_tokenrun (pfile->cur_run);
832 pfile->cur_token = pfile->cur_run->base;
835 if (pfile->lookaheads)
838 result = pfile->cur_token++;
841 result = _cpp_lex_direct (pfile);
843 if (result->flags & BOL)
845 /* Is this a directive. If _cpp_handle_directive returns
846 false, it is an assembler #. */
847 if (result->type == CPP_HASH
848 && !pfile->state.parsing_args
849 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
851 if (pfile->cb.line_change && !pfile->state.skipping)
852 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
855 /* We don't skip tokens in directives. */
856 if (pfile->state.in_directive)
859 /* Outside a directive, invalidate controlling macros. At file
860 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
861 get here and MI optimisation works. */
862 pfile->mi_valid = false;
864 if (!pfile->state.skipping || result->type == CPP_EOF)
871 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
873 if (get_effective_char (pfile) == CHAR) \
874 result->type = THEN_TYPE; \
878 result->type = ELSE_TYPE; \
882 /* Lex a token into pfile->cur_token, which is also incremented, to
883 get diagnostics pointing to the correct location.
885 Does not handle issues such as token lookahead, multiple-include
886 optimisation, directives, skipping etc. This function is only
887 suitable for use by _cpp_lex_token, and in special cases like
888 lex_expansion_token which doesn't care for any of these issues.
890 When meeting a newline, returns CPP_EOF if parsing a directive,
891 otherwise returns to the start of the token buffer if permissible.
892 Returns the location of the lexed token. */
894 _cpp_lex_direct (pfile)
899 const unsigned char *comment_start;
900 cpp_token *result = pfile->cur_token++;
903 buffer = pfile->buffer;
904 result->flags = buffer->saved_flags;
905 buffer->saved_flags = 0;
907 result->line = pfile->line;
910 if (buffer->cur == buffer->rlimit)
913 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
919 buffer->saved_flags = BOL;
920 if (!pfile->state.parsing_args && !pfile->state.in_directive)
922 if (buffer->cur != buffer->line_base)
924 /* Non-empty files should end in a newline. Don't warn
925 for command line and _Pragma buffers. */
926 if (!buffer->from_stage3)
927 cpp_pedwarn (pfile, "no newline at end of file");
928 handle_newline (pfile);
931 /* Don't pop the last buffer. */
934 unsigned char stop = buffer->return_at_eof;
936 _cpp_pop_buffer (pfile);
941 result->type = CPP_EOF;
944 case ' ': case '\t': case '\f': case '\v': case '\0':
945 skip_whitespace (pfile, c);
946 result->flags |= PREV_WHITE;
949 case '\n': case '\r':
950 handle_newline (pfile);
951 buffer->saved_flags = BOL;
952 if (! pfile->state.in_directive)
954 if (pfile->state.parsing_args == 2)
955 buffer->saved_flags |= PREV_WHITE;
956 if (!pfile->keep_tokens)
958 pfile->cur_run = &pfile->base_run;
959 result = pfile->base_run.base;
960 pfile->cur_token = result + 1;
964 result->type = CPP_EOF;
969 /* These could start an escaped newline, or '?' a trigraph. Let
970 skip_escaped_newlines do all the work. */
972 unsigned int line = pfile->line;
974 c = skip_escaped_newlines (pfile);
975 if (line != pfile->line)
978 /* We had at least one escaped newline of some sort.
979 Update the token's line and column. */
980 goto update_tokens_line;
984 /* We are either the original '?' or '\\', or a trigraph. */
986 result->type = CPP_QUERY;
993 case '0': case '1': case '2': case '3': case '4':
994 case '5': case '6': case '7': case '8': case '9':
995 result->type = CPP_NUMBER;
996 parse_number (pfile, &result->val.str, c, 0);
1000 if (!CPP_OPTION (pfile, dollars_in_ident))
1002 /* Fall through... */
1005 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1006 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1007 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1008 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1010 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1011 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1012 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1013 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1015 result->type = CPP_NAME;
1016 result->val.node = parse_identifier (pfile);
1018 /* 'L' may introduce wide characters or strings. */
1019 if (result->val.node == pfile->spec_nodes.n_L
1020 && buffer->cur < buffer->rlimit)
1023 if (c == '\'' || c == '"')
1026 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1027 parse_string (pfile, result, c);
1030 /* Convert named operators to their proper types. */
1031 else if (result->val.node->flags & NODE_OPERATOR)
1033 result->flags |= NAMED_OP;
1034 result->type = result->val.node->value.operator;
1040 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1041 parse_string (pfile, result, c);
1045 /* A potential block or line comment. */
1046 comment_start = buffer->cur;
1047 c = get_effective_char (pfile);
1051 if (skip_block_comment (pfile))
1052 cpp_error (pfile, "unterminated comment");
1054 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1055 || CPP_IN_SYSTEM_HEADER (pfile)))
1057 /* Warn about comments only if pedantically GNUC89, and not
1058 in system headers. */
1059 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1060 && ! buffer->warned_cplusplus_comments)
1063 "C++ style comments are not allowed in ISO C89");
1065 "(this will be reported only once per input file)");
1066 buffer->warned_cplusplus_comments = 1;
1069 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1070 cpp_warning (pfile, "multi-line comment");
1074 result->type = CPP_DIV_EQ;
1080 result->type = CPP_DIV;
1084 if (!pfile->state.save_comments)
1086 result->flags |= PREV_WHITE;
1087 goto update_tokens_line;
1090 /* Save the comment as a token in its own right. */
1091 save_comment (pfile, result, comment_start);
1095 if (pfile->state.angled_headers)
1097 result->type = CPP_HEADER_NAME;
1098 parse_string (pfile, result, '>');
1102 c = get_effective_char (pfile);
1104 result->type = CPP_LESS_EQ;
1106 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1107 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1108 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1109 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1111 result->type = CPP_OPEN_SQUARE;
1112 result->flags |= DIGRAPH;
1114 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1116 result->type = CPP_OPEN_BRACE;
1117 result->flags |= DIGRAPH;
1122 result->type = CPP_LESS;
1127 c = get_effective_char (pfile);
1129 result->type = CPP_GREATER_EQ;
1131 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1132 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1133 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1137 result->type = CPP_GREATER;
1142 c = get_effective_char (pfile);
1144 result->type = CPP_MOD_EQ;
1145 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1147 result->flags |= DIGRAPH;
1148 result->type = CPP_HASH;
1149 if (get_effective_char (pfile) == '%')
1151 const unsigned char *pos = buffer->cur;
1153 if (get_effective_char (pfile) == ':')
1154 result->type = CPP_PASTE;
1156 buffer->cur = pos - 1;
1161 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1163 result->flags |= DIGRAPH;
1164 result->type = CPP_CLOSE_BRACE;
1169 result->type = CPP_MOD;
1174 result->type = CPP_DOT;
1175 c = get_effective_char (pfile);
1178 const unsigned char *pos = buffer->cur;
1180 if (get_effective_char (pfile) == '.')
1181 result->type = CPP_ELLIPSIS;
1183 buffer->cur = pos - 1;
1185 /* All known character sets have 0...9 contiguous. */
1186 else if (ISDIGIT (c))
1188 result->type = CPP_NUMBER;
1189 parse_number (pfile, &result->val.str, c, 1);
1191 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1192 result->type = CPP_DOT_STAR;
1198 c = get_effective_char (pfile);
1200 result->type = CPP_PLUS_PLUS;
1202 result->type = CPP_PLUS_EQ;
1206 result->type = CPP_PLUS;
1211 c = get_effective_char (pfile);
1214 result->type = CPP_DEREF;
1215 if (CPP_OPTION (pfile, cplusplus))
1217 if (get_effective_char (pfile) == '*')
1218 result->type = CPP_DEREF_STAR;
1224 result->type = CPP_MINUS_MINUS;
1226 result->type = CPP_MINUS_EQ;
1230 result->type = CPP_MINUS;
1235 c = get_effective_char (pfile);
1237 result->type = CPP_AND_AND;
1239 result->type = CPP_AND_EQ;
1243 result->type = CPP_AND;
1248 c = get_effective_char (pfile);
1250 result->type = CPP_OR_OR;
1252 result->type = CPP_OR_EQ;
1256 result->type = CPP_OR;
1261 c = get_effective_char (pfile);
1262 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1263 result->type = CPP_SCOPE;
1264 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1266 result->flags |= DIGRAPH;
1267 result->type = CPP_CLOSE_SQUARE;
1272 result->type = CPP_COLON;
1276 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1277 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1278 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1279 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1280 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1282 case '~': result->type = CPP_COMPL; break;
1283 case ',': result->type = CPP_COMMA; break;
1284 case '(': result->type = CPP_OPEN_PAREN; break;
1285 case ')': result->type = CPP_CLOSE_PAREN; break;
1286 case '[': result->type = CPP_OPEN_SQUARE; break;
1287 case ']': result->type = CPP_CLOSE_SQUARE; break;
1288 case '{': result->type = CPP_OPEN_BRACE; break;
1289 case '}': result->type = CPP_CLOSE_BRACE; break;
1290 case ';': result->type = CPP_SEMICOLON; break;
1292 /* @ is a punctuator in Objective C. */
1293 case '@': result->type = CPP_ATSIGN; break;
1297 result->type = CPP_OTHER;
1305 /* An upper bound on the number of bytes needed to spell a token,
1306 including preceding whitespace. */
1308 cpp_token_len (token)
1309 const cpp_token *token;
1313 switch (TOKEN_SPELL (token))
1315 default: len = 0; break;
1317 case SPELL_STRING: len = token->val.str.len; break;
1318 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1320 /* 1 for whitespace, 4 for comment delimiters. */
1324 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1325 already contain the enough space to hold the token's spelling.
1326 Returns a pointer to the character after the last character
1329 cpp_spell_token (pfile, token, buffer)
1330 cpp_reader *pfile; /* Would be nice to be rid of this... */
1331 const cpp_token *token;
1332 unsigned char *buffer;
1334 switch (TOKEN_SPELL (token))
1336 case SPELL_OPERATOR:
1338 const unsigned char *spelling;
1341 if (token->flags & DIGRAPH)
1343 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1344 else if (token->flags & NAMED_OP)
1347 spelling = TOKEN_NAME (token);
1349 while ((c = *spelling++) != '\0')
1355 *buffer++ = token->val.c;
1360 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1361 buffer += NODE_LEN (token->val.node);
1365 memcpy (buffer, token->val.str.text, token->val.str.len);
1366 buffer += token->val.str.len;
1371 int left, right, tag;
1372 switch (token->type)
1374 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1375 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1376 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1377 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1378 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1380 cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1383 if (tag) *buffer++ = tag;
1385 memcpy (buffer, token->val.str.text, token->val.str.len);
1386 buffer += token->val.str.len;
1392 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1399 /* Returns a token as a null-terminated string. The string is
1400 temporary, and automatically freed later. Useful for diagnostics. */
1402 cpp_token_as_text (pfile, token)
1404 const cpp_token *token;
1406 unsigned int len = cpp_token_len (token);
1407 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1409 end = cpp_spell_token (pfile, token, start);
1415 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1417 cpp_type2name (type)
1418 enum cpp_ttype type;
1420 return (const char *) token_spellings[type].name;
1423 /* Writes the spelling of token to FP, without any preceding space.
1424 Separated from cpp_spell_token for efficiency - to avoid stdio
1425 double-buffering. */
1427 cpp_output_token (token, fp)
1428 const cpp_token *token;
1431 switch (TOKEN_SPELL (token))
1433 case SPELL_OPERATOR:
1435 const unsigned char *spelling;
1438 if (token->flags & DIGRAPH)
1440 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1441 else if (token->flags & NAMED_OP)
1444 spelling = TOKEN_NAME (token);
1449 while ((c = *++spelling) != '\0');
1454 putc (token->val.c, fp);
1459 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1463 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1468 int left, right, tag;
1469 switch (token->type)
1471 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1472 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1473 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1474 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1475 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1477 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1480 if (tag) putc (tag, fp);
1482 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1488 /* An error, most probably. */
1493 /* Compare two tokens. */
1495 _cpp_equiv_tokens (a, b)
1496 const cpp_token *a, *b;
1498 if (a->type == b->type && a->flags == b->flags)
1499 switch (TOKEN_SPELL (a))
1501 default: /* Keep compiler happy. */
1502 case SPELL_OPERATOR:
1505 return a->val.c == b->val.c; /* Character. */
1507 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1509 return a->val.node == b->val.node;
1512 return (a->val.str.len == b->val.str.len
1513 && !memcmp (a->val.str.text, b->val.str.text,
1520 /* Returns nonzero if a space should be inserted to avoid an
1521 accidental token paste for output. For simplicity, it is
1522 conservative, and occasionally advises a space where one is not
1523 needed, e.g. "." and ".2". */
1526 cpp_avoid_paste (pfile, token1, token2)
1528 const cpp_token *token1, *token2;
1530 enum cpp_ttype a = token1->type, b = token2->type;
1533 if (token1->flags & NAMED_OP)
1535 if (token2->flags & NAMED_OP)
1539 if (token2->flags & DIGRAPH)
1540 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1541 else if (token_spellings[b].category == SPELL_OPERATOR)
1542 c = token_spellings[b].name[0];
1544 /* Quickly get everything that can paste with an '='. */
1545 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1550 case CPP_GREATER: return c == '>' || c == '?';
1551 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1552 case CPP_PLUS: return c == '+';
1553 case CPP_MINUS: return c == '-' || c == '>';
1554 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1555 case CPP_MOD: return c == ':' || c == '>';
1556 case CPP_AND: return c == '&';
1557 case CPP_OR: return c == '|';
1558 case CPP_COLON: return c == ':' || c == '>';
1559 case CPP_DEREF: return c == '*';
1560 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1561 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1562 case CPP_NAME: return ((b == CPP_NUMBER
1563 && name_p (pfile, &token2->val.str))
1565 || b == CPP_CHAR || b == CPP_STRING); /* L */
1566 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1567 || c == '.' || c == '+' || c == '-');
1568 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1569 && token1->val.c == '@'
1570 && (b == CPP_NAME || b == CPP_STRING));
1577 /* Output all the remaining tokens on the current line, and a newline
1578 character, to FP. Leading whitespace is removed. If there are
1579 macros, special token padding is not performed. */
1581 cpp_output_line (pfile, fp)
1585 const cpp_token *token;
1587 token = cpp_get_token (pfile);
1588 while (token->type != CPP_EOF)
1590 cpp_output_token (token, fp);
1591 token = cpp_get_token (pfile);
1592 if (token->flags & PREV_WHITE)
1599 /* Returns the value of a hexadecimal digit. */
1604 if (c >= 'a' && c <= 'f')
1605 return c - 'a' + 10;
1606 if (c >= 'A' && c <= 'F')
1607 return c - 'A' + 10;
1608 if (c >= '0' && c <= '9')
1613 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1614 failure if cpplib is not parsing C++ or C99. Such failure is
1615 silent, and no variables are updated. Otherwise returns 0, and
1616 warns if -Wtraditional.
1618 [lex.charset]: The character designated by the universal character
1619 name \UNNNNNNNN is that character whose character short name in
1620 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1621 universal character name \uNNNN is that character whose character
1622 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1623 for a universal character name is less than 0x20 or in the range
1624 0x7F-0x9F (inclusive), or if the universal character name
1625 designates a character in the basic source character set, then the
1626 program is ill-formed.
1628 We assume that wchar_t is Unicode, so we don't need to do any
1629 mapping. Is this ever wrong?
1631 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1632 LIMIT is the end of the string or charconst. PSTR is updated to
1633 point after the UCS on return, and the UCS is written into PC. */
1636 maybe_read_ucs (pfile, pstr, limit, pc)
1638 const unsigned char **pstr;
1639 const unsigned char *limit;
1642 const unsigned char *p = *pstr;
1643 unsigned int code = 0;
1644 unsigned int c = *pc, length;
1646 /* Only attempt to interpret a UCS for C++ and C99. */
1647 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1650 if (CPP_WTRADITIONAL (pfile))
1651 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1653 length = (c == 'u' ? 4: 8);
1655 if ((size_t) (limit - p) < length)
1657 cpp_error (pfile, "incomplete universal-character-name");
1658 /* Skip to the end to avoid more diagnostics. */
1663 for (; length; length--, p++)
1667 code = (code << 4) + hex_digit_value (c);
1671 "non-hex digit '%c' in universal-character-name", c);
1672 /* We shouldn't skip in case there are multibyte chars. */
1678 #ifdef TARGET_EBCDIC
1679 cpp_error (pfile, "universal-character-name on EBCDIC target");
1680 code = 0x3f; /* EBCDIC invalid character */
1682 /* True extended characters are OK. */
1684 && !(code & 0x80000000)
1685 && !(code >= 0xD800 && code <= 0xDFFF))
1687 /* The standard permits $, @ and ` to be specified as UCNs. We use
1688 hex escapes so that this also works with EBCDIC hosts. */
1689 else if (code == 0x24 || code == 0x40 || code == 0x60)
1691 /* Don't give another error if one occurred above. */
1692 else if (length == 0)
1693 cpp_error (pfile, "universal-character-name out of range");
1701 /* Interpret an escape sequence, and return its value. PSTR points to
1702 the input pointer, which is just after the backslash. LIMIT is how
1703 much text we have. MASK is a bitmask for the precision for the
1704 destination type (char or wchar_t). TRADITIONAL, if true, does not
1705 interpret escapes that did not exist in traditional C.
1707 Handles all relevant diagnostics. */
1710 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1712 const unsigned char **pstr;
1713 const unsigned char *limit;
1714 unsigned HOST_WIDE_INT mask;
1718 const unsigned char *str = *pstr;
1719 unsigned int c = *str++;
1723 case '\\': case '\'': case '"': case '?': break;
1724 case 'b': c = TARGET_BS; break;
1725 case 'f': c = TARGET_FF; break;
1726 case 'n': c = TARGET_NEWLINE; break;
1727 case 'r': c = TARGET_CR; break;
1728 case 't': c = TARGET_TAB; break;
1729 case 'v': c = TARGET_VT; break;
1731 case '(': case '{': case '[': case '%':
1732 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1733 '\%' is used to prevent SCCS from getting confused. */
1734 unknown = CPP_PEDANTIC (pfile);
1738 if (CPP_WTRADITIONAL (pfile))
1739 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1745 if (CPP_PEDANTIC (pfile))
1746 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1751 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1755 if (CPP_WTRADITIONAL (pfile))
1756 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1760 unsigned int i = 0, overflow = 0;
1761 int digits_found = 0;
1769 overflow |= i ^ (i << 4 >> 4);
1770 i = (i << 4) + hex_digit_value (c);
1775 cpp_error (pfile, "\\x used with no following hex digits");
1777 if (overflow | (i != (i & mask)))
1779 cpp_pedwarn (pfile, "hex escape sequence out of range");
1786 case '0': case '1': case '2': case '3':
1787 case '4': case '5': case '6': case '7':
1789 unsigned int i = c - '0';
1792 while (str < limit && ++count < 3)
1795 if (c < '0' || c > '7')
1798 i = (i << 3) + c - '0';
1801 if (i != (i & mask))
1803 cpp_pedwarn (pfile, "octal escape sequence out of range");
1818 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1820 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1824 cpp_pedwarn (pfile, "escape sequence out of range for character");
1830 #ifndef MAX_CHAR_TYPE_SIZE
1831 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1834 #ifndef MAX_WCHAR_TYPE_SIZE
1835 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1838 /* Interpret a (possibly wide) character constant in TOKEN.
1839 WARN_MULTI warns about multi-character charconsts, if not
1840 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1841 that did not exist in traditional C. PCHARS_SEEN points to a
1842 variable that is filled in with the number of characters seen. */
1844 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1846 const cpp_token *token;
1849 unsigned int *pchars_seen;
1851 const unsigned char *str = token->val.str.text;
1852 const unsigned char *limit = str + token->val.str.len;
1853 unsigned int chars_seen = 0;
1854 unsigned int width, max_chars, c;
1855 unsigned HOST_WIDE_INT mask;
1856 HOST_WIDE_INT result = 0;
1858 #ifdef MULTIBYTE_CHARS
1859 (void) local_mbtowc (NULL, NULL, 0);
1862 /* Width in bits. */
1863 if (token->type == CPP_CHAR)
1864 width = MAX_CHAR_TYPE_SIZE;
1866 width = MAX_WCHAR_TYPE_SIZE;
1868 if (width < HOST_BITS_PER_WIDE_INT)
1869 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1872 max_chars = HOST_BITS_PER_WIDE_INT / width;
1876 #ifdef MULTIBYTE_CHARS
1880 char_len = local_mbtowc (&wc, str, limit - str);
1883 cpp_warning (pfile, "ignoring invalid multibyte character");
1896 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1898 #ifdef MAP_CHARACTER
1900 c = MAP_CHARACTER (c);
1903 /* Merge character into result; ignore excess chars. */
1904 if (++chars_seen <= max_chars)
1906 if (width < HOST_BITS_PER_WIDE_INT)
1907 result = (result << width) | (c & mask);
1913 if (chars_seen == 0)
1914 cpp_error (pfile, "empty character constant");
1915 else if (chars_seen > max_chars)
1917 chars_seen = max_chars;
1918 cpp_warning (pfile, "character constant too long");
1920 else if (chars_seen > 1 && !traditional && warn_multi)
1921 cpp_warning (pfile, "multi-character character constant");
1923 /* If char type is signed, sign-extend the constant. The
1924 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1925 if (token->type == CPP_CHAR && chars_seen)
1927 unsigned int nbits = chars_seen * width;
1928 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1930 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1931 || ((result >> (nbits - 1)) & 1) == 0)
1937 *pchars_seen = chars_seen;
1941 /* Memory buffers. Changing these three constants can have a dramatic
1942 effect on performance. The values here are reasonable defaults,
1943 but might be tuned. If you adjust them, be sure to test across a
1944 range of uses of cpplib, including heavy nested function-like macro
1945 expansion. Also check the change in peak memory usage (NJAMD is a
1946 good tool for this). */
1947 #define MIN_BUFF_SIZE 8000
1948 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1949 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1950 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1952 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1953 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1966 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1967 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1969 /* Create a new allocation buffer. Place the control block at the end
1970 of the buffer, so that buffer overflows will cause immediate chaos. */
1976 unsigned char *base;
1978 if (len < MIN_BUFF_SIZE)
1979 len = MIN_BUFF_SIZE;
1980 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1982 base = xmalloc (len + sizeof (_cpp_buff));
1983 result = (_cpp_buff *) (base + len);
1984 result->base = base;
1986 result->limit = base + len;
1987 result->next = NULL;
1991 /* Place a chain of unwanted allocation buffers on the free list. */
1993 _cpp_release_buff (pfile, buff)
1997 _cpp_buff *end = buff;
2001 end->next = pfile->free_buffs;
2002 pfile->free_buffs = buff;
2005 /* Return a free buffer of size at least MIN_SIZE. */
2007 _cpp_get_buff (pfile, min_size)
2011 _cpp_buff *result, **p;
2013 for (p = &pfile->free_buffs;; p = &(*p)->next)
2018 return new_buff (min_size);
2020 size = result->limit - result->base;
2021 /* Return a buffer that's big enough, but don't waste one that's
2023 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2028 result->next = NULL;
2029 result->cur = result->base;
2033 /* Creates a new buffer with enough space to hold the uncommitted
2034 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2035 the excess bytes to the new buffer. Chains the new buffer after
2036 BUFF, and returns the new buffer. */
2038 _cpp_append_extend_buff (pfile, buff, min_extra)
2043 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2044 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2046 buff->next = new_buff;
2047 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2051 /* Creates a new buffer with enough space to hold the uncommitted
2052 remaining bytes of the buffer pointed to by BUFF, and at least
2053 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2054 Chains the new buffer before the buffer pointed to by BUFF, and
2055 updates the pointer to point to the new buffer. */
2057 _cpp_extend_buff (pfile, pbuff, min_extra)
2062 _cpp_buff *new_buff, *old_buff = *pbuff;
2063 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2065 new_buff = _cpp_get_buff (pfile, size);
2066 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2067 new_buff->next = old_buff;
2071 /* Free a chain of buffers starting at BUFF. */
2073 _cpp_free_buff (buff)
2078 for (; buff; buff = next)
2085 /* Allocate permanent, unaligned storage of length LEN. */
2087 _cpp_unaligned_alloc (pfile, len)
2091 _cpp_buff *buff = pfile->u_buff;
2092 unsigned char *result = buff->cur;
2094 if (len > (size_t) (buff->limit - result))
2096 buff = _cpp_get_buff (pfile, len);
2097 buff->next = pfile->u_buff;
2098 pfile->u_buff = buff;
2102 buff->cur = result + len;
2106 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2107 That buffer is used for growing allocations when saving macro
2108 replacement lists in a #define, and when parsing an answer to an
2109 assertion in #assert, #unassert or #if (and therefore possibly
2110 whilst expanding macros). It therefore must not be used by any
2111 code that they might call: specifically the lexer and the guts of
2114 All existing other uses clearly fit this restriction: storing
2115 registered pragmas during initialization. */
2117 _cpp_aligned_alloc (pfile, len)
2121 _cpp_buff *buff = pfile->a_buff;
2122 unsigned char *result = buff->cur;
2124 if (len > (size_t) (buff->limit - result))
2126 buff = _cpp_get_buff (pfile, len);
2127 buff->next = pfile->a_buff;
2128 pfile->a_buff = buff;
2132 buff->cur = result + len;