1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
28 /* MULTIBYTE_CHARS support only works for native compilers.
29 ??? Ideally what we want is to model widechar support after
30 the current floating point support. */
32 #undef MULTIBYTE_CHARS
35 #ifdef MULTIBYTE_CHARS
40 /* Tokens with SPELL_STRING store their spelling in the token list,
41 and it's length in the token->val.name.len. */
54 enum spell_type category;
55 const unsigned char *name;
58 static const unsigned char *const digraph_spellings[] =
59 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
61 #define OP(e, s) { SPELL_OPERATOR, U s },
62 #define TK(e, s) { s, U STRINGX (e) },
63 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
67 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
68 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
69 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
71 static void handle_newline PARAMS ((cpp_reader *));
72 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
73 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
75 static int skip_block_comment PARAMS ((cpp_reader *));
76 static int skip_line_comment PARAMS ((cpp_reader *));
77 static void adjust_column PARAMS ((cpp_reader *));
78 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
79 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
80 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
82 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
83 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
84 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
85 static void unterminated PARAMS ((cpp_reader *, int));
86 static bool trigraph_p PARAMS ((cpp_reader *));
87 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
88 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
89 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
90 const unsigned char *, unsigned int *));
91 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
93 static unsigned int hex_digit_value PARAMS ((unsigned int));
94 static _cpp_buff *new_buff PARAMS ((size_t));
98 Compares, the token TOKEN to the NUL-terminated string STRING.
99 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
102 cpp_ideq (token, string)
103 const cpp_token *token;
106 if (token->type != CPP_NAME)
109 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
112 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
113 Returns with buffer->cur pointing to the character immediately
114 following the newline (combination). */
116 handle_newline (pfile)
119 cpp_buffer *buffer = pfile->buffer;
121 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
122 only accept CR-LF; maybe we should fall back to that behaviour?
124 NOTE: the EOF case in _cpp_lex_direct currently requires the
125 buffer->cur != buffer->rlimit test here for 0-length files. */
126 if (buffer->cur != buffer->rlimit
127 && buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
130 buffer->line_base = buffer->cur;
131 buffer->col_adjust = 0;
135 /* Subroutine of skip_escaped_newlines; called when a 3-character
136 sequence beginning with "??" is encountered. buffer->cur points to
139 Warn if necessary, and returns true if the sequence forms a
140 trigraph and the trigraph should be honoured. */
145 cpp_buffer *buffer = pfile->buffer;
146 cppchar_t from_char = buffer->cur[1];
149 if (!_cpp_trigraph_map[from_char])
152 accept = CPP_OPTION (pfile, trigraphs);
154 /* Don't warn about trigraphs in comments. */
155 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
158 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
159 "trigraph ??%c converted to %c",
161 (int) _cpp_trigraph_map[from_char]);
162 else if (buffer->cur != buffer->last_Wtrigraphs)
164 buffer->last_Wtrigraphs = buffer->cur;
165 cpp_warning_with_line (pfile, pfile->line,
166 CPP_BUF_COL (buffer) - 1,
167 "trigraph ??%c ignored", (int) from_char);
174 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
175 lie in buffer->cur[-1]. Returns the next byte, which will be in
176 buffer->cur[-1]. This routine performs preprocessing stages 1 and
177 2 of the ISO C standard. */
179 skip_escaped_newlines (pfile)
182 cpp_buffer *buffer = pfile->buffer;
183 cppchar_t next = buffer->cur[-1];
185 /* Only do this if we apply stages 1 and 2. */
186 if (!buffer->from_stage3)
188 const unsigned char *saved_cur;
193 if (buffer->cur == buffer->rlimit)
198 if (buffer->cur[0] != '?' || buffer->cur + 1 == buffer->rlimit)
201 if (!trigraph_p (pfile))
204 /* Translate the trigraph. */
205 next = _cpp_trigraph_map[buffer->cur[1]];
207 if (next != '\\' || buffer->cur == buffer->rlimit)
211 /* We have a backslash, and room for at least one more
212 character. Skip horizontal whitespace. */
213 saved_cur = buffer->cur;
215 next1 = *buffer->cur++;
216 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
218 if (!is_vspace (next1))
220 buffer->cur = saved_cur;
224 if (saved_cur != buffer->cur - 1
225 && !pfile->state.lexing_comment)
226 cpp_warning (pfile, "backslash and newline separated by space");
228 handle_newline (pfile);
229 buffer->backup_to = buffer->cur;
230 if (buffer->cur == buffer->rlimit)
232 cpp_pedwarn (pfile, "backslash-newline at end of file");
236 next = *buffer->cur++;
238 while (next == '\\' || next == '?');
244 /* Obtain the next character, after trigraph conversion and skipping
245 an arbitrarily long string of escaped newlines. The common case of
246 no trigraphs or escaped newlines falls through quickly. On return,
247 buffer->backup_to points to where to return to if the character is
248 not to be processed. */
250 get_effective_char (pfile)
253 cppchar_t next = EOF;
254 cpp_buffer *buffer = pfile->buffer;
256 buffer->backup_to = buffer->cur;
257 if (buffer->cur < buffer->rlimit)
259 next = *buffer->cur++;
260 if (__builtin_expect (next == '?' || next == '\\', 0))
261 next = skip_escaped_newlines (pfile);
267 /* Skip a C-style block comment. We find the end of the comment by
268 seeing if an asterisk is before every '/' we encounter. Returns
269 non-zero if comment terminated by EOF, zero otherwise. */
271 skip_block_comment (pfile)
274 cpp_buffer *buffer = pfile->buffer;
275 cppchar_t c = EOF, prevc = EOF;
277 pfile->state.lexing_comment = 1;
278 while (buffer->cur != buffer->rlimit)
280 prevc = c, c = *buffer->cur++;
282 /* FIXME: For speed, create a new character class of characters
283 of interest inside block comments. */
284 if (c == '?' || c == '\\')
285 c = skip_escaped_newlines (pfile);
287 /* People like decorating comments with '*', so check for '/'
288 instead for efficiency. */
294 /* Warn about potential nested comments, but not if the '/'
295 comes immediately before the true comment delimeter.
296 Don't bother to get it right across escaped newlines. */
297 if (CPP_OPTION (pfile, warn_comments)
298 && buffer->cur + 1 < buffer->rlimit
299 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
300 cpp_warning_with_line (pfile,
301 pfile->line, CPP_BUF_COL (buffer),
302 "\"/*\" within comment");
304 else if (is_vspace (c))
305 handle_newline (pfile);
307 adjust_column (pfile);
310 pfile->state.lexing_comment = 0;
311 return c != '/' || prevc != '*';
314 /* Skip a C++ line comment, leaving buffer->cur pointing to the
315 terminating newline. Handles escaped newlines. Returns non-zero
316 if a multiline comment. */
318 skip_line_comment (pfile)
321 cpp_buffer *buffer = pfile->buffer;
322 unsigned int orig_line = pfile->line;
325 pfile->state.lexing_comment = 1;
328 if (buffer->cur == buffer->rlimit)
332 if (c == '?' || c == '\\')
333 c = skip_escaped_newlines (pfile);
335 while (!is_vspace (c));
337 /* Step back over the newline, except at EOF. */
341 pfile->state.lexing_comment = 0;
342 return orig_line != pfile->line;
345 /* pfile->buffer->cur is one beyond the \t character. Update
346 col_adjust so we track the column correctly. */
348 adjust_column (pfile)
351 cpp_buffer *buffer = pfile->buffer;
352 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
354 /* Round it up to multiple of the tabstop, but subtract 1 since the
355 tab itself occupies a character position. */
356 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
357 - col % CPP_OPTION (pfile, tabstop)) - 1;
360 /* Skips whitespace, saving the next non-whitespace character.
361 Adjusts pfile->col_adjust to account for tabs. Without this,
362 tokens might be assigned an incorrect column. */
364 skip_whitespace (pfile, c)
368 cpp_buffer *buffer = pfile->buffer;
369 unsigned int warned = 0;
373 /* Horizontal space always OK. */
377 adjust_column (pfile);
378 /* Just \f \v or \0 left. */
383 cpp_warning (pfile, "null character(s) ignored");
387 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
388 cpp_pedwarn_with_line (pfile, pfile->line,
389 CPP_BUF_COL (buffer),
390 "%s in preprocessing directive",
391 c == '\f' ? "form feed" : "vertical tab");
393 if (buffer->cur == buffer->rlimit)
397 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
398 while (is_nvspace (c));
403 /* See if the characters of a number token are valid in a name (no
406 name_p (pfile, string)
408 const cpp_string *string;
412 for (i = 0; i < string->len; i++)
413 if (!is_idchar (string->text[i]))
419 /* Parse an identifier, skipping embedded backslash-newlines. This is
420 a critical inner loop. The common case is an identifier which has
421 not been split by backslash-newline, does not contain a dollar
422 sign, and has already been scanned (roughly 10:1 ratio of
423 seen:unseen identifiers in normal code; the distribution is
424 Poisson-like). Second most common case is a new identifier, not
425 split and no dollar sign. The other possibilities are rare and
426 have been relegated to parse_identifier_slow. */
428 static cpp_hashnode *
429 parse_identifier (pfile)
432 cpp_hashnode *result;
433 const U_CHAR *cur, *rlimit;
435 /* Fast-path loop. Skim over a normal identifier.
436 N.B. ISIDNUM does not include $. */
437 cur = pfile->buffer->cur - 1;
438 rlimit = pfile->buffer->rlimit;
441 while (cur < rlimit && ISIDNUM (*cur));
443 /* Check for slow-path cases. */
444 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
445 result = parse_identifier_slow (pfile, cur);
448 const U_CHAR *base = pfile->buffer->cur - 1;
449 result = (cpp_hashnode *)
450 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
451 pfile->buffer->cur = cur;
454 /* Rarely, identifiers require diagnostics when lexed.
455 XXX Has to be forced out of the fast path. */
456 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
457 && !pfile->state.skipping, 0))
459 /* It is allowed to poison the same identifier twice. */
460 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
461 cpp_error (pfile, "attempt to use poisoned \"%s\"",
464 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
465 replacement list of a variadic macro. */
466 if (result == pfile->spec_nodes.n__VA_ARGS__
467 && !pfile->state.va_args_ok)
469 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
475 /* Slow path. This handles identifiers which have been split, and
476 identifiers which contain dollar signs. The part of the identifier
477 from PFILE->buffer->cur-1 to CUR has already been scanned. */
478 static cpp_hashnode *
479 parse_identifier_slow (pfile, cur)
483 cpp_buffer *buffer = pfile->buffer;
484 const U_CHAR *base = buffer->cur - 1;
485 struct obstack *stack = &pfile->hash_table->stack;
486 unsigned int c, saw_dollar = 0, len;
488 /* Copy the part of the token which is known to be okay. */
489 obstack_grow (stack, base, cur - base);
491 /* Now process the part which isn't. We are looking at one of
492 '$', '\\', or '?' on entry to this loop. */
497 while (is_idchar (c))
499 obstack_1grow (stack, c);
504 if (buffer->cur == buffer->rlimit)
510 /* Potential escaped newline? */
511 buffer->backup_to = buffer->cur - 1;
512 if (c != '?' && c != '\\')
514 c = skip_escaped_newlines (pfile);
516 while (is_idchar (c));
518 /* Step back over the unwanted char, except at EOF. */
522 /* $ is not an identifier character in the standard, but is commonly
523 accepted as an extension. Don't warn about it in skipped
524 conditional blocks. */
525 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
526 cpp_pedwarn (pfile, "'$' character(s) in identifier");
528 /* Identifiers are null-terminated. */
529 len = obstack_object_size (stack);
530 obstack_1grow (stack, '\0');
532 return (cpp_hashnode *)
533 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
536 /* Parse a number, skipping embedded backslash-newlines. */
538 parse_number (pfile, number, c, leading_period)
544 cpp_buffer *buffer = pfile->buffer;
545 unsigned char *dest, *limit;
547 dest = BUFF_FRONT (pfile->u_buff);
548 limit = BUFF_LIMIT (pfile->u_buff);
550 /* Place a leading period. */
555 _cpp_extend_buff (pfile, &pfile->u_buff, 1);
556 dest = BUFF_FRONT (pfile->u_buff);
557 limit = BUFF_LIMIT (pfile->u_buff);
566 /* Need room for terminating null. */
567 if ((size_t) (limit - dest) < 2)
569 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
570 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
571 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
572 limit = BUFF_LIMIT (pfile->u_buff);
576 if (buffer->cur == buffer->rlimit)
581 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
583 /* Potential escaped newline? */
584 buffer->backup_to = buffer->cur - 1;
585 if (c != '?' && c != '\\')
587 c = skip_escaped_newlines (pfile);
589 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
591 /* Step back over the unwanted char, except at EOF. */
595 /* Null-terminate the number. */
598 number->text = BUFF_FRONT (pfile->u_buff);
599 number->len = dest - number->text;
600 BUFF_FRONT (pfile->u_buff) = dest + 1;
603 /* Subroutine of parse_string. Emits error for unterminated strings. */
605 unterminated (pfile, term)
609 cpp_error (pfile, "missing terminating %c character", term);
611 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
613 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
614 "possible start of unterminated string literal");
619 /* Subroutine of parse_string. */
621 unescaped_terminator_p (pfile, dest)
623 const unsigned char *dest;
625 const unsigned char *start, *temp;
627 /* In #include-style directives, terminators are not escapeable. */
628 if (pfile->state.angled_headers)
631 start = BUFF_FRONT (pfile->u_buff);
633 /* An odd number of consecutive backslashes represents an escaped
635 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
638 return ((dest - temp) & 1) == 0;
641 /* Parses a string, character constant, or angle-bracketed header file
642 name. Handles embedded trigraphs and escaped newlines. The stored
643 string is guaranteed NUL-terminated, but it is not guaranteed that
644 this is the first NUL since embedded NULs are preserved.
645 Multi-line strings are allowed, but they are deprecated.
647 When this function returns, buffer->cur points to the next
648 character to be processed. */
650 parse_string (pfile, token, terminator)
653 cppchar_t terminator;
655 cpp_buffer *buffer = pfile->buffer;
656 unsigned char *dest, *limit;
658 bool warned_nulls = false, warned_multi = false;
660 dest = BUFF_FRONT (pfile->u_buff);
661 limit = BUFF_LIMIT (pfile->u_buff);
665 /* We need room for another char, possibly the terminating NUL. */
666 if ((size_t) (limit - dest) < 1)
668 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
669 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
670 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
671 limit = BUFF_LIMIT (pfile->u_buff);
674 if (buffer->cur == buffer->rlimit)
676 unterminated (pfile, terminator);
680 /* Handle trigraphs, escaped newlines etc. */
682 if (c == '?' || c == '\\')
683 c = skip_escaped_newlines (pfile);
687 if (unescaped_terminator_p (pfile, dest))
690 else if (is_vspace (c))
692 /* In assembly language, silently terminate string and
693 character literals at end of line. This is a kludge
694 around not knowing where comments are. */
695 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
701 /* Character constants and header names may not extend over
702 multiple lines. In Standard C, neither may strings.
703 Unfortunately, we accept multiline strings as an
704 extension, except in #include family directives. */
705 if (terminator != '"' || pfile->state.angled_headers)
707 unterminated (pfile, terminator);
715 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
718 if (pfile->mls_line == 0)
720 pfile->mls_line = token->line;
721 pfile->mls_col = token->col;
724 handle_newline (pfile);
727 else if (c == '\0' && !warned_nulls)
730 cpp_warning (pfile, "null character(s) preserved in literal");
738 token->val.str.text = BUFF_FRONT (pfile->u_buff);
739 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
740 BUFF_FRONT (pfile->u_buff) = dest + 1;
743 /* The stored comment includes the comment start and any terminator. */
745 save_comment (pfile, token, from)
748 const unsigned char *from;
750 unsigned char *buffer;
753 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
755 /* C++ comments probably (not definitely) have moved past a new
756 line, which we don't want to save in the comment. */
757 if (is_vspace (pfile->buffer->cur[-1]))
759 buffer = _cpp_unaligned_alloc (pfile, len);
761 token->type = CPP_COMMENT;
762 token->val.str.len = len;
763 token->val.str.text = buffer;
766 memcpy (buffer + 1, from, len - 1);
769 /* Allocate COUNT tokens for RUN. */
771 _cpp_init_tokenrun (run, count)
775 run->base = xnewvec (cpp_token, count);
776 run->limit = run->base + count;
780 /* Returns the next tokenrun, or creates one if there is none. */
785 if (run->next == NULL)
787 run->next = xnew (tokenrun);
788 run->next->prev = run;
789 _cpp_init_tokenrun (run->next, 250);
795 /* Allocate a single token that is invalidated at the same time as the
796 rest of the tokens on the line. Has its line and col set to the
797 same as the last lexed token, so that diagnostics appear in the
800 _cpp_temp_token (pfile)
803 cpp_token *old, *result;
805 old = pfile->cur_token - 1;
806 if (pfile->cur_token == pfile->cur_run->limit)
808 pfile->cur_run = next_tokenrun (pfile->cur_run);
809 pfile->cur_token = pfile->cur_run->base;
812 result = pfile->cur_token++;
813 result->line = old->line;
814 result->col = old->col;
818 /* Lex a token into RESULT (external interface). Takes care of issues
819 like directive handling, token lookahead, multiple include
820 opimisation and skipping. */
822 _cpp_lex_token (pfile)
829 if (pfile->cur_token == pfile->cur_run->limit)
831 pfile->cur_run = next_tokenrun (pfile->cur_run);
832 pfile->cur_token = pfile->cur_run->base;
835 if (pfile->lookaheads)
838 result = pfile->cur_token++;
841 result = _cpp_lex_direct (pfile);
843 if (result->flags & BOL)
845 /* Is this a directive. If _cpp_handle_directive returns
846 false, it is an assembler #. */
847 if (result->type == CPP_HASH
848 && !pfile->state.parsing_args
849 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
851 if (pfile->cb.line_change && !pfile->state.skipping)
852 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
855 /* We don't skip tokens in directives. */
856 if (pfile->state.in_directive)
859 /* Outside a directive, invalidate controlling macros. At file
860 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
861 get here and MI optimisation works. */
862 pfile->mi_valid = false;
864 if (!pfile->state.skipping || result->type == CPP_EOF)
871 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
873 if (get_effective_char (pfile) == CHAR) \
874 result->type = THEN_TYPE; \
878 result->type = ELSE_TYPE; \
882 /* Lex a token into pfile->cur_token, which is also incremented, to
883 get diagnostics pointing to the correct location.
885 Does not handle issues such as token lookahead, multiple-include
886 optimisation, directives, skipping etc. This function is only
887 suitable for use by _cpp_lex_token, and in special cases like
888 lex_expansion_token which doesn't care for any of these issues.
890 When meeting a newline, returns CPP_EOF if parsing a directive,
891 otherwise returns to the start of the token buffer if permissible.
892 Returns the location of the lexed token. */
894 _cpp_lex_direct (pfile)
899 const unsigned char *comment_start;
900 cpp_token *result = pfile->cur_token++;
903 buffer = pfile->buffer;
904 result->flags = buffer->saved_flags;
905 buffer->saved_flags = 0;
907 result->line = pfile->line;
910 if (buffer->cur == buffer->rlimit)
913 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
919 buffer->saved_flags = BOL;
920 if (!pfile->state.parsing_args && !pfile->state.in_directive)
922 if (buffer->cur != buffer->line_base)
924 /* Non-empty files should end in a newline. Don't warn
925 for command line and _Pragma buffers. */
926 if (!buffer->from_stage3)
927 cpp_pedwarn (pfile, "no newline at end of file");
928 handle_newline (pfile);
931 /* Don't pop the last buffer. */
934 unsigned char stop = buffer->return_at_eof;
936 _cpp_pop_buffer (pfile);
941 result->type = CPP_EOF;
944 case ' ': case '\t': case '\f': case '\v': case '\0':
945 skip_whitespace (pfile, c);
946 result->flags |= PREV_WHITE;
949 case '\n': case '\r':
950 handle_newline (pfile);
951 buffer->saved_flags = BOL;
952 if (! pfile->state.in_directive)
954 if (pfile->state.parsing_args == 2)
955 buffer->saved_flags |= PREV_WHITE;
956 if (!pfile->keep_tokens)
958 pfile->cur_run = &pfile->base_run;
959 result = pfile->base_run.base;
960 pfile->cur_token = result + 1;
964 result->type = CPP_EOF;
969 /* These could start an escaped newline, or '?' a trigraph. Let
970 skip_escaped_newlines do all the work. */
972 unsigned int line = pfile->line;
974 c = skip_escaped_newlines (pfile);
975 if (line != pfile->line)
978 /* We had at least one escaped newline of some sort.
979 Update the token's line and column. */
980 goto update_tokens_line;
984 /* We are either the original '?' or '\\', or a trigraph. */
986 result->type = CPP_QUERY;
993 case '0': case '1': case '2': case '3': case '4':
994 case '5': case '6': case '7': case '8': case '9':
995 result->type = CPP_NUMBER;
996 parse_number (pfile, &result->val.str, c, 0);
1000 if (!CPP_OPTION (pfile, dollars_in_ident))
1002 /* Fall through... */
1005 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1006 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1007 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1008 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1010 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1011 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1012 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1013 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1015 result->type = CPP_NAME;
1016 result->val.node = parse_identifier (pfile);
1018 /* 'L' may introduce wide characters or strings. */
1019 if (result->val.node == pfile->spec_nodes.n_L
1020 && buffer->cur < buffer->rlimit)
1023 if (c == '\'' || c == '"')
1026 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1027 parse_string (pfile, result, c);
1030 /* Convert named operators to their proper types. */
1031 else if (result->val.node->flags & NODE_OPERATOR)
1033 result->flags |= NAMED_OP;
1034 result->type = result->val.node->value.operator;
1040 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1041 parse_string (pfile, result, c);
1045 /* A potential block or line comment. */
1046 comment_start = buffer->cur;
1047 c = get_effective_char (pfile);
1051 if (skip_block_comment (pfile))
1052 cpp_error (pfile, "unterminated comment");
1054 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1055 || CPP_IN_SYSTEM_HEADER (pfile)))
1057 /* Warn about comments only if pedantically GNUC89, and not
1058 in system headers. */
1059 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1060 && ! buffer->warned_cplusplus_comments)
1063 "C++ style comments are not allowed in ISO C89");
1065 "(this will be reported only once per input file)");
1066 buffer->warned_cplusplus_comments = 1;
1069 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1070 cpp_warning (pfile, "multi-line comment");
1074 result->type = CPP_DIV_EQ;
1080 result->type = CPP_DIV;
1084 if (!pfile->state.save_comments)
1086 result->flags |= PREV_WHITE;
1087 goto update_tokens_line;
1090 /* Save the comment as a token in its own right. */
1091 save_comment (pfile, result, comment_start);
1095 if (pfile->state.angled_headers)
1097 result->type = CPP_HEADER_NAME;
1098 parse_string (pfile, result, '>');
1102 c = get_effective_char (pfile);
1104 result->type = CPP_LESS_EQ;
1106 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1107 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1108 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1109 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1111 result->type = CPP_OPEN_SQUARE;
1112 result->flags |= DIGRAPH;
1114 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1116 result->type = CPP_OPEN_BRACE;
1117 result->flags |= DIGRAPH;
1122 result->type = CPP_LESS;
1127 c = get_effective_char (pfile);
1129 result->type = CPP_GREATER_EQ;
1131 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1132 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1133 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1137 result->type = CPP_GREATER;
1142 c = get_effective_char (pfile);
1144 result->type = CPP_MOD_EQ;
1145 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1147 result->flags |= DIGRAPH;
1148 result->type = CPP_HASH;
1149 if (get_effective_char (pfile) == '%')
1151 const unsigned char *pos = buffer->cur;
1153 if (get_effective_char (pfile) == ':')
1154 result->type = CPP_PASTE;
1156 buffer->cur = pos - 1;
1161 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1163 result->flags |= DIGRAPH;
1164 result->type = CPP_CLOSE_BRACE;
1169 result->type = CPP_MOD;
1174 result->type = CPP_DOT;
1175 c = get_effective_char (pfile);
1178 const unsigned char *pos = buffer->cur;
1180 if (get_effective_char (pfile) == '.')
1181 result->type = CPP_ELLIPSIS;
1183 buffer->cur = pos - 1;
1185 /* All known character sets have 0...9 contiguous. */
1186 else if (ISDIGIT (c))
1188 result->type = CPP_NUMBER;
1189 parse_number (pfile, &result->val.str, c, 1);
1191 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1192 result->type = CPP_DOT_STAR;
1198 c = get_effective_char (pfile);
1200 result->type = CPP_PLUS_PLUS;
1202 result->type = CPP_PLUS_EQ;
1206 result->type = CPP_PLUS;
1211 c = get_effective_char (pfile);
1214 result->type = CPP_DEREF;
1215 if (CPP_OPTION (pfile, cplusplus))
1217 if (get_effective_char (pfile) == '*')
1218 result->type = CPP_DEREF_STAR;
1224 result->type = CPP_MINUS_MINUS;
1226 result->type = CPP_MINUS_EQ;
1230 result->type = CPP_MINUS;
1235 c = get_effective_char (pfile);
1237 result->type = CPP_AND_AND;
1239 result->type = CPP_AND_EQ;
1243 result->type = CPP_AND;
1248 c = get_effective_char (pfile);
1250 result->type = CPP_OR_OR;
1252 result->type = CPP_OR_EQ;
1256 result->type = CPP_OR;
1261 c = get_effective_char (pfile);
1262 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1263 result->type = CPP_SCOPE;
1264 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1266 result->flags |= DIGRAPH;
1267 result->type = CPP_CLOSE_SQUARE;
1272 result->type = CPP_COLON;
1276 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1277 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1278 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1279 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1280 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1282 case '~': result->type = CPP_COMPL; break;
1283 case ',': result->type = CPP_COMMA; break;
1284 case '(': result->type = CPP_OPEN_PAREN; break;
1285 case ')': result->type = CPP_CLOSE_PAREN; break;
1286 case '[': result->type = CPP_OPEN_SQUARE; break;
1287 case ']': result->type = CPP_CLOSE_SQUARE; break;
1288 case '{': result->type = CPP_OPEN_BRACE; break;
1289 case '}': result->type = CPP_CLOSE_BRACE; break;
1290 case ';': result->type = CPP_SEMICOLON; break;
1292 /* @ is a punctuator in Objective C. */
1293 case '@': result->type = CPP_ATSIGN; break;
1297 result->type = CPP_OTHER;
1305 /* An upper bound on the number of bytes needed to spell a token,
1306 including preceding whitespace. */
1308 cpp_token_len (token)
1309 const cpp_token *token;
1313 switch (TOKEN_SPELL (token))
1315 default: len = 0; break;
1317 case SPELL_STRING: len = token->val.str.len; break;
1318 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1320 /* 1 for whitespace, 4 for comment delimiters. */
1324 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1325 already contain the enough space to hold the token's spelling.
1326 Returns a pointer to the character after the last character
1329 cpp_spell_token (pfile, token, buffer)
1330 cpp_reader *pfile; /* Would be nice to be rid of this... */
1331 const cpp_token *token;
1332 unsigned char *buffer;
1334 switch (TOKEN_SPELL (token))
1336 case SPELL_OPERATOR:
1338 const unsigned char *spelling;
1341 if (token->flags & DIGRAPH)
1343 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1344 else if (token->flags & NAMED_OP)
1347 spelling = TOKEN_NAME (token);
1349 while ((c = *spelling++) != '\0')
1355 *buffer++ = token->val.c;
1360 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1361 buffer += NODE_LEN (token->val.node);
1365 memcpy (buffer, token->val.str.text, token->val.str.len);
1366 buffer += token->val.str.len;
1371 int left, right, tag;
1372 switch (token->type)
1374 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1375 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1376 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1377 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1378 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1380 cpp_ice (pfile, "unknown string token %s\n", TOKEN_NAME (token));
1383 if (tag) *buffer++ = tag;
1385 memcpy (buffer, token->val.str.text, token->val.str.len);
1386 buffer += token->val.str.len;
1392 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1399 /* Returns a token as a null-terminated string. The string is
1400 temporary, and automatically freed later. Useful for diagnostics. */
1402 cpp_token_as_text (pfile, token)
1404 const cpp_token *token;
1406 unsigned int len = cpp_token_len (token);
1407 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1409 end = cpp_spell_token (pfile, token, start);
1415 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1417 cpp_type2name (type)
1418 enum cpp_ttype type;
1420 return (const char *) token_spellings[type].name;
1423 /* Writes the spelling of token to FP, without any preceding space.
1424 Separated from cpp_spell_token for efficiency - to avoid stdio
1425 double-buffering. */
1427 cpp_output_token (token, fp)
1428 const cpp_token *token;
1431 switch (TOKEN_SPELL (token))
1433 case SPELL_OPERATOR:
1435 const unsigned char *spelling;
1438 if (token->flags & DIGRAPH)
1440 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1441 else if (token->flags & NAMED_OP)
1444 spelling = TOKEN_NAME (token);
1449 while ((c = *++spelling) != '\0');
1454 putc (token->val.c, fp);
1459 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1463 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1468 int left, right, tag;
1469 switch (token->type)
1471 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1472 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1473 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1474 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1475 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1477 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1480 if (tag) putc (tag, fp);
1482 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1488 /* An error, most probably. */
1493 /* Compare two tokens. */
1495 _cpp_equiv_tokens (a, b)
1496 const cpp_token *a, *b;
1498 if (a->type == b->type && a->flags == b->flags)
1499 switch (TOKEN_SPELL (a))
1501 default: /* Keep compiler happy. */
1502 case SPELL_OPERATOR:
1505 return a->val.c == b->val.c; /* Character. */
1507 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1509 return a->val.node == b->val.node;
1512 return (a->val.str.len == b->val.str.len
1513 && !memcmp (a->val.str.text, b->val.str.text,
1520 /* Returns nonzero if a space should be inserted to avoid an
1521 accidental token paste for output. For simplicity, it is
1522 conservative, and occasionally advises a space where one is not
1523 needed, e.g. "." and ".2". */
1526 cpp_avoid_paste (pfile, token1, token2)
1528 const cpp_token *token1, *token2;
1530 enum cpp_ttype a = token1->type, b = token2->type;
1533 if (token1->flags & NAMED_OP)
1535 if (token2->flags & NAMED_OP)
1539 if (token2->flags & DIGRAPH)
1540 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1541 else if (token_spellings[b].category == SPELL_OPERATOR)
1542 c = token_spellings[b].name[0];
1544 /* Quickly get everything that can paste with an '='. */
1545 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1550 case CPP_GREATER: return c == '>' || c == '?';
1551 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1552 case CPP_PLUS: return c == '+';
1553 case CPP_MINUS: return c == '-' || c == '>';
1554 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1555 case CPP_MOD: return c == ':' || c == '>';
1556 case CPP_AND: return c == '&';
1557 case CPP_OR: return c == '|';
1558 case CPP_COLON: return c == ':' || c == '>';
1559 case CPP_DEREF: return c == '*';
1560 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1561 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1562 case CPP_NAME: return ((b == CPP_NUMBER
1563 && name_p (pfile, &token2->val.str))
1565 || b == CPP_CHAR || b == CPP_STRING); /* L */
1566 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1567 || c == '.' || c == '+' || c == '-');
1568 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1569 && token1->val.c == '@'
1570 && (b == CPP_NAME || b == CPP_STRING));
1577 /* Output all the remaining tokens on the current line, and a newline
1578 character, to FP. Leading whitespace is removed. If there are
1579 macros, special token padding is not performed. */
1581 cpp_output_line (pfile, fp)
1585 const cpp_token *token;
1587 token = cpp_get_token (pfile);
1588 while (token->type != CPP_EOF)
1590 cpp_output_token (token, fp);
1591 token = cpp_get_token (pfile);
1592 if (token->flags & PREV_WHITE)
1599 /* Returns the value of a hexadecimal digit. */
1605 return hex_value (c);
1610 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1611 failure if cpplib is not parsing C++ or C99. Such failure is
1612 silent, and no variables are updated. Otherwise returns 0, and
1613 warns if -Wtraditional.
1615 [lex.charset]: The character designated by the universal character
1616 name \UNNNNNNNN is that character whose character short name in
1617 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1618 universal character name \uNNNN is that character whose character
1619 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1620 for a universal character name is less than 0x20 or in the range
1621 0x7F-0x9F (inclusive), or if the universal character name
1622 designates a character in the basic source character set, then the
1623 program is ill-formed.
1625 We assume that wchar_t is Unicode, so we don't need to do any
1626 mapping. Is this ever wrong?
1628 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1629 LIMIT is the end of the string or charconst. PSTR is updated to
1630 point after the UCS on return, and the UCS is written into PC. */
1633 maybe_read_ucs (pfile, pstr, limit, pc)
1635 const unsigned char **pstr;
1636 const unsigned char *limit;
1639 const unsigned char *p = *pstr;
1640 unsigned int code = 0;
1641 unsigned int c = *pc, length;
1643 /* Only attempt to interpret a UCS for C++ and C99. */
1644 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1647 if (CPP_WTRADITIONAL (pfile))
1648 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1650 length = (c == 'u' ? 4: 8);
1652 if ((size_t) (limit - p) < length)
1654 cpp_error (pfile, "incomplete universal-character-name");
1655 /* Skip to the end to avoid more diagnostics. */
1660 for (; length; length--, p++)
1664 code = (code << 4) + hex_digit_value (c);
1668 "non-hex digit '%c' in universal-character-name", c);
1669 /* We shouldn't skip in case there are multibyte chars. */
1675 #ifdef TARGET_EBCDIC
1676 cpp_error (pfile, "universal-character-name on EBCDIC target");
1677 code = 0x3f; /* EBCDIC invalid character */
1679 /* True extended characters are OK. */
1681 && !(code & 0x80000000)
1682 && !(code >= 0xD800 && code <= 0xDFFF))
1684 /* The standard permits $, @ and ` to be specified as UCNs. We use
1685 hex escapes so that this also works with EBCDIC hosts. */
1686 else if (code == 0x24 || code == 0x40 || code == 0x60)
1688 /* Don't give another error if one occurred above. */
1689 else if (length == 0)
1690 cpp_error (pfile, "universal-character-name out of range");
1698 /* Interpret an escape sequence, and return its value. PSTR points to
1699 the input pointer, which is just after the backslash. LIMIT is how
1700 much text we have. MASK is a bitmask for the precision for the
1701 destination type (char or wchar_t). TRADITIONAL, if true, does not
1702 interpret escapes that did not exist in traditional C.
1704 Handles all relevant diagnostics. */
1707 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1709 const unsigned char **pstr;
1710 const unsigned char *limit;
1711 unsigned HOST_WIDE_INT mask;
1715 const unsigned char *str = *pstr;
1716 unsigned int c = *str++;
1720 case '\\': case '\'': case '"': case '?': break;
1721 case 'b': c = TARGET_BS; break;
1722 case 'f': c = TARGET_FF; break;
1723 case 'n': c = TARGET_NEWLINE; break;
1724 case 'r': c = TARGET_CR; break;
1725 case 't': c = TARGET_TAB; break;
1726 case 'v': c = TARGET_VT; break;
1728 case '(': case '{': case '[': case '%':
1729 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1730 '\%' is used to prevent SCCS from getting confused. */
1731 unknown = CPP_PEDANTIC (pfile);
1735 if (CPP_WTRADITIONAL (pfile))
1736 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1742 if (CPP_PEDANTIC (pfile))
1743 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1748 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1752 if (CPP_WTRADITIONAL (pfile))
1753 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1757 unsigned int i = 0, overflow = 0;
1758 int digits_found = 0;
1766 overflow |= i ^ (i << 4 >> 4);
1767 i = (i << 4) + hex_digit_value (c);
1772 cpp_error (pfile, "\\x used with no following hex digits");
1774 if (overflow | (i != (i & mask)))
1776 cpp_pedwarn (pfile, "hex escape sequence out of range");
1783 case '0': case '1': case '2': case '3':
1784 case '4': case '5': case '6': case '7':
1786 unsigned int i = c - '0';
1789 while (str < limit && ++count < 3)
1792 if (c < '0' || c > '7')
1795 i = (i << 3) + c - '0';
1798 if (i != (i & mask))
1800 cpp_pedwarn (pfile, "octal escape sequence out of range");
1815 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1817 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1821 cpp_pedwarn (pfile, "escape sequence out of range for character");
1827 #ifndef MAX_CHAR_TYPE_SIZE
1828 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1831 #ifndef MAX_WCHAR_TYPE_SIZE
1832 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1835 /* Interpret a (possibly wide) character constant in TOKEN.
1836 WARN_MULTI warns about multi-character charconsts, if not
1837 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1838 that did not exist in traditional C. PCHARS_SEEN points to a
1839 variable that is filled in with the number of characters seen. */
1841 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1843 const cpp_token *token;
1846 unsigned int *pchars_seen;
1848 const unsigned char *str = token->val.str.text;
1849 const unsigned char *limit = str + token->val.str.len;
1850 unsigned int chars_seen = 0;
1851 unsigned int width, max_chars, c;
1852 unsigned HOST_WIDE_INT mask;
1853 HOST_WIDE_INT result = 0;
1855 #ifdef MULTIBYTE_CHARS
1856 (void) local_mbtowc (NULL, NULL, 0);
1859 /* Width in bits. */
1860 if (token->type == CPP_CHAR)
1861 width = MAX_CHAR_TYPE_SIZE;
1863 width = MAX_WCHAR_TYPE_SIZE;
1865 if (width < HOST_BITS_PER_WIDE_INT)
1866 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1869 max_chars = HOST_BITS_PER_WIDE_INT / width;
1873 #ifdef MULTIBYTE_CHARS
1877 char_len = local_mbtowc (&wc, str, limit - str);
1880 cpp_warning (pfile, "ignoring invalid multibyte character");
1893 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1895 #ifdef MAP_CHARACTER
1897 c = MAP_CHARACTER (c);
1900 /* Merge character into result; ignore excess chars. */
1901 if (++chars_seen <= max_chars)
1903 if (width < HOST_BITS_PER_WIDE_INT)
1904 result = (result << width) | (c & mask);
1910 if (chars_seen == 0)
1911 cpp_error (pfile, "empty character constant");
1912 else if (chars_seen > max_chars)
1914 chars_seen = max_chars;
1915 cpp_warning (pfile, "character constant too long");
1917 else if (chars_seen > 1 && !traditional && warn_multi)
1918 cpp_warning (pfile, "multi-character character constant");
1920 /* If char type is signed, sign-extend the constant. The
1921 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1922 if (token->type == CPP_CHAR && chars_seen)
1924 unsigned int nbits = chars_seen * width;
1925 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1927 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1928 || ((result >> (nbits - 1)) & 1) == 0)
1934 *pchars_seen = chars_seen;
1938 /* Memory buffers. Changing these three constants can have a dramatic
1939 effect on performance. The values here are reasonable defaults,
1940 but might be tuned. If you adjust them, be sure to test across a
1941 range of uses of cpplib, including heavy nested function-like macro
1942 expansion. Also check the change in peak memory usage (NJAMD is a
1943 good tool for this). */
1944 #define MIN_BUFF_SIZE 8000
1945 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1946 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1947 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1949 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1950 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1963 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1964 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
1966 /* Create a new allocation buffer. Place the control block at the end
1967 of the buffer, so that buffer overflows will cause immediate chaos. */
1973 unsigned char *base;
1975 if (len < MIN_BUFF_SIZE)
1976 len = MIN_BUFF_SIZE;
1977 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
1979 base = xmalloc (len + sizeof (_cpp_buff));
1980 result = (_cpp_buff *) (base + len);
1981 result->base = base;
1983 result->limit = base + len;
1984 result->next = NULL;
1988 /* Place a chain of unwanted allocation buffers on the free list. */
1990 _cpp_release_buff (pfile, buff)
1994 _cpp_buff *end = buff;
1998 end->next = pfile->free_buffs;
1999 pfile->free_buffs = buff;
2002 /* Return a free buffer of size at least MIN_SIZE. */
2004 _cpp_get_buff (pfile, min_size)
2008 _cpp_buff *result, **p;
2010 for (p = &pfile->free_buffs;; p = &(*p)->next)
2015 return new_buff (min_size);
2017 size = result->limit - result->base;
2018 /* Return a buffer that's big enough, but don't waste one that's
2020 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2025 result->next = NULL;
2026 result->cur = result->base;
2030 /* Creates a new buffer with enough space to hold the uncommitted
2031 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2032 the excess bytes to the new buffer. Chains the new buffer after
2033 BUFF, and returns the new buffer. */
2035 _cpp_append_extend_buff (pfile, buff, min_extra)
2040 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2041 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2043 buff->next = new_buff;
2044 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2048 /* Creates a new buffer with enough space to hold the uncommitted
2049 remaining bytes of the buffer pointed to by BUFF, and at least
2050 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2051 Chains the new buffer before the buffer pointed to by BUFF, and
2052 updates the pointer to point to the new buffer. */
2054 _cpp_extend_buff (pfile, pbuff, min_extra)
2059 _cpp_buff *new_buff, *old_buff = *pbuff;
2060 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2062 new_buff = _cpp_get_buff (pfile, size);
2063 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2064 new_buff->next = old_buff;
2068 /* Free a chain of buffers starting at BUFF. */
2070 _cpp_free_buff (buff)
2075 for (; buff; buff = next)
2082 /* Allocate permanent, unaligned storage of length LEN. */
2084 _cpp_unaligned_alloc (pfile, len)
2088 _cpp_buff *buff = pfile->u_buff;
2089 unsigned char *result = buff->cur;
2091 if (len > (size_t) (buff->limit - result))
2093 buff = _cpp_get_buff (pfile, len);
2094 buff->next = pfile->u_buff;
2095 pfile->u_buff = buff;
2099 buff->cur = result + len;
2103 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2104 That buffer is used for growing allocations when saving macro
2105 replacement lists in a #define, and when parsing an answer to an
2106 assertion in #assert, #unassert or #if (and therefore possibly
2107 whilst expanding macros). It therefore must not be used by any
2108 code that they might call: specifically the lexer and the guts of
2111 All existing other uses clearly fit this restriction: storing
2112 registered pragmas during initialization. */
2114 _cpp_aligned_alloc (pfile, len)
2118 _cpp_buff *buff = pfile->a_buff;
2119 unsigned char *result = buff->cur;
2121 if (len > (size_t) (buff->limit - result))
2123 buff = _cpp_get_buff (pfile, len);
2124 buff->next = pfile->a_buff;
2125 pfile->a_buff = buff;
2129 buff->cur = result + len;