1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
97 static void unterminated PARAMS ((cpp_reader *, int));
98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
105 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
107 static cpp_chunk *new_chunk PARAMS ((unsigned int));
108 static int chunk_suitable PARAMS ((cpp_chunk *, unsigned int));
109 static unsigned int hex_digit_value PARAMS ((unsigned int));
110 static _cpp_buff *new_buff PARAMS ((unsigned int));
114 Compares, the token TOKEN to the NUL-terminated string STRING.
115 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
118 cpp_ideq (token, string)
119 const cpp_token *token;
122 if (token->type != CPP_NAME)
125 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
128 /* Call when meeting a newline. Returns the character after the newline
129 (or carriage-return newline combination), or EOF. */
131 handle_newline (pfile, newline_char)
133 cppchar_t newline_char;
136 cppchar_t next = EOF;
139 buffer = pfile->buffer;
140 buffer->col_adjust = 0;
141 buffer->line_base = buffer->cur;
143 /* Handle CR-LF and LF-CR combinations, get the next character. */
144 if (buffer->cur < buffer->rlimit)
146 next = *buffer->cur++;
147 if (next + newline_char == '\r' + '\n')
149 buffer->line_base = buffer->cur;
150 if (buffer->cur < buffer->rlimit)
151 next = *buffer->cur++;
157 buffer->read_ahead = next;
161 /* Subroutine of skip_escaped_newlines; called when a trigraph is
162 encountered. It warns if necessary, and returns true if the
163 trigraph should be honoured. FROM_CHAR is the third character of a
164 trigraph, and presumed to be the previous character for position
167 trigraph_ok (pfile, from_char)
171 int accept = CPP_OPTION (pfile, trigraphs);
173 /* Don't warn about trigraphs in comments. */
174 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
176 cpp_buffer *buffer = pfile->buffer;
179 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
180 "trigraph ??%c converted to %c",
182 (int) _cpp_trigraph_map[from_char]);
183 else if (buffer->cur != buffer->last_Wtrigraphs)
185 buffer->last_Wtrigraphs = buffer->cur;
186 cpp_warning_with_line (pfile, pfile->line,
187 CPP_BUF_COL (buffer) - 2,
188 "trigraph ??%c ignored", (int) from_char);
195 /* Assumes local variables buffer and result. */
196 #define ACCEPT_CHAR(t) \
197 do { result->type = t; buffer->read_ahead = EOF; } while (0)
199 /* When we move to multibyte character sets, add to these something
200 that saves and restores the state of the multibyte conversion
201 library. This probably involves saving and restoring a "cookie".
202 In the case of glibc it is an 8-byte structure, so is not a high
203 overhead operation. In any case, it's out of the fast path. */
204 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
205 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
207 /* Skips any escaped newlines introduced by NEXT, which is either a
208 '?' or a '\\'. Returns the next character, which will also have
209 been placed in buffer->read_ahead. This routine performs
210 preprocessing stages 1 and 2 of the ISO C standard. */
212 skip_escaped_newlines (pfile, next)
216 cpp_buffer *buffer = pfile->buffer;
218 /* Only do this if we apply stages 1 and 2. */
219 if (!buffer->from_stage3)
222 const unsigned char *saved_cur;
227 if (buffer->cur == buffer->rlimit)
233 next1 = *buffer->cur++;
234 if (next1 != '?' || buffer->cur == buffer->rlimit)
240 next1 = *buffer->cur++;
241 if (!_cpp_trigraph_map[next1]
242 || !trigraph_ok (pfile, next1))
248 /* We have a full trigraph here. */
249 next = _cpp_trigraph_map[next1];
250 if (next != '\\' || buffer->cur == buffer->rlimit)
255 /* We have a backslash, and room for at least one more character. */
259 next1 = *buffer->cur++;
260 if (!is_nvspace (next1))
264 while (buffer->cur < buffer->rlimit);
266 if (!is_vspace (next1))
272 if (space && !pfile->state.lexing_comment)
273 cpp_warning (pfile, "backslash and newline separated by space");
275 next = handle_newline (pfile, next1);
277 cpp_pedwarn (pfile, "backslash-newline at end of file");
279 while (next == '\\' || next == '?');
282 buffer->read_ahead = next;
286 /* Obtain the next character, after trigraph conversion and skipping
287 an arbitrary string of escaped newlines. The common case of no
288 trigraphs or escaped newlines falls through quickly. */
290 get_effective_char (pfile)
293 cpp_buffer *buffer = pfile->buffer;
294 cppchar_t next = EOF;
296 if (buffer->cur < buffer->rlimit)
298 next = *buffer->cur++;
300 /* '?' can introduce trigraphs (and therefore backslash); '\\'
301 can introduce escaped newlines, which we want to skip, or
302 UCNs, which, depending upon lexer state, we will handle in
304 if (next == '?' || next == '\\')
305 next = skip_escaped_newlines (pfile, next);
308 buffer->read_ahead = next;
312 /* Skip a C-style block comment. We find the end of the comment by
313 seeing if an asterisk is before every '/' we encounter. Returns
314 non-zero if comment terminated by EOF, zero otherwise. */
316 skip_block_comment (pfile)
319 cpp_buffer *buffer = pfile->buffer;
320 cppchar_t c = EOF, prevc = EOF;
322 pfile->state.lexing_comment = 1;
323 while (buffer->cur != buffer->rlimit)
325 prevc = c, c = *buffer->cur++;
328 /* FIXME: For speed, create a new character class of characters
329 of interest inside block comments. */
330 if (c == '?' || c == '\\')
331 c = skip_escaped_newlines (pfile, c);
333 /* People like decorating comments with '*', so check for '/'
334 instead for efficiency. */
340 /* Warn about potential nested comments, but not if the '/'
341 comes immediately before the true comment delimeter.
342 Don't bother to get it right across escaped newlines. */
343 if (CPP_OPTION (pfile, warn_comments)
344 && buffer->cur != buffer->rlimit)
346 prevc = c, c = *buffer->cur++;
347 if (c == '*' && buffer->cur != buffer->rlimit)
349 prevc = c, c = *buffer->cur++;
351 cpp_warning_with_line (pfile, pfile->line,
352 CPP_BUF_COL (buffer) - 2,
353 "\"/*\" within comment");
358 else if (is_vspace (c))
360 prevc = c, c = handle_newline (pfile, c);
364 adjust_column (pfile);
367 pfile->state.lexing_comment = 0;
368 buffer->read_ahead = EOF;
369 return c != '/' || prevc != '*';
372 /* Skip a C++ line comment. Handles escaped newlines. Returns
373 non-zero if a multiline comment. The following new line, if any,
374 is left in buffer->read_ahead. */
376 skip_line_comment (pfile)
379 cpp_buffer *buffer = pfile->buffer;
380 unsigned int orig_line = pfile->line;
383 pfile->state.lexing_comment = 1;
387 if (buffer->cur == buffer->rlimit)
391 if (c == '?' || c == '\\')
392 c = skip_escaped_newlines (pfile, c);
394 while (!is_vspace (c));
396 pfile->state.lexing_comment = 0;
397 buffer->read_ahead = c; /* Leave any newline for caller. */
398 return orig_line != pfile->line;
401 /* pfile->buffer->cur is one beyond the \t character. Update
402 col_adjust so we track the column correctly. */
404 adjust_column (pfile)
407 cpp_buffer *buffer = pfile->buffer;
408 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
410 /* Round it up to multiple of the tabstop, but subtract 1 since the
411 tab itself occupies a character position. */
412 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
413 - col % CPP_OPTION (pfile, tabstop)) - 1;
416 /* Skips whitespace, saving the next non-whitespace character.
417 Adjusts pfile->col_adjust to account for tabs. Without this,
418 tokens might be assigned an incorrect column. */
420 skip_whitespace (pfile, c)
424 cpp_buffer *buffer = pfile->buffer;
425 unsigned int warned = 0;
429 /* Horizontal space always OK. */
433 adjust_column (pfile);
434 /* Just \f \v or \0 left. */
439 cpp_warning (pfile, "null character(s) ignored");
443 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
444 cpp_pedwarn_with_line (pfile, pfile->line,
445 CPP_BUF_COL (buffer),
446 "%s in preprocessing directive",
447 c == '\f' ? "form feed" : "vertical tab");
450 if (buffer->cur == buffer->rlimit)
454 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
455 while (is_nvspace (c));
457 /* Remember the next character. */
458 buffer->read_ahead = c;
461 /* See if the characters of a number token are valid in a name (no
464 name_p (pfile, string)
466 const cpp_string *string;
470 for (i = 0; i < string->len; i++)
471 if (!is_idchar (string->text[i]))
477 /* Parse an identifier, skipping embedded backslash-newlines. This is
478 a critical inner loop. The common case is an identifier which has
479 not been split by backslash-newline, does not contain a dollar
480 sign, and has already been scanned (roughly 10:1 ratio of
481 seen:unseen identifiers in normal code; the distribution is
482 Poisson-like). Second most common case is a new identifier, not
483 split and no dollar sign. The other possibilities are rare and
484 have been relegated to parse_identifier_slow. */
486 static cpp_hashnode *
487 parse_identifier (pfile)
490 cpp_hashnode *result;
491 const U_CHAR *cur, *rlimit;
493 /* Fast-path loop. Skim over a normal identifier.
494 N.B. ISIDNUM does not include $. */
495 cur = pfile->buffer->cur - 1;
496 rlimit = pfile->buffer->rlimit;
499 while (cur < rlimit && ISIDNUM (*cur));
501 /* Check for slow-path cases. */
502 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
503 result = parse_identifier_slow (pfile, cur);
506 const U_CHAR *base = pfile->buffer->cur - 1;
507 result = (cpp_hashnode *)
508 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
509 pfile->buffer->cur = cur;
512 /* Rarely, identifiers require diagnostics when lexed.
513 XXX Has to be forced out of the fast path. */
514 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
515 && !pfile->state.skipping, 0))
517 /* It is allowed to poison the same identifier twice. */
518 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
519 cpp_error (pfile, "attempt to use poisoned \"%s\"",
522 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
523 replacement list of a variadic macro. */
524 if (result == pfile->spec_nodes.n__VA_ARGS__
525 && !pfile->state.va_args_ok)
527 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
533 /* Slow path. This handles identifiers which have been split, and
534 identifiers which contain dollar signs. The part of the identifier
535 from PFILE->buffer->cur-1 to CUR has already been scanned. */
536 static cpp_hashnode *
537 parse_identifier_slow (pfile, cur)
541 cpp_buffer *buffer = pfile->buffer;
542 const U_CHAR *base = buffer->cur - 1;
543 struct obstack *stack = &pfile->hash_table->stack;
544 unsigned int c, saw_dollar = 0, len;
546 /* Copy the part of the token which is known to be okay. */
547 obstack_grow (stack, base, cur - base);
549 /* Now process the part which isn't. We are looking at one of
550 '$', '\\', or '?' on entry to this loop. */
555 while (is_idchar (c))
557 obstack_1grow (stack, c);
563 if (buffer->cur == buffer->rlimit)
569 /* Potential escaped newline? */
570 if (c != '?' && c != '\\')
572 c = skip_escaped_newlines (pfile, c);
574 while (is_idchar (c));
576 /* Remember the next character. */
577 buffer->read_ahead = c;
579 /* $ is not a identifier character in the standard, but is commonly
580 accepted as an extension. Don't warn about it in skipped
581 conditional blocks. */
582 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
583 cpp_pedwarn (pfile, "'$' character(s) in identifier");
585 /* Identifiers are null-terminated. */
586 len = obstack_object_size (stack);
587 obstack_1grow (stack, '\0');
589 return (cpp_hashnode *)
590 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
593 /* Parse a number, skipping embedded backslash-newlines. */
595 parse_number (pfile, number, c, leading_period)
601 cpp_buffer *buffer = pfile->buffer;
602 cpp_pool *pool = &pfile->ident_pool;
603 unsigned char *dest, *limit;
605 dest = POOL_FRONT (pool);
606 limit = POOL_LIMIT (pool);
608 /* Place a leading period. */
612 limit = _cpp_next_chunk (pool, 0, &dest);
620 /* Need room for terminating null. */
621 if (dest + 1 >= limit)
622 limit = _cpp_next_chunk (pool, 0, &dest);
626 if (buffer->cur == buffer->rlimit)
631 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
633 /* Potential escaped newline? */
634 if (c != '?' && c != '\\')
636 c = skip_escaped_newlines (pfile, c);
638 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
640 /* Remember the next character. */
641 buffer->read_ahead = c;
643 /* Null-terminate the number. */
646 number->text = POOL_FRONT (pool);
647 number->len = dest - number->text;
648 POOL_COMMIT (pool, number->len + 1);
651 /* Subroutine of parse_string. Emits error for unterminated strings. */
653 unterminated (pfile, term)
657 cpp_error (pfile, "missing terminating %c character", term);
659 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
661 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
662 "possible start of unterminated string literal");
667 /* Subroutine of parse_string. */
669 unescaped_terminator_p (pfile, dest)
671 const unsigned char *dest;
673 const unsigned char *start, *temp;
675 /* In #include-style directives, terminators are not escapeable. */
676 if (pfile->state.angled_headers)
679 start = POOL_FRONT (&pfile->ident_pool);
681 /* An odd number of consecutive backslashes represents an escaped
683 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
686 return ((dest - temp) & 1) == 0;
689 /* Parses a string, character constant, or angle-bracketed header file
690 name. Handles embedded trigraphs and escaped newlines. The stored
691 string is guaranteed NUL-terminated, but it is not guaranteed that
692 this is the first NUL since embedded NULs are preserved.
694 Multi-line strings are allowed, but they are deprecated. */
696 parse_string (pfile, token, terminator)
699 cppchar_t terminator;
701 cpp_buffer *buffer = pfile->buffer;
702 cpp_pool *pool = &pfile->ident_pool;
703 unsigned char *dest, *limit;
705 bool warned_nulls = false, warned_multi = false;
707 dest = POOL_FRONT (pool);
708 limit = POOL_LIMIT (pool);
712 if (buffer->cur == buffer->rlimit)
718 /* We need space for the terminating NUL. */
720 limit = _cpp_next_chunk (pool, 0, &dest);
724 unterminated (pfile, terminator);
728 /* Handle trigraphs, escaped newlines etc. */
729 if (c == '?' || c == '\\')
730 c = skip_escaped_newlines (pfile, c);
732 if (c == terminator && unescaped_terminator_p (pfile, dest))
737 else if (is_vspace (c))
739 /* In assembly language, silently terminate string and
740 character literals at end of line. This is a kludge
741 around not knowing where comments are. */
742 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
745 /* Character constants and header names may not extend over
746 multiple lines. In Standard C, neither may strings.
747 Unfortunately, we accept multiline strings as an
748 extension, except in #include family directives. */
749 if (terminator != '"' || pfile->state.angled_headers)
751 unterminated (pfile, terminator);
758 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
761 if (pfile->mls_line == 0)
763 pfile->mls_line = token->line;
764 pfile->mls_col = token->col;
767 c = handle_newline (pfile, c);
771 else if (c == '\0' && !warned_nulls)
774 cpp_warning (pfile, "null character(s) preserved in literal");
780 /* Remember the next character. */
781 buffer->read_ahead = c;
784 token->val.str.text = POOL_FRONT (pool);
785 token->val.str.len = dest - token->val.str.text;
786 POOL_COMMIT (pool, token->val.str.len + 1);
789 /* The stored comment includes the comment start and any terminator. */
791 save_comment (pfile, token, from)
794 const unsigned char *from;
796 unsigned char *buffer;
799 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
800 /* C++ comments probably (not definitely) have moved past a new
801 line, which we don't want to save in the comment. */
802 if (pfile->buffer->read_ahead != EOF)
804 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
806 token->type = CPP_COMMENT;
807 token->val.str.len = len;
808 token->val.str.text = buffer;
811 memcpy (buffer + 1, from, len - 1);
814 /* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
815 want to avoid stepping back when lexing %:%X. */
817 lex_percent (pfile, result)
821 cpp_buffer *buffer= pfile->buffer;
824 result->type = CPP_MOD;
825 /* Parsing %:%X could leave an extra character. */
826 if (buffer->extra_char == EOF)
827 c = get_effective_char (pfile);
830 c = buffer->read_ahead = buffer->extra_char;
831 buffer->extra_char = EOF;
835 ACCEPT_CHAR (CPP_MOD_EQ);
836 else if (CPP_OPTION (pfile, digraphs))
840 result->flags |= DIGRAPH;
841 ACCEPT_CHAR (CPP_HASH);
842 if (get_effective_char (pfile) == '%')
844 buffer->extra_char = get_effective_char (pfile);
845 if (buffer->extra_char == ':')
847 buffer->extra_char = EOF;
848 ACCEPT_CHAR (CPP_PASTE);
851 /* We'll catch the extra_char when we're called back. */
852 buffer->read_ahead = '%';
857 result->flags |= DIGRAPH;
858 ACCEPT_CHAR (CPP_CLOSE_BRACE);
863 /* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
864 want to avoid stepping back when lexing '...' or '.123'. In the
865 latter case we should also set a flag for parse_number. */
867 lex_dot (pfile, result)
871 cpp_buffer *buffer = pfile->buffer;
874 /* Parsing ..X could leave an extra character. */
875 if (buffer->extra_char == EOF)
876 c = get_effective_char (pfile);
879 c = buffer->read_ahead = buffer->extra_char;
880 buffer->extra_char = EOF;
883 /* All known character sets have 0...9 contiguous. */
884 if (c >= '0' && c <= '9')
886 result->type = CPP_NUMBER;
887 parse_number (pfile, &result->val.str, c, 1);
891 result->type = CPP_DOT;
894 buffer->extra_char = get_effective_char (pfile);
895 if (buffer->extra_char == '.')
897 buffer->extra_char = EOF;
898 ACCEPT_CHAR (CPP_ELLIPSIS);
901 /* We'll catch the extra_char when we're called back. */
902 buffer->read_ahead = '.';
904 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
905 ACCEPT_CHAR (CPP_DOT_STAR);
909 /* Allocate COUNT tokens for RUN. */
911 _cpp_init_tokenrun (run, count)
915 run->base = xnewvec (cpp_token, count);
916 run->limit = run->base + count;
920 /* Returns the next tokenrun, or creates one if there is none. */
925 if (run->next == NULL)
927 run->next = xnew (tokenrun);
928 run->next->prev = run;
929 _cpp_init_tokenrun (run->next, 250);
935 /* Allocate a single token that is invalidated at the same time as the
936 rest of the tokens on the line. Has its line and col set to the
937 same as the last lexed token, so that diagnostics appear in the
940 _cpp_temp_token (pfile)
943 cpp_token *old, *result;
945 old = pfile->cur_token - 1;
946 if (pfile->cur_token == pfile->cur_run->limit)
948 pfile->cur_run = next_tokenrun (pfile->cur_run);
949 pfile->cur_token = pfile->cur_run->base;
952 result = pfile->cur_token++;
953 result->line = old->line;
954 result->col = old->col;
958 /* Lex a token into RESULT (external interface). Takes care of issues
959 like directive handling, token lookahead, multiple include
960 opimisation and skipping. */
962 _cpp_lex_token (pfile)
969 if (pfile->cur_token == pfile->cur_run->limit)
971 pfile->cur_run = next_tokenrun (pfile->cur_run);
972 pfile->cur_token = pfile->cur_run->base;
975 if (pfile->lookaheads)
978 result = pfile->cur_token++;
981 result = _cpp_lex_direct (pfile);
983 if (result->flags & BOL)
985 /* Is this a directive. If _cpp_handle_directive returns
986 false, it is an assembler #. */
987 if (result->type == CPP_HASH
988 && !pfile->state.parsing_args
989 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
991 if (pfile->cb.line_change && !pfile->state.skipping)
992 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
995 /* We don't skip tokens in directives. */
996 if (pfile->state.in_directive)
999 /* Outside a directive, invalidate controlling macros. At file
1000 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1001 get here and MI optimisation works. */
1002 pfile->mi_valid = false;
1004 if (!pfile->state.skipping || result->type == CPP_EOF)
1011 /* Lex a token into pfile->cur_token, which is also incremented, to
1012 get diagnostics pointing to the correct location.
1014 Does not handle issues such as token lookahead, multiple-include
1015 optimisation, directives, skipping etc. This function is only
1016 suitable for use by _cpp_lex_token, and in special cases like
1017 lex_expansion_token which doesn't care for any of these issues.
1019 When meeting a newline, returns CPP_EOF if parsing a directive,
1020 otherwise returns to the start of the token buffer if permissible.
1021 Returns the location of the lexed token. */
1023 _cpp_lex_direct (pfile)
1028 const unsigned char *comment_start;
1029 cpp_token *result = pfile->cur_token++;
1032 buffer = pfile->buffer;
1033 result->flags = buffer->saved_flags;
1034 buffer->saved_flags = 0;
1036 result->line = pfile->line;
1039 c = buffer->read_ahead;
1040 if (c == EOF && buffer->cur < buffer->rlimit)
1042 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1043 buffer->read_ahead = EOF;
1049 buffer->saved_flags = BOL;
1050 if (!pfile->state.parsing_args && !pfile->state.in_directive)
1052 if (buffer->cur != buffer->line_base)
1054 /* Non-empty files should end in a newline. Don't warn
1055 for command line and _Pragma buffers. */
1056 if (!buffer->from_stage3)
1057 cpp_pedwarn (pfile, "no newline at end of file");
1058 handle_newline (pfile, '\n');
1061 /* Don't pop the last buffer. */
1064 unsigned char stop = buffer->return_at_eof;
1066 _cpp_pop_buffer (pfile);
1071 result->type = CPP_EOF;
1074 case ' ': case '\t': case '\f': case '\v': case '\0':
1075 skip_whitespace (pfile, c);
1076 result->flags |= PREV_WHITE;
1079 case '\n': case '\r':
1080 handle_newline (pfile, c);
1081 buffer->saved_flags = BOL;
1082 if (! pfile->state.in_directive)
1084 if (pfile->state.parsing_args == 2)
1085 buffer->saved_flags |= PREV_WHITE;
1086 if (!pfile->keep_tokens)
1088 pfile->cur_run = &pfile->base_run;
1089 result = pfile->base_run.base;
1090 pfile->cur_token = result + 1;
1094 result->type = CPP_EOF;
1099 /* These could start an escaped newline, or '?' a trigraph. Let
1100 skip_escaped_newlines do all the work. */
1102 unsigned int line = pfile->line;
1104 c = skip_escaped_newlines (pfile, c);
1105 if (line != pfile->line)
1106 /* We had at least one escaped newline of some sort, and the
1107 next character is in buffer->read_ahead. Update the
1108 token's line and column. */
1109 goto update_tokens_line;
1111 /* We are either the original '?' or '\\', or a trigraph. */
1112 result->type = CPP_QUERY;
1113 buffer->read_ahead = EOF;
1121 case '0': case '1': case '2': case '3': case '4':
1122 case '5': case '6': case '7': case '8': case '9':
1123 result->type = CPP_NUMBER;
1124 parse_number (pfile, &result->val.str, c, 0);
1128 if (!CPP_OPTION (pfile, dollars_in_ident))
1130 /* Fall through... */
1133 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1134 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1135 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1136 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1138 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1139 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1140 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1141 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1143 result->type = CPP_NAME;
1144 result->val.node = parse_identifier (pfile);
1146 /* 'L' may introduce wide characters or strings. */
1147 if (result->val.node == pfile->spec_nodes.n_L)
1149 c = buffer->read_ahead;
1150 if (c == EOF && buffer->cur < buffer->rlimit)
1152 if (c == '\'' || c == '"')
1155 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1159 /* Convert named operators to their proper types. */
1160 else if (result->val.node->flags & NODE_OPERATOR)
1162 result->flags |= NAMED_OP;
1163 result->type = result->val.node->value.operator;
1169 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1171 parse_string (pfile, result, c);
1175 /* A potential block or line comment. */
1176 comment_start = buffer->cur;
1177 result->type = CPP_DIV;
1178 c = get_effective_char (pfile);
1180 ACCEPT_CHAR (CPP_DIV_EQ);
1181 if (c != '/' && c != '*')
1186 if (skip_block_comment (pfile))
1187 cpp_error (pfile, "unterminated comment");
1191 if (!CPP_OPTION (pfile, cplusplus_comments)
1192 && !CPP_IN_SYSTEM_HEADER (pfile))
1195 /* Warn about comments only if pedantically GNUC89, and not
1196 in system headers. */
1197 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1198 && ! buffer->warned_cplusplus_comments)
1201 "C++ style comments are not allowed in ISO C89");
1203 "(this will be reported only once per input file)");
1204 buffer->warned_cplusplus_comments = 1;
1207 /* Skip_line_comment updates buffer->read_ahead. */
1208 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1209 cpp_warning (pfile, "multi-line comment");
1212 /* Skipping the comment has updated buffer->read_ahead. */
1213 if (!pfile->state.save_comments)
1215 result->flags |= PREV_WHITE;
1216 goto update_tokens_line;
1219 /* Save the comment as a token in its own right. */
1220 save_comment (pfile, result, comment_start);
1224 if (pfile->state.angled_headers)
1226 result->type = CPP_HEADER_NAME;
1227 c = '>'; /* terminator. */
1231 result->type = CPP_LESS;
1232 c = get_effective_char (pfile);
1234 ACCEPT_CHAR (CPP_LESS_EQ);
1237 ACCEPT_CHAR (CPP_LSHIFT);
1238 if (get_effective_char (pfile) == '=')
1239 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1241 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1243 ACCEPT_CHAR (CPP_MIN);
1244 if (get_effective_char (pfile) == '=')
1245 ACCEPT_CHAR (CPP_MIN_EQ);
1247 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1249 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1250 result->flags |= DIGRAPH;
1252 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1254 ACCEPT_CHAR (CPP_OPEN_BRACE);
1255 result->flags |= DIGRAPH;
1260 result->type = CPP_GREATER;
1261 c = get_effective_char (pfile);
1263 ACCEPT_CHAR (CPP_GREATER_EQ);
1266 ACCEPT_CHAR (CPP_RSHIFT);
1267 if (get_effective_char (pfile) == '=')
1268 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1270 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1272 ACCEPT_CHAR (CPP_MAX);
1273 if (get_effective_char (pfile) == '=')
1274 ACCEPT_CHAR (CPP_MAX_EQ);
1279 lex_percent (pfile, result);
1283 lex_dot (pfile, result);
1287 result->type = CPP_PLUS;
1288 c = get_effective_char (pfile);
1290 ACCEPT_CHAR (CPP_PLUS_EQ);
1292 ACCEPT_CHAR (CPP_PLUS_PLUS);
1296 result->type = CPP_MINUS;
1297 c = get_effective_char (pfile);
1300 ACCEPT_CHAR (CPP_DEREF);
1301 if (CPP_OPTION (pfile, cplusplus)
1302 && get_effective_char (pfile) == '*')
1303 ACCEPT_CHAR (CPP_DEREF_STAR);
1306 ACCEPT_CHAR (CPP_MINUS_EQ);
1308 ACCEPT_CHAR (CPP_MINUS_MINUS);
1312 result->type = CPP_MULT;
1313 if (get_effective_char (pfile) == '=')
1314 ACCEPT_CHAR (CPP_MULT_EQ);
1318 result->type = CPP_EQ;
1319 if (get_effective_char (pfile) == '=')
1320 ACCEPT_CHAR (CPP_EQ_EQ);
1324 result->type = CPP_NOT;
1325 if (get_effective_char (pfile) == '=')
1326 ACCEPT_CHAR (CPP_NOT_EQ);
1330 result->type = CPP_AND;
1331 c = get_effective_char (pfile);
1333 ACCEPT_CHAR (CPP_AND_EQ);
1335 ACCEPT_CHAR (CPP_AND_AND);
1339 result->type = CPP_HASH;
1340 if (get_effective_char (pfile) == '#')
1341 ACCEPT_CHAR (CPP_PASTE);
1345 result->type = CPP_OR;
1346 c = get_effective_char (pfile);
1348 ACCEPT_CHAR (CPP_OR_EQ);
1350 ACCEPT_CHAR (CPP_OR_OR);
1354 result->type = CPP_XOR;
1355 if (get_effective_char (pfile) == '=')
1356 ACCEPT_CHAR (CPP_XOR_EQ);
1360 result->type = CPP_COLON;
1361 c = get_effective_char (pfile);
1362 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1363 ACCEPT_CHAR (CPP_SCOPE);
1364 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1366 result->flags |= DIGRAPH;
1367 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1371 case '~': result->type = CPP_COMPL; break;
1372 case ',': result->type = CPP_COMMA; break;
1373 case '(': result->type = CPP_OPEN_PAREN; break;
1374 case ')': result->type = CPP_CLOSE_PAREN; break;
1375 case '[': result->type = CPP_OPEN_SQUARE; break;
1376 case ']': result->type = CPP_CLOSE_SQUARE; break;
1377 case '{': result->type = CPP_OPEN_BRACE; break;
1378 case '}': result->type = CPP_CLOSE_BRACE; break;
1379 case ';': result->type = CPP_SEMICOLON; break;
1381 /* @ is a punctuator in Objective C. */
1382 case '@': result->type = CPP_ATSIGN; break;
1386 result->type = CPP_OTHER;
1394 /* An upper bound on the number of bytes needed to spell a token,
1395 including preceding whitespace. */
1397 cpp_token_len (token)
1398 const cpp_token *token;
1402 switch (TOKEN_SPELL (token))
1404 default: len = 0; break;
1405 case SPELL_STRING: len = token->val.str.len; break;
1406 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1408 /* 1 for whitespace, 4 for comment delimeters. */
1412 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1413 already contain the enough space to hold the token's spelling.
1414 Returns a pointer to the character after the last character
1417 cpp_spell_token (pfile, token, buffer)
1418 cpp_reader *pfile; /* Would be nice to be rid of this... */
1419 const cpp_token *token;
1420 unsigned char *buffer;
1422 switch (TOKEN_SPELL (token))
1424 case SPELL_OPERATOR:
1426 const unsigned char *spelling;
1429 if (token->flags & DIGRAPH)
1431 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1432 else if (token->flags & NAMED_OP)
1435 spelling = TOKEN_NAME (token);
1437 while ((c = *spelling++) != '\0')
1444 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1445 buffer += NODE_LEN (token->val.node);
1450 int left, right, tag;
1451 switch (token->type)
1453 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1454 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1455 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1456 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1457 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1458 default: left = '\0'; right = '\0'; tag = '\0'; break;
1460 if (tag) *buffer++ = tag;
1461 if (left) *buffer++ = left;
1462 memcpy (buffer, token->val.str.text, token->val.str.len);
1463 buffer += token->val.str.len;
1464 if (right) *buffer++ = right;
1469 *buffer++ = token->val.c;
1473 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1480 /* Returns a token as a null-terminated string. The string is
1481 temporary, and automatically freed later. Useful for diagnostics. */
1483 cpp_token_as_text (pfile, token)
1485 const cpp_token *token;
1487 unsigned int len = cpp_token_len (token);
1488 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1490 end = cpp_spell_token (pfile, token, start);
1496 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1498 cpp_type2name (type)
1499 enum cpp_ttype type;
1501 return (const char *) token_spellings[type].name;
1504 /* Writes the spelling of token to FP, without any preceding space.
1505 Separated from cpp_spell_token for efficiency - to avoid stdio
1506 double-buffering. */
1508 cpp_output_token (token, fp)
1509 const cpp_token *token;
1512 switch (TOKEN_SPELL (token))
1514 case SPELL_OPERATOR:
1516 const unsigned char *spelling;
1518 if (token->flags & DIGRAPH)
1520 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1521 else if (token->flags & NAMED_OP)
1524 spelling = TOKEN_NAME (token);
1526 ufputs (spelling, fp);
1532 ufputs (NODE_NAME (token->val.node), fp);
1537 int left, right, tag;
1538 switch (token->type)
1540 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1541 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1542 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1543 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1544 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1545 default: left = '\0'; right = '\0'; tag = '\0'; break;
1547 if (tag) putc (tag, fp);
1548 if (left) putc (left, fp);
1549 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1550 if (right) putc (right, fp);
1555 putc (token->val.c, fp);
1559 /* An error, most probably. */
1564 /* Compare two tokens. */
1566 _cpp_equiv_tokens (a, b)
1567 const cpp_token *a, *b;
1569 if (a->type == b->type && a->flags == b->flags)
1570 switch (TOKEN_SPELL (a))
1572 default: /* Keep compiler happy. */
1573 case SPELL_OPERATOR:
1576 return a->val.c == b->val.c; /* Character. */
1578 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1580 return a->val.node == b->val.node;
1582 return (a->val.str.len == b->val.str.len
1583 && !memcmp (a->val.str.text, b->val.str.text,
1590 /* Returns nonzero if a space should be inserted to avoid an
1591 accidental token paste for output. For simplicity, it is
1592 conservative, and occasionally advises a space where one is not
1593 needed, e.g. "." and ".2". */
1596 cpp_avoid_paste (pfile, token1, token2)
1598 const cpp_token *token1, *token2;
1600 enum cpp_ttype a = token1->type, b = token2->type;
1603 if (token1->flags & NAMED_OP)
1605 if (token2->flags & NAMED_OP)
1609 if (token2->flags & DIGRAPH)
1610 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1611 else if (token_spellings[b].category == SPELL_OPERATOR)
1612 c = token_spellings[b].name[0];
1614 /* Quickly get everything that can paste with an '='. */
1615 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1620 case CPP_GREATER: return c == '>' || c == '?';
1621 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1622 case CPP_PLUS: return c == '+';
1623 case CPP_MINUS: return c == '-' || c == '>';
1624 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1625 case CPP_MOD: return c == ':' || c == '>';
1626 case CPP_AND: return c == '&';
1627 case CPP_OR: return c == '|';
1628 case CPP_COLON: return c == ':' || c == '>';
1629 case CPP_DEREF: return c == '*';
1630 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1631 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1632 case CPP_NAME: return ((b == CPP_NUMBER
1633 && name_p (pfile, &token2->val.str))
1635 || b == CPP_CHAR || b == CPP_STRING); /* L */
1636 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1637 || c == '.' || c == '+' || c == '-');
1638 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1639 && token1->val.c == '@'
1640 && (b == CPP_NAME || b == CPP_STRING));
1647 /* Output all the remaining tokens on the current line, and a newline
1648 character, to FP. Leading whitespace is removed. If there are
1649 macros, special token padding is not performed. */
1651 cpp_output_line (pfile, fp)
1655 const cpp_token *token;
1657 token = cpp_get_token (pfile);
1658 while (token->type != CPP_EOF)
1660 cpp_output_token (token, fp);
1661 token = cpp_get_token (pfile);
1662 if (token->flags & PREV_WHITE)
1669 /* Returns the value of a hexadecimal digit. */
1674 if (c >= 'a' && c <= 'f')
1675 return c - 'a' + 10;
1676 if (c >= 'A' && c <= 'F')
1677 return c - 'A' + 10;
1678 if (c >= '0' && c <= '9')
1683 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1684 failure if cpplib is not parsing C++ or C99. Such failure is
1685 silent, and no variables are updated. Otherwise returns 0, and
1686 warns if -Wtraditional.
1688 [lex.charset]: The character designated by the universal character
1689 name \UNNNNNNNN is that character whose character short name in
1690 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1691 universal character name \uNNNN is that character whose character
1692 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1693 for a universal character name is less than 0x20 or in the range
1694 0x7F-0x9F (inclusive), or if the universal character name
1695 designates a character in the basic source character set, then the
1696 program is ill-formed.
1698 We assume that wchar_t is Unicode, so we don't need to do any
1699 mapping. Is this ever wrong?
1701 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1702 LIMIT is the end of the string or charconst. PSTR is updated to
1703 point after the UCS on return, and the UCS is written into PC. */
1706 maybe_read_ucs (pfile, pstr, limit, pc)
1708 const unsigned char **pstr;
1709 const unsigned char *limit;
1712 const unsigned char *p = *pstr;
1713 unsigned int code = 0;
1714 unsigned int c = *pc, length;
1716 /* Only attempt to interpret a UCS for C++ and C99. */
1717 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1720 if (CPP_WTRADITIONAL (pfile))
1721 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1723 length = (c == 'u' ? 4: 8);
1725 if ((size_t) (limit - p) < length)
1727 cpp_error (pfile, "incomplete universal-character-name");
1728 /* Skip to the end to avoid more diagnostics. */
1733 for (; length; length--, p++)
1737 code = (code << 4) + hex_digit_value (c);
1741 "non-hex digit '%c' in universal-character-name", c);
1742 /* We shouldn't skip in case there are multibyte chars. */
1748 #ifdef TARGET_EBCDIC
1749 cpp_error (pfile, "universal-character-name on EBCDIC target");
1750 code = 0x3f; /* EBCDIC invalid character */
1752 /* True extended characters are OK. */
1754 && !(code & 0x80000000)
1755 && !(code >= 0xD800 && code <= 0xDFFF))
1757 /* The standard permits $, @ and ` to be specified as UCNs. We use
1758 hex escapes so that this also works with EBCDIC hosts. */
1759 else if (code == 0x24 || code == 0x40 || code == 0x60)
1761 /* Don't give another error if one occurred above. */
1762 else if (length == 0)
1763 cpp_error (pfile, "universal-character-name out of range");
1771 /* Interpret an escape sequence, and return its value. PSTR points to
1772 the input pointer, which is just after the backslash. LIMIT is how
1773 much text we have. MASK is a bitmask for the precision for the
1774 destination type (char or wchar_t). TRADITIONAL, if true, does not
1775 interpret escapes that did not exist in traditional C.
1777 Handles all relevant diagnostics. */
1780 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1782 const unsigned char **pstr;
1783 const unsigned char *limit;
1784 unsigned HOST_WIDE_INT mask;
1788 const unsigned char *str = *pstr;
1789 unsigned int c = *str++;
1793 case '\\': case '\'': case '"': case '?': break;
1794 case 'b': c = TARGET_BS; break;
1795 case 'f': c = TARGET_FF; break;
1796 case 'n': c = TARGET_NEWLINE; break;
1797 case 'r': c = TARGET_CR; break;
1798 case 't': c = TARGET_TAB; break;
1799 case 'v': c = TARGET_VT; break;
1801 case '(': case '{': case '[': case '%':
1802 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1803 '\%' is used to prevent SCCS from getting confused. */
1804 unknown = CPP_PEDANTIC (pfile);
1808 if (CPP_WTRADITIONAL (pfile))
1809 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1815 if (CPP_PEDANTIC (pfile))
1816 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1821 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1825 if (CPP_WTRADITIONAL (pfile))
1826 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1830 unsigned int i = 0, overflow = 0;
1831 int digits_found = 0;
1839 overflow |= i ^ (i << 4 >> 4);
1840 i = (i << 4) + hex_digit_value (c);
1845 cpp_error (pfile, "\\x used with no following hex digits");
1847 if (overflow | (i != (i & mask)))
1849 cpp_pedwarn (pfile, "hex escape sequence out of range");
1856 case '0': case '1': case '2': case '3':
1857 case '4': case '5': case '6': case '7':
1859 unsigned int i = c - '0';
1862 while (str < limit && ++count < 3)
1865 if (c < '0' || c > '7')
1868 i = (i << 3) + c - '0';
1871 if (i != (i & mask))
1873 cpp_pedwarn (pfile, "octal escape sequence out of range");
1888 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1890 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1894 cpp_pedwarn (pfile, "escape sequence out of range for character");
1900 #ifndef MAX_CHAR_TYPE_SIZE
1901 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1904 #ifndef MAX_WCHAR_TYPE_SIZE
1905 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1908 /* Interpret a (possibly wide) character constant in TOKEN.
1909 WARN_MULTI warns about multi-character charconsts, if not
1910 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1911 that did not exist in traditional C. PCHARS_SEEN points to a
1912 variable that is filled in with the number of characters seen. */
1914 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1916 const cpp_token *token;
1919 unsigned int *pchars_seen;
1921 const unsigned char *str = token->val.str.text;
1922 const unsigned char *limit = str + token->val.str.len;
1923 unsigned int chars_seen = 0;
1924 unsigned int width, max_chars, c;
1925 unsigned HOST_WIDE_INT mask;
1926 HOST_WIDE_INT result = 0;
1928 #ifdef MULTIBYTE_CHARS
1929 (void) local_mbtowc (NULL, NULL, 0);
1932 /* Width in bits. */
1933 if (token->type == CPP_CHAR)
1934 width = MAX_CHAR_TYPE_SIZE;
1936 width = MAX_WCHAR_TYPE_SIZE;
1938 if (width < HOST_BITS_PER_WIDE_INT)
1939 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1942 max_chars = HOST_BITS_PER_WIDE_INT / width;
1946 #ifdef MULTIBYTE_CHARS
1950 char_len = local_mbtowc (&wc, str, limit - str);
1953 cpp_warning (pfile, "ignoring invalid multibyte character");
1966 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1968 #ifdef MAP_CHARACTER
1970 c = MAP_CHARACTER (c);
1973 /* Merge character into result; ignore excess chars. */
1974 if (++chars_seen <= max_chars)
1976 if (width < HOST_BITS_PER_WIDE_INT)
1977 result = (result << width) | (c & mask);
1983 if (chars_seen == 0)
1984 cpp_error (pfile, "empty character constant");
1985 else if (chars_seen > max_chars)
1987 chars_seen = max_chars;
1988 cpp_warning (pfile, "character constant too long");
1990 else if (chars_seen > 1 && !traditional && warn_multi)
1991 cpp_warning (pfile, "multi-character character constant");
1993 /* If char type is signed, sign-extend the constant. The
1994 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1995 if (token->type == CPP_CHAR && chars_seen)
1997 unsigned int nbits = chars_seen * width;
1998 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2000 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2001 || ((result >> (nbits - 1)) & 1) == 0)
2007 *pchars_seen = chars_seen;
2011 /* Memory buffers. Changing these three constants can have a dramatic
2012 effect on performance. The values here are reasonable defaults,
2013 but might be tuned. If you adjust them, be sure to test across a
2014 range of uses of cpplib, including heavy nested function-like macro
2015 expansion. Also check the change in peak memory usage (NJAMD is a
2016 good tool for this). */
2017 #define MIN_BUFF_SIZE 8000
2018 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2019 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2020 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2032 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2033 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2035 /* Create a new allocation buffer. Place the control block at the end
2036 of the buffer, so that buffer overflows will cause immediate chaos. */
2044 if (len < MIN_BUFF_SIZE)
2045 len = MIN_BUFF_SIZE;
2046 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2048 base = xmalloc (len + sizeof (_cpp_buff));
2049 result = (_cpp_buff *) (base + len);
2050 result->base = base;
2052 result->limit = base + len;
2053 result->next = NULL;
2057 /* Place a chain of unwanted allocation buffers on the free list. */
2059 _cpp_release_buff (pfile, buff)
2063 _cpp_buff *end = buff;
2067 end->next = pfile->free_buffs;
2068 pfile->free_buffs = buff;
2071 /* Return a free buffer of size at least MIN_SIZE. */
2073 _cpp_get_buff (pfile, min_size)
2075 unsigned int min_size;
2077 _cpp_buff *result, **p;
2079 for (p = &pfile->free_buffs;; p = &(*p)->next)
2084 return new_buff (min_size);
2086 size = result->limit - result->base;
2087 /* Return a buffer that's big enough, but don't waste one that's
2089 if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size))
2094 result->next = NULL;
2095 result->cur = result->base;
2099 /* Return a buffer chained on the end of BUFF. Copy to it the
2100 uncommitted remaining bytes of BUFF, with at least MIN_EXTRA more
2103 _cpp_extend_buff (pfile, buff, min_extra)
2106 unsigned int min_extra;
2108 unsigned int size = EXTENDED_BUFF_SIZE (buff, min_extra);
2110 buff->next = _cpp_get_buff (pfile, size);
2111 memcpy (buff->next->base, buff->cur, buff->limit - buff->cur);
2115 /* Free a chain of buffers starting at BUFF. */
2117 _cpp_free_buff (buff)
2122 for (; buff; buff = next)
2130 chunk_suitable (chunk, size)
2134 /* Being at least twice SIZE means we can use memcpy in
2135 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2137 return (chunk && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2140 /* Returns the end of the new pool. PTR points to a char in the old
2141 pool, and is updated to point to the same char in the new pool. */
2143 _cpp_next_chunk (pool, len, ptr)
2146 unsigned char **ptr;
2148 cpp_chunk *chunk = pool->cur->next;
2150 /* LEN is the minimum size we want in the new pool. */
2151 len += POOL_ROOM (pool);
2152 if (! chunk_suitable (chunk, len))
2154 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2156 chunk->next = pool->cur->next;
2157 pool->cur->next = chunk;
2160 /* Update the pointer before changing chunk's front. */
2162 *ptr += chunk->base - POOL_FRONT (pool);
2164 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2165 chunk->front = chunk->base;
2168 return POOL_LIMIT (pool);
2175 unsigned char *base;
2178 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2179 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2180 /* Put the chunk descriptor at the end. Then chunk overruns will
2181 cause obvious chaos. */
2182 result = (cpp_chunk *) (base + size);
2183 result->base = base;
2184 result->front = base;
2185 result->limit = base + size;
2192 _cpp_init_pool (pool, size, align, temp)
2194 unsigned int size, align, temp;
2197 align = DEFAULT_ALIGNMENT;
2198 if (align & (align - 1))
2200 pool->align = align;
2201 pool->first = new_chunk (size);
2202 pool->cur = pool->first;
2204 pool->cur->next = pool->cur;
2208 _cpp_free_pool (pool)
2211 cpp_chunk *chunk = pool->first, *next;
2219 while (chunk && chunk != pool->first);
2222 /* Reserve LEN bytes from a memory pool. */
2224 _cpp_pool_reserve (pool, len)
2228 len = POOL_ALIGN (len, pool->align);
2229 if (len > (unsigned int) POOL_ROOM (pool))
2230 _cpp_next_chunk (pool, len, 0);
2232 return POOL_FRONT (pool);
2235 /* Allocate LEN bytes from a memory pool. */
2237 _cpp_pool_alloc (pool, len)
2241 unsigned char *result = _cpp_pool_reserve (pool, len);
2243 POOL_COMMIT (pool, len);