1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
97 static void unterminated PARAMS ((cpp_reader *, int));
98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
105 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
107 static cpp_chunk *new_chunk PARAMS ((unsigned int));
108 static int chunk_suitable PARAMS ((cpp_chunk *, unsigned int));
109 static unsigned int hex_digit_value PARAMS ((unsigned int));
110 static _cpp_buff *new_buff PARAMS ((unsigned int));
114 Compares, the token TOKEN to the NUL-terminated string STRING.
115 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
118 cpp_ideq (token, string)
119 const cpp_token *token;
122 if (token->type != CPP_NAME)
125 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
128 /* Call when meeting a newline. Returns the character after the newline
129 (or carriage-return newline combination), or EOF. */
131 handle_newline (pfile, newline_char)
133 cppchar_t newline_char;
136 cppchar_t next = EOF;
139 buffer = pfile->buffer;
140 buffer->col_adjust = 0;
141 buffer->line_base = buffer->cur;
143 /* Handle CR-LF and LF-CR combinations, get the next character. */
144 if (buffer->cur < buffer->rlimit)
146 next = *buffer->cur++;
147 if (next + newline_char == '\r' + '\n')
149 buffer->line_base = buffer->cur;
150 if (buffer->cur < buffer->rlimit)
151 next = *buffer->cur++;
157 buffer->read_ahead = next;
161 /* Subroutine of skip_escaped_newlines; called when a trigraph is
162 encountered. It warns if necessary, and returns true if the
163 trigraph should be honoured. FROM_CHAR is the third character of a
164 trigraph, and presumed to be the previous character for position
167 trigraph_ok (pfile, from_char)
171 int accept = CPP_OPTION (pfile, trigraphs);
173 /* Don't warn about trigraphs in comments. */
174 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
176 cpp_buffer *buffer = pfile->buffer;
179 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
180 "trigraph ??%c converted to %c",
182 (int) _cpp_trigraph_map[from_char]);
183 else if (buffer->cur != buffer->last_Wtrigraphs)
185 buffer->last_Wtrigraphs = buffer->cur;
186 cpp_warning_with_line (pfile, pfile->line,
187 CPP_BUF_COL (buffer) - 2,
188 "trigraph ??%c ignored", (int) from_char);
195 /* Assumes local variables buffer and result. */
196 #define ACCEPT_CHAR(t) \
197 do { result->type = t; buffer->read_ahead = EOF; } while (0)
199 /* When we move to multibyte character sets, add to these something
200 that saves and restores the state of the multibyte conversion
201 library. This probably involves saving and restoring a "cookie".
202 In the case of glibc it is an 8-byte structure, so is not a high
203 overhead operation. In any case, it's out of the fast path. */
204 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
205 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
207 /* Skips any escaped newlines introduced by NEXT, which is either a
208 '?' or a '\\'. Returns the next character, which will also have
209 been placed in buffer->read_ahead. This routine performs
210 preprocessing stages 1 and 2 of the ISO C standard. */
212 skip_escaped_newlines (pfile, next)
216 cpp_buffer *buffer = pfile->buffer;
218 /* Only do this if we apply stages 1 and 2. */
219 if (!buffer->from_stage3)
222 const unsigned char *saved_cur;
227 if (buffer->cur == buffer->rlimit)
233 next1 = *buffer->cur++;
234 if (next1 != '?' || buffer->cur == buffer->rlimit)
240 next1 = *buffer->cur++;
241 if (!_cpp_trigraph_map[next1]
242 || !trigraph_ok (pfile, next1))
248 /* We have a full trigraph here. */
249 next = _cpp_trigraph_map[next1];
250 if (next != '\\' || buffer->cur == buffer->rlimit)
255 /* We have a backslash, and room for at least one more character. */
259 next1 = *buffer->cur++;
260 if (!is_nvspace (next1))
264 while (buffer->cur < buffer->rlimit);
266 if (!is_vspace (next1))
272 if (space && !pfile->state.lexing_comment)
273 cpp_warning (pfile, "backslash and newline separated by space");
275 next = handle_newline (pfile, next1);
277 cpp_pedwarn (pfile, "backslash-newline at end of file");
279 while (next == '\\' || next == '?');
282 buffer->read_ahead = next;
286 /* Obtain the next character, after trigraph conversion and skipping
287 an arbitrary string of escaped newlines. The common case of no
288 trigraphs or escaped newlines falls through quickly. */
290 get_effective_char (pfile)
293 cpp_buffer *buffer = pfile->buffer;
294 cppchar_t next = EOF;
296 if (buffer->cur < buffer->rlimit)
298 next = *buffer->cur++;
300 /* '?' can introduce trigraphs (and therefore backslash); '\\'
301 can introduce escaped newlines, which we want to skip, or
302 UCNs, which, depending upon lexer state, we will handle in
304 if (next == '?' || next == '\\')
305 next = skip_escaped_newlines (pfile, next);
308 buffer->read_ahead = next;
312 /* Skip a C-style block comment. We find the end of the comment by
313 seeing if an asterisk is before every '/' we encounter. Returns
314 non-zero if comment terminated by EOF, zero otherwise. */
316 skip_block_comment (pfile)
319 cpp_buffer *buffer = pfile->buffer;
320 cppchar_t c = EOF, prevc = EOF;
322 pfile->state.lexing_comment = 1;
323 while (buffer->cur != buffer->rlimit)
325 prevc = c, c = *buffer->cur++;
328 /* FIXME: For speed, create a new character class of characters
329 of interest inside block comments. */
330 if (c == '?' || c == '\\')
331 c = skip_escaped_newlines (pfile, c);
333 /* People like decorating comments with '*', so check for '/'
334 instead for efficiency. */
340 /* Warn about potential nested comments, but not if the '/'
341 comes immediately before the true comment delimeter.
342 Don't bother to get it right across escaped newlines. */
343 if (CPP_OPTION (pfile, warn_comments)
344 && buffer->cur != buffer->rlimit)
346 prevc = c, c = *buffer->cur++;
347 if (c == '*' && buffer->cur != buffer->rlimit)
349 prevc = c, c = *buffer->cur++;
351 cpp_warning_with_line (pfile, pfile->line,
352 CPP_BUF_COL (buffer) - 2,
353 "\"/*\" within comment");
358 else if (is_vspace (c))
360 prevc = c, c = handle_newline (pfile, c);
364 adjust_column (pfile);
367 pfile->state.lexing_comment = 0;
368 buffer->read_ahead = EOF;
369 return c != '/' || prevc != '*';
372 /* Skip a C++ line comment. Handles escaped newlines. Returns
373 non-zero if a multiline comment. The following new line, if any,
374 is left in buffer->read_ahead. */
376 skip_line_comment (pfile)
379 cpp_buffer *buffer = pfile->buffer;
380 unsigned int orig_line = pfile->line;
383 pfile->state.lexing_comment = 1;
387 if (buffer->cur == buffer->rlimit)
391 if (c == '?' || c == '\\')
392 c = skip_escaped_newlines (pfile, c);
394 while (!is_vspace (c));
396 pfile->state.lexing_comment = 0;
397 buffer->read_ahead = c; /* Leave any newline for caller. */
398 return orig_line != pfile->line;
401 /* pfile->buffer->cur is one beyond the \t character. Update
402 col_adjust so we track the column correctly. */
404 adjust_column (pfile)
407 cpp_buffer *buffer = pfile->buffer;
408 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
410 /* Round it up to multiple of the tabstop, but subtract 1 since the
411 tab itself occupies a character position. */
412 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
413 - col % CPP_OPTION (pfile, tabstop)) - 1;
416 /* Skips whitespace, saving the next non-whitespace character.
417 Adjusts pfile->col_adjust to account for tabs. Without this,
418 tokens might be assigned an incorrect column. */
420 skip_whitespace (pfile, c)
424 cpp_buffer *buffer = pfile->buffer;
425 unsigned int warned = 0;
429 /* Horizontal space always OK. */
433 adjust_column (pfile);
434 /* Just \f \v or \0 left. */
439 cpp_warning (pfile, "null character(s) ignored");
443 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
444 cpp_pedwarn_with_line (pfile, pfile->line,
445 CPP_BUF_COL (buffer),
446 "%s in preprocessing directive",
447 c == '\f' ? "form feed" : "vertical tab");
450 if (buffer->cur == buffer->rlimit)
454 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
455 while (is_nvspace (c));
457 /* Remember the next character. */
458 buffer->read_ahead = c;
461 /* See if the characters of a number token are valid in a name (no
464 name_p (pfile, string)
466 const cpp_string *string;
470 for (i = 0; i < string->len; i++)
471 if (!is_idchar (string->text[i]))
477 /* Parse an identifier, skipping embedded backslash-newlines. This is
478 a critical inner loop. The common case is an identifier which has
479 not been split by backslash-newline, does not contain a dollar
480 sign, and has already been scanned (roughly 10:1 ratio of
481 seen:unseen identifiers in normal code; the distribution is
482 Poisson-like). Second most common case is a new identifier, not
483 split and no dollar sign. The other possibilities are rare and
484 have been relegated to parse_identifier_slow. */
486 static cpp_hashnode *
487 parse_identifier (pfile)
490 cpp_hashnode *result;
491 const U_CHAR *cur, *rlimit;
493 /* Fast-path loop. Skim over a normal identifier.
494 N.B. ISIDNUM does not include $. */
495 cur = pfile->buffer->cur - 1;
496 rlimit = pfile->buffer->rlimit;
499 while (cur < rlimit && ISIDNUM (*cur));
501 /* Check for slow-path cases. */
502 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
503 result = parse_identifier_slow (pfile, cur);
506 const U_CHAR *base = pfile->buffer->cur - 1;
507 result = (cpp_hashnode *)
508 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
509 pfile->buffer->cur = cur;
512 /* Rarely, identifiers require diagnostics when lexed.
513 XXX Has to be forced out of the fast path. */
514 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
515 && !pfile->state.skipping, 0))
517 /* It is allowed to poison the same identifier twice. */
518 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
519 cpp_error (pfile, "attempt to use poisoned \"%s\"",
522 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
523 replacement list of a variadic macro. */
524 if (result == pfile->spec_nodes.n__VA_ARGS__
525 && !pfile->state.va_args_ok)
527 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
533 /* Slow path. This handles identifiers which have been split, and
534 identifiers which contain dollar signs. The part of the identifier
535 from PFILE->buffer->cur-1 to CUR has already been scanned. */
536 static cpp_hashnode *
537 parse_identifier_slow (pfile, cur)
541 cpp_buffer *buffer = pfile->buffer;
542 const U_CHAR *base = buffer->cur - 1;
543 struct obstack *stack = &pfile->hash_table->stack;
544 unsigned int c, saw_dollar = 0, len;
546 /* Copy the part of the token which is known to be okay. */
547 obstack_grow (stack, base, cur - base);
549 /* Now process the part which isn't. We are looking at one of
550 '$', '\\', or '?' on entry to this loop. */
555 while (is_idchar (c))
557 obstack_1grow (stack, c);
563 if (buffer->cur == buffer->rlimit)
569 /* Potential escaped newline? */
570 if (c != '?' && c != '\\')
572 c = skip_escaped_newlines (pfile, c);
574 while (is_idchar (c));
576 /* Remember the next character. */
577 buffer->read_ahead = c;
579 /* $ is not a identifier character in the standard, but is commonly
580 accepted as an extension. Don't warn about it in skipped
581 conditional blocks. */
582 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
583 cpp_pedwarn (pfile, "'$' character(s) in identifier");
585 /* Identifiers are null-terminated. */
586 len = obstack_object_size (stack);
587 obstack_1grow (stack, '\0');
589 return (cpp_hashnode *)
590 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
593 /* Parse a number, skipping embedded backslash-newlines. */
595 parse_number (pfile, number, c, leading_period)
601 cpp_buffer *buffer = pfile->buffer;
602 unsigned char *dest, *limit;
604 dest = BUFF_FRONT (pfile->u_buff);
605 limit = BUFF_LIMIT (pfile->u_buff);
607 /* Place a leading period. */
612 pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 1);
613 dest = BUFF_FRONT (pfile->u_buff);
614 limit = BUFF_LIMIT (pfile->u_buff);
623 /* Need room for terminating null. */
624 if ((size_t) (limit - dest) < 2)
626 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
627 pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 2);
628 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
629 limit = BUFF_LIMIT (pfile->u_buff);
634 if (buffer->cur == buffer->rlimit)
639 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
641 /* Potential escaped newline? */
642 if (c != '?' && c != '\\')
644 c = skip_escaped_newlines (pfile, c);
646 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
648 /* Remember the next character. */
649 buffer->read_ahead = c;
651 /* Null-terminate the number. */
654 number->text = BUFF_FRONT (pfile->u_buff);
655 number->len = dest - number->text;
656 BUFF_FRONT (pfile->u_buff) = dest + 1;
659 /* Subroutine of parse_string. Emits error for unterminated strings. */
661 unterminated (pfile, term)
665 cpp_error (pfile, "missing terminating %c character", term);
667 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
669 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
670 "possible start of unterminated string literal");
675 /* Subroutine of parse_string. */
677 unescaped_terminator_p (pfile, dest)
679 const unsigned char *dest;
681 const unsigned char *start, *temp;
683 /* In #include-style directives, terminators are not escapeable. */
684 if (pfile->state.angled_headers)
687 start = BUFF_FRONT (pfile->u_buff);
689 /* An odd number of consecutive backslashes represents an escaped
691 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
694 return ((dest - temp) & 1) == 0;
697 /* Parses a string, character constant, or angle-bracketed header file
698 name. Handles embedded trigraphs and escaped newlines. The stored
699 string is guaranteed NUL-terminated, but it is not guaranteed that
700 this is the first NUL since embedded NULs are preserved.
702 Multi-line strings are allowed, but they are deprecated. */
704 parse_string (pfile, token, terminator)
707 cppchar_t terminator;
709 cpp_buffer *buffer = pfile->buffer;
710 unsigned char *dest, *limit;
712 bool warned_nulls = false, warned_multi = false;
714 dest = BUFF_FRONT (pfile->u_buff);
715 limit = BUFF_LIMIT (pfile->u_buff);
719 if (buffer->cur == buffer->rlimit)
725 /* We need space for the terminating NUL. */
726 if ((size_t) (limit - dest) < 1)
728 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
729 pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 2);
730 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
731 limit = BUFF_LIMIT (pfile->u_buff);
736 unterminated (pfile, terminator);
740 /* Handle trigraphs, escaped newlines etc. */
741 if (c == '?' || c == '\\')
742 c = skip_escaped_newlines (pfile, c);
744 if (c == terminator && unescaped_terminator_p (pfile, dest))
749 else if (is_vspace (c))
751 /* In assembly language, silently terminate string and
752 character literals at end of line. This is a kludge
753 around not knowing where comments are. */
754 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
757 /* Character constants and header names may not extend over
758 multiple lines. In Standard C, neither may strings.
759 Unfortunately, we accept multiline strings as an
760 extension, except in #include family directives. */
761 if (terminator != '"' || pfile->state.angled_headers)
763 unterminated (pfile, terminator);
770 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
773 if (pfile->mls_line == 0)
775 pfile->mls_line = token->line;
776 pfile->mls_col = token->col;
779 c = handle_newline (pfile, c);
783 else if (c == '\0' && !warned_nulls)
786 cpp_warning (pfile, "null character(s) preserved in literal");
792 /* Remember the next character. */
793 buffer->read_ahead = c;
796 token->val.str.text = BUFF_FRONT (pfile->u_buff);
797 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
798 BUFF_FRONT (pfile->u_buff) = dest + 1;
801 /* The stored comment includes the comment start and any terminator. */
803 save_comment (pfile, token, from)
806 const unsigned char *from;
808 unsigned char *buffer;
811 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
812 /* C++ comments probably (not definitely) have moved past a new
813 line, which we don't want to save in the comment. */
814 if (pfile->buffer->read_ahead != EOF)
816 buffer = _cpp_unaligned_alloc (pfile, len);
818 token->type = CPP_COMMENT;
819 token->val.str.len = len;
820 token->val.str.text = buffer;
823 memcpy (buffer + 1, from, len - 1);
826 /* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
827 want to avoid stepping back when lexing %:%X. */
829 lex_percent (pfile, result)
833 cpp_buffer *buffer= pfile->buffer;
836 result->type = CPP_MOD;
837 /* Parsing %:%X could leave an extra character. */
838 if (buffer->extra_char == EOF)
839 c = get_effective_char (pfile);
842 c = buffer->read_ahead = buffer->extra_char;
843 buffer->extra_char = EOF;
847 ACCEPT_CHAR (CPP_MOD_EQ);
848 else if (CPP_OPTION (pfile, digraphs))
852 result->flags |= DIGRAPH;
853 ACCEPT_CHAR (CPP_HASH);
854 if (get_effective_char (pfile) == '%')
856 buffer->extra_char = get_effective_char (pfile);
857 if (buffer->extra_char == ':')
859 buffer->extra_char = EOF;
860 ACCEPT_CHAR (CPP_PASTE);
863 /* We'll catch the extra_char when we're called back. */
864 buffer->read_ahead = '%';
869 result->flags |= DIGRAPH;
870 ACCEPT_CHAR (CPP_CLOSE_BRACE);
875 /* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
876 want to avoid stepping back when lexing '...' or '.123'. In the
877 latter case we should also set a flag for parse_number. */
879 lex_dot (pfile, result)
883 cpp_buffer *buffer = pfile->buffer;
886 /* Parsing ..X could leave an extra character. */
887 if (buffer->extra_char == EOF)
888 c = get_effective_char (pfile);
891 c = buffer->read_ahead = buffer->extra_char;
892 buffer->extra_char = EOF;
895 /* All known character sets have 0...9 contiguous. */
896 if (c >= '0' && c <= '9')
898 result->type = CPP_NUMBER;
899 parse_number (pfile, &result->val.str, c, 1);
903 result->type = CPP_DOT;
906 buffer->extra_char = get_effective_char (pfile);
907 if (buffer->extra_char == '.')
909 buffer->extra_char = EOF;
910 ACCEPT_CHAR (CPP_ELLIPSIS);
913 /* We'll catch the extra_char when we're called back. */
914 buffer->read_ahead = '.';
916 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
917 ACCEPT_CHAR (CPP_DOT_STAR);
921 /* Allocate COUNT tokens for RUN. */
923 _cpp_init_tokenrun (run, count)
927 run->base = xnewvec (cpp_token, count);
928 run->limit = run->base + count;
932 /* Returns the next tokenrun, or creates one if there is none. */
937 if (run->next == NULL)
939 run->next = xnew (tokenrun);
940 run->next->prev = run;
941 _cpp_init_tokenrun (run->next, 250);
947 /* Allocate a single token that is invalidated at the same time as the
948 rest of the tokens on the line. Has its line and col set to the
949 same as the last lexed token, so that diagnostics appear in the
952 _cpp_temp_token (pfile)
955 cpp_token *old, *result;
957 old = pfile->cur_token - 1;
958 if (pfile->cur_token == pfile->cur_run->limit)
960 pfile->cur_run = next_tokenrun (pfile->cur_run);
961 pfile->cur_token = pfile->cur_run->base;
964 result = pfile->cur_token++;
965 result->line = old->line;
966 result->col = old->col;
970 /* Lex a token into RESULT (external interface). Takes care of issues
971 like directive handling, token lookahead, multiple include
972 opimisation and skipping. */
974 _cpp_lex_token (pfile)
981 if (pfile->cur_token == pfile->cur_run->limit)
983 pfile->cur_run = next_tokenrun (pfile->cur_run);
984 pfile->cur_token = pfile->cur_run->base;
987 if (pfile->lookaheads)
990 result = pfile->cur_token++;
993 result = _cpp_lex_direct (pfile);
995 if (result->flags & BOL)
997 /* Is this a directive. If _cpp_handle_directive returns
998 false, it is an assembler #. */
999 if (result->type == CPP_HASH
1000 && !pfile->state.parsing_args
1001 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1003 if (pfile->cb.line_change && !pfile->state.skipping)
1004 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
1007 /* We don't skip tokens in directives. */
1008 if (pfile->state.in_directive)
1011 /* Outside a directive, invalidate controlling macros. At file
1012 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1013 get here and MI optimisation works. */
1014 pfile->mi_valid = false;
1016 if (!pfile->state.skipping || result->type == CPP_EOF)
1023 /* Lex a token into pfile->cur_token, which is also incremented, to
1024 get diagnostics pointing to the correct location.
1026 Does not handle issues such as token lookahead, multiple-include
1027 optimisation, directives, skipping etc. This function is only
1028 suitable for use by _cpp_lex_token, and in special cases like
1029 lex_expansion_token which doesn't care for any of these issues.
1031 When meeting a newline, returns CPP_EOF if parsing a directive,
1032 otherwise returns to the start of the token buffer if permissible.
1033 Returns the location of the lexed token. */
1035 _cpp_lex_direct (pfile)
1040 const unsigned char *comment_start;
1041 cpp_token *result = pfile->cur_token++;
1044 buffer = pfile->buffer;
1045 result->flags = buffer->saved_flags;
1046 buffer->saved_flags = 0;
1048 result->line = pfile->line;
1051 c = buffer->read_ahead;
1052 if (c == EOF && buffer->cur < buffer->rlimit)
1054 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1055 buffer->read_ahead = EOF;
1061 buffer->saved_flags = BOL;
1062 if (!pfile->state.parsing_args && !pfile->state.in_directive)
1064 if (buffer->cur != buffer->line_base)
1066 /* Non-empty files should end in a newline. Don't warn
1067 for command line and _Pragma buffers. */
1068 if (!buffer->from_stage3)
1069 cpp_pedwarn (pfile, "no newline at end of file");
1070 handle_newline (pfile, '\n');
1073 /* Don't pop the last buffer. */
1076 unsigned char stop = buffer->return_at_eof;
1078 _cpp_pop_buffer (pfile);
1083 result->type = CPP_EOF;
1086 case ' ': case '\t': case '\f': case '\v': case '\0':
1087 skip_whitespace (pfile, c);
1088 result->flags |= PREV_WHITE;
1091 case '\n': case '\r':
1092 handle_newline (pfile, c);
1093 buffer->saved_flags = BOL;
1094 if (! pfile->state.in_directive)
1096 if (pfile->state.parsing_args == 2)
1097 buffer->saved_flags |= PREV_WHITE;
1098 if (!pfile->keep_tokens)
1100 pfile->cur_run = &pfile->base_run;
1101 result = pfile->base_run.base;
1102 pfile->cur_token = result + 1;
1106 result->type = CPP_EOF;
1111 /* These could start an escaped newline, or '?' a trigraph. Let
1112 skip_escaped_newlines do all the work. */
1114 unsigned int line = pfile->line;
1116 c = skip_escaped_newlines (pfile, c);
1117 if (line != pfile->line)
1118 /* We had at least one escaped newline of some sort, and the
1119 next character is in buffer->read_ahead. Update the
1120 token's line and column. */
1121 goto update_tokens_line;
1123 /* We are either the original '?' or '\\', or a trigraph. */
1124 result->type = CPP_QUERY;
1125 buffer->read_ahead = EOF;
1133 case '0': case '1': case '2': case '3': case '4':
1134 case '5': case '6': case '7': case '8': case '9':
1135 result->type = CPP_NUMBER;
1136 parse_number (pfile, &result->val.str, c, 0);
1140 if (!CPP_OPTION (pfile, dollars_in_ident))
1142 /* Fall through... */
1145 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1146 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1147 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1148 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1150 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1151 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1152 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1153 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1155 result->type = CPP_NAME;
1156 result->val.node = parse_identifier (pfile);
1158 /* 'L' may introduce wide characters or strings. */
1159 if (result->val.node == pfile->spec_nodes.n_L)
1161 c = buffer->read_ahead;
1162 if (c == EOF && buffer->cur < buffer->rlimit)
1164 if (c == '\'' || c == '"')
1167 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1171 /* Convert named operators to their proper types. */
1172 else if (result->val.node->flags & NODE_OPERATOR)
1174 result->flags |= NAMED_OP;
1175 result->type = result->val.node->value.operator;
1181 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1183 parse_string (pfile, result, c);
1187 /* A potential block or line comment. */
1188 comment_start = buffer->cur;
1189 result->type = CPP_DIV;
1190 c = get_effective_char (pfile);
1192 ACCEPT_CHAR (CPP_DIV_EQ);
1193 if (c != '/' && c != '*')
1198 if (skip_block_comment (pfile))
1199 cpp_error (pfile, "unterminated comment");
1203 if (!CPP_OPTION (pfile, cplusplus_comments)
1204 && !CPP_IN_SYSTEM_HEADER (pfile))
1207 /* Warn about comments only if pedantically GNUC89, and not
1208 in system headers. */
1209 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1210 && ! buffer->warned_cplusplus_comments)
1213 "C++ style comments are not allowed in ISO C89");
1215 "(this will be reported only once per input file)");
1216 buffer->warned_cplusplus_comments = 1;
1219 /* Skip_line_comment updates buffer->read_ahead. */
1220 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1221 cpp_warning (pfile, "multi-line comment");
1224 /* Skipping the comment has updated buffer->read_ahead. */
1225 if (!pfile->state.save_comments)
1227 result->flags |= PREV_WHITE;
1228 goto update_tokens_line;
1231 /* Save the comment as a token in its own right. */
1232 save_comment (pfile, result, comment_start);
1236 if (pfile->state.angled_headers)
1238 result->type = CPP_HEADER_NAME;
1239 c = '>'; /* terminator. */
1243 result->type = CPP_LESS;
1244 c = get_effective_char (pfile);
1246 ACCEPT_CHAR (CPP_LESS_EQ);
1249 ACCEPT_CHAR (CPP_LSHIFT);
1250 if (get_effective_char (pfile) == '=')
1251 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1253 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1255 ACCEPT_CHAR (CPP_MIN);
1256 if (get_effective_char (pfile) == '=')
1257 ACCEPT_CHAR (CPP_MIN_EQ);
1259 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1261 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1262 result->flags |= DIGRAPH;
1264 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1266 ACCEPT_CHAR (CPP_OPEN_BRACE);
1267 result->flags |= DIGRAPH;
1272 result->type = CPP_GREATER;
1273 c = get_effective_char (pfile);
1275 ACCEPT_CHAR (CPP_GREATER_EQ);
1278 ACCEPT_CHAR (CPP_RSHIFT);
1279 if (get_effective_char (pfile) == '=')
1280 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1282 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1284 ACCEPT_CHAR (CPP_MAX);
1285 if (get_effective_char (pfile) == '=')
1286 ACCEPT_CHAR (CPP_MAX_EQ);
1291 lex_percent (pfile, result);
1295 lex_dot (pfile, result);
1299 result->type = CPP_PLUS;
1300 c = get_effective_char (pfile);
1302 ACCEPT_CHAR (CPP_PLUS_EQ);
1304 ACCEPT_CHAR (CPP_PLUS_PLUS);
1308 result->type = CPP_MINUS;
1309 c = get_effective_char (pfile);
1312 ACCEPT_CHAR (CPP_DEREF);
1313 if (CPP_OPTION (pfile, cplusplus)
1314 && get_effective_char (pfile) == '*')
1315 ACCEPT_CHAR (CPP_DEREF_STAR);
1318 ACCEPT_CHAR (CPP_MINUS_EQ);
1320 ACCEPT_CHAR (CPP_MINUS_MINUS);
1324 result->type = CPP_MULT;
1325 if (get_effective_char (pfile) == '=')
1326 ACCEPT_CHAR (CPP_MULT_EQ);
1330 result->type = CPP_EQ;
1331 if (get_effective_char (pfile) == '=')
1332 ACCEPT_CHAR (CPP_EQ_EQ);
1336 result->type = CPP_NOT;
1337 if (get_effective_char (pfile) == '=')
1338 ACCEPT_CHAR (CPP_NOT_EQ);
1342 result->type = CPP_AND;
1343 c = get_effective_char (pfile);
1345 ACCEPT_CHAR (CPP_AND_EQ);
1347 ACCEPT_CHAR (CPP_AND_AND);
1351 result->type = CPP_HASH;
1352 if (get_effective_char (pfile) == '#')
1353 ACCEPT_CHAR (CPP_PASTE);
1357 result->type = CPP_OR;
1358 c = get_effective_char (pfile);
1360 ACCEPT_CHAR (CPP_OR_EQ);
1362 ACCEPT_CHAR (CPP_OR_OR);
1366 result->type = CPP_XOR;
1367 if (get_effective_char (pfile) == '=')
1368 ACCEPT_CHAR (CPP_XOR_EQ);
1372 result->type = CPP_COLON;
1373 c = get_effective_char (pfile);
1374 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1375 ACCEPT_CHAR (CPP_SCOPE);
1376 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1378 result->flags |= DIGRAPH;
1379 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1383 case '~': result->type = CPP_COMPL; break;
1384 case ',': result->type = CPP_COMMA; break;
1385 case '(': result->type = CPP_OPEN_PAREN; break;
1386 case ')': result->type = CPP_CLOSE_PAREN; break;
1387 case '[': result->type = CPP_OPEN_SQUARE; break;
1388 case ']': result->type = CPP_CLOSE_SQUARE; break;
1389 case '{': result->type = CPP_OPEN_BRACE; break;
1390 case '}': result->type = CPP_CLOSE_BRACE; break;
1391 case ';': result->type = CPP_SEMICOLON; break;
1393 /* @ is a punctuator in Objective C. */
1394 case '@': result->type = CPP_ATSIGN; break;
1398 result->type = CPP_OTHER;
1406 /* An upper bound on the number of bytes needed to spell a token,
1407 including preceding whitespace. */
1409 cpp_token_len (token)
1410 const cpp_token *token;
1414 switch (TOKEN_SPELL (token))
1416 default: len = 0; break;
1417 case SPELL_STRING: len = token->val.str.len; break;
1418 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1420 /* 1 for whitespace, 4 for comment delimeters. */
1424 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1425 already contain the enough space to hold the token's spelling.
1426 Returns a pointer to the character after the last character
1429 cpp_spell_token (pfile, token, buffer)
1430 cpp_reader *pfile; /* Would be nice to be rid of this... */
1431 const cpp_token *token;
1432 unsigned char *buffer;
1434 switch (TOKEN_SPELL (token))
1436 case SPELL_OPERATOR:
1438 const unsigned char *spelling;
1441 if (token->flags & DIGRAPH)
1443 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1444 else if (token->flags & NAMED_OP)
1447 spelling = TOKEN_NAME (token);
1449 while ((c = *spelling++) != '\0')
1456 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1457 buffer += NODE_LEN (token->val.node);
1462 int left, right, tag;
1463 switch (token->type)
1465 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1466 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1467 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1468 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1469 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1470 default: left = '\0'; right = '\0'; tag = '\0'; break;
1472 if (tag) *buffer++ = tag;
1473 if (left) *buffer++ = left;
1474 memcpy (buffer, token->val.str.text, token->val.str.len);
1475 buffer += token->val.str.len;
1476 if (right) *buffer++ = right;
1481 *buffer++ = token->val.c;
1485 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1492 /* Returns a token as a null-terminated string. The string is
1493 temporary, and automatically freed later. Useful for diagnostics. */
1495 cpp_token_as_text (pfile, token)
1497 const cpp_token *token;
1499 unsigned int len = cpp_token_len (token);
1500 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1502 end = cpp_spell_token (pfile, token, start);
1508 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1510 cpp_type2name (type)
1511 enum cpp_ttype type;
1513 return (const char *) token_spellings[type].name;
1516 /* Writes the spelling of token to FP, without any preceding space.
1517 Separated from cpp_spell_token for efficiency - to avoid stdio
1518 double-buffering. */
1520 cpp_output_token (token, fp)
1521 const cpp_token *token;
1524 switch (TOKEN_SPELL (token))
1526 case SPELL_OPERATOR:
1528 const unsigned char *spelling;
1531 if (token->flags & DIGRAPH)
1533 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1534 else if (token->flags & NAMED_OP)
1537 spelling = TOKEN_NAME (token);
1542 while ((c = *++spelling) != '\0');
1548 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1553 int left, right, tag;
1554 switch (token->type)
1556 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1557 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1558 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1559 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1560 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1561 default: left = '\0'; right = '\0'; tag = '\0'; break;
1563 if (tag) putc (tag, fp);
1564 if (left) putc (left, fp);
1565 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1566 if (right) putc (right, fp);
1571 putc (token->val.c, fp);
1575 /* An error, most probably. */
1580 /* Compare two tokens. */
1582 _cpp_equiv_tokens (a, b)
1583 const cpp_token *a, *b;
1585 if (a->type == b->type && a->flags == b->flags)
1586 switch (TOKEN_SPELL (a))
1588 default: /* Keep compiler happy. */
1589 case SPELL_OPERATOR:
1592 return a->val.c == b->val.c; /* Character. */
1594 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1596 return a->val.node == b->val.node;
1598 return (a->val.str.len == b->val.str.len
1599 && !memcmp (a->val.str.text, b->val.str.text,
1606 /* Returns nonzero if a space should be inserted to avoid an
1607 accidental token paste for output. For simplicity, it is
1608 conservative, and occasionally advises a space where one is not
1609 needed, e.g. "." and ".2". */
1612 cpp_avoid_paste (pfile, token1, token2)
1614 const cpp_token *token1, *token2;
1616 enum cpp_ttype a = token1->type, b = token2->type;
1619 if (token1->flags & NAMED_OP)
1621 if (token2->flags & NAMED_OP)
1625 if (token2->flags & DIGRAPH)
1626 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1627 else if (token_spellings[b].category == SPELL_OPERATOR)
1628 c = token_spellings[b].name[0];
1630 /* Quickly get everything that can paste with an '='. */
1631 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1636 case CPP_GREATER: return c == '>' || c == '?';
1637 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1638 case CPP_PLUS: return c == '+';
1639 case CPP_MINUS: return c == '-' || c == '>';
1640 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1641 case CPP_MOD: return c == ':' || c == '>';
1642 case CPP_AND: return c == '&';
1643 case CPP_OR: return c == '|';
1644 case CPP_COLON: return c == ':' || c == '>';
1645 case CPP_DEREF: return c == '*';
1646 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1647 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1648 case CPP_NAME: return ((b == CPP_NUMBER
1649 && name_p (pfile, &token2->val.str))
1651 || b == CPP_CHAR || b == CPP_STRING); /* L */
1652 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1653 || c == '.' || c == '+' || c == '-');
1654 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1655 && token1->val.c == '@'
1656 && (b == CPP_NAME || b == CPP_STRING));
1663 /* Output all the remaining tokens on the current line, and a newline
1664 character, to FP. Leading whitespace is removed. If there are
1665 macros, special token padding is not performed. */
1667 cpp_output_line (pfile, fp)
1671 const cpp_token *token;
1673 token = cpp_get_token (pfile);
1674 while (token->type != CPP_EOF)
1676 cpp_output_token (token, fp);
1677 token = cpp_get_token (pfile);
1678 if (token->flags & PREV_WHITE)
1685 /* Returns the value of a hexadecimal digit. */
1690 if (c >= 'a' && c <= 'f')
1691 return c - 'a' + 10;
1692 if (c >= 'A' && c <= 'F')
1693 return c - 'A' + 10;
1694 if (c >= '0' && c <= '9')
1699 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1700 failure if cpplib is not parsing C++ or C99. Such failure is
1701 silent, and no variables are updated. Otherwise returns 0, and
1702 warns if -Wtraditional.
1704 [lex.charset]: The character designated by the universal character
1705 name \UNNNNNNNN is that character whose character short name in
1706 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1707 universal character name \uNNNN is that character whose character
1708 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1709 for a universal character name is less than 0x20 or in the range
1710 0x7F-0x9F (inclusive), or if the universal character name
1711 designates a character in the basic source character set, then the
1712 program is ill-formed.
1714 We assume that wchar_t is Unicode, so we don't need to do any
1715 mapping. Is this ever wrong?
1717 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1718 LIMIT is the end of the string or charconst. PSTR is updated to
1719 point after the UCS on return, and the UCS is written into PC. */
1722 maybe_read_ucs (pfile, pstr, limit, pc)
1724 const unsigned char **pstr;
1725 const unsigned char *limit;
1728 const unsigned char *p = *pstr;
1729 unsigned int code = 0;
1730 unsigned int c = *pc, length;
1732 /* Only attempt to interpret a UCS for C++ and C99. */
1733 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1736 if (CPP_WTRADITIONAL (pfile))
1737 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1739 length = (c == 'u' ? 4: 8);
1741 if ((size_t) (limit - p) < length)
1743 cpp_error (pfile, "incomplete universal-character-name");
1744 /* Skip to the end to avoid more diagnostics. */
1749 for (; length; length--, p++)
1753 code = (code << 4) + hex_digit_value (c);
1757 "non-hex digit '%c' in universal-character-name", c);
1758 /* We shouldn't skip in case there are multibyte chars. */
1764 #ifdef TARGET_EBCDIC
1765 cpp_error (pfile, "universal-character-name on EBCDIC target");
1766 code = 0x3f; /* EBCDIC invalid character */
1768 /* True extended characters are OK. */
1770 && !(code & 0x80000000)
1771 && !(code >= 0xD800 && code <= 0xDFFF))
1773 /* The standard permits $, @ and ` to be specified as UCNs. We use
1774 hex escapes so that this also works with EBCDIC hosts. */
1775 else if (code == 0x24 || code == 0x40 || code == 0x60)
1777 /* Don't give another error if one occurred above. */
1778 else if (length == 0)
1779 cpp_error (pfile, "universal-character-name out of range");
1787 /* Interpret an escape sequence, and return its value. PSTR points to
1788 the input pointer, which is just after the backslash. LIMIT is how
1789 much text we have. MASK is a bitmask for the precision for the
1790 destination type (char or wchar_t). TRADITIONAL, if true, does not
1791 interpret escapes that did not exist in traditional C.
1793 Handles all relevant diagnostics. */
1796 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1798 const unsigned char **pstr;
1799 const unsigned char *limit;
1800 unsigned HOST_WIDE_INT mask;
1804 const unsigned char *str = *pstr;
1805 unsigned int c = *str++;
1809 case '\\': case '\'': case '"': case '?': break;
1810 case 'b': c = TARGET_BS; break;
1811 case 'f': c = TARGET_FF; break;
1812 case 'n': c = TARGET_NEWLINE; break;
1813 case 'r': c = TARGET_CR; break;
1814 case 't': c = TARGET_TAB; break;
1815 case 'v': c = TARGET_VT; break;
1817 case '(': case '{': case '[': case '%':
1818 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1819 '\%' is used to prevent SCCS from getting confused. */
1820 unknown = CPP_PEDANTIC (pfile);
1824 if (CPP_WTRADITIONAL (pfile))
1825 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1831 if (CPP_PEDANTIC (pfile))
1832 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1837 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1841 if (CPP_WTRADITIONAL (pfile))
1842 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1846 unsigned int i = 0, overflow = 0;
1847 int digits_found = 0;
1855 overflow |= i ^ (i << 4 >> 4);
1856 i = (i << 4) + hex_digit_value (c);
1861 cpp_error (pfile, "\\x used with no following hex digits");
1863 if (overflow | (i != (i & mask)))
1865 cpp_pedwarn (pfile, "hex escape sequence out of range");
1872 case '0': case '1': case '2': case '3':
1873 case '4': case '5': case '6': case '7':
1875 unsigned int i = c - '0';
1878 while (str < limit && ++count < 3)
1881 if (c < '0' || c > '7')
1884 i = (i << 3) + c - '0';
1887 if (i != (i & mask))
1889 cpp_pedwarn (pfile, "octal escape sequence out of range");
1904 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1906 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1910 cpp_pedwarn (pfile, "escape sequence out of range for character");
1916 #ifndef MAX_CHAR_TYPE_SIZE
1917 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1920 #ifndef MAX_WCHAR_TYPE_SIZE
1921 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1924 /* Interpret a (possibly wide) character constant in TOKEN.
1925 WARN_MULTI warns about multi-character charconsts, if not
1926 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1927 that did not exist in traditional C. PCHARS_SEEN points to a
1928 variable that is filled in with the number of characters seen. */
1930 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1932 const cpp_token *token;
1935 unsigned int *pchars_seen;
1937 const unsigned char *str = token->val.str.text;
1938 const unsigned char *limit = str + token->val.str.len;
1939 unsigned int chars_seen = 0;
1940 unsigned int width, max_chars, c;
1941 unsigned HOST_WIDE_INT mask;
1942 HOST_WIDE_INT result = 0;
1944 #ifdef MULTIBYTE_CHARS
1945 (void) local_mbtowc (NULL, NULL, 0);
1948 /* Width in bits. */
1949 if (token->type == CPP_CHAR)
1950 width = MAX_CHAR_TYPE_SIZE;
1952 width = MAX_WCHAR_TYPE_SIZE;
1954 if (width < HOST_BITS_PER_WIDE_INT)
1955 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1958 max_chars = HOST_BITS_PER_WIDE_INT / width;
1962 #ifdef MULTIBYTE_CHARS
1966 char_len = local_mbtowc (&wc, str, limit - str);
1969 cpp_warning (pfile, "ignoring invalid multibyte character");
1982 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1984 #ifdef MAP_CHARACTER
1986 c = MAP_CHARACTER (c);
1989 /* Merge character into result; ignore excess chars. */
1990 if (++chars_seen <= max_chars)
1992 if (width < HOST_BITS_PER_WIDE_INT)
1993 result = (result << width) | (c & mask);
1999 if (chars_seen == 0)
2000 cpp_error (pfile, "empty character constant");
2001 else if (chars_seen > max_chars)
2003 chars_seen = max_chars;
2004 cpp_warning (pfile, "character constant too long");
2006 else if (chars_seen > 1 && !traditional && warn_multi)
2007 cpp_warning (pfile, "multi-character character constant");
2009 /* If char type is signed, sign-extend the constant. The
2010 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2011 if (token->type == CPP_CHAR && chars_seen)
2013 unsigned int nbits = chars_seen * width;
2014 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2016 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2017 || ((result >> (nbits - 1)) & 1) == 0)
2023 *pchars_seen = chars_seen;
2027 /* Memory buffers. Changing these three constants can have a dramatic
2028 effect on performance. The values here are reasonable defaults,
2029 but might be tuned. If you adjust them, be sure to test across a
2030 range of uses of cpplib, including heavy nested function-like macro
2031 expansion. Also check the change in peak memory usage (NJAMD is a
2032 good tool for this). */
2033 #define MIN_BUFF_SIZE 8000
2034 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
2035 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2036 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2048 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2049 #define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1))
2051 /* Create a new allocation buffer. Place the control block at the end
2052 of the buffer, so that buffer overflows will cause immediate chaos. */
2058 unsigned char *base;
2060 if (len < MIN_BUFF_SIZE)
2061 len = MIN_BUFF_SIZE;
2062 len = CPP_ALIGN (len, DEFAULT_ALIGNMENT);
2064 base = xmalloc (len + sizeof (_cpp_buff));
2065 result = (_cpp_buff *) (base + len);
2066 result->base = base;
2068 result->limit = base + len;
2069 result->next = NULL;
2073 /* Place a chain of unwanted allocation buffers on the free list. */
2075 _cpp_release_buff (pfile, buff)
2079 _cpp_buff *end = buff;
2083 end->next = pfile->free_buffs;
2084 pfile->free_buffs = buff;
2087 /* Return a free buffer of size at least MIN_SIZE. */
2089 _cpp_get_buff (pfile, min_size)
2091 unsigned int min_size;
2093 _cpp_buff *result, **p;
2095 for (p = &pfile->free_buffs;; p = &(*p)->next)
2100 return new_buff (min_size);
2102 size = result->limit - result->base;
2103 /* Return a buffer that's big enough, but don't waste one that's
2105 if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size))
2110 result->next = NULL;
2111 result->cur = result->base;
2115 /* Return a buffer chained on the end of BUFF. Copy to it the
2116 uncommitted remaining bytes of BUFF, with at least MIN_EXTRA more
2119 _cpp_extend_buff (pfile, buff, min_extra)
2122 unsigned int min_extra;
2124 unsigned int size = EXTENDED_BUFF_SIZE (buff, min_extra);
2126 buff->next = _cpp_get_buff (pfile, size);
2127 memcpy (buff->next->base, buff->cur, buff->limit - buff->cur);
2131 /* Free a chain of buffers starting at BUFF. */
2133 _cpp_free_buff (buff)
2138 for (; buff; buff = next)
2145 /* Allocate permanent, unaligned storage of length LEN. */
2147 _cpp_unaligned_alloc (pfile, len)
2151 _cpp_buff *buff = pfile->u_buff;
2152 unsigned char *result = buff->cur;
2154 if (len > (size_t) (buff->limit - result))
2156 buff = _cpp_get_buff (pfile, len);
2157 buff->next = pfile->u_buff;
2158 pfile->u_buff = buff;
2162 buff->cur = result + len;
2167 chunk_suitable (chunk, size)
2171 /* Being at least twice SIZE means we can use memcpy in
2172 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2174 return (chunk && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2177 /* Returns the end of the new pool. PTR points to a char in the old
2178 pool, and is updated to point to the same char in the new pool. */
2180 _cpp_next_chunk (pool, len, ptr)
2183 unsigned char **ptr;
2185 cpp_chunk *chunk = pool->cur->next;
2187 /* LEN is the minimum size we want in the new pool. */
2188 len += POOL_ROOM (pool);
2189 if (! chunk_suitable (chunk, len))
2191 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2193 chunk->next = pool->cur->next;
2194 pool->cur->next = chunk;
2197 /* Update the pointer before changing chunk's front. */
2199 *ptr += chunk->base - POOL_FRONT (pool);
2201 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2202 chunk->front = chunk->base;
2205 return POOL_LIMIT (pool);
2212 unsigned char *base;
2215 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2216 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2217 /* Put the chunk descriptor at the end. Then chunk overruns will
2218 cause obvious chaos. */
2219 result = (cpp_chunk *) (base + size);
2220 result->base = base;
2221 result->front = base;
2222 result->limit = base + size;
2229 _cpp_init_pool (pool, size, align, temp)
2231 unsigned int size, align, temp;
2234 align = DEFAULT_ALIGNMENT;
2235 if (align & (align - 1))
2237 pool->align = align;
2238 pool->first = new_chunk (size);
2239 pool->cur = pool->first;
2241 pool->cur->next = pool->cur;
2245 _cpp_free_pool (pool)
2248 cpp_chunk *chunk = pool->first, *next;
2256 while (chunk && chunk != pool->first);
2259 /* Reserve LEN bytes from a memory pool. */
2261 _cpp_pool_reserve (pool, len)
2265 len = POOL_ALIGN (len, pool->align);
2266 if (len > (unsigned int) POOL_ROOM (pool))
2267 _cpp_next_chunk (pool, len, 0);
2269 return POOL_FRONT (pool);
2272 /* Allocate LEN bytes from a memory pool. */
2274 _cpp_pool_alloc (pool, len)
2278 unsigned char *result = _cpp_pool_reserve (pool, len);
2280 POOL_COMMIT (pool, len);