1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
97 static void unterminated PARAMS ((cpp_reader *, int));
98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
105 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
107 static cpp_chunk *new_chunk PARAMS ((unsigned int));
108 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
109 static unsigned int hex_digit_value PARAMS ((unsigned int));
113 Compares, the token TOKEN to the NUL-terminated string STRING.
114 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
117 cpp_ideq (token, string)
118 const cpp_token *token;
121 if (token->type != CPP_NAME)
124 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
127 /* Call when meeting a newline. Returns the character after the newline
128 (or carriage-return newline combination), or EOF. */
130 handle_newline (pfile, newline_char)
132 cppchar_t newline_char;
135 cppchar_t next = EOF;
138 buffer = pfile->buffer;
139 buffer->col_adjust = 0;
140 buffer->line_base = buffer->cur;
142 /* Handle CR-LF and LF-CR combinations, get the next character. */
143 if (buffer->cur < buffer->rlimit)
145 next = *buffer->cur++;
146 if (next + newline_char == '\r' + '\n')
148 buffer->line_base = buffer->cur;
149 if (buffer->cur < buffer->rlimit)
150 next = *buffer->cur++;
156 buffer->read_ahead = next;
160 /* Subroutine of skip_escaped_newlines; called when a trigraph is
161 encountered. It warns if necessary, and returns true if the
162 trigraph should be honoured. FROM_CHAR is the third character of a
163 trigraph, and presumed to be the previous character for position
166 trigraph_ok (pfile, from_char)
170 int accept = CPP_OPTION (pfile, trigraphs);
172 /* Don't warn about trigraphs in comments. */
173 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
175 cpp_buffer *buffer = pfile->buffer;
178 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
179 "trigraph ??%c converted to %c",
181 (int) _cpp_trigraph_map[from_char]);
182 else if (buffer->cur != buffer->last_Wtrigraphs)
184 buffer->last_Wtrigraphs = buffer->cur;
185 cpp_warning_with_line (pfile, pfile->line,
186 CPP_BUF_COL (buffer) - 2,
187 "trigraph ??%c ignored", (int) from_char);
194 /* Assumes local variables buffer and result. */
195 #define ACCEPT_CHAR(t) \
196 do { result->type = t; buffer->read_ahead = EOF; } while (0)
198 /* When we move to multibyte character sets, add to these something
199 that saves and restores the state of the multibyte conversion
200 library. This probably involves saving and restoring a "cookie".
201 In the case of glibc it is an 8-byte structure, so is not a high
202 overhead operation. In any case, it's out of the fast path. */
203 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
204 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
206 /* Skips any escaped newlines introduced by NEXT, which is either a
207 '?' or a '\\'. Returns the next character, which will also have
208 been placed in buffer->read_ahead. This routine performs
209 preprocessing stages 1 and 2 of the ISO C standard. */
211 skip_escaped_newlines (pfile, next)
215 cpp_buffer *buffer = pfile->buffer;
217 /* Only do this if we apply stages 1 and 2. */
218 if (!buffer->from_stage3)
221 const unsigned char *saved_cur;
226 if (buffer->cur == buffer->rlimit)
232 next1 = *buffer->cur++;
233 if (next1 != '?' || buffer->cur == buffer->rlimit)
239 next1 = *buffer->cur++;
240 if (!_cpp_trigraph_map[next1]
241 || !trigraph_ok (pfile, next1))
247 /* We have a full trigraph here. */
248 next = _cpp_trigraph_map[next1];
249 if (next != '\\' || buffer->cur == buffer->rlimit)
254 /* We have a backslash, and room for at least one more character. */
258 next1 = *buffer->cur++;
259 if (!is_nvspace (next1))
263 while (buffer->cur < buffer->rlimit);
265 if (!is_vspace (next1))
271 if (space && !pfile->state.lexing_comment)
272 cpp_warning (pfile, "backslash and newline separated by space");
274 next = handle_newline (pfile, next1);
276 cpp_pedwarn (pfile, "backslash-newline at end of file");
278 while (next == '\\' || next == '?');
281 buffer->read_ahead = next;
285 /* Obtain the next character, after trigraph conversion and skipping
286 an arbitrary string of escaped newlines. The common case of no
287 trigraphs or escaped newlines falls through quickly. */
289 get_effective_char (pfile)
292 cpp_buffer *buffer = pfile->buffer;
293 cppchar_t next = EOF;
295 if (buffer->cur < buffer->rlimit)
297 next = *buffer->cur++;
299 /* '?' can introduce trigraphs (and therefore backslash); '\\'
300 can introduce escaped newlines, which we want to skip, or
301 UCNs, which, depending upon lexer state, we will handle in
303 if (next == '?' || next == '\\')
304 next = skip_escaped_newlines (pfile, next);
307 buffer->read_ahead = next;
311 /* Skip a C-style block comment. We find the end of the comment by
312 seeing if an asterisk is before every '/' we encounter. Returns
313 non-zero if comment terminated by EOF, zero otherwise. */
315 skip_block_comment (pfile)
318 cpp_buffer *buffer = pfile->buffer;
319 cppchar_t c = EOF, prevc = EOF;
321 pfile->state.lexing_comment = 1;
322 while (buffer->cur != buffer->rlimit)
324 prevc = c, c = *buffer->cur++;
327 /* FIXME: For speed, create a new character class of characters
328 of interest inside block comments. */
329 if (c == '?' || c == '\\')
330 c = skip_escaped_newlines (pfile, c);
332 /* People like decorating comments with '*', so check for '/'
333 instead for efficiency. */
339 /* Warn about potential nested comments, but not if the '/'
340 comes immediately before the true comment delimeter.
341 Don't bother to get it right across escaped newlines. */
342 if (CPP_OPTION (pfile, warn_comments)
343 && buffer->cur != buffer->rlimit)
345 prevc = c, c = *buffer->cur++;
346 if (c == '*' && buffer->cur != buffer->rlimit)
348 prevc = c, c = *buffer->cur++;
350 cpp_warning_with_line (pfile, pfile->line,
351 CPP_BUF_COL (buffer) - 2,
352 "\"/*\" within comment");
357 else if (is_vspace (c))
359 prevc = c, c = handle_newline (pfile, c);
363 adjust_column (pfile);
366 pfile->state.lexing_comment = 0;
367 buffer->read_ahead = EOF;
368 return c != '/' || prevc != '*';
371 /* Skip a C++ line comment. Handles escaped newlines. Returns
372 non-zero if a multiline comment. The following new line, if any,
373 is left in buffer->read_ahead. */
375 skip_line_comment (pfile)
378 cpp_buffer *buffer = pfile->buffer;
379 unsigned int orig_line = pfile->line;
382 pfile->state.lexing_comment = 1;
386 if (buffer->cur == buffer->rlimit)
390 if (c == '?' || c == '\\')
391 c = skip_escaped_newlines (pfile, c);
393 while (!is_vspace (c));
395 pfile->state.lexing_comment = 0;
396 buffer->read_ahead = c; /* Leave any newline for caller. */
397 return orig_line != pfile->line;
400 /* pfile->buffer->cur is one beyond the \t character. Update
401 col_adjust so we track the column correctly. */
403 adjust_column (pfile)
406 cpp_buffer *buffer = pfile->buffer;
407 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
409 /* Round it up to multiple of the tabstop, but subtract 1 since the
410 tab itself occupies a character position. */
411 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
412 - col % CPP_OPTION (pfile, tabstop)) - 1;
415 /* Skips whitespace, saving the next non-whitespace character.
416 Adjusts pfile->col_adjust to account for tabs. Without this,
417 tokens might be assigned an incorrect column. */
419 skip_whitespace (pfile, c)
423 cpp_buffer *buffer = pfile->buffer;
424 unsigned int warned = 0;
428 /* Horizontal space always OK. */
432 adjust_column (pfile);
433 /* Just \f \v or \0 left. */
438 cpp_warning (pfile, "null character(s) ignored");
442 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
443 cpp_pedwarn_with_line (pfile, pfile->line,
444 CPP_BUF_COL (buffer),
445 "%s in preprocessing directive",
446 c == '\f' ? "form feed" : "vertical tab");
449 if (buffer->cur == buffer->rlimit)
453 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
454 while (is_nvspace (c));
456 /* Remember the next character. */
457 buffer->read_ahead = c;
460 /* See if the characters of a number token are valid in a name (no
463 name_p (pfile, string)
465 const cpp_string *string;
469 for (i = 0; i < string->len; i++)
470 if (!is_idchar (string->text[i]))
476 /* Parse an identifier, skipping embedded backslash-newlines. This is
477 a critical inner loop. The common case is an identifier which has
478 not been split by backslash-newline, does not contain a dollar
479 sign, and has already been scanned (roughly 10:1 ratio of
480 seen:unseen identifiers in normal code; the distribution is
481 Poisson-like). Second most common case is a new identifier, not
482 split and no dollar sign. The other possibilities are rare and
483 have been relegated to parse_identifier_slow. */
485 static cpp_hashnode *
486 parse_identifier (pfile)
489 cpp_hashnode *result;
490 const U_CHAR *cur, *rlimit;
492 /* Fast-path loop. Skim over a normal identifier.
493 N.B. ISIDNUM does not include $. */
494 cur = pfile->buffer->cur - 1;
495 rlimit = pfile->buffer->rlimit;
498 while (cur < rlimit && ISIDNUM (*cur));
500 /* Check for slow-path cases. */
501 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
502 result = parse_identifier_slow (pfile, cur);
505 const U_CHAR *base = pfile->buffer->cur - 1;
506 result = (cpp_hashnode *)
507 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
508 pfile->buffer->cur = cur;
511 /* Rarely, identifiers require diagnostics when lexed.
512 XXX Has to be forced out of the fast path. */
513 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
514 && !pfile->state.skipping, 0))
516 /* It is allowed to poison the same identifier twice. */
517 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
518 cpp_error (pfile, "attempt to use poisoned \"%s\"",
521 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
522 replacement list of a variadic macro. */
523 if (result == pfile->spec_nodes.n__VA_ARGS__
524 && !pfile->state.va_args_ok)
526 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
532 /* Slow path. This handles identifiers which have been split, and
533 identifiers which contain dollar signs. The part of the identifier
534 from PFILE->buffer->cur-1 to CUR has already been scanned. */
535 static cpp_hashnode *
536 parse_identifier_slow (pfile, cur)
540 cpp_buffer *buffer = pfile->buffer;
541 const U_CHAR *base = buffer->cur - 1;
542 struct obstack *stack = &pfile->hash_table->stack;
543 unsigned int c, saw_dollar = 0, len;
545 /* Copy the part of the token which is known to be okay. */
546 obstack_grow (stack, base, cur - base);
548 /* Now process the part which isn't. We are looking at one of
549 '$', '\\', or '?' on entry to this loop. */
554 while (is_idchar (c))
556 obstack_1grow (stack, c);
562 if (buffer->cur == buffer->rlimit)
568 /* Potential escaped newline? */
569 if (c != '?' && c != '\\')
571 c = skip_escaped_newlines (pfile, c);
573 while (is_idchar (c));
575 /* Remember the next character. */
576 buffer->read_ahead = c;
578 /* $ is not a identifier character in the standard, but is commonly
579 accepted as an extension. Don't warn about it in skipped
580 conditional blocks. */
581 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
582 cpp_pedwarn (pfile, "'$' character(s) in identifier");
584 /* Identifiers are null-terminated. */
585 len = obstack_object_size (stack);
586 obstack_1grow (stack, '\0');
588 return (cpp_hashnode *)
589 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
592 /* Parse a number, skipping embedded backslash-newlines. */
594 parse_number (pfile, number, c, leading_period)
600 cpp_buffer *buffer = pfile->buffer;
601 cpp_pool *pool = &pfile->ident_pool;
602 unsigned char *dest, *limit;
604 dest = POOL_FRONT (pool);
605 limit = POOL_LIMIT (pool);
607 /* Place a leading period. */
611 limit = _cpp_next_chunk (pool, 0, &dest);
619 /* Need room for terminating null. */
620 if (dest + 1 >= limit)
621 limit = _cpp_next_chunk (pool, 0, &dest);
625 if (buffer->cur == buffer->rlimit)
630 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
632 /* Potential escaped newline? */
633 if (c != '?' && c != '\\')
635 c = skip_escaped_newlines (pfile, c);
637 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
639 /* Remember the next character. */
640 buffer->read_ahead = c;
642 /* Null-terminate the number. */
645 number->text = POOL_FRONT (pool);
646 number->len = dest - number->text;
647 POOL_COMMIT (pool, number->len + 1);
650 /* Subroutine of parse_string. Emits error for unterminated strings. */
652 unterminated (pfile, term)
656 cpp_error (pfile, "missing terminating %c character", term);
658 if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line)
660 cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col,
661 "possible start of unterminated string literal");
666 /* Subroutine of parse_string. */
668 unescaped_terminator_p (pfile, dest)
670 const unsigned char *dest;
672 const unsigned char *start, *temp;
674 /* In #include-style directives, terminators are not escapeable. */
675 if (pfile->state.angled_headers)
678 start = POOL_FRONT (&pfile->ident_pool);
680 /* An odd number of consecutive backslashes represents an escaped
682 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
685 return ((dest - temp) & 1) == 0;
688 /* Parses a string, character constant, or angle-bracketed header file
689 name. Handles embedded trigraphs and escaped newlines. The stored
690 string is guaranteed NUL-terminated, but it is not guaranteed that
691 this is the first NUL since embedded NULs are preserved.
693 Multi-line strings are allowed, but they are deprecated. */
695 parse_string (pfile, token, terminator)
698 cppchar_t terminator;
700 cpp_buffer *buffer = pfile->buffer;
701 cpp_pool *pool = &pfile->ident_pool;
702 unsigned char *dest, *limit;
704 bool warned_nulls = false, warned_multi = false;
706 dest = POOL_FRONT (pool);
707 limit = POOL_LIMIT (pool);
711 if (buffer->cur == buffer->rlimit)
717 /* We need space for the terminating NUL. */
719 limit = _cpp_next_chunk (pool, 0, &dest);
723 unterminated (pfile, terminator);
727 /* Handle trigraphs, escaped newlines etc. */
728 if (c == '?' || c == '\\')
729 c = skip_escaped_newlines (pfile, c);
731 if (c == terminator && unescaped_terminator_p (pfile, dest))
736 else if (is_vspace (c))
738 /* In assembly language, silently terminate string and
739 character literals at end of line. This is a kludge
740 around not knowing where comments are. */
741 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
744 /* Character constants and header names may not extend over
745 multiple lines. In Standard C, neither may strings.
746 Unfortunately, we accept multiline strings as an
747 extension, except in #include family directives. */
748 if (terminator != '"' || pfile->state.angled_headers)
750 unterminated (pfile, terminator);
757 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
760 if (pfile->mls_line == 0)
762 pfile->mls_line = token->line;
763 pfile->mls_col = token->col;
766 c = handle_newline (pfile, c);
770 else if (c == '\0' && !warned_nulls)
773 cpp_warning (pfile, "null character(s) preserved in literal");
779 /* Remember the next character. */
780 buffer->read_ahead = c;
783 token->val.str.text = POOL_FRONT (pool);
784 token->val.str.len = dest - token->val.str.text;
785 POOL_COMMIT (pool, token->val.str.len + 1);
788 /* The stored comment includes the comment start and any terminator. */
790 save_comment (pfile, token, from)
793 const unsigned char *from;
795 unsigned char *buffer;
798 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
799 /* C++ comments probably (not definitely) have moved past a new
800 line, which we don't want to save in the comment. */
801 if (pfile->buffer->read_ahead != EOF)
803 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
805 token->type = CPP_COMMENT;
806 token->val.str.len = len;
807 token->val.str.text = buffer;
810 memcpy (buffer + 1, from, len - 1);
813 /* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we
814 want to avoid stepping back when lexing %:%X. */
816 lex_percent (pfile, result)
820 cpp_buffer *buffer= pfile->buffer;
823 result->type = CPP_MOD;
824 /* Parsing %:%X could leave an extra character. */
825 if (buffer->extra_char == EOF)
826 c = get_effective_char (pfile);
829 c = buffer->read_ahead = buffer->extra_char;
830 buffer->extra_char = EOF;
834 ACCEPT_CHAR (CPP_MOD_EQ);
835 else if (CPP_OPTION (pfile, digraphs))
839 result->flags |= DIGRAPH;
840 ACCEPT_CHAR (CPP_HASH);
841 if (get_effective_char (pfile) == '%')
843 buffer->extra_char = get_effective_char (pfile);
844 if (buffer->extra_char == ':')
846 buffer->extra_char = EOF;
847 ACCEPT_CHAR (CPP_PASTE);
850 /* We'll catch the extra_char when we're called back. */
851 buffer->read_ahead = '%';
856 result->flags |= DIGRAPH;
857 ACCEPT_CHAR (CPP_CLOSE_BRACE);
862 /* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we
863 want to avoid stepping back when lexing '...' or '.123'. In the
864 latter case we should also set a flag for parse_number. */
866 lex_dot (pfile, result)
870 cpp_buffer *buffer = pfile->buffer;
873 /* Parsing ..X could leave an extra character. */
874 if (buffer->extra_char == EOF)
875 c = get_effective_char (pfile);
878 c = buffer->read_ahead = buffer->extra_char;
879 buffer->extra_char = EOF;
882 /* All known character sets have 0...9 contiguous. */
883 if (c >= '0' && c <= '9')
885 result->type = CPP_NUMBER;
886 parse_number (pfile, &result->val.str, c, 1);
890 result->type = CPP_DOT;
893 buffer->extra_char = get_effective_char (pfile);
894 if (buffer->extra_char == '.')
896 buffer->extra_char = EOF;
897 ACCEPT_CHAR (CPP_ELLIPSIS);
900 /* We'll catch the extra_char when we're called back. */
901 buffer->read_ahead = '.';
903 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
904 ACCEPT_CHAR (CPP_DOT_STAR);
908 /* Allocate COUNT tokens for RUN. */
910 _cpp_init_tokenrun (run, count)
914 run->base = xnewvec (cpp_token, count);
915 run->limit = run->base + count;
919 /* Returns the next tokenrun, or creates one if there is none. */
924 if (run->next == NULL)
926 run->next = xnew (tokenrun);
927 run->next->prev = run;
928 _cpp_init_tokenrun (run->next, 250);
934 /* Lex a token into RESULT (external interface). Takes care of issues
935 like directive handling, token lookahead, multiple include
936 opimisation and skipping. */
938 _cpp_lex_token (pfile)
945 if (pfile->cur_token == pfile->cur_run->limit)
947 pfile->cur_run = next_tokenrun (pfile->cur_run);
948 pfile->cur_token = pfile->cur_run->base;
951 if (pfile->lookaheads)
954 result = pfile->cur_token++;
957 result = _cpp_lex_direct (pfile);
959 if (result->flags & BOL)
961 /* Is this a directive. If _cpp_handle_directive returns
962 false, it is an assembler #. */
963 if (result->type == CPP_HASH
964 && !pfile->state.parsing_args
965 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
967 if (pfile->cb.line_change && !pfile->state.skipping)
968 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
971 /* We don't skip tokens in directives. */
972 if (pfile->state.in_directive)
975 /* Outside a directive, invalidate controlling macros. At file
976 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
977 get here and MI optimisation works. */
978 pfile->mi_valid = false;
980 if (!pfile->state.skipping || result->type == CPP_EOF)
987 /* Lex a token into pfile->cur_token, which is also incremented, to
988 get diagnostics pointing to the correct location.
990 Does not handle issues such as token lookahead, multiple-include
991 optimisation, directives, skipping etc. This function is only
992 suitable for use by _cpp_lex_token, and in special cases like
993 lex_expansion_token which doesn't care for any of these issues.
995 When meeting a newline, returns CPP_EOF if parsing a directive,
996 otherwise returns to the start of the token buffer if permissible.
997 Returns the location of the lexed token. */
999 _cpp_lex_direct (pfile)
1004 const unsigned char *comment_start;
1005 cpp_token *result = pfile->cur_token++;
1008 buffer = pfile->buffer;
1009 result->flags = buffer->saved_flags;
1010 buffer->saved_flags = 0;
1012 result->line = pfile->line;
1015 c = buffer->read_ahead;
1016 if (c == EOF && buffer->cur < buffer->rlimit)
1018 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1019 buffer->read_ahead = EOF;
1025 buffer->saved_flags = BOL;
1026 if (!pfile->state.parsing_args && !pfile->state.in_directive)
1028 if (buffer->cur != buffer->line_base)
1030 /* Non-empty files should end in a newline. Don't warn
1031 for command line and _Pragma buffers. */
1032 if (!buffer->from_stage3)
1033 cpp_pedwarn (pfile, "no newline at end of file");
1034 handle_newline (pfile, '\n');
1037 /* Don't pop the last buffer. */
1040 unsigned char stop = buffer->return_at_eof;
1042 _cpp_pop_buffer (pfile);
1047 result->type = CPP_EOF;
1050 case ' ': case '\t': case '\f': case '\v': case '\0':
1051 skip_whitespace (pfile, c);
1052 result->flags |= PREV_WHITE;
1055 case '\n': case '\r':
1056 handle_newline (pfile, c);
1057 buffer->saved_flags = BOL;
1058 if (! pfile->state.in_directive)
1060 if (!pfile->keep_tokens)
1062 pfile->cur_run = &pfile->base_run;
1063 result = pfile->base_run.base;
1064 pfile->cur_token = result + 1;
1068 result->type = CPP_EOF;
1073 /* These could start an escaped newline, or '?' a trigraph. Let
1074 skip_escaped_newlines do all the work. */
1076 unsigned int line = pfile->line;
1078 c = skip_escaped_newlines (pfile, c);
1079 if (line != pfile->line)
1080 /* We had at least one escaped newline of some sort, and the
1081 next character is in buffer->read_ahead. Update the
1082 token's line and column. */
1083 goto update_tokens_line;
1085 /* We are either the original '?' or '\\', or a trigraph. */
1086 result->type = CPP_QUERY;
1087 buffer->read_ahead = EOF;
1095 case '0': case '1': case '2': case '3': case '4':
1096 case '5': case '6': case '7': case '8': case '9':
1097 result->type = CPP_NUMBER;
1098 parse_number (pfile, &result->val.str, c, 0);
1102 if (!CPP_OPTION (pfile, dollars_in_ident))
1104 /* Fall through... */
1107 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1108 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1109 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1110 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1112 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1113 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1114 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1115 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1117 result->type = CPP_NAME;
1118 result->val.node = parse_identifier (pfile);
1120 /* 'L' may introduce wide characters or strings. */
1121 if (result->val.node == pfile->spec_nodes.n_L)
1123 c = buffer->read_ahead;
1124 if (c == EOF && buffer->cur < buffer->rlimit)
1126 if (c == '\'' || c == '"')
1129 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1133 /* Convert named operators to their proper types. */
1134 else if (result->val.node->flags & NODE_OPERATOR)
1136 result->flags |= NAMED_OP;
1137 result->type = result->val.node->value.operator;
1143 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1145 parse_string (pfile, result, c);
1149 /* A potential block or line comment. */
1150 comment_start = buffer->cur;
1151 result->type = CPP_DIV;
1152 c = get_effective_char (pfile);
1154 ACCEPT_CHAR (CPP_DIV_EQ);
1155 if (c != '/' && c != '*')
1160 if (skip_block_comment (pfile))
1161 cpp_error (pfile, "unterminated comment");
1165 if (!CPP_OPTION (pfile, cplusplus_comments)
1166 && !CPP_IN_SYSTEM_HEADER (pfile))
1169 /* Warn about comments only if pedantically GNUC89, and not
1170 in system headers. */
1171 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1172 && ! buffer->warned_cplusplus_comments)
1175 "C++ style comments are not allowed in ISO C89");
1177 "(this will be reported only once per input file)");
1178 buffer->warned_cplusplus_comments = 1;
1181 /* Skip_line_comment updates buffer->read_ahead. */
1182 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1183 cpp_warning (pfile, "multi-line comment");
1186 /* Skipping the comment has updated buffer->read_ahead. */
1187 if (!pfile->state.save_comments)
1189 result->flags |= PREV_WHITE;
1190 goto update_tokens_line;
1193 /* Save the comment as a token in its own right. */
1194 save_comment (pfile, result, comment_start);
1195 /* Don't do MI optimisation. */
1199 if (pfile->state.angled_headers)
1201 result->type = CPP_HEADER_NAME;
1202 c = '>'; /* terminator. */
1206 result->type = CPP_LESS;
1207 c = get_effective_char (pfile);
1209 ACCEPT_CHAR (CPP_LESS_EQ);
1212 ACCEPT_CHAR (CPP_LSHIFT);
1213 if (get_effective_char (pfile) == '=')
1214 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1216 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1218 ACCEPT_CHAR (CPP_MIN);
1219 if (get_effective_char (pfile) == '=')
1220 ACCEPT_CHAR (CPP_MIN_EQ);
1222 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1224 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1225 result->flags |= DIGRAPH;
1227 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1229 ACCEPT_CHAR (CPP_OPEN_BRACE);
1230 result->flags |= DIGRAPH;
1235 result->type = CPP_GREATER;
1236 c = get_effective_char (pfile);
1238 ACCEPT_CHAR (CPP_GREATER_EQ);
1241 ACCEPT_CHAR (CPP_RSHIFT);
1242 if (get_effective_char (pfile) == '=')
1243 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1245 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1247 ACCEPT_CHAR (CPP_MAX);
1248 if (get_effective_char (pfile) == '=')
1249 ACCEPT_CHAR (CPP_MAX_EQ);
1254 lex_percent (pfile, result);
1258 lex_dot (pfile, result);
1262 result->type = CPP_PLUS;
1263 c = get_effective_char (pfile);
1265 ACCEPT_CHAR (CPP_PLUS_EQ);
1267 ACCEPT_CHAR (CPP_PLUS_PLUS);
1271 result->type = CPP_MINUS;
1272 c = get_effective_char (pfile);
1275 ACCEPT_CHAR (CPP_DEREF);
1276 if (CPP_OPTION (pfile, cplusplus)
1277 && get_effective_char (pfile) == '*')
1278 ACCEPT_CHAR (CPP_DEREF_STAR);
1281 ACCEPT_CHAR (CPP_MINUS_EQ);
1283 ACCEPT_CHAR (CPP_MINUS_MINUS);
1287 result->type = CPP_MULT;
1288 if (get_effective_char (pfile) == '=')
1289 ACCEPT_CHAR (CPP_MULT_EQ);
1293 result->type = CPP_EQ;
1294 if (get_effective_char (pfile) == '=')
1295 ACCEPT_CHAR (CPP_EQ_EQ);
1299 result->type = CPP_NOT;
1300 if (get_effective_char (pfile) == '=')
1301 ACCEPT_CHAR (CPP_NOT_EQ);
1305 result->type = CPP_AND;
1306 c = get_effective_char (pfile);
1308 ACCEPT_CHAR (CPP_AND_EQ);
1310 ACCEPT_CHAR (CPP_AND_AND);
1314 result->type = CPP_HASH;
1315 if (get_effective_char (pfile) == '#')
1316 ACCEPT_CHAR (CPP_PASTE);
1320 result->type = CPP_OR;
1321 c = get_effective_char (pfile);
1323 ACCEPT_CHAR (CPP_OR_EQ);
1325 ACCEPT_CHAR (CPP_OR_OR);
1329 result->type = CPP_XOR;
1330 if (get_effective_char (pfile) == '=')
1331 ACCEPT_CHAR (CPP_XOR_EQ);
1335 result->type = CPP_COLON;
1336 c = get_effective_char (pfile);
1337 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1338 ACCEPT_CHAR (CPP_SCOPE);
1339 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1341 result->flags |= DIGRAPH;
1342 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1346 case '~': result->type = CPP_COMPL; break;
1347 case ',': result->type = CPP_COMMA; break;
1348 case '(': result->type = CPP_OPEN_PAREN; break;
1349 case ')': result->type = CPP_CLOSE_PAREN; break;
1350 case '[': result->type = CPP_OPEN_SQUARE; break;
1351 case ']': result->type = CPP_CLOSE_SQUARE; break;
1352 case '{': result->type = CPP_OPEN_BRACE; break;
1353 case '}': result->type = CPP_CLOSE_BRACE; break;
1354 case ';': result->type = CPP_SEMICOLON; break;
1356 /* @ is a punctuator in Objective C. */
1357 case '@': result->type = CPP_ATSIGN; break;
1361 result->type = CPP_OTHER;
1369 /* An upper bound on the number of bytes needed to spell a token,
1370 including preceding whitespace. */
1372 cpp_token_len (token)
1373 const cpp_token *token;
1377 switch (TOKEN_SPELL (token))
1379 default: len = 0; break;
1380 case SPELL_STRING: len = token->val.str.len; break;
1381 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1383 /* 1 for whitespace, 4 for comment delimeters. */
1387 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1388 already contain the enough space to hold the token's spelling.
1389 Returns a pointer to the character after the last character
1392 cpp_spell_token (pfile, token, buffer)
1393 cpp_reader *pfile; /* Would be nice to be rid of this... */
1394 const cpp_token *token;
1395 unsigned char *buffer;
1397 switch (TOKEN_SPELL (token))
1399 case SPELL_OPERATOR:
1401 const unsigned char *spelling;
1404 if (token->flags & DIGRAPH)
1406 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1407 else if (token->flags & NAMED_OP)
1410 spelling = TOKEN_NAME (token);
1412 while ((c = *spelling++) != '\0')
1419 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1420 buffer += NODE_LEN (token->val.node);
1425 int left, right, tag;
1426 switch (token->type)
1428 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1429 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1430 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1431 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1432 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1433 default: left = '\0'; right = '\0'; tag = '\0'; break;
1435 if (tag) *buffer++ = tag;
1436 if (left) *buffer++ = left;
1437 memcpy (buffer, token->val.str.text, token->val.str.len);
1438 buffer += token->val.str.len;
1439 if (right) *buffer++ = right;
1444 *buffer++ = token->val.c;
1448 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1455 /* Returns a token as a null-terminated string. The string is
1456 temporary, and automatically freed later. Useful for diagnostics. */
1458 cpp_token_as_text (pfile, token)
1460 const cpp_token *token;
1462 unsigned int len = cpp_token_len (token);
1463 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1465 end = cpp_spell_token (pfile, token, start);
1471 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1473 cpp_type2name (type)
1474 enum cpp_ttype type;
1476 return (const char *) token_spellings[type].name;
1479 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1480 for efficiency - to avoid double-buffering. Also, outputs a space
1481 if PREV_WHITE is flagged. */
1483 cpp_output_token (token, fp)
1484 const cpp_token *token;
1487 if (token->flags & PREV_WHITE)
1490 switch (TOKEN_SPELL (token))
1492 case SPELL_OPERATOR:
1494 const unsigned char *spelling;
1496 if (token->flags & DIGRAPH)
1498 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1499 else if (token->flags & NAMED_OP)
1502 spelling = TOKEN_NAME (token);
1504 ufputs (spelling, fp);
1510 ufputs (NODE_NAME (token->val.node), fp);
1515 int left, right, tag;
1516 switch (token->type)
1518 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1519 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1520 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1521 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1522 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1523 default: left = '\0'; right = '\0'; tag = '\0'; break;
1525 if (tag) putc (tag, fp);
1526 if (left) putc (left, fp);
1527 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1528 if (right) putc (right, fp);
1533 putc (token->val.c, fp);
1537 /* An error, most probably. */
1542 /* Compare two tokens. */
1544 _cpp_equiv_tokens (a, b)
1545 const cpp_token *a, *b;
1547 if (a->type == b->type && a->flags == b->flags)
1548 switch (TOKEN_SPELL (a))
1550 default: /* Keep compiler happy. */
1551 case SPELL_OPERATOR:
1554 return a->val.c == b->val.c; /* Character. */
1556 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1558 return a->val.node == b->val.node;
1560 return (a->val.str.len == b->val.str.len
1561 && !memcmp (a->val.str.text, b->val.str.text,
1568 /* Determine whether two tokens can be pasted together, and if so,
1569 what the resulting token is. Returns CPP_EOF if the tokens cannot
1570 be pasted, or the appropriate type for the merged token if they
1573 cpp_can_paste (pfile, token1, token2, digraph)
1575 const cpp_token *token1, *token2;
1578 enum cpp_ttype a = token1->type, b = token2->type;
1579 int cxx = CPP_OPTION (pfile, cplusplus);
1581 /* Treat named operators as if they were ordinary NAMEs. */
1582 if (token1->flags & NAMED_OP)
1584 if (token2->flags & NAMED_OP)
1587 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1588 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1593 if (b == a) return CPP_RSHIFT;
1594 if (b == CPP_QUERY && cxx) return CPP_MAX;
1595 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1598 if (b == a) return CPP_LSHIFT;
1599 if (b == CPP_QUERY && cxx) return CPP_MIN;
1600 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1601 if (CPP_OPTION (pfile, digraphs))
1604 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1606 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1610 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1611 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1612 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1615 if (b == a) return CPP_MINUS_MINUS;
1616 if (b == CPP_GREATER) return CPP_DEREF;
1619 if (b == a && cxx) return CPP_SCOPE;
1620 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1621 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1625 if (CPP_OPTION (pfile, digraphs))
1627 if (b == CPP_GREATER)
1628 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1630 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1634 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1637 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1638 if (b == CPP_NUMBER) return CPP_NUMBER;
1642 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1644 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1648 if (b == CPP_NAME) return CPP_NAME;
1650 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1652 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1654 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1658 if (b == CPP_NUMBER) return CPP_NUMBER;
1659 if (b == CPP_NAME) return CPP_NUMBER;
1660 if (b == CPP_DOT) return CPP_NUMBER;
1661 /* Numbers cannot have length zero, so this is safe. */
1662 if ((b == CPP_PLUS || b == CPP_MINUS)
1663 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1674 /* Returns nonzero if a space should be inserted to avoid an
1675 accidental token paste for output. For simplicity, it is
1676 conservative, and occasionally advises a space where one is not
1677 needed, e.g. "." and ".2". */
1680 cpp_avoid_paste (pfile, token1, token2)
1682 const cpp_token *token1, *token2;
1684 enum cpp_ttype a = token1->type, b = token2->type;
1687 if (token1->flags & NAMED_OP)
1689 if (token2->flags & NAMED_OP)
1693 if (token2->flags & DIGRAPH)
1694 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1695 else if (token_spellings[b].category == SPELL_OPERATOR)
1696 c = token_spellings[b].name[0];
1698 /* Quickly get everything that can paste with an '='. */
1699 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1704 case CPP_GREATER: return c == '>' || c == '?';
1705 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1706 case CPP_PLUS: return c == '+';
1707 case CPP_MINUS: return c == '-' || c == '>';
1708 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1709 case CPP_MOD: return c == ':' || c == '>';
1710 case CPP_AND: return c == '&';
1711 case CPP_OR: return c == '|';
1712 case CPP_COLON: return c == ':' || c == '>';
1713 case CPP_DEREF: return c == '*';
1714 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1715 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1716 case CPP_NAME: return ((b == CPP_NUMBER
1717 && name_p (pfile, &token2->val.str))
1719 || b == CPP_CHAR || b == CPP_STRING); /* L */
1720 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1721 || c == '.' || c == '+' || c == '-');
1722 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1723 && token1->val.c == '@'
1724 && (b == CPP_NAME || b == CPP_STRING));
1731 /* Output all the remaining tokens on the current line, and a newline
1732 character, to FP. Leading whitespace is removed. */
1734 cpp_output_line (pfile, fp)
1740 cpp_get_token (pfile, &token);
1741 token.flags &= ~PREV_WHITE;
1742 while (token.type != CPP_EOF)
1744 cpp_output_token (&token, fp);
1745 cpp_get_token (pfile, &token);
1751 /* Returns the value of a hexadecimal digit. */
1756 if (c >= 'a' && c <= 'f')
1757 return c - 'a' + 10;
1758 if (c >= 'A' && c <= 'F')
1759 return c - 'A' + 10;
1760 if (c >= '0' && c <= '9')
1765 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1766 failure if cpplib is not parsing C++ or C99. Such failure is
1767 silent, and no variables are updated. Otherwise returns 0, and
1768 warns if -Wtraditional.
1770 [lex.charset]: The character designated by the universal character
1771 name \UNNNNNNNN is that character whose character short name in
1772 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1773 universal character name \uNNNN is that character whose character
1774 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1775 for a universal character name is less than 0x20 or in the range
1776 0x7F-0x9F (inclusive), or if the universal character name
1777 designates a character in the basic source character set, then the
1778 program is ill-formed.
1780 We assume that wchar_t is Unicode, so we don't need to do any
1781 mapping. Is this ever wrong?
1783 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1784 LIMIT is the end of the string or charconst. PSTR is updated to
1785 point after the UCS on return, and the UCS is written into PC. */
1788 maybe_read_ucs (pfile, pstr, limit, pc)
1790 const unsigned char **pstr;
1791 const unsigned char *limit;
1794 const unsigned char *p = *pstr;
1795 unsigned int code = 0;
1796 unsigned int c = *pc, length;
1798 /* Only attempt to interpret a UCS for C++ and C99. */
1799 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1802 if (CPP_WTRADITIONAL (pfile))
1803 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1805 length = (c == 'u' ? 4: 8);
1807 if ((size_t) (limit - p) < length)
1809 cpp_error (pfile, "incomplete universal-character-name");
1810 /* Skip to the end to avoid more diagnostics. */
1815 for (; length; length--, p++)
1819 code = (code << 4) + hex_digit_value (c);
1823 "non-hex digit '%c' in universal-character-name", c);
1824 /* We shouldn't skip in case there are multibyte chars. */
1830 #ifdef TARGET_EBCDIC
1831 cpp_error (pfile, "universal-character-name on EBCDIC target");
1832 code = 0x3f; /* EBCDIC invalid character */
1834 /* True extended characters are OK. */
1836 && !(code & 0x80000000)
1837 && !(code >= 0xD800 && code <= 0xDFFF))
1839 /* The standard permits $, @ and ` to be specified as UCNs. We use
1840 hex escapes so that this also works with EBCDIC hosts. */
1841 else if (code == 0x24 || code == 0x40 || code == 0x60)
1843 /* Don't give another error if one occurred above. */
1844 else if (length == 0)
1845 cpp_error (pfile, "universal-character-name out of range");
1853 /* Interpret an escape sequence, and return its value. PSTR points to
1854 the input pointer, which is just after the backslash. LIMIT is how
1855 much text we have. MASK is a bitmask for the precision for the
1856 destination type (char or wchar_t). TRADITIONAL, if true, does not
1857 interpret escapes that did not exist in traditional C.
1859 Handles all relevant diagnostics. */
1862 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1864 const unsigned char **pstr;
1865 const unsigned char *limit;
1866 unsigned HOST_WIDE_INT mask;
1870 const unsigned char *str = *pstr;
1871 unsigned int c = *str++;
1875 case '\\': case '\'': case '"': case '?': break;
1876 case 'b': c = TARGET_BS; break;
1877 case 'f': c = TARGET_FF; break;
1878 case 'n': c = TARGET_NEWLINE; break;
1879 case 'r': c = TARGET_CR; break;
1880 case 't': c = TARGET_TAB; break;
1881 case 'v': c = TARGET_VT; break;
1883 case '(': case '{': case '[': case '%':
1884 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1885 '\%' is used to prevent SCCS from getting confused. */
1886 unknown = CPP_PEDANTIC (pfile);
1890 if (CPP_WTRADITIONAL (pfile))
1891 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1897 if (CPP_PEDANTIC (pfile))
1898 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1903 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1907 if (CPP_WTRADITIONAL (pfile))
1908 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1912 unsigned int i = 0, overflow = 0;
1913 int digits_found = 0;
1921 overflow |= i ^ (i << 4 >> 4);
1922 i = (i << 4) + hex_digit_value (c);
1927 cpp_error (pfile, "\\x used with no following hex digits");
1929 if (overflow | (i != (i & mask)))
1931 cpp_pedwarn (pfile, "hex escape sequence out of range");
1938 case '0': case '1': case '2': case '3':
1939 case '4': case '5': case '6': case '7':
1941 unsigned int i = c - '0';
1944 while (str < limit && ++count < 3)
1947 if (c < '0' || c > '7')
1950 i = (i << 3) + c - '0';
1953 if (i != (i & mask))
1955 cpp_pedwarn (pfile, "octal escape sequence out of range");
1970 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1972 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1976 cpp_pedwarn (pfile, "escape sequence out of range for character");
1982 #ifndef MAX_CHAR_TYPE_SIZE
1983 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1986 #ifndef MAX_WCHAR_TYPE_SIZE
1987 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1990 /* Interpret a (possibly wide) character constant in TOKEN.
1991 WARN_MULTI warns about multi-character charconsts, if not
1992 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1993 that did not exist in traditional C. PCHARS_SEEN points to a
1994 variable that is filled in with the number of characters seen. */
1996 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1998 const cpp_token *token;
2001 unsigned int *pchars_seen;
2003 const unsigned char *str = token->val.str.text;
2004 const unsigned char *limit = str + token->val.str.len;
2005 unsigned int chars_seen = 0;
2006 unsigned int width, max_chars, c;
2007 unsigned HOST_WIDE_INT mask;
2008 HOST_WIDE_INT result = 0;
2010 #ifdef MULTIBYTE_CHARS
2011 (void) local_mbtowc (NULL, NULL, 0);
2014 /* Width in bits. */
2015 if (token->type == CPP_CHAR)
2016 width = MAX_CHAR_TYPE_SIZE;
2018 width = MAX_WCHAR_TYPE_SIZE;
2020 if (width < HOST_BITS_PER_WIDE_INT)
2021 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2024 max_chars = HOST_BITS_PER_WIDE_INT / width;
2028 #ifdef MULTIBYTE_CHARS
2032 char_len = local_mbtowc (&wc, str, limit - str);
2035 cpp_warning (pfile, "ignoring invalid multibyte character");
2048 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
2050 #ifdef MAP_CHARACTER
2052 c = MAP_CHARACTER (c);
2055 /* Merge character into result; ignore excess chars. */
2056 if (++chars_seen <= max_chars)
2058 if (width < HOST_BITS_PER_WIDE_INT)
2059 result = (result << width) | (c & mask);
2065 if (chars_seen == 0)
2066 cpp_error (pfile, "empty character constant");
2067 else if (chars_seen > max_chars)
2069 chars_seen = max_chars;
2070 cpp_warning (pfile, "character constant too long");
2072 else if (chars_seen > 1 && !traditional && warn_multi)
2073 cpp_warning (pfile, "multi-character character constant");
2075 /* If char type is signed, sign-extend the constant. The
2076 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2077 if (token->type == CPP_CHAR && chars_seen)
2079 unsigned int nbits = chars_seen * width;
2080 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2082 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2083 || ((result >> (nbits - 1)) & 1) == 0)
2089 *pchars_seen = chars_seen;
2105 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2108 chunk_suitable (pool, chunk, size)
2113 /* Being at least twice SIZE means we can use memcpy in
2114 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2116 return (chunk && pool->locked != chunk
2117 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2120 /* Returns the end of the new pool. PTR points to a char in the old
2121 pool, and is updated to point to the same char in the new pool. */
2123 _cpp_next_chunk (pool, len, ptr)
2126 unsigned char **ptr;
2128 cpp_chunk *chunk = pool->cur->next;
2130 /* LEN is the minimum size we want in the new pool. */
2131 len += POOL_ROOM (pool);
2132 if (! chunk_suitable (pool, chunk, len))
2134 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2136 chunk->next = pool->cur->next;
2137 pool->cur->next = chunk;
2140 /* Update the pointer before changing chunk's front. */
2142 *ptr += chunk->base - POOL_FRONT (pool);
2144 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2145 chunk->front = chunk->base;
2148 return POOL_LIMIT (pool);
2155 unsigned char *base;
2158 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2159 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2160 /* Put the chunk descriptor at the end. Then chunk overruns will
2161 cause obvious chaos. */
2162 result = (cpp_chunk *) (base + size);
2163 result->base = base;
2164 result->front = base;
2165 result->limit = base + size;
2172 _cpp_init_pool (pool, size, align, temp)
2174 unsigned int size, align, temp;
2177 align = DEFAULT_ALIGNMENT;
2178 if (align & (align - 1))
2180 pool->align = align;
2181 pool->first = new_chunk (size);
2182 pool->cur = pool->first;
2186 pool->cur->next = pool->cur;
2190 _cpp_lock_pool (pool)
2193 if (pool->locks++ == 0)
2194 pool->locked = pool->cur;
2198 _cpp_unlock_pool (pool)
2201 if (--pool->locks == 0)
2206 _cpp_free_pool (pool)
2209 cpp_chunk *chunk = pool->first, *next;
2217 while (chunk && chunk != pool->first);
2220 /* Reserve LEN bytes from a memory pool. */
2222 _cpp_pool_reserve (pool, len)
2226 len = POOL_ALIGN (len, pool->align);
2227 if (len > (unsigned int) POOL_ROOM (pool))
2228 _cpp_next_chunk (pool, len, 0);
2230 return POOL_FRONT (pool);
2233 /* Allocate LEN bytes from a memory pool. */
2235 _cpp_pool_alloc (pool, len)
2239 unsigned char *result = _cpp_pool_reserve (pool, len);
2241 POOL_COMMIT (pool, len);