1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
92 static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
94 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
95 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
96 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
97 static void unterminated PARAMS ((cpp_reader *, int));
98 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
99 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
100 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
101 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
102 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
103 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
104 const unsigned char *, unsigned int *));
105 static cpp_token *lex_token PARAMS ((cpp_reader *, cpp_token *));
106 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
108 static cpp_chunk *new_chunk PARAMS ((unsigned int));
109 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
110 static unsigned int hex_digit_value PARAMS ((unsigned int));
114 Compares, the token TOKEN to the NUL-terminated string STRING.
115 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
118 cpp_ideq (token, string)
119 const cpp_token *token;
122 if (token->type != CPP_NAME)
125 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
128 /* Call when meeting a newline. Returns the character after the newline
129 (or carriage-return newline combination), or EOF. */
131 handle_newline (pfile, newline_char)
133 cppchar_t newline_char;
136 cppchar_t next = EOF;
139 buffer = pfile->buffer;
140 buffer->col_adjust = 0;
141 buffer->line_base = buffer->cur;
143 /* Handle CR-LF and LF-CR combinations, get the next character. */
144 if (buffer->cur < buffer->rlimit)
146 next = *buffer->cur++;
147 if (next + newline_char == '\r' + '\n')
149 buffer->line_base = buffer->cur;
150 if (buffer->cur < buffer->rlimit)
151 next = *buffer->cur++;
157 buffer->read_ahead = next;
161 /* Subroutine of skip_escaped_newlines; called when a trigraph is
162 encountered. It warns if necessary, and returns true if the
163 trigraph should be honoured. FROM_CHAR is the third character of a
164 trigraph, and presumed to be the previous character for position
167 trigraph_ok (pfile, from_char)
171 int accept = CPP_OPTION (pfile, trigraphs);
173 /* Don't warn about trigraphs in comments. */
174 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
176 cpp_buffer *buffer = pfile->buffer;
179 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
180 "trigraph ??%c converted to %c",
182 (int) _cpp_trigraph_map[from_char]);
183 else if (buffer->cur != buffer->last_Wtrigraphs)
185 buffer->last_Wtrigraphs = buffer->cur;
186 cpp_warning_with_line (pfile, pfile->line,
187 CPP_BUF_COL (buffer) - 2,
188 "trigraph ??%c ignored", (int) from_char);
195 /* Assumes local variables buffer and result. */
196 #define ACCEPT_CHAR(t) \
197 do { result->type = t; buffer->read_ahead = EOF; } while (0)
199 /* When we move to multibyte character sets, add to these something
200 that saves and restores the state of the multibyte conversion
201 library. This probably involves saving and restoring a "cookie".
202 In the case of glibc it is an 8-byte structure, so is not a high
203 overhead operation. In any case, it's out of the fast path. */
204 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
205 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
207 /* Skips any escaped newlines introduced by NEXT, which is either a
208 '?' or a '\\'. Returns the next character, which will also have
209 been placed in buffer->read_ahead. This routine performs
210 preprocessing stages 1 and 2 of the ISO C standard. */
212 skip_escaped_newlines (pfile, next)
216 cpp_buffer *buffer = pfile->buffer;
218 /* Only do this if we apply stages 1 and 2. */
219 if (!buffer->from_stage3)
222 const unsigned char *saved_cur;
227 if (buffer->cur == buffer->rlimit)
233 next1 = *buffer->cur++;
234 if (next1 != '?' || buffer->cur == buffer->rlimit)
240 next1 = *buffer->cur++;
241 if (!_cpp_trigraph_map[next1]
242 || !trigraph_ok (pfile, next1))
248 /* We have a full trigraph here. */
249 next = _cpp_trigraph_map[next1];
250 if (next != '\\' || buffer->cur == buffer->rlimit)
255 /* We have a backslash, and room for at least one more character. */
259 next1 = *buffer->cur++;
260 if (!is_nvspace (next1))
264 while (buffer->cur < buffer->rlimit);
266 if (!is_vspace (next1))
272 if (space && !pfile->state.lexing_comment)
273 cpp_warning (pfile, "backslash and newline separated by space");
275 next = handle_newline (pfile, next1);
277 cpp_pedwarn (pfile, "backslash-newline at end of file");
279 while (next == '\\' || next == '?');
282 buffer->read_ahead = next;
286 /* Obtain the next character, after trigraph conversion and skipping
287 an arbitrary string of escaped newlines. The common case of no
288 trigraphs or escaped newlines falls through quickly. */
290 get_effective_char (pfile)
293 cpp_buffer *buffer = pfile->buffer;
294 cppchar_t next = EOF;
296 if (buffer->cur < buffer->rlimit)
298 next = *buffer->cur++;
300 /* '?' can introduce trigraphs (and therefore backslash); '\\'
301 can introduce escaped newlines, which we want to skip, or
302 UCNs, which, depending upon lexer state, we will handle in
304 if (next == '?' || next == '\\')
305 next = skip_escaped_newlines (pfile, next);
308 buffer->read_ahead = next;
312 /* Skip a C-style block comment. We find the end of the comment by
313 seeing if an asterisk is before every '/' we encounter. Returns
314 non-zero if comment terminated by EOF, zero otherwise. */
316 skip_block_comment (pfile)
319 cpp_buffer *buffer = pfile->buffer;
320 cppchar_t c = EOF, prevc = EOF;
322 pfile->state.lexing_comment = 1;
323 while (buffer->cur != buffer->rlimit)
325 prevc = c, c = *buffer->cur++;
328 /* FIXME: For speed, create a new character class of characters
329 of interest inside block comments. */
330 if (c == '?' || c == '\\')
331 c = skip_escaped_newlines (pfile, c);
333 /* People like decorating comments with '*', so check for '/'
334 instead for efficiency. */
340 /* Warn about potential nested comments, but not if the '/'
341 comes immediately before the true comment delimeter.
342 Don't bother to get it right across escaped newlines. */
343 if (CPP_OPTION (pfile, warn_comments)
344 && buffer->cur != buffer->rlimit)
346 prevc = c, c = *buffer->cur++;
347 if (c == '*' && buffer->cur != buffer->rlimit)
349 prevc = c, c = *buffer->cur++;
351 cpp_warning_with_line (pfile, pfile->line,
352 CPP_BUF_COL (buffer) - 2,
353 "\"/*\" within comment");
358 else if (is_vspace (c))
360 prevc = c, c = handle_newline (pfile, c);
364 adjust_column (pfile);
367 pfile->state.lexing_comment = 0;
368 buffer->read_ahead = EOF;
369 return c != '/' || prevc != '*';
372 /* Skip a C++ line comment. Handles escaped newlines. Returns
373 non-zero if a multiline comment. The following new line, if any,
374 is left in buffer->read_ahead. */
376 skip_line_comment (pfile)
379 cpp_buffer *buffer = pfile->buffer;
380 unsigned int orig_line = pfile->line;
383 pfile->state.lexing_comment = 1;
387 if (buffer->cur == buffer->rlimit)
391 if (c == '?' || c == '\\')
392 c = skip_escaped_newlines (pfile, c);
394 while (!is_vspace (c));
396 pfile->state.lexing_comment = 0;
397 buffer->read_ahead = c; /* Leave any newline for caller. */
398 return orig_line != pfile->line;
401 /* pfile->buffer->cur is one beyond the \t character. Update
402 col_adjust so we track the column correctly. */
404 adjust_column (pfile)
407 cpp_buffer *buffer = pfile->buffer;
408 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
410 /* Round it up to multiple of the tabstop, but subtract 1 since the
411 tab itself occupies a character position. */
412 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
413 - col % CPP_OPTION (pfile, tabstop)) - 1;
416 /* Skips whitespace, saving the next non-whitespace character.
417 Adjusts pfile->col_adjust to account for tabs. Without this,
418 tokens might be assigned an incorrect column. */
420 skip_whitespace (pfile, c)
424 cpp_buffer *buffer = pfile->buffer;
425 unsigned int warned = 0;
429 /* Horizontal space always OK. */
433 adjust_column (pfile);
434 /* Just \f \v or \0 left. */
439 cpp_warning (pfile, "null character(s) ignored");
443 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
444 cpp_pedwarn_with_line (pfile, pfile->line,
445 CPP_BUF_COL (buffer),
446 "%s in preprocessing directive",
447 c == '\f' ? "form feed" : "vertical tab");
450 if (buffer->cur == buffer->rlimit)
454 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
455 while (is_nvspace (c));
457 /* Remember the next character. */
458 buffer->read_ahead = c;
461 /* See if the characters of a number token are valid in a name (no
464 name_p (pfile, string)
466 const cpp_string *string;
470 for (i = 0; i < string->len; i++)
471 if (!is_idchar (string->text[i]))
477 /* Parse an identifier, skipping embedded backslash-newlines. This is
478 a critical inner loop. The common case is an identifier which has
479 not been split by backslash-newline, does not contain a dollar
480 sign, and has already been scanned (roughly 10:1 ratio of
481 seen:unseen identifiers in normal code; the distribution is
482 Poisson-like). Second most common case is a new identifier, not
483 split and no dollar sign. The other possibilities are rare and
484 have been relegated to parse_identifier_slow. */
486 static cpp_hashnode *
487 parse_identifier (pfile)
490 cpp_hashnode *result;
491 const U_CHAR *cur, *rlimit;
493 /* Fast-path loop. Skim over a normal identifier.
494 N.B. ISIDNUM does not include $. */
495 cur = pfile->buffer->cur - 1;
496 rlimit = pfile->buffer->rlimit;
499 while (cur < rlimit && ISIDNUM (*cur));
501 /* Check for slow-path cases. */
502 if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
503 result = parse_identifier_slow (pfile, cur);
506 const U_CHAR *base = pfile->buffer->cur - 1;
507 result = (cpp_hashnode *)
508 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
509 pfile->buffer->cur = cur;
512 /* Rarely, identifiers require diagnostics when lexed.
513 XXX Has to be forced out of the fast path. */
514 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
515 && !pfile->state.skipping, 0))
517 /* It is allowed to poison the same identifier twice. */
518 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
519 cpp_error (pfile, "attempt to use poisoned \"%s\"",
522 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
523 replacement list of a variadic macro. */
524 if (result == pfile->spec_nodes.n__VA_ARGS__
525 && !pfile->state.va_args_ok)
527 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
533 /* Slow path. This handles identifiers which have been split, and
534 identifiers which contain dollar signs. The part of the identifier
535 from PFILE->buffer->cur-1 to CUR has already been scanned. */
536 static cpp_hashnode *
537 parse_identifier_slow (pfile, cur)
541 cpp_buffer *buffer = pfile->buffer;
542 const U_CHAR *base = buffer->cur - 1;
543 struct obstack *stack = &pfile->hash_table->stack;
544 unsigned int c, saw_dollar = 0, len;
546 /* Copy the part of the token which is known to be okay. */
547 obstack_grow (stack, base, cur - base);
549 /* Now process the part which isn't. We are looking at one of
550 '$', '\\', or '?' on entry to this loop. */
555 while (is_idchar (c))
557 obstack_1grow (stack, c);
563 if (buffer->cur == buffer->rlimit)
569 /* Potential escaped newline? */
570 if (c != '?' && c != '\\')
572 c = skip_escaped_newlines (pfile, c);
574 while (is_idchar (c));
576 /* Remember the next character. */
577 buffer->read_ahead = c;
579 /* $ is not a identifier character in the standard, but is commonly
580 accepted as an extension. Don't warn about it in skipped
581 conditional blocks. */
582 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
583 cpp_pedwarn (pfile, "'$' character(s) in identifier");
585 /* Identifiers are null-terminated. */
586 len = obstack_object_size (stack);
587 obstack_1grow (stack, '\0');
589 return (cpp_hashnode *)
590 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
593 /* Parse a number, skipping embedded backslash-newlines. */
595 parse_number (pfile, number, c, leading_period)
601 cpp_buffer *buffer = pfile->buffer;
602 cpp_pool *pool = &pfile->ident_pool;
603 unsigned char *dest, *limit;
605 dest = POOL_FRONT (pool);
606 limit = POOL_LIMIT (pool);
608 /* Place a leading period. */
612 limit = _cpp_next_chunk (pool, 0, &dest);
620 /* Need room for terminating null. */
621 if (dest + 1 >= limit)
622 limit = _cpp_next_chunk (pool, 0, &dest);
626 if (buffer->cur == buffer->rlimit)
631 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
633 /* Potential escaped newline? */
634 if (c != '?' && c != '\\')
636 c = skip_escaped_newlines (pfile, c);
638 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
640 /* Remember the next character. */
641 buffer->read_ahead = c;
643 /* Null-terminate the number. */
646 number->text = POOL_FRONT (pool);
647 number->len = dest - number->text;
648 POOL_COMMIT (pool, number->len + 1);
651 /* Subroutine of parse_string. Emits error for unterminated strings. */
653 unterminated (pfile, term)
657 cpp_error (pfile, "missing terminating %c character", term);
659 if (term == '\"' && pfile->mlstring_pos.line
660 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
662 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
663 pfile->mlstring_pos.col,
664 "possible start of unterminated string literal");
665 pfile->mlstring_pos.line = 0;
669 /* Subroutine of parse_string. */
671 unescaped_terminator_p (pfile, dest)
673 const unsigned char *dest;
675 const unsigned char *start, *temp;
677 /* In #include-style directives, terminators are not escapeable. */
678 if (pfile->state.angled_headers)
681 start = POOL_FRONT (&pfile->ident_pool);
683 /* An odd number of consecutive backslashes represents an escaped
685 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
688 return ((dest - temp) & 1) == 0;
691 /* Parses a string, character constant, or angle-bracketed header file
692 name. Handles embedded trigraphs and escaped newlines. The stored
693 string is guaranteed NUL-terminated, but it is not guaranteed that
694 this is the first NUL since embedded NULs are preserved.
696 Multi-line strings are allowed, but they are deprecated. */
698 parse_string (pfile, token, terminator)
701 cppchar_t terminator;
703 cpp_buffer *buffer = pfile->buffer;
704 cpp_pool *pool = &pfile->ident_pool;
705 unsigned char *dest, *limit;
707 bool warned_nulls = false, warned_multi = false;
709 dest = POOL_FRONT (pool);
710 limit = POOL_LIMIT (pool);
714 if (buffer->cur == buffer->rlimit)
720 /* We need space for the terminating NUL. */
722 limit = _cpp_next_chunk (pool, 0, &dest);
726 unterminated (pfile, terminator);
730 /* Handle trigraphs, escaped newlines etc. */
731 if (c == '?' || c == '\\')
732 c = skip_escaped_newlines (pfile, c);
734 if (c == terminator && unescaped_terminator_p (pfile, dest))
739 else if (is_vspace (c))
741 /* In assembly language, silently terminate string and
742 character literals at end of line. This is a kludge
743 around not knowing where comments are. */
744 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
747 /* Character constants and header names may not extend over
748 multiple lines. In Standard C, neither may strings.
749 Unfortunately, we accept multiline strings as an
750 extension, except in #include family directives. */
751 if (terminator != '"' || pfile->state.angled_headers)
753 unterminated (pfile, terminator);
760 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
763 if (pfile->mlstring_pos.line == 0)
764 pfile->mlstring_pos = pfile->lexer_pos;
766 c = handle_newline (pfile, c);
770 else if (c == '\0' && !warned_nulls)
773 cpp_warning (pfile, "null character(s) preserved in literal");
779 /* Remember the next character. */
780 buffer->read_ahead = c;
783 token->val.str.text = POOL_FRONT (pool);
784 token->val.str.len = dest - token->val.str.text;
785 POOL_COMMIT (pool, token->val.str.len + 1);
788 /* The stored comment includes the comment start and any terminator. */
790 save_comment (pfile, token, from)
793 const unsigned char *from;
795 unsigned char *buffer;
798 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
799 /* C++ comments probably (not definitely) have moved past a new
800 line, which we don't want to save in the comment. */
801 if (pfile->buffer->read_ahead != EOF)
803 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
805 token->type = CPP_COMMENT;
806 token->val.str.len = len;
807 token->val.str.text = buffer;
810 memcpy (buffer + 1, from, len - 1);
813 /* Subroutine of lex_token to handle '%'. A little tricky, since we
814 want to avoid stepping back when lexing %:%X. */
816 lex_percent (pfile, result)
820 cpp_buffer *buffer= pfile->buffer;
823 result->type = CPP_MOD;
824 /* Parsing %:%X could leave an extra character. */
825 if (buffer->extra_char == EOF)
826 c = get_effective_char (pfile);
829 c = buffer->read_ahead = buffer->extra_char;
830 buffer->extra_char = EOF;
834 ACCEPT_CHAR (CPP_MOD_EQ);
835 else if (CPP_OPTION (pfile, digraphs))
839 result->flags |= DIGRAPH;
840 ACCEPT_CHAR (CPP_HASH);
841 if (get_effective_char (pfile) == '%')
843 buffer->extra_char = get_effective_char (pfile);
844 if (buffer->extra_char == ':')
846 buffer->extra_char = EOF;
847 ACCEPT_CHAR (CPP_PASTE);
850 /* We'll catch the extra_char when we're called back. */
851 buffer->read_ahead = '%';
856 result->flags |= DIGRAPH;
857 ACCEPT_CHAR (CPP_CLOSE_BRACE);
862 /* Subroutine of lex_token to handle '.'. This is tricky, since we
863 want to avoid stepping back when lexing '...' or '.123'. In the
864 latter case we should also set a flag for parse_number. */
866 lex_dot (pfile, result)
870 cpp_buffer *buffer = pfile->buffer;
873 /* Parsing ..X could leave an extra character. */
874 if (buffer->extra_char == EOF)
875 c = get_effective_char (pfile);
878 c = buffer->read_ahead = buffer->extra_char;
879 buffer->extra_char = EOF;
882 /* All known character sets have 0...9 contiguous. */
883 if (c >= '0' && c <= '9')
885 result->type = CPP_NUMBER;
886 parse_number (pfile, &result->val.str, c, 1);
890 result->type = CPP_DOT;
893 buffer->extra_char = get_effective_char (pfile);
894 if (buffer->extra_char == '.')
896 buffer->extra_char = EOF;
897 ACCEPT_CHAR (CPP_ELLIPSIS);
900 /* We'll catch the extra_char when we're called back. */
901 buffer->read_ahead = '.';
903 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
904 ACCEPT_CHAR (CPP_DOT_STAR);
908 /* Allocate COUNT tokens for RUN. */
910 _cpp_init_tokenrun (run, count)
914 run->base = xnewvec (cpp_token, count);
915 run->limit = run->base + count;
919 /* Returns the next tokenrun, or creates one if there is none. */
924 if (run->next == NULL)
926 run->next = xnew (tokenrun);
927 run->next->prev = run;
928 _cpp_init_tokenrun (run->next, 250);
934 /* Lex a token into RESULT (external interface). */
936 _cpp_lex_token (pfile, dest)
944 if (pfile->cur_token == pfile->cur_run->limit)
946 pfile->cur_run = next_tokenrun (pfile->cur_run);
947 pfile->cur_token = pfile->cur_run->base;
949 result = pfile->cur_token++;
951 if (pfile->lookaheads)
954 result = lex_token (pfile, result);
956 if (result->flags & BOL)
958 /* Is this a directive. If _cpp_handle_directive returns
959 false, it is an assembler #. */
960 if (result->type == CPP_HASH
961 && !pfile->state.parsing_args
962 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
964 if (pfile->cb.line_change && !pfile->state.skipping)
965 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
968 /* We don't skip tokens in directives. */
969 if (pfile->state.in_directive)
972 /* Outside a directive, invalidate controlling macros. At file
973 EOF, lex_token takes care of popping the buffer, so we never
974 get here and MI optimisation works. */
975 pfile->mi_valid = false;
977 if (!pfile->state.skipping || result->type == CPP_EOF)
984 /* Lex a token into RESULT. When meeting a newline, returns CPP_EOF
985 if parsing a directive, otherwise returns to the start of the token
986 buffer if permissible. Returns the location of the lexed token. */
988 lex_token (pfile, result)
994 const unsigned char *comment_start;
997 buffer = pfile->buffer;
998 result->flags = buffer->saved_flags;
999 buffer->saved_flags = 0;
1001 pfile->lexer_pos.line = pfile->line;
1002 result->line = pfile->line;
1005 c = buffer->read_ahead;
1006 if (c == EOF && buffer->cur < buffer->rlimit)
1008 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1009 pfile->lexer_pos.col = result->col;
1010 buffer->read_ahead = EOF;
1016 buffer->saved_flags = BOL;
1017 if (!pfile->state.parsing_args && !pfile->state.in_directive)
1019 if (buffer->cur != buffer->line_base)
1021 /* Non-empty files should end in a newline. Don't warn
1022 for command line and _Pragma buffers. */
1023 if (!buffer->from_stage3)
1024 cpp_pedwarn (pfile, "no newline at end of file");
1025 handle_newline (pfile, '\n');
1028 /* Don't pop the last buffer. */
1031 unsigned char stop = buffer->return_at_eof;
1033 _cpp_pop_buffer (pfile);
1038 result->type = CPP_EOF;
1041 case ' ': case '\t': case '\f': case '\v': case '\0':
1042 skip_whitespace (pfile, c);
1043 result->flags |= PREV_WHITE;
1046 case '\n': case '\r':
1047 handle_newline (pfile, c);
1048 buffer->saved_flags = BOL;
1049 if (! pfile->state.in_directive)
1051 if (!pfile->keep_tokens)
1053 pfile->cur_run = &pfile->base_run;
1054 result = pfile->base_run.base;
1055 pfile->cur_token = result + 1;
1059 result->type = CPP_EOF;
1064 /* These could start an escaped newline, or '?' a trigraph. Let
1065 skip_escaped_newlines do all the work. */
1067 unsigned int line = pfile->line;
1069 c = skip_escaped_newlines (pfile, c);
1070 if (line != pfile->line)
1071 /* We had at least one escaped newline of some sort, and the
1072 next character is in buffer->read_ahead. Update the
1073 token's line and column. */
1074 goto update_tokens_line;
1076 /* We are either the original '?' or '\\', or a trigraph. */
1077 result->type = CPP_QUERY;
1078 buffer->read_ahead = EOF;
1086 case '0': case '1': case '2': case '3': case '4':
1087 case '5': case '6': case '7': case '8': case '9':
1088 result->type = CPP_NUMBER;
1089 parse_number (pfile, &result->val.str, c, 0);
1093 if (!CPP_OPTION (pfile, dollars_in_ident))
1095 /* Fall through... */
1098 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1099 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1100 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1101 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1103 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1104 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1105 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1106 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1108 result->type = CPP_NAME;
1109 result->val.node = parse_identifier (pfile);
1111 /* 'L' may introduce wide characters or strings. */
1112 if (result->val.node == pfile->spec_nodes.n_L)
1114 c = buffer->read_ahead;
1115 if (c == EOF && buffer->cur < buffer->rlimit)
1117 if (c == '\'' || c == '"')
1120 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1124 /* Convert named operators to their proper types. */
1125 else if (result->val.node->flags & NODE_OPERATOR)
1127 result->flags |= NAMED_OP;
1128 result->type = result->val.node->value.operator;
1134 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1136 parse_string (pfile, result, c);
1140 /* A potential block or line comment. */
1141 comment_start = buffer->cur;
1142 result->type = CPP_DIV;
1143 c = get_effective_char (pfile);
1145 ACCEPT_CHAR (CPP_DIV_EQ);
1146 if (c != '/' && c != '*')
1151 if (skip_block_comment (pfile))
1152 cpp_error (pfile, "unterminated comment");
1156 if (!CPP_OPTION (pfile, cplusplus_comments)
1157 && !CPP_IN_SYSTEM_HEADER (pfile))
1160 /* Warn about comments only if pedantically GNUC89, and not
1161 in system headers. */
1162 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1163 && ! buffer->warned_cplusplus_comments)
1166 "C++ style comments are not allowed in ISO C89");
1168 "(this will be reported only once per input file)");
1169 buffer->warned_cplusplus_comments = 1;
1172 /* Skip_line_comment updates buffer->read_ahead. */
1173 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1174 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1175 pfile->lexer_pos.col,
1176 "multi-line comment");
1179 /* Skipping the comment has updated buffer->read_ahead. */
1180 if (!pfile->state.save_comments)
1182 result->flags |= PREV_WHITE;
1183 goto update_tokens_line;
1186 /* Save the comment as a token in its own right. */
1187 save_comment (pfile, result, comment_start);
1188 /* Don't do MI optimisation. */
1192 if (pfile->state.angled_headers)
1194 result->type = CPP_HEADER_NAME;
1195 c = '>'; /* terminator. */
1199 result->type = CPP_LESS;
1200 c = get_effective_char (pfile);
1202 ACCEPT_CHAR (CPP_LESS_EQ);
1205 ACCEPT_CHAR (CPP_LSHIFT);
1206 if (get_effective_char (pfile) == '=')
1207 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1209 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1211 ACCEPT_CHAR (CPP_MIN);
1212 if (get_effective_char (pfile) == '=')
1213 ACCEPT_CHAR (CPP_MIN_EQ);
1215 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1217 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1218 result->flags |= DIGRAPH;
1220 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1222 ACCEPT_CHAR (CPP_OPEN_BRACE);
1223 result->flags |= DIGRAPH;
1228 result->type = CPP_GREATER;
1229 c = get_effective_char (pfile);
1231 ACCEPT_CHAR (CPP_GREATER_EQ);
1234 ACCEPT_CHAR (CPP_RSHIFT);
1235 if (get_effective_char (pfile) == '=')
1236 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1238 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1240 ACCEPT_CHAR (CPP_MAX);
1241 if (get_effective_char (pfile) == '=')
1242 ACCEPT_CHAR (CPP_MAX_EQ);
1247 lex_percent (pfile, result);
1251 lex_dot (pfile, result);
1255 result->type = CPP_PLUS;
1256 c = get_effective_char (pfile);
1258 ACCEPT_CHAR (CPP_PLUS_EQ);
1260 ACCEPT_CHAR (CPP_PLUS_PLUS);
1264 result->type = CPP_MINUS;
1265 c = get_effective_char (pfile);
1268 ACCEPT_CHAR (CPP_DEREF);
1269 if (CPP_OPTION (pfile, cplusplus)
1270 && get_effective_char (pfile) == '*')
1271 ACCEPT_CHAR (CPP_DEREF_STAR);
1274 ACCEPT_CHAR (CPP_MINUS_EQ);
1276 ACCEPT_CHAR (CPP_MINUS_MINUS);
1280 result->type = CPP_MULT;
1281 if (get_effective_char (pfile) == '=')
1282 ACCEPT_CHAR (CPP_MULT_EQ);
1286 result->type = CPP_EQ;
1287 if (get_effective_char (pfile) == '=')
1288 ACCEPT_CHAR (CPP_EQ_EQ);
1292 result->type = CPP_NOT;
1293 if (get_effective_char (pfile) == '=')
1294 ACCEPT_CHAR (CPP_NOT_EQ);
1298 result->type = CPP_AND;
1299 c = get_effective_char (pfile);
1301 ACCEPT_CHAR (CPP_AND_EQ);
1303 ACCEPT_CHAR (CPP_AND_AND);
1307 result->type = CPP_HASH;
1308 if (get_effective_char (pfile) == '#')
1309 ACCEPT_CHAR (CPP_PASTE);
1313 result->type = CPP_OR;
1314 c = get_effective_char (pfile);
1316 ACCEPT_CHAR (CPP_OR_EQ);
1318 ACCEPT_CHAR (CPP_OR_OR);
1322 result->type = CPP_XOR;
1323 if (get_effective_char (pfile) == '=')
1324 ACCEPT_CHAR (CPP_XOR_EQ);
1328 result->type = CPP_COLON;
1329 c = get_effective_char (pfile);
1330 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1331 ACCEPT_CHAR (CPP_SCOPE);
1332 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1334 result->flags |= DIGRAPH;
1335 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1339 case '~': result->type = CPP_COMPL; break;
1340 case ',': result->type = CPP_COMMA; break;
1341 case '(': result->type = CPP_OPEN_PAREN; break;
1342 case ')': result->type = CPP_CLOSE_PAREN; break;
1343 case '[': result->type = CPP_OPEN_SQUARE; break;
1344 case ']': result->type = CPP_CLOSE_SQUARE; break;
1345 case '{': result->type = CPP_OPEN_BRACE; break;
1346 case '}': result->type = CPP_CLOSE_BRACE; break;
1347 case ';': result->type = CPP_SEMICOLON; break;
1349 /* @ is a punctuator in Objective C. */
1350 case '@': result->type = CPP_ATSIGN; break;
1354 result->type = CPP_OTHER;
1362 /* An upper bound on the number of bytes needed to spell a token,
1363 including preceding whitespace. */
1365 cpp_token_len (token)
1366 const cpp_token *token;
1370 switch (TOKEN_SPELL (token))
1372 default: len = 0; break;
1373 case SPELL_STRING: len = token->val.str.len; break;
1374 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1376 /* 1 for whitespace, 4 for comment delimeters. */
1380 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1381 already contain the enough space to hold the token's spelling.
1382 Returns a pointer to the character after the last character
1385 cpp_spell_token (pfile, token, buffer)
1386 cpp_reader *pfile; /* Would be nice to be rid of this... */
1387 const cpp_token *token;
1388 unsigned char *buffer;
1390 switch (TOKEN_SPELL (token))
1392 case SPELL_OPERATOR:
1394 const unsigned char *spelling;
1397 if (token->flags & DIGRAPH)
1399 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1400 else if (token->flags & NAMED_OP)
1403 spelling = TOKEN_NAME (token);
1405 while ((c = *spelling++) != '\0')
1412 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1413 buffer += NODE_LEN (token->val.node);
1418 int left, right, tag;
1419 switch (token->type)
1421 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1422 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1423 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1424 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1425 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1426 default: left = '\0'; right = '\0'; tag = '\0'; break;
1428 if (tag) *buffer++ = tag;
1429 if (left) *buffer++ = left;
1430 memcpy (buffer, token->val.str.text, token->val.str.len);
1431 buffer += token->val.str.len;
1432 if (right) *buffer++ = right;
1437 *buffer++ = token->val.c;
1441 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1448 /* Returns a token as a null-terminated string. The string is
1449 temporary, and automatically freed later. Useful for diagnostics. */
1451 cpp_token_as_text (pfile, token)
1453 const cpp_token *token;
1455 unsigned int len = cpp_token_len (token);
1456 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1458 end = cpp_spell_token (pfile, token, start);
1464 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1466 cpp_type2name (type)
1467 enum cpp_ttype type;
1469 return (const char *) token_spellings[type].name;
1472 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1473 for efficiency - to avoid double-buffering. Also, outputs a space
1474 if PREV_WHITE is flagged. */
1476 cpp_output_token (token, fp)
1477 const cpp_token *token;
1480 if (token->flags & PREV_WHITE)
1483 switch (TOKEN_SPELL (token))
1485 case SPELL_OPERATOR:
1487 const unsigned char *spelling;
1489 if (token->flags & DIGRAPH)
1491 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1492 else if (token->flags & NAMED_OP)
1495 spelling = TOKEN_NAME (token);
1497 ufputs (spelling, fp);
1503 ufputs (NODE_NAME (token->val.node), fp);
1508 int left, right, tag;
1509 switch (token->type)
1511 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1512 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1513 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1514 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1515 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1516 default: left = '\0'; right = '\0'; tag = '\0'; break;
1518 if (tag) putc (tag, fp);
1519 if (left) putc (left, fp);
1520 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1521 if (right) putc (right, fp);
1526 putc (token->val.c, fp);
1530 /* An error, most probably. */
1535 /* Compare two tokens. */
1537 _cpp_equiv_tokens (a, b)
1538 const cpp_token *a, *b;
1540 if (a->type == b->type && a->flags == b->flags)
1541 switch (TOKEN_SPELL (a))
1543 default: /* Keep compiler happy. */
1544 case SPELL_OPERATOR:
1547 return a->val.c == b->val.c; /* Character. */
1549 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1551 return a->val.node == b->val.node;
1553 return (a->val.str.len == b->val.str.len
1554 && !memcmp (a->val.str.text, b->val.str.text,
1561 /* Determine whether two tokens can be pasted together, and if so,
1562 what the resulting token is. Returns CPP_EOF if the tokens cannot
1563 be pasted, or the appropriate type for the merged token if they
1566 cpp_can_paste (pfile, token1, token2, digraph)
1568 const cpp_token *token1, *token2;
1571 enum cpp_ttype a = token1->type, b = token2->type;
1572 int cxx = CPP_OPTION (pfile, cplusplus);
1574 /* Treat named operators as if they were ordinary NAMEs. */
1575 if (token1->flags & NAMED_OP)
1577 if (token2->flags & NAMED_OP)
1580 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1581 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1586 if (b == a) return CPP_RSHIFT;
1587 if (b == CPP_QUERY && cxx) return CPP_MAX;
1588 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1591 if (b == a) return CPP_LSHIFT;
1592 if (b == CPP_QUERY && cxx) return CPP_MIN;
1593 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1594 if (CPP_OPTION (pfile, digraphs))
1597 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1599 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1603 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1604 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1605 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1608 if (b == a) return CPP_MINUS_MINUS;
1609 if (b == CPP_GREATER) return CPP_DEREF;
1612 if (b == a && cxx) return CPP_SCOPE;
1613 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1614 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1618 if (CPP_OPTION (pfile, digraphs))
1620 if (b == CPP_GREATER)
1621 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1623 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1627 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1630 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1631 if (b == CPP_NUMBER) return CPP_NUMBER;
1635 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1637 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1641 if (b == CPP_NAME) return CPP_NAME;
1643 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1645 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1647 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1651 if (b == CPP_NUMBER) return CPP_NUMBER;
1652 if (b == CPP_NAME) return CPP_NUMBER;
1653 if (b == CPP_DOT) return CPP_NUMBER;
1654 /* Numbers cannot have length zero, so this is safe. */
1655 if ((b == CPP_PLUS || b == CPP_MINUS)
1656 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1667 /* Returns nonzero if a space should be inserted to avoid an
1668 accidental token paste for output. For simplicity, it is
1669 conservative, and occasionally advises a space where one is not
1670 needed, e.g. "." and ".2". */
1673 cpp_avoid_paste (pfile, token1, token2)
1675 const cpp_token *token1, *token2;
1677 enum cpp_ttype a = token1->type, b = token2->type;
1680 if (token1->flags & NAMED_OP)
1682 if (token2->flags & NAMED_OP)
1686 if (token2->flags & DIGRAPH)
1687 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1688 else if (token_spellings[b].category == SPELL_OPERATOR)
1689 c = token_spellings[b].name[0];
1691 /* Quickly get everything that can paste with an '='. */
1692 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1697 case CPP_GREATER: return c == '>' || c == '?';
1698 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1699 case CPP_PLUS: return c == '+';
1700 case CPP_MINUS: return c == '-' || c == '>';
1701 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1702 case CPP_MOD: return c == ':' || c == '>';
1703 case CPP_AND: return c == '&';
1704 case CPP_OR: return c == '|';
1705 case CPP_COLON: return c == ':' || c == '>';
1706 case CPP_DEREF: return c == '*';
1707 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1708 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1709 case CPP_NAME: return ((b == CPP_NUMBER
1710 && name_p (pfile, &token2->val.str))
1712 || b == CPP_CHAR || b == CPP_STRING); /* L */
1713 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1714 || c == '.' || c == '+' || c == '-');
1715 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1716 && token1->val.c == '@'
1717 && (b == CPP_NAME || b == CPP_STRING));
1724 /* Output all the remaining tokens on the current line, and a newline
1725 character, to FP. Leading whitespace is removed. */
1727 cpp_output_line (pfile, fp)
1733 cpp_get_token (pfile, &token);
1734 token.flags &= ~PREV_WHITE;
1735 while (token.type != CPP_EOF)
1737 cpp_output_token (&token, fp);
1738 cpp_get_token (pfile, &token);
1744 /* Returns the value of a hexadecimal digit. */
1749 if (c >= 'a' && c <= 'f')
1750 return c - 'a' + 10;
1751 if (c >= 'A' && c <= 'F')
1752 return c - 'A' + 10;
1753 if (c >= '0' && c <= '9')
1758 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1759 failure if cpplib is not parsing C++ or C99. Such failure is
1760 silent, and no variables are updated. Otherwise returns 0, and
1761 warns if -Wtraditional.
1763 [lex.charset]: The character designated by the universal character
1764 name \UNNNNNNNN is that character whose character short name in
1765 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1766 universal character name \uNNNN is that character whose character
1767 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1768 for a universal character name is less than 0x20 or in the range
1769 0x7F-0x9F (inclusive), or if the universal character name
1770 designates a character in the basic source character set, then the
1771 program is ill-formed.
1773 We assume that wchar_t is Unicode, so we don't need to do any
1774 mapping. Is this ever wrong?
1776 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1777 LIMIT is the end of the string or charconst. PSTR is updated to
1778 point after the UCS on return, and the UCS is written into PC. */
1781 maybe_read_ucs (pfile, pstr, limit, pc)
1783 const unsigned char **pstr;
1784 const unsigned char *limit;
1787 const unsigned char *p = *pstr;
1788 unsigned int code = 0;
1789 unsigned int c = *pc, length;
1791 /* Only attempt to interpret a UCS for C++ and C99. */
1792 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1795 if (CPP_WTRADITIONAL (pfile))
1796 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1798 length = (c == 'u' ? 4: 8);
1800 if ((size_t) (limit - p) < length)
1802 cpp_error (pfile, "incomplete universal-character-name");
1803 /* Skip to the end to avoid more diagnostics. */
1808 for (; length; length--, p++)
1812 code = (code << 4) + hex_digit_value (c);
1816 "non-hex digit '%c' in universal-character-name", c);
1817 /* We shouldn't skip in case there are multibyte chars. */
1823 #ifdef TARGET_EBCDIC
1824 cpp_error (pfile, "universal-character-name on EBCDIC target");
1825 code = 0x3f; /* EBCDIC invalid character */
1827 /* True extended characters are OK. */
1829 && !(code & 0x80000000)
1830 && !(code >= 0xD800 && code <= 0xDFFF))
1832 /* The standard permits $, @ and ` to be specified as UCNs. We use
1833 hex escapes so that this also works with EBCDIC hosts. */
1834 else if (code == 0x24 || code == 0x40 || code == 0x60)
1836 /* Don't give another error if one occurred above. */
1837 else if (length == 0)
1838 cpp_error (pfile, "universal-character-name out of range");
1846 /* Interpret an escape sequence, and return its value. PSTR points to
1847 the input pointer, which is just after the backslash. LIMIT is how
1848 much text we have. MASK is a bitmask for the precision for the
1849 destination type (char or wchar_t). TRADITIONAL, if true, does not
1850 interpret escapes that did not exist in traditional C.
1852 Handles all relevant diagnostics. */
1855 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1857 const unsigned char **pstr;
1858 const unsigned char *limit;
1859 unsigned HOST_WIDE_INT mask;
1863 const unsigned char *str = *pstr;
1864 unsigned int c = *str++;
1868 case '\\': case '\'': case '"': case '?': break;
1869 case 'b': c = TARGET_BS; break;
1870 case 'f': c = TARGET_FF; break;
1871 case 'n': c = TARGET_NEWLINE; break;
1872 case 'r': c = TARGET_CR; break;
1873 case 't': c = TARGET_TAB; break;
1874 case 'v': c = TARGET_VT; break;
1876 case '(': case '{': case '[': case '%':
1877 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1878 '\%' is used to prevent SCCS from getting confused. */
1879 unknown = CPP_PEDANTIC (pfile);
1883 if (CPP_WTRADITIONAL (pfile))
1884 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1890 if (CPP_PEDANTIC (pfile))
1891 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1896 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1900 if (CPP_WTRADITIONAL (pfile))
1901 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1905 unsigned int i = 0, overflow = 0;
1906 int digits_found = 0;
1914 overflow |= i ^ (i << 4 >> 4);
1915 i = (i << 4) + hex_digit_value (c);
1920 cpp_error (pfile, "\\x used with no following hex digits");
1922 if (overflow | (i != (i & mask)))
1924 cpp_pedwarn (pfile, "hex escape sequence out of range");
1931 case '0': case '1': case '2': case '3':
1932 case '4': case '5': case '6': case '7':
1934 unsigned int i = c - '0';
1937 while (str < limit && ++count < 3)
1940 if (c < '0' || c > '7')
1943 i = (i << 3) + c - '0';
1946 if (i != (i & mask))
1948 cpp_pedwarn (pfile, "octal escape sequence out of range");
1963 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1965 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1969 cpp_pedwarn (pfile, "escape sequence out of range for character");
1975 #ifndef MAX_CHAR_TYPE_SIZE
1976 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1979 #ifndef MAX_WCHAR_TYPE_SIZE
1980 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1983 /* Interpret a (possibly wide) character constant in TOKEN.
1984 WARN_MULTI warns about multi-character charconsts, if not
1985 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1986 that did not exist in traditional C. PCHARS_SEEN points to a
1987 variable that is filled in with the number of characters seen. */
1989 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1991 const cpp_token *token;
1994 unsigned int *pchars_seen;
1996 const unsigned char *str = token->val.str.text;
1997 const unsigned char *limit = str + token->val.str.len;
1998 unsigned int chars_seen = 0;
1999 unsigned int width, max_chars, c;
2000 unsigned HOST_WIDE_INT mask;
2001 HOST_WIDE_INT result = 0;
2003 #ifdef MULTIBYTE_CHARS
2004 (void) local_mbtowc (NULL, NULL, 0);
2007 /* Width in bits. */
2008 if (token->type == CPP_CHAR)
2009 width = MAX_CHAR_TYPE_SIZE;
2011 width = MAX_WCHAR_TYPE_SIZE;
2013 if (width < HOST_BITS_PER_WIDE_INT)
2014 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
2017 max_chars = HOST_BITS_PER_WIDE_INT / width;
2021 #ifdef MULTIBYTE_CHARS
2025 char_len = local_mbtowc (&wc, str, limit - str);
2028 cpp_warning (pfile, "ignoring invalid multibyte character");
2041 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
2043 #ifdef MAP_CHARACTER
2045 c = MAP_CHARACTER (c);
2048 /* Merge character into result; ignore excess chars. */
2049 if (++chars_seen <= max_chars)
2051 if (width < HOST_BITS_PER_WIDE_INT)
2052 result = (result << width) | (c & mask);
2058 if (chars_seen == 0)
2059 cpp_error (pfile, "empty character constant");
2060 else if (chars_seen > max_chars)
2062 chars_seen = max_chars;
2063 cpp_warning (pfile, "character constant too long");
2065 else if (chars_seen > 1 && !traditional && warn_multi)
2066 cpp_warning (pfile, "multi-character character constant");
2068 /* If char type is signed, sign-extend the constant. The
2069 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2070 if (token->type == CPP_CHAR && chars_seen)
2072 unsigned int nbits = chars_seen * width;
2073 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2075 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2076 || ((result >> (nbits - 1)) & 1) == 0)
2082 *pchars_seen = chars_seen;
2098 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2101 chunk_suitable (pool, chunk, size)
2106 /* Being at least twice SIZE means we can use memcpy in
2107 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2109 return (chunk && pool->locked != chunk
2110 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2113 /* Returns the end of the new pool. PTR points to a char in the old
2114 pool, and is updated to point to the same char in the new pool. */
2116 _cpp_next_chunk (pool, len, ptr)
2119 unsigned char **ptr;
2121 cpp_chunk *chunk = pool->cur->next;
2123 /* LEN is the minimum size we want in the new pool. */
2124 len += POOL_ROOM (pool);
2125 if (! chunk_suitable (pool, chunk, len))
2127 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2129 chunk->next = pool->cur->next;
2130 pool->cur->next = chunk;
2133 /* Update the pointer before changing chunk's front. */
2135 *ptr += chunk->base - POOL_FRONT (pool);
2137 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2138 chunk->front = chunk->base;
2141 return POOL_LIMIT (pool);
2148 unsigned char *base;
2151 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2152 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2153 /* Put the chunk descriptor at the end. Then chunk overruns will
2154 cause obvious chaos. */
2155 result = (cpp_chunk *) (base + size);
2156 result->base = base;
2157 result->front = base;
2158 result->limit = base + size;
2165 _cpp_init_pool (pool, size, align, temp)
2167 unsigned int size, align, temp;
2170 align = DEFAULT_ALIGNMENT;
2171 if (align & (align - 1))
2173 pool->align = align;
2174 pool->first = new_chunk (size);
2175 pool->cur = pool->first;
2179 pool->cur->next = pool->cur;
2183 _cpp_lock_pool (pool)
2186 if (pool->locks++ == 0)
2187 pool->locked = pool->cur;
2191 _cpp_unlock_pool (pool)
2194 if (--pool->locks == 0)
2199 _cpp_free_pool (pool)
2202 cpp_chunk *chunk = pool->first, *next;
2210 while (chunk && chunk != pool->first);
2213 /* Reserve LEN bytes from a memory pool. */
2215 _cpp_pool_reserve (pool, len)
2219 len = POOL_ALIGN (len, pool->align);
2220 if (len > (unsigned int) POOL_ROOM (pool))
2221 _cpp_next_chunk (pool, len, 0);
2223 return POOL_FRONT (pool);
2226 /* Allocate LEN bytes from a memory pool. */
2228 _cpp_pool_alloc (pool, len)
2232 unsigned char *result = _cpp_pool_reserve (pool, len);
2234 POOL_COMMIT (pool, len);