1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
95 static void unterminated PARAMS ((cpp_reader *, int));
96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
102 const unsigned char *, unsigned int *));
104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
106 static unsigned int hex_digit_value PARAMS ((unsigned int));
110 Compares, the token TOKEN to the NUL-terminated string STRING.
111 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
114 cpp_ideq (token, string)
115 const cpp_token *token;
118 if (token->type != CPP_NAME)
121 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
124 /* Call when meeting a newline. Returns the character after the newline
125 (or carriage-return newline combination), or EOF. */
127 handle_newline (pfile, newline_char)
129 cppchar_t newline_char;
132 cppchar_t next = EOF;
135 buffer = pfile->buffer;
136 buffer->col_adjust = 0;
137 buffer->line_base = buffer->cur;
139 /* Handle CR-LF and LF-CR combinations, get the next character. */
140 if (buffer->cur < buffer->rlimit)
142 next = *buffer->cur++;
143 if (next + newline_char == '\r' + '\n')
145 buffer->line_base = buffer->cur;
146 if (buffer->cur < buffer->rlimit)
147 next = *buffer->cur++;
153 buffer->read_ahead = next;
157 /* Subroutine of skip_escaped_newlines; called when a trigraph is
158 encountered. It warns if necessary, and returns true if the
159 trigraph should be honoured. FROM_CHAR is the third character of a
160 trigraph, and presumed to be the previous character for position
163 trigraph_ok (pfile, from_char)
167 int accept = CPP_OPTION (pfile, trigraphs);
169 /* Don't warn about trigraphs in comments. */
170 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
172 cpp_buffer *buffer = pfile->buffer;
175 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
176 "trigraph ??%c converted to %c",
178 (int) _cpp_trigraph_map[from_char]);
179 else if (buffer->cur != buffer->last_Wtrigraphs)
181 buffer->last_Wtrigraphs = buffer->cur;
182 cpp_warning_with_line (pfile, pfile->line,
183 CPP_BUF_COL (buffer) - 2,
184 "trigraph ??%c ignored", (int) from_char);
191 /* Assumes local variables buffer and result. */
192 #define ACCEPT_CHAR(t) \
193 do { result->type = t; buffer->read_ahead = EOF; } while (0)
195 /* When we move to multibyte character sets, add to these something
196 that saves and restores the state of the multibyte conversion
197 library. This probably involves saving and restoring a "cookie".
198 In the case of glibc it is an 8-byte structure, so is not a high
199 overhead operation. In any case, it's out of the fast path. */
200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
203 /* Skips any escaped newlines introduced by NEXT, which is either a
204 '?' or a '\\'. Returns the next character, which will also have
205 been placed in buffer->read_ahead. This routine performs
206 preprocessing stages 1 and 2 of the ISO C standard. */
208 skip_escaped_newlines (buffer, next)
212 /* Only do this if we apply stages 1 and 2. */
213 if (!buffer->from_stage3)
216 const unsigned char *saved_cur;
221 if (buffer->cur == buffer->rlimit)
227 next1 = *buffer->cur++;
228 if (next1 != '?' || buffer->cur == buffer->rlimit)
234 next1 = *buffer->cur++;
235 if (!_cpp_trigraph_map[next1]
236 || !trigraph_ok (buffer->pfile, next1))
242 /* We have a full trigraph here. */
243 next = _cpp_trigraph_map[next1];
244 if (next != '\\' || buffer->cur == buffer->rlimit)
249 /* We have a backslash, and room for at least one more character. */
253 next1 = *buffer->cur++;
254 if (!is_nvspace (next1))
258 while (buffer->cur < buffer->rlimit);
260 if (!is_vspace (next1))
266 if (space && !buffer->pfile->state.lexing_comment)
267 cpp_warning (buffer->pfile,
268 "backslash and newline separated by space");
270 next = handle_newline (buffer->pfile, next1);
272 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
274 while (next == '\\' || next == '?');
277 buffer->read_ahead = next;
281 /* Obtain the next character, after trigraph conversion and skipping
282 an arbitrary string of escaped newlines. The common case of no
283 trigraphs or escaped newlines falls through quickly. */
285 get_effective_char (buffer)
288 cppchar_t next = EOF;
290 if (buffer->cur < buffer->rlimit)
292 next = *buffer->cur++;
294 /* '?' can introduce trigraphs (and therefore backslash); '\\'
295 can introduce escaped newlines, which we want to skip, or
296 UCNs, which, depending upon lexer state, we will handle in
298 if (next == '?' || next == '\\')
299 next = skip_escaped_newlines (buffer, next);
302 buffer->read_ahead = next;
306 /* Skip a C-style block comment. We find the end of the comment by
307 seeing if an asterisk is before every '/' we encounter. Returns
308 non-zero if comment terminated by EOF, zero otherwise. */
310 skip_block_comment (pfile)
313 cpp_buffer *buffer = pfile->buffer;
314 cppchar_t c = EOF, prevc = EOF;
316 pfile->state.lexing_comment = 1;
317 while (buffer->cur != buffer->rlimit)
319 prevc = c, c = *buffer->cur++;
322 /* FIXME: For speed, create a new character class of characters
323 of interest inside block comments. */
324 if (c == '?' || c == '\\')
325 c = skip_escaped_newlines (buffer, c);
327 /* People like decorating comments with '*', so check for '/'
328 instead for efficiency. */
334 /* Warn about potential nested comments, but not if the '/'
335 comes immediately before the true comment delimeter.
336 Don't bother to get it right across escaped newlines. */
337 if (CPP_OPTION (pfile, warn_comments)
338 && buffer->cur != buffer->rlimit)
340 prevc = c, c = *buffer->cur++;
341 if (c == '*' && buffer->cur != buffer->rlimit)
343 prevc = c, c = *buffer->cur++;
345 cpp_warning_with_line (pfile, pfile->line,
346 CPP_BUF_COL (buffer) - 2,
347 "\"/*\" within comment");
352 else if (is_vspace (c))
354 prevc = c, c = handle_newline (pfile, c);
358 adjust_column (pfile);
361 pfile->state.lexing_comment = 0;
362 buffer->read_ahead = EOF;
363 return c != '/' || prevc != '*';
366 /* Skip a C++ line comment. Handles escaped newlines. Returns
367 non-zero if a multiline comment. The following new line, if any,
368 is left in buffer->read_ahead. */
370 skip_line_comment (pfile)
373 cpp_buffer *buffer = pfile->buffer;
374 unsigned int orig_line = pfile->line;
377 pfile->state.lexing_comment = 1;
381 if (buffer->cur == buffer->rlimit)
385 if (c == '?' || c == '\\')
386 c = skip_escaped_newlines (buffer, c);
388 while (!is_vspace (c));
390 pfile->state.lexing_comment = 0;
391 buffer->read_ahead = c; /* Leave any newline for caller. */
392 return orig_line != pfile->line;
395 /* pfile->buffer->cur is one beyond the \t character. Update
396 col_adjust so we track the column correctly. */
398 adjust_column (pfile)
401 cpp_buffer *buffer = pfile->buffer;
402 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
404 /* Round it up to multiple of the tabstop, but subtract 1 since the
405 tab itself occupies a character position. */
406 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
407 - col % CPP_OPTION (pfile, tabstop)) - 1;
410 /* Skips whitespace, saving the next non-whitespace character.
411 Adjusts pfile->col_adjust to account for tabs. Without this,
412 tokens might be assigned an incorrect column. */
414 skip_whitespace (pfile, c)
418 cpp_buffer *buffer = pfile->buffer;
419 unsigned int warned = 0;
423 /* Horizontal space always OK. */
427 adjust_column (pfile);
428 /* Just \f \v or \0 left. */
433 cpp_warning (pfile, "null character(s) ignored");
437 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
438 cpp_pedwarn_with_line (pfile, pfile->line,
439 CPP_BUF_COL (buffer),
440 "%s in preprocessing directive",
441 c == '\f' ? "form feed" : "vertical tab");
444 if (buffer->cur == buffer->rlimit)
448 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
449 while (is_nvspace (c));
451 /* Remember the next character. */
452 buffer->read_ahead = c;
455 /* See if the characters of a number token are valid in a name (no
458 name_p (pfile, string)
460 const cpp_string *string;
464 for (i = 0; i < string->len; i++)
465 if (!is_idchar (string->text[i]))
471 /* Parse an identifier, skipping embedded backslash-newlines.
472 Calculate the hash value of the token while parsing, for improved
473 performance. The hashing algorithm *must* match cpp_lookup(). */
475 static cpp_hashnode *
476 parse_identifier (pfile, c)
480 cpp_hashnode *result;
481 cpp_buffer *buffer = pfile->buffer;
482 unsigned int saw_dollar = 0, len;
483 struct obstack *stack = &pfile->hash_table->stack;
489 obstack_1grow (stack, c);
495 if (buffer->cur == buffer->rlimit)
500 while (is_idchar (c));
502 /* Potential escaped newline? */
503 if (c != '?' && c != '\\')
505 c = skip_escaped_newlines (buffer, c);
507 while (is_idchar (c));
509 /* Remember the next character. */
510 buffer->read_ahead = c;
512 /* $ is not a identifier character in the standard, but is commonly
513 accepted as an extension. Don't warn about it in skipped
514 conditional blocks. */
515 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
516 cpp_pedwarn (pfile, "'$' character(s) in identifier");
518 /* Identifiers are null-terminated. */
519 len = obstack_object_size (stack);
520 obstack_1grow (stack, '\0');
522 /* This routine commits the memory if necessary. */
523 result = (cpp_hashnode *)
524 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
526 /* Some identifiers require diagnostics when lexed. */
527 if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
529 /* It is allowed to poison the same identifier twice. */
530 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
531 cpp_error (pfile, "attempt to use poisoned \"%s\"",
534 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
535 replacement list of a variadic macro. */
536 if (result == pfile->spec_nodes.n__VA_ARGS__
537 && !pfile->state.va_args_ok)
538 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
544 /* Parse a number, skipping embedded backslash-newlines. */
546 parse_number (pfile, number, c, leading_period)
552 cpp_buffer *buffer = pfile->buffer;
553 cpp_pool *pool = &pfile->ident_pool;
554 unsigned char *dest, *limit;
556 dest = POOL_FRONT (pool);
557 limit = POOL_LIMIT (pool);
559 /* Place a leading period. */
563 limit = _cpp_next_chunk (pool, 0, &dest);
571 /* Need room for terminating null. */
572 if (dest + 1 >= limit)
573 limit = _cpp_next_chunk (pool, 0, &dest);
577 if (buffer->cur == buffer->rlimit)
582 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
584 /* Potential escaped newline? */
585 if (c != '?' && c != '\\')
587 c = skip_escaped_newlines (buffer, c);
589 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
591 /* Remember the next character. */
592 buffer->read_ahead = c;
594 /* Null-terminate the number. */
597 number->text = POOL_FRONT (pool);
598 number->len = dest - number->text;
599 POOL_COMMIT (pool, number->len + 1);
602 /* Subroutine of parse_string. Emits error for unterminated strings. */
604 unterminated (pfile, term)
608 cpp_error (pfile, "missing terminating %c character", term);
610 if (term == '\"' && pfile->mlstring_pos.line
611 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
613 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
614 pfile->mlstring_pos.col,
615 "possible start of unterminated string literal");
616 pfile->mlstring_pos.line = 0;
620 /* Subroutine of parse_string. */
622 unescaped_terminator_p (pfile, dest)
624 const unsigned char *dest;
626 const unsigned char *start, *temp;
628 /* In #include-style directives, terminators are not escapeable. */
629 if (pfile->state.angled_headers)
632 start = POOL_FRONT (&pfile->ident_pool);
634 /* An odd number of consecutive backslashes represents an escaped
636 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
639 return ((dest - temp) & 1) == 0;
642 /* Parses a string, character constant, or angle-bracketed header file
643 name. Handles embedded trigraphs and escaped newlines. The stored
644 string is guaranteed NUL-terminated, but it is not guaranteed that
645 this is the first NUL since embedded NULs are preserved.
647 Multi-line strings are allowed, but they are deprecated. */
649 parse_string (pfile, token, terminator)
652 cppchar_t terminator;
654 cpp_buffer *buffer = pfile->buffer;
655 cpp_pool *pool = &pfile->ident_pool;
656 unsigned char *dest, *limit;
658 bool warned_nulls = false, warned_multi = false;
660 dest = POOL_FRONT (pool);
661 limit = POOL_LIMIT (pool);
665 if (buffer->cur == buffer->rlimit)
671 /* We need space for the terminating NUL. */
673 limit = _cpp_next_chunk (pool, 0, &dest);
677 unterminated (pfile, terminator);
681 /* Handle trigraphs, escaped newlines etc. */
682 if (c == '?' || c == '\\')
683 c = skip_escaped_newlines (buffer, c);
685 if (c == terminator && unescaped_terminator_p (pfile, dest))
690 else if (is_vspace (c))
692 /* In assembly language, silently terminate string and
693 character literals at end of line. This is a kludge
694 around not knowing where comments are. */
695 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
698 /* Character constants and header names may not extend over
699 multiple lines. In Standard C, neither may strings.
700 Unfortunately, we accept multiline strings as an
701 extension, except in #include family directives. */
702 if (terminator != '"' || pfile->state.angled_headers)
704 unterminated (pfile, terminator);
711 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
714 if (pfile->mlstring_pos.line == 0)
715 pfile->mlstring_pos = pfile->lexer_pos;
717 c = handle_newline (pfile, c);
721 else if (c == '\0' && !warned_nulls)
724 cpp_warning (pfile, "null character(s) preserved in literal");
730 /* Remember the next character. */
731 buffer->read_ahead = c;
734 token->val.str.text = POOL_FRONT (pool);
735 token->val.str.len = dest - token->val.str.text;
736 POOL_COMMIT (pool, token->val.str.len + 1);
739 /* The stored comment includes the comment start and any terminator. */
741 save_comment (pfile, token, from)
744 const unsigned char *from;
746 unsigned char *buffer;
749 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
750 /* C++ comments probably (not definitely) have moved past a new
751 line, which we don't want to save in the comment. */
752 if (pfile->buffer->read_ahead != EOF)
754 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
756 token->type = CPP_COMMENT;
757 token->val.str.len = len;
758 token->val.str.text = buffer;
761 memcpy (buffer + 1, from, len - 1);
764 /* Subroutine of lex_token to handle '%'. A little tricky, since we
765 want to avoid stepping back when lexing %:%X. */
767 lex_percent (buffer, result)
773 result->type = CPP_MOD;
774 /* Parsing %:%X could leave an extra character. */
775 if (buffer->extra_char == EOF)
776 c = get_effective_char (buffer);
779 c = buffer->read_ahead = buffer->extra_char;
780 buffer->extra_char = EOF;
784 ACCEPT_CHAR (CPP_MOD_EQ);
785 else if (CPP_OPTION (buffer->pfile, digraphs))
789 result->flags |= DIGRAPH;
790 ACCEPT_CHAR (CPP_HASH);
791 if (get_effective_char (buffer) == '%')
793 buffer->extra_char = get_effective_char (buffer);
794 if (buffer->extra_char == ':')
796 buffer->extra_char = EOF;
797 ACCEPT_CHAR (CPP_PASTE);
800 /* We'll catch the extra_char when we're called back. */
801 buffer->read_ahead = '%';
806 result->flags |= DIGRAPH;
807 ACCEPT_CHAR (CPP_CLOSE_BRACE);
812 /* Subroutine of lex_token to handle '.'. This is tricky, since we
813 want to avoid stepping back when lexing '...' or '.123'. In the
814 latter case we should also set a flag for parse_number. */
816 lex_dot (pfile, result)
820 cpp_buffer *buffer = pfile->buffer;
823 /* Parsing ..X could leave an extra character. */
824 if (buffer->extra_char == EOF)
825 c = get_effective_char (buffer);
828 c = buffer->read_ahead = buffer->extra_char;
829 buffer->extra_char = EOF;
832 /* All known character sets have 0...9 contiguous. */
833 if (c >= '0' && c <= '9')
835 result->type = CPP_NUMBER;
836 parse_number (pfile, &result->val.str, c, 1);
840 result->type = CPP_DOT;
843 buffer->extra_char = get_effective_char (buffer);
844 if (buffer->extra_char == '.')
846 buffer->extra_char = EOF;
847 ACCEPT_CHAR (CPP_ELLIPSIS);
850 /* We'll catch the extra_char when we're called back. */
851 buffer->read_ahead = '.';
853 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
854 ACCEPT_CHAR (CPP_DOT_STAR);
859 _cpp_lex_token (pfile, result)
865 const unsigned char *comment_start;
869 buffer = pfile->buffer;
870 result->flags = buffer->saved_flags;
871 buffer->saved_flags = 0;
872 bol = (buffer->cur <= buffer->line_base + 1
873 && pfile->lexer_pos.output_line == pfile->line);
875 pfile->lexer_pos.line = pfile->line;
876 result->line = pfile->line;
878 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
880 c = buffer->read_ahead;
881 if (c == EOF && buffer->cur < buffer->rlimit)
884 pfile->lexer_pos.col++;
886 result->col = pfile->lexer_pos.col;
889 buffer->read_ahead = EOF;
893 /* To prevent bogus diagnostics, only pop the buffer when
894 in-progress directives and arguments have been taken care of.
895 Decrement the line to terminate an in-progress directive. */
896 if (pfile->state.in_directive)
898 else if (! pfile->state.parsing_args)
900 /* Non-empty files should end in a newline. Don't warn for
901 command line and _Pragma buffers. */
902 if (pfile->lexer_pos.col != 0)
904 /* Account for the missing \n, prevent multiple warnings. */
906 pfile->lexer_pos.col = 0;
907 if (!buffer->from_stage3)
908 cpp_pedwarn (pfile, "no newline at end of file");
911 /* Don't pop the last file. */
914 unsigned char stop = buffer->return_at_eof;
916 _cpp_pop_buffer (pfile);
921 result->type = CPP_EOF;
924 case ' ': case '\t': case '\f': case '\v': case '\0':
925 skip_whitespace (pfile, c);
926 result->flags |= PREV_WHITE;
929 case '\n': case '\r':
930 if (pfile->state.in_directive)
932 result->type = CPP_EOF;
933 if (pfile->state.parsing_args)
934 buffer->read_ahead = c;
937 handle_newline (pfile, c);
938 /* Decrementing pfile->line allows directives to
939 recognise that the newline has been seen, and also
940 means that diagnostics don't point to the next line. */
941 pfile->lexer_pos.output_line = pfile->line--;
946 handle_newline (pfile, c);
947 /* This is a new line, so clear any white space flag. Newlines
948 in arguments are white space (6.10.3.10); parse_arg takes
950 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
952 if (pfile->state.parsing_args != 2)
953 pfile->lexer_pos.output_line = pfile->line;
958 /* These could start an escaped newline, or '?' a trigraph. Let
959 skip_escaped_newlines do all the work. */
961 unsigned int line = pfile->line;
963 c = skip_escaped_newlines (buffer, c);
964 if (line != pfile->line)
965 /* We had at least one escaped newline of some sort, and the
966 next character is in buffer->read_ahead. Update the
967 token's line and column. */
970 /* We are either the original '?' or '\\', or a trigraph. */
971 result->type = CPP_QUERY;
972 buffer->read_ahead = EOF;
980 case '0': case '1': case '2': case '3': case '4':
981 case '5': case '6': case '7': case '8': case '9':
982 result->type = CPP_NUMBER;
983 parse_number (pfile, &result->val.str, c, 0);
987 if (!CPP_OPTION (pfile, dollars_in_ident))
989 /* Fall through... */
992 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
993 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
994 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
995 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
997 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
998 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
999 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1000 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1002 result->type = CPP_NAME;
1003 result->val.node = parse_identifier (pfile, c);
1005 /* 'L' may introduce wide characters or strings. */
1006 if (result->val.node == pfile->spec_nodes.n_L)
1008 c = buffer->read_ahead; /* For make_string. */
1009 if (c == '\'' || c == '"')
1011 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1015 /* Convert named operators to their proper types. */
1016 else if (result->val.node->flags & NODE_OPERATOR)
1018 result->flags |= NAMED_OP;
1019 result->type = result->val.node->value.operator;
1025 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1027 parse_string (pfile, result, c);
1031 /* A potential block or line comment. */
1032 comment_start = buffer->cur;
1033 result->type = CPP_DIV;
1034 c = get_effective_char (buffer);
1036 ACCEPT_CHAR (CPP_DIV_EQ);
1037 if (c != '/' && c != '*')
1042 if (skip_block_comment (pfile))
1043 cpp_error (pfile, "unterminated comment");
1047 if (!CPP_OPTION (pfile, cplusplus_comments)
1048 && !CPP_IN_SYSTEM_HEADER (pfile))
1051 /* Warn about comments only if pedantically GNUC89, and not
1052 in system headers. */
1053 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1054 && ! buffer->warned_cplusplus_comments)
1057 "C++ style comments are not allowed in ISO C89");
1059 "(this will be reported only once per input file)");
1060 buffer->warned_cplusplus_comments = 1;
1063 /* Skip_line_comment updates buffer->read_ahead. */
1064 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1065 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1066 pfile->lexer_pos.col,
1067 "multi-line comment");
1070 /* Skipping the comment has updated buffer->read_ahead. */
1071 if (!pfile->state.save_comments)
1073 result->flags |= PREV_WHITE;
1077 /* Save the comment as a token in its own right. */
1078 save_comment (pfile, result, comment_start);
1079 /* Don't do MI optimisation. */
1083 if (pfile->state.angled_headers)
1085 result->type = CPP_HEADER_NAME;
1086 c = '>'; /* terminator. */
1090 result->type = CPP_LESS;
1091 c = get_effective_char (buffer);
1093 ACCEPT_CHAR (CPP_LESS_EQ);
1096 ACCEPT_CHAR (CPP_LSHIFT);
1097 if (get_effective_char (buffer) == '=')
1098 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1100 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1102 ACCEPT_CHAR (CPP_MIN);
1103 if (get_effective_char (buffer) == '=')
1104 ACCEPT_CHAR (CPP_MIN_EQ);
1106 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1108 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1109 result->flags |= DIGRAPH;
1111 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1113 ACCEPT_CHAR (CPP_OPEN_BRACE);
1114 result->flags |= DIGRAPH;
1119 result->type = CPP_GREATER;
1120 c = get_effective_char (buffer);
1122 ACCEPT_CHAR (CPP_GREATER_EQ);
1125 ACCEPT_CHAR (CPP_RSHIFT);
1126 if (get_effective_char (buffer) == '=')
1127 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1129 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1131 ACCEPT_CHAR (CPP_MAX);
1132 if (get_effective_char (buffer) == '=')
1133 ACCEPT_CHAR (CPP_MAX_EQ);
1138 lex_percent (buffer, result);
1139 if (result->type == CPP_HASH)
1144 lex_dot (pfile, result);
1148 result->type = CPP_PLUS;
1149 c = get_effective_char (buffer);
1151 ACCEPT_CHAR (CPP_PLUS_EQ);
1153 ACCEPT_CHAR (CPP_PLUS_PLUS);
1157 result->type = CPP_MINUS;
1158 c = get_effective_char (buffer);
1161 ACCEPT_CHAR (CPP_DEREF);
1162 if (CPP_OPTION (pfile, cplusplus)
1163 && get_effective_char (buffer) == '*')
1164 ACCEPT_CHAR (CPP_DEREF_STAR);
1167 ACCEPT_CHAR (CPP_MINUS_EQ);
1169 ACCEPT_CHAR (CPP_MINUS_MINUS);
1173 result->type = CPP_MULT;
1174 if (get_effective_char (buffer) == '=')
1175 ACCEPT_CHAR (CPP_MULT_EQ);
1179 result->type = CPP_EQ;
1180 if (get_effective_char (buffer) == '=')
1181 ACCEPT_CHAR (CPP_EQ_EQ);
1185 result->type = CPP_NOT;
1186 if (get_effective_char (buffer) == '=')
1187 ACCEPT_CHAR (CPP_NOT_EQ);
1191 result->type = CPP_AND;
1192 c = get_effective_char (buffer);
1194 ACCEPT_CHAR (CPP_AND_EQ);
1196 ACCEPT_CHAR (CPP_AND_AND);
1200 c = buffer->extra_char; /* Can be set by error condition below. */
1203 buffer->read_ahead = c;
1204 buffer->extra_char = EOF;
1207 c = get_effective_char (buffer);
1211 ACCEPT_CHAR (CPP_PASTE);
1215 result->type = CPP_HASH;
1219 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1220 tokens within the list of arguments that would otherwise act
1221 as preprocessing directives, the behavior is undefined.
1223 This implementation will report a hard error, terminate the
1224 macro invocation, and proceed to process the directive. */
1225 if (pfile->state.parsing_args)
1227 pfile->lexer_pos.output_line = pfile->line;
1228 if (pfile->state.parsing_args == 2)
1231 "directives may not be used inside a macro argument");
1232 result->type = CPP_EOF;
1235 /* in_directive can be true inside a _Pragma. */
1236 else if (!pfile->state.in_directive)
1238 /* This is the hash introducing a directive. If the return
1239 value is false, it is an assembler #. */
1240 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1246 result->type = CPP_OR;
1247 c = get_effective_char (buffer);
1249 ACCEPT_CHAR (CPP_OR_EQ);
1251 ACCEPT_CHAR (CPP_OR_OR);
1255 result->type = CPP_XOR;
1256 if (get_effective_char (buffer) == '=')
1257 ACCEPT_CHAR (CPP_XOR_EQ);
1261 result->type = CPP_COLON;
1262 c = get_effective_char (buffer);
1263 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1264 ACCEPT_CHAR (CPP_SCOPE);
1265 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1267 result->flags |= DIGRAPH;
1268 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1272 case '~': result->type = CPP_COMPL; break;
1273 case ',': result->type = CPP_COMMA; break;
1274 case '(': result->type = CPP_OPEN_PAREN; break;
1275 case ')': result->type = CPP_CLOSE_PAREN; break;
1276 case '[': result->type = CPP_OPEN_SQUARE; break;
1277 case ']': result->type = CPP_CLOSE_SQUARE; break;
1278 case '{': result->type = CPP_OPEN_BRACE; break;
1279 case '}': result->type = CPP_CLOSE_BRACE; break;
1280 case ';': result->type = CPP_SEMICOLON; break;
1282 /* @ is a punctuator in Objective C. */
1283 case '@': result->type = CPP_ATSIGN; break;
1287 result->type = CPP_OTHER;
1292 if (!pfile->state.in_directive && pfile->state.skipping)
1295 /* If not in a directive, this token invalidates controlling macros. */
1296 if (!pfile->state.in_directive)
1297 pfile->mi_valid = false;
1300 /* An upper bound on the number of bytes needed to spell a token,
1301 including preceding whitespace. */
1303 cpp_token_len (token)
1304 const cpp_token *token;
1308 switch (TOKEN_SPELL (token))
1310 default: len = 0; break;
1311 case SPELL_STRING: len = token->val.str.len; break;
1312 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1314 /* 1 for whitespace, 4 for comment delimeters. */
1318 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1319 already contain the enough space to hold the token's spelling.
1320 Returns a pointer to the character after the last character
1323 cpp_spell_token (pfile, token, buffer)
1324 cpp_reader *pfile; /* Would be nice to be rid of this... */
1325 const cpp_token *token;
1326 unsigned char *buffer;
1328 switch (TOKEN_SPELL (token))
1330 case SPELL_OPERATOR:
1332 const unsigned char *spelling;
1335 if (token->flags & DIGRAPH)
1337 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1338 else if (token->flags & NAMED_OP)
1341 spelling = TOKEN_NAME (token);
1343 while ((c = *spelling++) != '\0')
1350 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1351 buffer += NODE_LEN (token->val.node);
1356 int left, right, tag;
1357 switch (token->type)
1359 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1360 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1361 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1362 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1363 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1364 default: left = '\0'; right = '\0'; tag = '\0'; break;
1366 if (tag) *buffer++ = tag;
1367 if (left) *buffer++ = left;
1368 memcpy (buffer, token->val.str.text, token->val.str.len);
1369 buffer += token->val.str.len;
1370 if (right) *buffer++ = right;
1375 *buffer++ = token->val.c;
1379 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1386 /* Returns a token as a null-terminated string. The string is
1387 temporary, and automatically freed later. Useful for diagnostics. */
1389 cpp_token_as_text (pfile, token)
1391 const cpp_token *token;
1393 unsigned int len = cpp_token_len (token);
1394 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1396 end = cpp_spell_token (pfile, token, start);
1402 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1404 cpp_type2name (type)
1405 enum cpp_ttype type;
1407 return (const char *) token_spellings[type].name;
1410 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1411 for efficiency - to avoid double-buffering. Also, outputs a space
1412 if PREV_WHITE is flagged. */
1414 cpp_output_token (token, fp)
1415 const cpp_token *token;
1418 if (token->flags & PREV_WHITE)
1421 switch (TOKEN_SPELL (token))
1423 case SPELL_OPERATOR:
1425 const unsigned char *spelling;
1427 if (token->flags & DIGRAPH)
1429 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1430 else if (token->flags & NAMED_OP)
1433 spelling = TOKEN_NAME (token);
1435 ufputs (spelling, fp);
1441 ufputs (NODE_NAME (token->val.node), fp);
1446 int left, right, tag;
1447 switch (token->type)
1449 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1450 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1451 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1452 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1453 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1454 default: left = '\0'; right = '\0'; tag = '\0'; break;
1456 if (tag) putc (tag, fp);
1457 if (left) putc (left, fp);
1458 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1459 if (right) putc (right, fp);
1464 putc (token->val.c, fp);
1468 /* An error, most probably. */
1473 /* Compare two tokens. */
1475 _cpp_equiv_tokens (a, b)
1476 const cpp_token *a, *b;
1478 if (a->type == b->type && a->flags == b->flags)
1479 switch (TOKEN_SPELL (a))
1481 default: /* Keep compiler happy. */
1482 case SPELL_OPERATOR:
1485 return a->val.c == b->val.c; /* Character. */
1487 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1489 return a->val.node == b->val.node;
1491 return (a->val.str.len == b->val.str.len
1492 && !memcmp (a->val.str.text, b->val.str.text,
1499 /* Determine whether two tokens can be pasted together, and if so,
1500 what the resulting token is. Returns CPP_EOF if the tokens cannot
1501 be pasted, or the appropriate type for the merged token if they
1504 cpp_can_paste (pfile, token1, token2, digraph)
1506 const cpp_token *token1, *token2;
1509 enum cpp_ttype a = token1->type, b = token2->type;
1510 int cxx = CPP_OPTION (pfile, cplusplus);
1512 /* Treat named operators as if they were ordinary NAMEs. */
1513 if (token1->flags & NAMED_OP)
1515 if (token2->flags & NAMED_OP)
1518 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1519 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1524 if (b == a) return CPP_RSHIFT;
1525 if (b == CPP_QUERY && cxx) return CPP_MAX;
1526 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1529 if (b == a) return CPP_LSHIFT;
1530 if (b == CPP_QUERY && cxx) return CPP_MIN;
1531 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1532 if (CPP_OPTION (pfile, digraphs))
1535 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1537 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1541 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1542 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1543 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1546 if (b == a) return CPP_MINUS_MINUS;
1547 if (b == CPP_GREATER) return CPP_DEREF;
1550 if (b == a && cxx) return CPP_SCOPE;
1551 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1552 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1556 if (CPP_OPTION (pfile, digraphs))
1558 if (b == CPP_GREATER)
1559 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1561 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1565 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1568 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1569 if (b == CPP_NUMBER) return CPP_NUMBER;
1573 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1575 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1579 if (b == CPP_NAME) return CPP_NAME;
1581 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1583 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1585 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1589 if (b == CPP_NUMBER) return CPP_NUMBER;
1590 if (b == CPP_NAME) return CPP_NUMBER;
1591 if (b == CPP_DOT) return CPP_NUMBER;
1592 /* Numbers cannot have length zero, so this is safe. */
1593 if ((b == CPP_PLUS || b == CPP_MINUS)
1594 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1605 /* Returns nonzero if a space should be inserted to avoid an
1606 accidental token paste for output. For simplicity, it is
1607 conservative, and occasionally advises a space where one is not
1608 needed, e.g. "." and ".2". */
1611 cpp_avoid_paste (pfile, token1, token2)
1613 const cpp_token *token1, *token2;
1615 enum cpp_ttype a = token1->type, b = token2->type;
1618 if (token1->flags & NAMED_OP)
1620 if (token2->flags & NAMED_OP)
1624 if (token2->flags & DIGRAPH)
1625 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1626 else if (token_spellings[b].category == SPELL_OPERATOR)
1627 c = token_spellings[b].name[0];
1629 /* Quickly get everything that can paste with an '='. */
1630 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1635 case CPP_GREATER: return c == '>' || c == '?';
1636 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1637 case CPP_PLUS: return c == '+';
1638 case CPP_MINUS: return c == '-' || c == '>';
1639 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1640 case CPP_MOD: return c == ':' || c == '>';
1641 case CPP_AND: return c == '&';
1642 case CPP_OR: return c == '|';
1643 case CPP_COLON: return c == ':' || c == '>';
1644 case CPP_DEREF: return c == '*';
1645 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1646 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1647 case CPP_NAME: return ((b == CPP_NUMBER
1648 && name_p (pfile, &token2->val.str))
1650 || b == CPP_CHAR || b == CPP_STRING); /* L */
1651 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1652 || c == '.' || c == '+' || c == '-');
1653 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1654 && token1->val.c == '@'
1655 && (b == CPP_NAME || b == CPP_STRING));
1662 /* Output all the remaining tokens on the current line, and a newline
1663 character, to FP. Leading whitespace is removed. */
1665 cpp_output_line (pfile, fp)
1671 cpp_get_token (pfile, &token);
1672 token.flags &= ~PREV_WHITE;
1673 while (token.type != CPP_EOF)
1675 cpp_output_token (&token, fp);
1676 cpp_get_token (pfile, &token);
1682 /* Returns the value of a hexadecimal digit. */
1687 if (c >= 'a' && c <= 'f')
1688 return c - 'a' + 10;
1689 if (c >= 'A' && c <= 'F')
1690 return c - 'A' + 10;
1691 if (c >= '0' && c <= '9')
1696 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1697 failure if cpplib is not parsing C++ or C99. Such failure is
1698 silent, and no variables are updated. Otherwise returns 0, and
1699 warns if -Wtraditional.
1701 [lex.charset]: The character designated by the universal character
1702 name \UNNNNNNNN is that character whose character short name in
1703 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1704 universal character name \uNNNN is that character whose character
1705 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1706 for a universal character name is less than 0x20 or in the range
1707 0x7F-0x9F (inclusive), or if the universal character name
1708 designates a character in the basic source character set, then the
1709 program is ill-formed.
1711 We assume that wchar_t is Unicode, so we don't need to do any
1712 mapping. Is this ever wrong?
1714 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1715 LIMIT is the end of the string or charconst. PSTR is updated to
1716 point after the UCS on return, and the UCS is written into PC. */
1719 maybe_read_ucs (pfile, pstr, limit, pc)
1721 const unsigned char **pstr;
1722 const unsigned char *limit;
1725 const unsigned char *p = *pstr;
1726 unsigned int code = 0;
1727 unsigned int c = *pc, length;
1729 /* Only attempt to interpret a UCS for C++ and C99. */
1730 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1733 if (CPP_WTRADITIONAL (pfile))
1734 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1736 length = (c == 'u' ? 4: 8);
1738 if ((size_t) (limit - p) < length)
1740 cpp_error (pfile, "incomplete universal-character-name");
1741 /* Skip to the end to avoid more diagnostics. */
1746 for (; length; length--, p++)
1750 code = (code << 4) + hex_digit_value (c);
1754 "non-hex digit '%c' in universal-character-name", c);
1755 /* We shouldn't skip in case there are multibyte chars. */
1761 #ifdef TARGET_EBCDIC
1762 cpp_error (pfile, "universal-character-name on EBCDIC target");
1763 code = 0x3f; /* EBCDIC invalid character */
1765 /* True extended characters are OK. */
1767 && !(code & 0x80000000)
1768 && !(code >= 0xD800 && code <= 0xDFFF))
1770 /* The standard permits $, @ and ` to be specified as UCNs. We use
1771 hex escapes so that this also works with EBCDIC hosts. */
1772 else if (code == 0x24 || code == 0x40 || code == 0x60)
1774 /* Don't give another error if one occurred above. */
1775 else if (length == 0)
1776 cpp_error (pfile, "universal-character-name out of range");
1784 /* Interpret an escape sequence, and return its value. PSTR points to
1785 the input pointer, which is just after the backslash. LIMIT is how
1786 much text we have. MASK is a bitmask for the precision for the
1787 destination type (char or wchar_t). TRADITIONAL, if true, does not
1788 interpret escapes that did not exist in traditional C.
1790 Handles all relevant diagnostics. */
1793 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1795 const unsigned char **pstr;
1796 const unsigned char *limit;
1797 unsigned HOST_WIDE_INT mask;
1801 const unsigned char *str = *pstr;
1802 unsigned int c = *str++;
1806 case '\\': case '\'': case '"': case '?': break;
1807 case 'b': c = TARGET_BS; break;
1808 case 'f': c = TARGET_FF; break;
1809 case 'n': c = TARGET_NEWLINE; break;
1810 case 'r': c = TARGET_CR; break;
1811 case 't': c = TARGET_TAB; break;
1812 case 'v': c = TARGET_VT; break;
1814 case '(': case '{': case '[': case '%':
1815 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1816 '\%' is used to prevent SCCS from getting confused. */
1817 unknown = CPP_PEDANTIC (pfile);
1821 if (CPP_WTRADITIONAL (pfile))
1822 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1828 if (CPP_PEDANTIC (pfile))
1829 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1834 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1838 if (CPP_WTRADITIONAL (pfile))
1839 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1843 unsigned int i = 0, overflow = 0;
1844 int digits_found = 0;
1852 overflow |= i ^ (i << 4 >> 4);
1853 i = (i << 4) + hex_digit_value (c);
1858 cpp_error (pfile, "\\x used with no following hex digits");
1860 if (overflow | (i != (i & mask)))
1862 cpp_pedwarn (pfile, "hex escape sequence out of range");
1869 case '0': case '1': case '2': case '3':
1870 case '4': case '5': case '6': case '7':
1872 unsigned int i = c - '0';
1875 while (str < limit && ++count < 3)
1878 if (c < '0' || c > '7')
1881 i = (i << 3) + c - '0';
1884 if (i != (i & mask))
1886 cpp_pedwarn (pfile, "octal escape sequence out of range");
1901 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1903 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1907 cpp_pedwarn (pfile, "escape sequence out of range for character");
1913 #ifndef MAX_CHAR_TYPE_SIZE
1914 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1917 #ifndef MAX_WCHAR_TYPE_SIZE
1918 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1921 /* Interpret a (possibly wide) character constant in TOKEN.
1922 WARN_MULTI warns about multi-character charconsts, if not
1923 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1924 that did not exist in traditional C. PCHARS_SEEN points to a
1925 variable that is filled in with the number of characters seen. */
1927 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1929 const cpp_token *token;
1932 unsigned int *pchars_seen;
1934 const unsigned char *str = token->val.str.text;
1935 const unsigned char *limit = str + token->val.str.len;
1936 unsigned int chars_seen = 0;
1937 unsigned int width, max_chars, c;
1938 unsigned HOST_WIDE_INT mask;
1939 HOST_WIDE_INT result = 0;
1941 #ifdef MULTIBYTE_CHARS
1942 (void) local_mbtowc (NULL, NULL, 0);
1945 /* Width in bits. */
1946 if (token->type == CPP_CHAR)
1947 width = MAX_CHAR_TYPE_SIZE;
1949 width = MAX_WCHAR_TYPE_SIZE;
1951 if (width < HOST_BITS_PER_WIDE_INT)
1952 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1955 max_chars = HOST_BITS_PER_WIDE_INT / width;
1959 #ifdef MULTIBYTE_CHARS
1963 char_len = local_mbtowc (&wc, str, limit - str);
1966 cpp_warning (pfile, "ignoring invalid multibyte character");
1979 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1981 #ifdef MAP_CHARACTER
1983 c = MAP_CHARACTER (c);
1986 /* Merge character into result; ignore excess chars. */
1987 if (++chars_seen <= max_chars)
1989 if (width < HOST_BITS_PER_WIDE_INT)
1990 result = (result << width) | (c & mask);
1996 if (chars_seen == 0)
1997 cpp_error (pfile, "empty character constant");
1998 else if (chars_seen > max_chars)
2000 chars_seen = max_chars;
2001 cpp_warning (pfile, "character constant too long");
2003 else if (chars_seen > 1 && !traditional && warn_multi)
2004 cpp_warning (pfile, "multi-character character constant");
2006 /* If char type is signed, sign-extend the constant. The
2007 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2008 if (token->type == CPP_CHAR && chars_seen)
2010 unsigned int nbits = chars_seen * width;
2011 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2013 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2014 || ((result >> (nbits - 1)) & 1) == 0)
2020 *pchars_seen = chars_seen;
2036 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2039 chunk_suitable (pool, chunk, size)
2044 /* Being at least twice SIZE means we can use memcpy in
2045 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2047 return (chunk && pool->locked != chunk
2048 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2051 /* Returns the end of the new pool. PTR points to a char in the old
2052 pool, and is updated to point to the same char in the new pool. */
2054 _cpp_next_chunk (pool, len, ptr)
2057 unsigned char **ptr;
2059 cpp_chunk *chunk = pool->cur->next;
2061 /* LEN is the minimum size we want in the new pool. */
2062 len += POOL_ROOM (pool);
2063 if (! chunk_suitable (pool, chunk, len))
2065 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2067 chunk->next = pool->cur->next;
2068 pool->cur->next = chunk;
2071 /* Update the pointer before changing chunk's front. */
2073 *ptr += chunk->base - POOL_FRONT (pool);
2075 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2076 chunk->front = chunk->base;
2079 return POOL_LIMIT (pool);
2086 unsigned char *base;
2089 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2090 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2091 /* Put the chunk descriptor at the end. Then chunk overruns will
2092 cause obvious chaos. */
2093 result = (cpp_chunk *) (base + size);
2094 result->base = base;
2095 result->front = base;
2096 result->limit = base + size;
2103 _cpp_init_pool (pool, size, align, temp)
2105 unsigned int size, align, temp;
2108 align = DEFAULT_ALIGNMENT;
2109 if (align & (align - 1))
2111 pool->align = align;
2112 pool->cur = new_chunk (size);
2116 pool->cur->next = pool->cur;
2120 _cpp_lock_pool (pool)
2123 if (pool->locks++ == 0)
2124 pool->locked = pool->cur;
2128 _cpp_unlock_pool (pool)
2131 if (--pool->locks == 0)
2136 _cpp_free_pool (pool)
2139 cpp_chunk *chunk = pool->cur, *next;
2147 while (chunk && chunk != pool->cur);
2150 /* Reserve LEN bytes from a memory pool. */
2152 _cpp_pool_reserve (pool, len)
2156 len = POOL_ALIGN (len, pool->align);
2157 if (len > (unsigned int) POOL_ROOM (pool))
2158 _cpp_next_chunk (pool, len, 0);
2160 return POOL_FRONT (pool);
2163 /* Allocate LEN bytes from a memory pool. */
2165 _cpp_pool_alloc (pool, len)
2169 unsigned char *result = _cpp_pool_reserve (pool, len);
2171 POOL_COMMIT (pool, len);