1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
95 static void unterminated PARAMS ((cpp_reader *, int));
96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
98 static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
102 const unsigned char *, unsigned int *));
104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
106 static unsigned int hex_digit_value PARAMS ((unsigned int));
110 Compares, the token TOKEN to the NUL-terminated string STRING.
111 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
114 cpp_ideq (token, string)
115 const cpp_token *token;
118 if (token->type != CPP_NAME)
121 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
124 /* Call when meeting a newline. Returns the character after the newline
125 (or carriage-return newline combination), or EOF. */
127 handle_newline (pfile, newline_char)
129 cppchar_t newline_char;
132 cppchar_t next = EOF;
135 buffer = pfile->buffer;
136 buffer->col_adjust = 0;
137 buffer->line_base = buffer->cur;
139 /* Handle CR-LF and LF-CR combinations, get the next character. */
140 if (buffer->cur < buffer->rlimit)
142 next = *buffer->cur++;
143 if (next + newline_char == '\r' + '\n')
145 buffer->line_base = buffer->cur;
146 if (buffer->cur < buffer->rlimit)
147 next = *buffer->cur++;
153 buffer->read_ahead = next;
157 /* Subroutine of skip_escaped_newlines; called when a trigraph is
158 encountered. It warns if necessary, and returns true if the
159 trigraph should be honoured. FROM_CHAR is the third character of a
160 trigraph, and presumed to be the previous character for position
163 trigraph_ok (pfile, from_char)
167 int accept = CPP_OPTION (pfile, trigraphs);
169 /* Don't warn about trigraphs in comments. */
170 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
172 cpp_buffer *buffer = pfile->buffer;
175 cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
176 "trigraph ??%c converted to %c",
178 (int) _cpp_trigraph_map[from_char]);
179 else if (buffer->cur != buffer->last_Wtrigraphs)
181 buffer->last_Wtrigraphs = buffer->cur;
182 cpp_warning_with_line (pfile, pfile->line,
183 CPP_BUF_COL (buffer) - 2,
184 "trigraph ??%c ignored", (int) from_char);
191 /* Assumes local variables buffer and result. */
192 #define ACCEPT_CHAR(t) \
193 do { result->type = t; buffer->read_ahead = EOF; } while (0)
195 /* When we move to multibyte character sets, add to these something
196 that saves and restores the state of the multibyte conversion
197 library. This probably involves saving and restoring a "cookie".
198 In the case of glibc it is an 8-byte structure, so is not a high
199 overhead operation. In any case, it's out of the fast path. */
200 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
201 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
203 /* Skips any escaped newlines introduced by NEXT, which is either a
204 '?' or a '\\'. Returns the next character, which will also have
205 been placed in buffer->read_ahead. This routine performs
206 preprocessing stages 1 and 2 of the ISO C standard. */
208 skip_escaped_newlines (pfile, next)
212 cpp_buffer *buffer = pfile->buffer;
214 /* Only do this if we apply stages 1 and 2. */
215 if (!buffer->from_stage3)
218 const unsigned char *saved_cur;
223 if (buffer->cur == buffer->rlimit)
229 next1 = *buffer->cur++;
230 if (next1 != '?' || buffer->cur == buffer->rlimit)
236 next1 = *buffer->cur++;
237 if (!_cpp_trigraph_map[next1]
238 || !trigraph_ok (pfile, next1))
244 /* We have a full trigraph here. */
245 next = _cpp_trigraph_map[next1];
246 if (next != '\\' || buffer->cur == buffer->rlimit)
251 /* We have a backslash, and room for at least one more character. */
255 next1 = *buffer->cur++;
256 if (!is_nvspace (next1))
260 while (buffer->cur < buffer->rlimit);
262 if (!is_vspace (next1))
268 if (space && !pfile->state.lexing_comment)
269 cpp_warning (pfile, "backslash and newline separated by space");
271 next = handle_newline (pfile, next1);
273 cpp_pedwarn (pfile, "backslash-newline at end of file");
275 while (next == '\\' || next == '?');
278 buffer->read_ahead = next;
282 /* Obtain the next character, after trigraph conversion and skipping
283 an arbitrary string of escaped newlines. The common case of no
284 trigraphs or escaped newlines falls through quickly. */
286 get_effective_char (pfile)
289 cpp_buffer *buffer = pfile->buffer;
290 cppchar_t next = EOF;
292 if (buffer->cur < buffer->rlimit)
294 next = *buffer->cur++;
296 /* '?' can introduce trigraphs (and therefore backslash); '\\'
297 can introduce escaped newlines, which we want to skip, or
298 UCNs, which, depending upon lexer state, we will handle in
300 if (next == '?' || next == '\\')
301 next = skip_escaped_newlines (pfile, next);
304 buffer->read_ahead = next;
308 /* Skip a C-style block comment. We find the end of the comment by
309 seeing if an asterisk is before every '/' we encounter. Returns
310 non-zero if comment terminated by EOF, zero otherwise. */
312 skip_block_comment (pfile)
315 cpp_buffer *buffer = pfile->buffer;
316 cppchar_t c = EOF, prevc = EOF;
318 pfile->state.lexing_comment = 1;
319 while (buffer->cur != buffer->rlimit)
321 prevc = c, c = *buffer->cur++;
324 /* FIXME: For speed, create a new character class of characters
325 of interest inside block comments. */
326 if (c == '?' || c == '\\')
327 c = skip_escaped_newlines (pfile, c);
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
336 /* Warn about potential nested comments, but not if the '/'
337 comes immediately before the true comment delimeter.
338 Don't bother to get it right across escaped newlines. */
339 if (CPP_OPTION (pfile, warn_comments)
340 && buffer->cur != buffer->rlimit)
342 prevc = c, c = *buffer->cur++;
343 if (c == '*' && buffer->cur != buffer->rlimit)
345 prevc = c, c = *buffer->cur++;
347 cpp_warning_with_line (pfile, pfile->line,
348 CPP_BUF_COL (buffer) - 2,
349 "\"/*\" within comment");
354 else if (is_vspace (c))
356 prevc = c, c = handle_newline (pfile, c);
360 adjust_column (pfile);
363 pfile->state.lexing_comment = 0;
364 buffer->read_ahead = EOF;
365 return c != '/' || prevc != '*';
368 /* Skip a C++ line comment. Handles escaped newlines. Returns
369 non-zero if a multiline comment. The following new line, if any,
370 is left in buffer->read_ahead. */
372 skip_line_comment (pfile)
375 cpp_buffer *buffer = pfile->buffer;
376 unsigned int orig_line = pfile->line;
379 pfile->state.lexing_comment = 1;
383 if (buffer->cur == buffer->rlimit)
387 if (c == '?' || c == '\\')
388 c = skip_escaped_newlines (pfile, c);
390 while (!is_vspace (c));
392 pfile->state.lexing_comment = 0;
393 buffer->read_ahead = c; /* Leave any newline for caller. */
394 return orig_line != pfile->line;
397 /* pfile->buffer->cur is one beyond the \t character. Update
398 col_adjust so we track the column correctly. */
400 adjust_column (pfile)
403 cpp_buffer *buffer = pfile->buffer;
404 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
406 /* Round it up to multiple of the tabstop, but subtract 1 since the
407 tab itself occupies a character position. */
408 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
409 - col % CPP_OPTION (pfile, tabstop)) - 1;
412 /* Skips whitespace, saving the next non-whitespace character.
413 Adjusts pfile->col_adjust to account for tabs. Without this,
414 tokens might be assigned an incorrect column. */
416 skip_whitespace (pfile, c)
420 cpp_buffer *buffer = pfile->buffer;
421 unsigned int warned = 0;
425 /* Horizontal space always OK. */
429 adjust_column (pfile);
430 /* Just \f \v or \0 left. */
435 cpp_warning (pfile, "null character(s) ignored");
439 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
440 cpp_pedwarn_with_line (pfile, pfile->line,
441 CPP_BUF_COL (buffer),
442 "%s in preprocessing directive",
443 c == '\f' ? "form feed" : "vertical tab");
446 if (buffer->cur == buffer->rlimit)
450 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
451 while (is_nvspace (c));
453 /* Remember the next character. */
454 buffer->read_ahead = c;
457 /* See if the characters of a number token are valid in a name (no
460 name_p (pfile, string)
462 const cpp_string *string;
466 for (i = 0; i < string->len; i++)
467 if (!is_idchar (string->text[i]))
473 /* Parse an identifier, skipping embedded backslash-newlines.
474 Calculate the hash value of the token while parsing, for improved
475 performance. The hashing algorithm *must* match cpp_lookup(). */
477 static cpp_hashnode *
478 parse_identifier (pfile, c)
482 cpp_hashnode *result;
483 cpp_buffer *buffer = pfile->buffer;
484 unsigned int saw_dollar = 0, len;
485 struct obstack *stack = &pfile->hash_table->stack;
491 obstack_1grow (stack, c);
497 if (buffer->cur == buffer->rlimit)
502 while (is_idchar (c));
504 /* Potential escaped newline? */
505 if (c != '?' && c != '\\')
507 c = skip_escaped_newlines (pfile, c);
509 while (is_idchar (c));
511 /* Remember the next character. */
512 buffer->read_ahead = c;
514 /* $ is not a identifier character in the standard, but is commonly
515 accepted as an extension. Don't warn about it in skipped
516 conditional blocks. */
517 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
518 cpp_pedwarn (pfile, "'$' character(s) in identifier");
520 /* Identifiers are null-terminated. */
521 len = obstack_object_size (stack);
522 obstack_1grow (stack, '\0');
524 /* This routine commits the memory if necessary. */
525 result = (cpp_hashnode *)
526 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
528 /* Some identifiers require diagnostics when lexed. */
529 if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
531 /* It is allowed to poison the same identifier twice. */
532 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
533 cpp_error (pfile, "attempt to use poisoned \"%s\"",
536 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
537 replacement list of a variadic macro. */
538 if (result == pfile->spec_nodes.n__VA_ARGS__
539 && !pfile->state.va_args_ok)
540 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
546 /* Parse a number, skipping embedded backslash-newlines. */
548 parse_number (pfile, number, c, leading_period)
554 cpp_buffer *buffer = pfile->buffer;
555 cpp_pool *pool = &pfile->ident_pool;
556 unsigned char *dest, *limit;
558 dest = POOL_FRONT (pool);
559 limit = POOL_LIMIT (pool);
561 /* Place a leading period. */
565 limit = _cpp_next_chunk (pool, 0, &dest);
573 /* Need room for terminating null. */
574 if (dest + 1 >= limit)
575 limit = _cpp_next_chunk (pool, 0, &dest);
579 if (buffer->cur == buffer->rlimit)
584 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
586 /* Potential escaped newline? */
587 if (c != '?' && c != '\\')
589 c = skip_escaped_newlines (pfile, c);
591 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
593 /* Remember the next character. */
594 buffer->read_ahead = c;
596 /* Null-terminate the number. */
599 number->text = POOL_FRONT (pool);
600 number->len = dest - number->text;
601 POOL_COMMIT (pool, number->len + 1);
604 /* Subroutine of parse_string. Emits error for unterminated strings. */
606 unterminated (pfile, term)
610 cpp_error (pfile, "missing terminating %c character", term);
612 if (term == '\"' && pfile->mlstring_pos.line
613 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
615 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
616 pfile->mlstring_pos.col,
617 "possible start of unterminated string literal");
618 pfile->mlstring_pos.line = 0;
622 /* Subroutine of parse_string. */
624 unescaped_terminator_p (pfile, dest)
626 const unsigned char *dest;
628 const unsigned char *start, *temp;
630 /* In #include-style directives, terminators are not escapeable. */
631 if (pfile->state.angled_headers)
634 start = POOL_FRONT (&pfile->ident_pool);
636 /* An odd number of consecutive backslashes represents an escaped
638 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
641 return ((dest - temp) & 1) == 0;
644 /* Parses a string, character constant, or angle-bracketed header file
645 name. Handles embedded trigraphs and escaped newlines. The stored
646 string is guaranteed NUL-terminated, but it is not guaranteed that
647 this is the first NUL since embedded NULs are preserved.
649 Multi-line strings are allowed, but they are deprecated. */
651 parse_string (pfile, token, terminator)
654 cppchar_t terminator;
656 cpp_buffer *buffer = pfile->buffer;
657 cpp_pool *pool = &pfile->ident_pool;
658 unsigned char *dest, *limit;
660 bool warned_nulls = false, warned_multi = false;
662 dest = POOL_FRONT (pool);
663 limit = POOL_LIMIT (pool);
667 if (buffer->cur == buffer->rlimit)
673 /* We need space for the terminating NUL. */
675 limit = _cpp_next_chunk (pool, 0, &dest);
679 unterminated (pfile, terminator);
683 /* Handle trigraphs, escaped newlines etc. */
684 if (c == '?' || c == '\\')
685 c = skip_escaped_newlines (pfile, c);
687 if (c == terminator && unescaped_terminator_p (pfile, dest))
692 else if (is_vspace (c))
694 /* In assembly language, silently terminate string and
695 character literals at end of line. This is a kludge
696 around not knowing where comments are. */
697 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
700 /* Character constants and header names may not extend over
701 multiple lines. In Standard C, neither may strings.
702 Unfortunately, we accept multiline strings as an
703 extension, except in #include family directives. */
704 if (terminator != '"' || pfile->state.angled_headers)
706 unterminated (pfile, terminator);
713 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
716 if (pfile->mlstring_pos.line == 0)
717 pfile->mlstring_pos = pfile->lexer_pos;
719 c = handle_newline (pfile, c);
723 else if (c == '\0' && !warned_nulls)
726 cpp_warning (pfile, "null character(s) preserved in literal");
732 /* Remember the next character. */
733 buffer->read_ahead = c;
736 token->val.str.text = POOL_FRONT (pool);
737 token->val.str.len = dest - token->val.str.text;
738 POOL_COMMIT (pool, token->val.str.len + 1);
741 /* The stored comment includes the comment start and any terminator. */
743 save_comment (pfile, token, from)
746 const unsigned char *from;
748 unsigned char *buffer;
751 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
752 /* C++ comments probably (not definitely) have moved past a new
753 line, which we don't want to save in the comment. */
754 if (pfile->buffer->read_ahead != EOF)
756 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
758 token->type = CPP_COMMENT;
759 token->val.str.len = len;
760 token->val.str.text = buffer;
763 memcpy (buffer + 1, from, len - 1);
766 /* Subroutine of lex_token to handle '%'. A little tricky, since we
767 want to avoid stepping back when lexing %:%X. */
769 lex_percent (pfile, result)
773 cpp_buffer *buffer= pfile->buffer;
776 result->type = CPP_MOD;
777 /* Parsing %:%X could leave an extra character. */
778 if (buffer->extra_char == EOF)
779 c = get_effective_char (pfile);
782 c = buffer->read_ahead = buffer->extra_char;
783 buffer->extra_char = EOF;
787 ACCEPT_CHAR (CPP_MOD_EQ);
788 else if (CPP_OPTION (pfile, digraphs))
792 result->flags |= DIGRAPH;
793 ACCEPT_CHAR (CPP_HASH);
794 if (get_effective_char (pfile) == '%')
796 buffer->extra_char = get_effective_char (pfile);
797 if (buffer->extra_char == ':')
799 buffer->extra_char = EOF;
800 ACCEPT_CHAR (CPP_PASTE);
803 /* We'll catch the extra_char when we're called back. */
804 buffer->read_ahead = '%';
809 result->flags |= DIGRAPH;
810 ACCEPT_CHAR (CPP_CLOSE_BRACE);
815 /* Subroutine of lex_token to handle '.'. This is tricky, since we
816 want to avoid stepping back when lexing '...' or '.123'. In the
817 latter case we should also set a flag for parse_number. */
819 lex_dot (pfile, result)
823 cpp_buffer *buffer = pfile->buffer;
826 /* Parsing ..X could leave an extra character. */
827 if (buffer->extra_char == EOF)
828 c = get_effective_char (pfile);
831 c = buffer->read_ahead = buffer->extra_char;
832 buffer->extra_char = EOF;
835 /* All known character sets have 0...9 contiguous. */
836 if (c >= '0' && c <= '9')
838 result->type = CPP_NUMBER;
839 parse_number (pfile, &result->val.str, c, 1);
843 result->type = CPP_DOT;
846 buffer->extra_char = get_effective_char (pfile);
847 if (buffer->extra_char == '.')
849 buffer->extra_char = EOF;
850 ACCEPT_CHAR (CPP_ELLIPSIS);
853 /* We'll catch the extra_char when we're called back. */
854 buffer->read_ahead = '.';
856 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
857 ACCEPT_CHAR (CPP_DOT_STAR);
862 _cpp_lex_token (pfile, result)
868 const unsigned char *comment_start;
872 buffer = pfile->buffer;
873 result->flags = buffer->saved_flags;
874 buffer->saved_flags = 0;
875 bol = (buffer->cur <= buffer->line_base + 1
876 && pfile->lexer_pos.output_line == pfile->line);
878 pfile->lexer_pos.line = pfile->line;
879 result->line = pfile->line;
881 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
883 c = buffer->read_ahead;
884 if (c == EOF && buffer->cur < buffer->rlimit)
887 pfile->lexer_pos.col++;
889 result->col = pfile->lexer_pos.col;
892 buffer->read_ahead = EOF;
896 /* Non-empty files should end in a newline. Don't warn for
897 command line and _Pragma buffers. */
898 if (pfile->lexer_pos.col != 0)
900 /* Account for the missing \n, prevent multiple warnings. */
902 pfile->lexer_pos.col = 0;
903 if (!buffer->from_stage3)
904 cpp_pedwarn (pfile, "no newline at end of file");
907 /* To prevent bogus diagnostics, only pop the buffer when
908 in-progress directives and arguments have been taken care of.
909 Decrement the line to terminate an in-progress directive. */
910 if (pfile->state.in_directive)
911 pfile->lexer_pos.output_line = pfile->line--;
912 else if (! pfile->state.parsing_args)
914 /* Don't pop the last buffer. */
917 unsigned char stop = buffer->return_at_eof;
919 _cpp_pop_buffer (pfile);
924 result->type = CPP_EOF;
927 case ' ': case '\t': case '\f': case '\v': case '\0':
928 skip_whitespace (pfile, c);
929 result->flags |= PREV_WHITE;
932 case '\n': case '\r':
933 if (pfile->state.in_directive)
935 result->type = CPP_EOF;
936 if (pfile->state.parsing_args)
937 buffer->read_ahead = c;
940 handle_newline (pfile, c);
941 /* Decrementing pfile->line allows directives to
942 recognise that the newline has been seen, and also
943 means that diagnostics don't point to the next line. */
944 pfile->lexer_pos.output_line = pfile->line--;
949 handle_newline (pfile, c);
950 /* This is a new line, so clear any white space flag. Newlines
951 in arguments are white space (6.10.3.10); parse_arg takes
953 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
955 if (pfile->state.parsing_args != 2)
956 pfile->lexer_pos.output_line = pfile->line;
961 /* These could start an escaped newline, or '?' a trigraph. Let
962 skip_escaped_newlines do all the work. */
964 unsigned int line = pfile->line;
966 c = skip_escaped_newlines (pfile, c);
967 if (line != pfile->line)
968 /* We had at least one escaped newline of some sort, and the
969 next character is in buffer->read_ahead. Update the
970 token's line and column. */
973 /* We are either the original '?' or '\\', or a trigraph. */
974 result->type = CPP_QUERY;
975 buffer->read_ahead = EOF;
983 case '0': case '1': case '2': case '3': case '4':
984 case '5': case '6': case '7': case '8': case '9':
985 result->type = CPP_NUMBER;
986 parse_number (pfile, &result->val.str, c, 0);
990 if (!CPP_OPTION (pfile, dollars_in_ident))
992 /* Fall through... */
995 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
996 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
997 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
998 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1000 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1001 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1002 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1003 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1005 result->type = CPP_NAME;
1006 result->val.node = parse_identifier (pfile, c);
1008 /* 'L' may introduce wide characters or strings. */
1009 if (result->val.node == pfile->spec_nodes.n_L)
1011 c = buffer->read_ahead; /* For make_string. */
1012 if (c == '\'' || c == '"')
1014 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1018 /* Convert named operators to their proper types. */
1019 else if (result->val.node->flags & NODE_OPERATOR)
1021 result->flags |= NAMED_OP;
1022 result->type = result->val.node->value.operator;
1028 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1030 parse_string (pfile, result, c);
1034 /* A potential block or line comment. */
1035 comment_start = buffer->cur;
1036 result->type = CPP_DIV;
1037 c = get_effective_char (pfile);
1039 ACCEPT_CHAR (CPP_DIV_EQ);
1040 if (c != '/' && c != '*')
1045 if (skip_block_comment (pfile))
1046 cpp_error (pfile, "unterminated comment");
1050 if (!CPP_OPTION (pfile, cplusplus_comments)
1051 && !CPP_IN_SYSTEM_HEADER (pfile))
1054 /* Warn about comments only if pedantically GNUC89, and not
1055 in system headers. */
1056 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1057 && ! buffer->warned_cplusplus_comments)
1060 "C++ style comments are not allowed in ISO C89");
1062 "(this will be reported only once per input file)");
1063 buffer->warned_cplusplus_comments = 1;
1066 /* Skip_line_comment updates buffer->read_ahead. */
1067 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1068 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1069 pfile->lexer_pos.col,
1070 "multi-line comment");
1073 /* Skipping the comment has updated buffer->read_ahead. */
1074 if (!pfile->state.save_comments)
1076 result->flags |= PREV_WHITE;
1080 /* Save the comment as a token in its own right. */
1081 save_comment (pfile, result, comment_start);
1082 /* Don't do MI optimisation. */
1086 if (pfile->state.angled_headers)
1088 result->type = CPP_HEADER_NAME;
1089 c = '>'; /* terminator. */
1093 result->type = CPP_LESS;
1094 c = get_effective_char (pfile);
1096 ACCEPT_CHAR (CPP_LESS_EQ);
1099 ACCEPT_CHAR (CPP_LSHIFT);
1100 if (get_effective_char (pfile) == '=')
1101 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1103 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1105 ACCEPT_CHAR (CPP_MIN);
1106 if (get_effective_char (pfile) == '=')
1107 ACCEPT_CHAR (CPP_MIN_EQ);
1109 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1111 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1112 result->flags |= DIGRAPH;
1114 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1116 ACCEPT_CHAR (CPP_OPEN_BRACE);
1117 result->flags |= DIGRAPH;
1122 result->type = CPP_GREATER;
1123 c = get_effective_char (pfile);
1125 ACCEPT_CHAR (CPP_GREATER_EQ);
1128 ACCEPT_CHAR (CPP_RSHIFT);
1129 if (get_effective_char (pfile) == '=')
1130 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1132 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1134 ACCEPT_CHAR (CPP_MAX);
1135 if (get_effective_char (pfile) == '=')
1136 ACCEPT_CHAR (CPP_MAX_EQ);
1141 lex_percent (pfile, result);
1142 if (result->type == CPP_HASH)
1147 lex_dot (pfile, result);
1151 result->type = CPP_PLUS;
1152 c = get_effective_char (pfile);
1154 ACCEPT_CHAR (CPP_PLUS_EQ);
1156 ACCEPT_CHAR (CPP_PLUS_PLUS);
1160 result->type = CPP_MINUS;
1161 c = get_effective_char (pfile);
1164 ACCEPT_CHAR (CPP_DEREF);
1165 if (CPP_OPTION (pfile, cplusplus)
1166 && get_effective_char (pfile) == '*')
1167 ACCEPT_CHAR (CPP_DEREF_STAR);
1170 ACCEPT_CHAR (CPP_MINUS_EQ);
1172 ACCEPT_CHAR (CPP_MINUS_MINUS);
1176 result->type = CPP_MULT;
1177 if (get_effective_char (pfile) == '=')
1178 ACCEPT_CHAR (CPP_MULT_EQ);
1182 result->type = CPP_EQ;
1183 if (get_effective_char (pfile) == '=')
1184 ACCEPT_CHAR (CPP_EQ_EQ);
1188 result->type = CPP_NOT;
1189 if (get_effective_char (pfile) == '=')
1190 ACCEPT_CHAR (CPP_NOT_EQ);
1194 result->type = CPP_AND;
1195 c = get_effective_char (pfile);
1197 ACCEPT_CHAR (CPP_AND_EQ);
1199 ACCEPT_CHAR (CPP_AND_AND);
1203 c = buffer->extra_char; /* Can be set by error condition below. */
1206 buffer->read_ahead = c;
1207 buffer->extra_char = EOF;
1210 c = get_effective_char (pfile);
1214 ACCEPT_CHAR (CPP_PASTE);
1218 result->type = CPP_HASH;
1222 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1223 tokens within the list of arguments that would otherwise act
1224 as preprocessing directives, the behavior is undefined.
1226 This implementation will report a hard error, terminate the
1227 macro invocation, and proceed to process the directive. */
1228 if (pfile->state.parsing_args)
1230 pfile->lexer_pos.output_line = pfile->line;
1231 if (pfile->state.parsing_args == 2)
1234 "directives may not be used inside a macro argument");
1235 result->type = CPP_EOF;
1238 /* in_directive can be true inside a _Pragma. */
1239 else if (!pfile->state.in_directive)
1241 /* This is the hash introducing a directive. If the return
1242 value is false, it is an assembler #. */
1243 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1249 result->type = CPP_OR;
1250 c = get_effective_char (pfile);
1252 ACCEPT_CHAR (CPP_OR_EQ);
1254 ACCEPT_CHAR (CPP_OR_OR);
1258 result->type = CPP_XOR;
1259 if (get_effective_char (pfile) == '=')
1260 ACCEPT_CHAR (CPP_XOR_EQ);
1264 result->type = CPP_COLON;
1265 c = get_effective_char (pfile);
1266 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1267 ACCEPT_CHAR (CPP_SCOPE);
1268 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1270 result->flags |= DIGRAPH;
1271 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1275 case '~': result->type = CPP_COMPL; break;
1276 case ',': result->type = CPP_COMMA; break;
1277 case '(': result->type = CPP_OPEN_PAREN; break;
1278 case ')': result->type = CPP_CLOSE_PAREN; break;
1279 case '[': result->type = CPP_OPEN_SQUARE; break;
1280 case ']': result->type = CPP_CLOSE_SQUARE; break;
1281 case '{': result->type = CPP_OPEN_BRACE; break;
1282 case '}': result->type = CPP_CLOSE_BRACE; break;
1283 case ';': result->type = CPP_SEMICOLON; break;
1285 /* @ is a punctuator in Objective C. */
1286 case '@': result->type = CPP_ATSIGN; break;
1290 result->type = CPP_OTHER;
1295 if (!pfile->state.in_directive && pfile->state.skipping)
1298 /* If not in a directive, this token invalidates controlling macros. */
1299 if (!pfile->state.in_directive)
1300 pfile->mi_valid = false;
1303 /* An upper bound on the number of bytes needed to spell a token,
1304 including preceding whitespace. */
1306 cpp_token_len (token)
1307 const cpp_token *token;
1311 switch (TOKEN_SPELL (token))
1313 default: len = 0; break;
1314 case SPELL_STRING: len = token->val.str.len; break;
1315 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1317 /* 1 for whitespace, 4 for comment delimeters. */
1321 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1322 already contain the enough space to hold the token's spelling.
1323 Returns a pointer to the character after the last character
1326 cpp_spell_token (pfile, token, buffer)
1327 cpp_reader *pfile; /* Would be nice to be rid of this... */
1328 const cpp_token *token;
1329 unsigned char *buffer;
1331 switch (TOKEN_SPELL (token))
1333 case SPELL_OPERATOR:
1335 const unsigned char *spelling;
1338 if (token->flags & DIGRAPH)
1340 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1341 else if (token->flags & NAMED_OP)
1344 spelling = TOKEN_NAME (token);
1346 while ((c = *spelling++) != '\0')
1353 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1354 buffer += NODE_LEN (token->val.node);
1359 int left, right, tag;
1360 switch (token->type)
1362 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1363 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1364 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1365 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1366 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1367 default: left = '\0'; right = '\0'; tag = '\0'; break;
1369 if (tag) *buffer++ = tag;
1370 if (left) *buffer++ = left;
1371 memcpy (buffer, token->val.str.text, token->val.str.len);
1372 buffer += token->val.str.len;
1373 if (right) *buffer++ = right;
1378 *buffer++ = token->val.c;
1382 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1389 /* Returns a token as a null-terminated string. The string is
1390 temporary, and automatically freed later. Useful for diagnostics. */
1392 cpp_token_as_text (pfile, token)
1394 const cpp_token *token;
1396 unsigned int len = cpp_token_len (token);
1397 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1399 end = cpp_spell_token (pfile, token, start);
1405 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1407 cpp_type2name (type)
1408 enum cpp_ttype type;
1410 return (const char *) token_spellings[type].name;
1413 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1414 for efficiency - to avoid double-buffering. Also, outputs a space
1415 if PREV_WHITE is flagged. */
1417 cpp_output_token (token, fp)
1418 const cpp_token *token;
1421 if (token->flags & PREV_WHITE)
1424 switch (TOKEN_SPELL (token))
1426 case SPELL_OPERATOR:
1428 const unsigned char *spelling;
1430 if (token->flags & DIGRAPH)
1432 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1433 else if (token->flags & NAMED_OP)
1436 spelling = TOKEN_NAME (token);
1438 ufputs (spelling, fp);
1444 ufputs (NODE_NAME (token->val.node), fp);
1449 int left, right, tag;
1450 switch (token->type)
1452 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1453 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1454 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1455 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1456 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1457 default: left = '\0'; right = '\0'; tag = '\0'; break;
1459 if (tag) putc (tag, fp);
1460 if (left) putc (left, fp);
1461 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1462 if (right) putc (right, fp);
1467 putc (token->val.c, fp);
1471 /* An error, most probably. */
1476 /* Compare two tokens. */
1478 _cpp_equiv_tokens (a, b)
1479 const cpp_token *a, *b;
1481 if (a->type == b->type && a->flags == b->flags)
1482 switch (TOKEN_SPELL (a))
1484 default: /* Keep compiler happy. */
1485 case SPELL_OPERATOR:
1488 return a->val.c == b->val.c; /* Character. */
1490 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1492 return a->val.node == b->val.node;
1494 return (a->val.str.len == b->val.str.len
1495 && !memcmp (a->val.str.text, b->val.str.text,
1502 /* Determine whether two tokens can be pasted together, and if so,
1503 what the resulting token is. Returns CPP_EOF if the tokens cannot
1504 be pasted, or the appropriate type for the merged token if they
1507 cpp_can_paste (pfile, token1, token2, digraph)
1509 const cpp_token *token1, *token2;
1512 enum cpp_ttype a = token1->type, b = token2->type;
1513 int cxx = CPP_OPTION (pfile, cplusplus);
1515 /* Treat named operators as if they were ordinary NAMEs. */
1516 if (token1->flags & NAMED_OP)
1518 if (token2->flags & NAMED_OP)
1521 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1522 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1527 if (b == a) return CPP_RSHIFT;
1528 if (b == CPP_QUERY && cxx) return CPP_MAX;
1529 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1532 if (b == a) return CPP_LSHIFT;
1533 if (b == CPP_QUERY && cxx) return CPP_MIN;
1534 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1535 if (CPP_OPTION (pfile, digraphs))
1538 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1540 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1544 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1545 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1546 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1549 if (b == a) return CPP_MINUS_MINUS;
1550 if (b == CPP_GREATER) return CPP_DEREF;
1553 if (b == a && cxx) return CPP_SCOPE;
1554 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1555 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1559 if (CPP_OPTION (pfile, digraphs))
1561 if (b == CPP_GREATER)
1562 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1564 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1568 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1571 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1572 if (b == CPP_NUMBER) return CPP_NUMBER;
1576 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1578 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1582 if (b == CPP_NAME) return CPP_NAME;
1584 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1586 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1588 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1592 if (b == CPP_NUMBER) return CPP_NUMBER;
1593 if (b == CPP_NAME) return CPP_NUMBER;
1594 if (b == CPP_DOT) return CPP_NUMBER;
1595 /* Numbers cannot have length zero, so this is safe. */
1596 if ((b == CPP_PLUS || b == CPP_MINUS)
1597 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1608 /* Returns nonzero if a space should be inserted to avoid an
1609 accidental token paste for output. For simplicity, it is
1610 conservative, and occasionally advises a space where one is not
1611 needed, e.g. "." and ".2". */
1614 cpp_avoid_paste (pfile, token1, token2)
1616 const cpp_token *token1, *token2;
1618 enum cpp_ttype a = token1->type, b = token2->type;
1621 if (token1->flags & NAMED_OP)
1623 if (token2->flags & NAMED_OP)
1627 if (token2->flags & DIGRAPH)
1628 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1629 else if (token_spellings[b].category == SPELL_OPERATOR)
1630 c = token_spellings[b].name[0];
1632 /* Quickly get everything that can paste with an '='. */
1633 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1638 case CPP_GREATER: return c == '>' || c == '?';
1639 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1640 case CPP_PLUS: return c == '+';
1641 case CPP_MINUS: return c == '-' || c == '>';
1642 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1643 case CPP_MOD: return c == ':' || c == '>';
1644 case CPP_AND: return c == '&';
1645 case CPP_OR: return c == '|';
1646 case CPP_COLON: return c == ':' || c == '>';
1647 case CPP_DEREF: return c == '*';
1648 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1649 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1650 case CPP_NAME: return ((b == CPP_NUMBER
1651 && name_p (pfile, &token2->val.str))
1653 || b == CPP_CHAR || b == CPP_STRING); /* L */
1654 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1655 || c == '.' || c == '+' || c == '-');
1656 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1657 && token1->val.c == '@'
1658 && (b == CPP_NAME || b == CPP_STRING));
1665 /* Output all the remaining tokens on the current line, and a newline
1666 character, to FP. Leading whitespace is removed. */
1668 cpp_output_line (pfile, fp)
1674 cpp_get_token (pfile, &token);
1675 token.flags &= ~PREV_WHITE;
1676 while (token.type != CPP_EOF)
1678 cpp_output_token (&token, fp);
1679 cpp_get_token (pfile, &token);
1685 /* Returns the value of a hexadecimal digit. */
1690 if (c >= 'a' && c <= 'f')
1691 return c - 'a' + 10;
1692 if (c >= 'A' && c <= 'F')
1693 return c - 'A' + 10;
1694 if (c >= '0' && c <= '9')
1699 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1700 failure if cpplib is not parsing C++ or C99. Such failure is
1701 silent, and no variables are updated. Otherwise returns 0, and
1702 warns if -Wtraditional.
1704 [lex.charset]: The character designated by the universal character
1705 name \UNNNNNNNN is that character whose character short name in
1706 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1707 universal character name \uNNNN is that character whose character
1708 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1709 for a universal character name is less than 0x20 or in the range
1710 0x7F-0x9F (inclusive), or if the universal character name
1711 designates a character in the basic source character set, then the
1712 program is ill-formed.
1714 We assume that wchar_t is Unicode, so we don't need to do any
1715 mapping. Is this ever wrong?
1717 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1718 LIMIT is the end of the string or charconst. PSTR is updated to
1719 point after the UCS on return, and the UCS is written into PC. */
1722 maybe_read_ucs (pfile, pstr, limit, pc)
1724 const unsigned char **pstr;
1725 const unsigned char *limit;
1728 const unsigned char *p = *pstr;
1729 unsigned int code = 0;
1730 unsigned int c = *pc, length;
1732 /* Only attempt to interpret a UCS for C++ and C99. */
1733 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1736 if (CPP_WTRADITIONAL (pfile))
1737 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1739 length = (c == 'u' ? 4: 8);
1741 if ((size_t) (limit - p) < length)
1743 cpp_error (pfile, "incomplete universal-character-name");
1744 /* Skip to the end to avoid more diagnostics. */
1749 for (; length; length--, p++)
1753 code = (code << 4) + hex_digit_value (c);
1757 "non-hex digit '%c' in universal-character-name", c);
1758 /* We shouldn't skip in case there are multibyte chars. */
1764 #ifdef TARGET_EBCDIC
1765 cpp_error (pfile, "universal-character-name on EBCDIC target");
1766 code = 0x3f; /* EBCDIC invalid character */
1768 /* True extended characters are OK. */
1770 && !(code & 0x80000000)
1771 && !(code >= 0xD800 && code <= 0xDFFF))
1773 /* The standard permits $, @ and ` to be specified as UCNs. We use
1774 hex escapes so that this also works with EBCDIC hosts. */
1775 else if (code == 0x24 || code == 0x40 || code == 0x60)
1777 /* Don't give another error if one occurred above. */
1778 else if (length == 0)
1779 cpp_error (pfile, "universal-character-name out of range");
1787 /* Interpret an escape sequence, and return its value. PSTR points to
1788 the input pointer, which is just after the backslash. LIMIT is how
1789 much text we have. MASK is a bitmask for the precision for the
1790 destination type (char or wchar_t). TRADITIONAL, if true, does not
1791 interpret escapes that did not exist in traditional C.
1793 Handles all relevant diagnostics. */
1796 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1798 const unsigned char **pstr;
1799 const unsigned char *limit;
1800 unsigned HOST_WIDE_INT mask;
1804 const unsigned char *str = *pstr;
1805 unsigned int c = *str++;
1809 case '\\': case '\'': case '"': case '?': break;
1810 case 'b': c = TARGET_BS; break;
1811 case 'f': c = TARGET_FF; break;
1812 case 'n': c = TARGET_NEWLINE; break;
1813 case 'r': c = TARGET_CR; break;
1814 case 't': c = TARGET_TAB; break;
1815 case 'v': c = TARGET_VT; break;
1817 case '(': case '{': case '[': case '%':
1818 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1819 '\%' is used to prevent SCCS from getting confused. */
1820 unknown = CPP_PEDANTIC (pfile);
1824 if (CPP_WTRADITIONAL (pfile))
1825 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1831 if (CPP_PEDANTIC (pfile))
1832 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1837 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1841 if (CPP_WTRADITIONAL (pfile))
1842 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1846 unsigned int i = 0, overflow = 0;
1847 int digits_found = 0;
1855 overflow |= i ^ (i << 4 >> 4);
1856 i = (i << 4) + hex_digit_value (c);
1861 cpp_error (pfile, "\\x used with no following hex digits");
1863 if (overflow | (i != (i & mask)))
1865 cpp_pedwarn (pfile, "hex escape sequence out of range");
1872 case '0': case '1': case '2': case '3':
1873 case '4': case '5': case '6': case '7':
1875 unsigned int i = c - '0';
1878 while (str < limit && ++count < 3)
1881 if (c < '0' || c > '7')
1884 i = (i << 3) + c - '0';
1887 if (i != (i & mask))
1889 cpp_pedwarn (pfile, "octal escape sequence out of range");
1904 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1906 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1910 cpp_pedwarn (pfile, "escape sequence out of range for character");
1916 #ifndef MAX_CHAR_TYPE_SIZE
1917 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1920 #ifndef MAX_WCHAR_TYPE_SIZE
1921 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1924 /* Interpret a (possibly wide) character constant in TOKEN.
1925 WARN_MULTI warns about multi-character charconsts, if not
1926 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1927 that did not exist in traditional C. PCHARS_SEEN points to a
1928 variable that is filled in with the number of characters seen. */
1930 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1932 const cpp_token *token;
1935 unsigned int *pchars_seen;
1937 const unsigned char *str = token->val.str.text;
1938 const unsigned char *limit = str + token->val.str.len;
1939 unsigned int chars_seen = 0;
1940 unsigned int width, max_chars, c;
1941 unsigned HOST_WIDE_INT mask;
1942 HOST_WIDE_INT result = 0;
1944 #ifdef MULTIBYTE_CHARS
1945 (void) local_mbtowc (NULL, NULL, 0);
1948 /* Width in bits. */
1949 if (token->type == CPP_CHAR)
1950 width = MAX_CHAR_TYPE_SIZE;
1952 width = MAX_WCHAR_TYPE_SIZE;
1954 if (width < HOST_BITS_PER_WIDE_INT)
1955 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1958 max_chars = HOST_BITS_PER_WIDE_INT / width;
1962 #ifdef MULTIBYTE_CHARS
1966 char_len = local_mbtowc (&wc, str, limit - str);
1969 cpp_warning (pfile, "ignoring invalid multibyte character");
1982 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1984 #ifdef MAP_CHARACTER
1986 c = MAP_CHARACTER (c);
1989 /* Merge character into result; ignore excess chars. */
1990 if (++chars_seen <= max_chars)
1992 if (width < HOST_BITS_PER_WIDE_INT)
1993 result = (result << width) | (c & mask);
1999 if (chars_seen == 0)
2000 cpp_error (pfile, "empty character constant");
2001 else if (chars_seen > max_chars)
2003 chars_seen = max_chars;
2004 cpp_warning (pfile, "character constant too long");
2006 else if (chars_seen > 1 && !traditional && warn_multi)
2007 cpp_warning (pfile, "multi-character character constant");
2009 /* If char type is signed, sign-extend the constant. The
2010 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2011 if (token->type == CPP_CHAR && chars_seen)
2013 unsigned int nbits = chars_seen * width;
2014 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2016 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2017 || ((result >> (nbits - 1)) & 1) == 0)
2023 *pchars_seen = chars_seen;
2039 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2042 chunk_suitable (pool, chunk, size)
2047 /* Being at least twice SIZE means we can use memcpy in
2048 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2050 return (chunk && pool->locked != chunk
2051 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2054 /* Returns the end of the new pool. PTR points to a char in the old
2055 pool, and is updated to point to the same char in the new pool. */
2057 _cpp_next_chunk (pool, len, ptr)
2060 unsigned char **ptr;
2062 cpp_chunk *chunk = pool->cur->next;
2064 /* LEN is the minimum size we want in the new pool. */
2065 len += POOL_ROOM (pool);
2066 if (! chunk_suitable (pool, chunk, len))
2068 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2070 chunk->next = pool->cur->next;
2071 pool->cur->next = chunk;
2074 /* Update the pointer before changing chunk's front. */
2076 *ptr += chunk->base - POOL_FRONT (pool);
2078 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2079 chunk->front = chunk->base;
2082 return POOL_LIMIT (pool);
2089 unsigned char *base;
2092 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2093 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2094 /* Put the chunk descriptor at the end. Then chunk overruns will
2095 cause obvious chaos. */
2096 result = (cpp_chunk *) (base + size);
2097 result->base = base;
2098 result->front = base;
2099 result->limit = base + size;
2106 _cpp_init_pool (pool, size, align, temp)
2108 unsigned int size, align, temp;
2111 align = DEFAULT_ALIGNMENT;
2112 if (align & (align - 1))
2114 pool->align = align;
2115 pool->first = new_chunk (size);
2116 pool->cur = pool->first;
2120 pool->cur->next = pool->cur;
2124 _cpp_lock_pool (pool)
2127 if (pool->locks++ == 0)
2128 pool->locked = pool->cur;
2132 _cpp_unlock_pool (pool)
2135 if (--pool->locks == 0)
2140 _cpp_free_pool (pool)
2143 cpp_chunk *chunk = pool->first, *next;
2151 while (chunk && chunk != pool->first);
2154 /* Reserve LEN bytes from a memory pool. */
2156 _cpp_pool_reserve (pool, len)
2160 len = POOL_ALIGN (len, pool->align);
2161 if (len > (unsigned int) POOL_ROOM (pool))
2162 _cpp_next_chunk (pool, len, 0);
2164 return POOL_FRONT (pool);
2167 /* Allocate LEN bytes from a memory pool. */
2169 _cpp_pool_alloc (pool, len)
2173 unsigned char *result = _cpp_pool_reserve (pool, len);
2175 POOL_COMMIT (pool, len);