1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
95 static void unterminated PARAMS ((cpp_reader *, int));
96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
102 const unsigned char *, unsigned int *));
104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
106 static unsigned int hex_digit_value PARAMS ((unsigned int));
110 Compares, the token TOKEN to the NUL-terminated string STRING.
111 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
114 cpp_ideq (token, string)
115 const cpp_token *token;
118 if (token->type != CPP_NAME)
121 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
124 /* Call when meeting a newline. Returns the character after the newline
125 (or carriage-return newline combination), or EOF. */
127 handle_newline (pfile, newline_char)
129 cppchar_t newline_char;
132 cppchar_t next = EOF;
135 pfile->pseudo_newlines++;
137 buffer = pfile->buffer;
138 buffer->col_adjust = 0;
140 buffer->line_base = buffer->cur;
142 /* Handle CR-LF and LF-CR combinations, get the next character. */
143 if (buffer->cur < buffer->rlimit)
145 next = *buffer->cur++;
146 if (next + newline_char == '\r' + '\n')
148 buffer->line_base = buffer->cur;
149 if (buffer->cur < buffer->rlimit)
150 next = *buffer->cur++;
156 buffer->read_ahead = next;
160 /* Subroutine of skip_escaped_newlines; called when a trigraph is
161 encountered. It warns if necessary, and returns true if the
162 trigraph should be honoured. FROM_CHAR is the third character of a
163 trigraph, and presumed to be the previous character for position
166 trigraph_ok (pfile, from_char)
170 int accept = CPP_OPTION (pfile, trigraphs);
172 /* Don't warn about trigraphs in comments. */
173 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
175 cpp_buffer *buffer = pfile->buffer;
177 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
178 "trigraph ??%c converted to %c",
180 (int) _cpp_trigraph_map[from_char]);
181 else if (buffer->cur != buffer->last_Wtrigraphs)
183 buffer->last_Wtrigraphs = buffer->cur;
184 cpp_warning_with_line (pfile, buffer->lineno,
185 CPP_BUF_COL (buffer) - 2,
186 "trigraph ??%c ignored", (int) from_char);
193 /* Assumes local variables buffer and result. */
194 #define ACCEPT_CHAR(t) \
195 do { result->type = t; buffer->read_ahead = EOF; } while (0)
197 /* When we move to multibyte character sets, add to these something
198 that saves and restores the state of the multibyte conversion
199 library. This probably involves saving and restoring a "cookie".
200 In the case of glibc it is an 8-byte structure, so is not a high
201 overhead operation. In any case, it's out of the fast path. */
202 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
203 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
205 /* Skips any escaped newlines introduced by NEXT, which is either a
206 '?' or a '\\'. Returns the next character, which will also have
207 been placed in buffer->read_ahead. This routine performs
208 preprocessing stages 1 and 2 of the ISO C standard. */
210 skip_escaped_newlines (buffer, next)
214 /* Only do this if we apply stages 1 and 2. */
215 if (!buffer->from_stage3)
218 const unsigned char *saved_cur;
223 if (buffer->cur == buffer->rlimit)
229 next1 = *buffer->cur++;
230 if (next1 != '?' || buffer->cur == buffer->rlimit)
236 next1 = *buffer->cur++;
237 if (!_cpp_trigraph_map[next1]
238 || !trigraph_ok (buffer->pfile, next1))
244 /* We have a full trigraph here. */
245 next = _cpp_trigraph_map[next1];
246 if (next != '\\' || buffer->cur == buffer->rlimit)
251 /* We have a backslash, and room for at least one more character. */
255 next1 = *buffer->cur++;
256 if (!is_nvspace (next1))
260 while (buffer->cur < buffer->rlimit);
262 if (!is_vspace (next1))
268 if (space && !buffer->pfile->state.lexing_comment)
269 cpp_warning (buffer->pfile,
270 "backslash and newline separated by space");
272 next = handle_newline (buffer->pfile, next1);
274 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
276 while (next == '\\' || next == '?');
279 buffer->read_ahead = next;
283 /* Obtain the next character, after trigraph conversion and skipping
284 an arbitrary string of escaped newlines. The common case of no
285 trigraphs or escaped newlines falls through quickly. */
287 get_effective_char (buffer)
290 cppchar_t next = EOF;
292 if (buffer->cur < buffer->rlimit)
294 next = *buffer->cur++;
296 /* '?' can introduce trigraphs (and therefore backslash); '\\'
297 can introduce escaped newlines, which we want to skip, or
298 UCNs, which, depending upon lexer state, we will handle in
300 if (next == '?' || next == '\\')
301 next = skip_escaped_newlines (buffer, next);
304 buffer->read_ahead = next;
308 /* Skip a C-style block comment. We find the end of the comment by
309 seeing if an asterisk is before every '/' we encounter. Returns
310 non-zero if comment terminated by EOF, zero otherwise. */
312 skip_block_comment (pfile)
315 cpp_buffer *buffer = pfile->buffer;
316 cppchar_t c = EOF, prevc = EOF;
318 pfile->state.lexing_comment = 1;
319 while (buffer->cur != buffer->rlimit)
321 prevc = c, c = *buffer->cur++;
324 /* FIXME: For speed, create a new character class of characters
325 of interest inside block comments. */
326 if (c == '?' || c == '\\')
327 c = skip_escaped_newlines (buffer, c);
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
336 /* Warn about potential nested comments, but not if the '/'
337 comes immediately before the true comment delimeter.
338 Don't bother to get it right across escaped newlines. */
339 if (CPP_OPTION (pfile, warn_comments)
340 && buffer->cur != buffer->rlimit)
342 prevc = c, c = *buffer->cur++;
343 if (c == '*' && buffer->cur != buffer->rlimit)
345 prevc = c, c = *buffer->cur++;
347 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
348 CPP_BUF_COL (buffer),
349 "\"/*\" within comment");
354 else if (is_vspace (c))
356 prevc = c, c = handle_newline (pfile, c);
360 adjust_column (pfile);
363 pfile->state.lexing_comment = 0;
364 buffer->read_ahead = EOF;
365 return c != '/' || prevc != '*';
368 /* Skip a C++ line comment. Handles escaped newlines. Returns
369 non-zero if a multiline comment. The following new line, if any,
370 is left in buffer->read_ahead. */
372 skip_line_comment (pfile)
375 cpp_buffer *buffer = pfile->buffer;
376 unsigned int orig_lineno = buffer->lineno;
379 pfile->state.lexing_comment = 1;
383 if (buffer->cur == buffer->rlimit)
387 if (c == '?' || c == '\\')
388 c = skip_escaped_newlines (buffer, c);
390 while (!is_vspace (c));
392 pfile->state.lexing_comment = 0;
393 buffer->read_ahead = c; /* Leave any newline for caller. */
394 return orig_lineno != buffer->lineno;
397 /* pfile->buffer->cur is one beyond the \t character. Update
398 col_adjust so we track the column correctly. */
400 adjust_column (pfile)
403 cpp_buffer *buffer = pfile->buffer;
404 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
406 /* Round it up to multiple of the tabstop, but subtract 1 since the
407 tab itself occupies a character position. */
408 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
409 - col % CPP_OPTION (pfile, tabstop)) - 1;
412 /* Skips whitespace, saving the next non-whitespace character.
413 Adjusts pfile->col_adjust to account for tabs. Without this,
414 tokens might be assigned an incorrect column. */
416 skip_whitespace (pfile, c)
420 cpp_buffer *buffer = pfile->buffer;
421 unsigned int warned = 0;
425 /* Horizontal space always OK. */
429 adjust_column (pfile);
430 /* Just \f \v or \0 left. */
435 cpp_warning (pfile, "null character(s) ignored");
439 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
440 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
441 CPP_BUF_COL (buffer),
442 "%s in preprocessing directive",
443 c == '\f' ? "form feed" : "vertical tab");
446 if (buffer->cur == buffer->rlimit)
450 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
451 while (is_nvspace (c));
453 /* Remember the next character. */
454 buffer->read_ahead = c;
457 /* See if the characters of a number token are valid in a name (no
460 name_p (pfile, string)
462 const cpp_string *string;
466 for (i = 0; i < string->len; i++)
467 if (!is_idchar (string->text[i]))
473 /* Parse an identifier, skipping embedded backslash-newlines.
474 Calculate the hash value of the token while parsing, for improved
475 performance. The hashing algorithm *must* match cpp_lookup(). */
477 static cpp_hashnode *
478 parse_identifier (pfile, c)
482 cpp_hashnode *result;
483 cpp_buffer *buffer = pfile->buffer;
484 unsigned int saw_dollar = 0, len;
485 struct obstack *stack = &pfile->hash_table->stack;
491 obstack_1grow (stack, c);
497 if (buffer->cur == buffer->rlimit)
502 while (is_idchar (c));
504 /* Potential escaped newline? */
505 if (c != '?' && c != '\\')
507 c = skip_escaped_newlines (buffer, c);
509 while (is_idchar (c));
511 /* Remember the next character. */
512 buffer->read_ahead = c;
514 /* $ is not a identifier character in the standard, but is commonly
515 accepted as an extension. Don't warn about it in skipped
516 conditional blocks. */
517 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
518 cpp_pedwarn (pfile, "'$' character(s) in identifier");
520 /* Identifiers are null-terminated. */
521 len = obstack_object_size (stack);
522 obstack_1grow (stack, '\0');
524 /* This routine commits the memory if necessary. */
525 result = (cpp_hashnode *)
526 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
528 /* Some identifiers require diagnostics when lexed. */
529 if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
531 /* It is allowed to poison the same identifier twice. */
532 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
533 cpp_error (pfile, "attempt to use poisoned \"%s\"",
536 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
537 replacement list of a variadic macro. */
538 if (result == pfile->spec_nodes.n__VA_ARGS__
539 && !pfile->state.va_args_ok)
540 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
546 /* Parse a number, skipping embedded backslash-newlines. */
548 parse_number (pfile, number, c, leading_period)
554 cpp_buffer *buffer = pfile->buffer;
555 cpp_pool *pool = &pfile->ident_pool;
556 unsigned char *dest, *limit;
558 dest = POOL_FRONT (pool);
559 limit = POOL_LIMIT (pool);
561 /* Place a leading period. */
565 limit = _cpp_next_chunk (pool, 0, &dest);
573 /* Need room for terminating null. */
574 if (dest + 1 >= limit)
575 limit = _cpp_next_chunk (pool, 0, &dest);
579 if (buffer->cur == buffer->rlimit)
584 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
586 /* Potential escaped newline? */
587 if (c != '?' && c != '\\')
589 c = skip_escaped_newlines (buffer, c);
591 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
593 /* Remember the next character. */
594 buffer->read_ahead = c;
596 /* Null-terminate the number. */
599 number->text = POOL_FRONT (pool);
600 number->len = dest - number->text;
601 POOL_COMMIT (pool, number->len + 1);
604 /* Subroutine of parse_string. Emits error for unterminated strings. */
606 unterminated (pfile, term)
610 cpp_error (pfile, "missing terminating %c character", term);
612 if (term == '\"' && pfile->mlstring_pos.line
613 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
615 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
616 pfile->mlstring_pos.col,
617 "possible start of unterminated string literal");
618 pfile->mlstring_pos.line = 0;
622 /* Subroutine of parse_string. */
624 unescaped_terminator_p (pfile, dest)
626 const unsigned char *dest;
628 const unsigned char *start, *temp;
630 /* In #include-style directives, terminators are not escapeable. */
631 if (pfile->state.angled_headers)
634 start = POOL_FRONT (&pfile->ident_pool);
636 /* An odd number of consecutive backslashes represents an escaped
638 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
641 return ((dest - temp) & 1) == 0;
644 /* Parses a string, character constant, or angle-bracketed header file
645 name. Handles embedded trigraphs and escaped newlines. The stored
646 string is guaranteed NUL-terminated, but it is not guaranteed that
647 this is the first NUL since embedded NULs are preserved.
649 Multi-line strings are allowed, but they are deprecated. */
651 parse_string (pfile, token, terminator)
654 cppchar_t terminator;
656 cpp_buffer *buffer = pfile->buffer;
657 cpp_pool *pool = &pfile->ident_pool;
658 unsigned char *dest, *limit;
660 bool warned_nulls = false, warned_multi = false;
662 dest = POOL_FRONT (pool);
663 limit = POOL_LIMIT (pool);
667 if (buffer->cur == buffer->rlimit)
673 /* We need space for the terminating NUL. */
675 limit = _cpp_next_chunk (pool, 0, &dest);
679 unterminated (pfile, terminator);
683 /* Handle trigraphs, escaped newlines etc. */
684 if (c == '?' || c == '\\')
685 c = skip_escaped_newlines (buffer, c);
687 if (c == terminator && unescaped_terminator_p (pfile, dest))
692 else if (is_vspace (c))
694 /* In assembly language, silently terminate string and
695 character literals at end of line. This is a kludge
696 around not knowing where comments are. */
697 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
700 /* Character constants and header names may not extend over
701 multiple lines. In Standard C, neither may strings.
702 Unfortunately, we accept multiline strings as an
703 extension, except in #include family directives. */
704 if (terminator != '"' || pfile->state.angled_headers)
706 unterminated (pfile, terminator);
713 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
716 if (pfile->mlstring_pos.line == 0)
717 pfile->mlstring_pos = pfile->lexer_pos;
719 c = handle_newline (pfile, c);
723 else if (c == '\0' && !warned_nulls)
726 cpp_warning (pfile, "null character(s) preserved in literal");
732 /* Remember the next character. */
733 buffer->read_ahead = c;
736 token->val.str.text = POOL_FRONT (pool);
737 token->val.str.len = dest - token->val.str.text;
738 POOL_COMMIT (pool, token->val.str.len + 1);
741 /* The stored comment includes the comment start and any terminator. */
743 save_comment (pfile, token, from)
746 const unsigned char *from;
748 unsigned char *buffer;
751 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
752 /* C++ comments probably (not definitely) have moved past a new
753 line, which we don't want to save in the comment. */
754 if (pfile->buffer->read_ahead != EOF)
756 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
758 token->type = CPP_COMMENT;
759 token->val.str.len = len;
760 token->val.str.text = buffer;
763 memcpy (buffer + 1, from, len - 1);
766 /* Subroutine of lex_token to handle '%'. A little tricky, since we
767 want to avoid stepping back when lexing %:%X. */
769 lex_percent (buffer, result)
775 result->type = CPP_MOD;
776 /* Parsing %:%X could leave an extra character. */
777 if (buffer->extra_char == EOF)
778 c = get_effective_char (buffer);
781 c = buffer->read_ahead = buffer->extra_char;
782 buffer->extra_char = EOF;
786 ACCEPT_CHAR (CPP_MOD_EQ);
787 else if (CPP_OPTION (buffer->pfile, digraphs))
791 result->flags |= DIGRAPH;
792 ACCEPT_CHAR (CPP_HASH);
793 if (get_effective_char (buffer) == '%')
795 buffer->extra_char = get_effective_char (buffer);
796 if (buffer->extra_char == ':')
798 buffer->extra_char = EOF;
799 ACCEPT_CHAR (CPP_PASTE);
802 /* We'll catch the extra_char when we're called back. */
803 buffer->read_ahead = '%';
808 result->flags |= DIGRAPH;
809 ACCEPT_CHAR (CPP_CLOSE_BRACE);
814 /* Subroutine of lex_token to handle '.'. This is tricky, since we
815 want to avoid stepping back when lexing '...' or '.123'. In the
816 latter case we should also set a flag for parse_number. */
818 lex_dot (pfile, result)
822 cpp_buffer *buffer = pfile->buffer;
825 /* Parsing ..X could leave an extra character. */
826 if (buffer->extra_char == EOF)
827 c = get_effective_char (buffer);
830 c = buffer->read_ahead = buffer->extra_char;
831 buffer->extra_char = EOF;
834 /* All known character sets have 0...9 contiguous. */
835 if (c >= '0' && c <= '9')
837 result->type = CPP_NUMBER;
838 parse_number (pfile, &result->val.str, c, 1);
842 result->type = CPP_DOT;
845 buffer->extra_char = get_effective_char (buffer);
846 if (buffer->extra_char == '.')
848 buffer->extra_char = EOF;
849 ACCEPT_CHAR (CPP_ELLIPSIS);
852 /* We'll catch the extra_char when we're called back. */
853 buffer->read_ahead = '.';
855 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
856 ACCEPT_CHAR (CPP_DOT_STAR);
861 _cpp_lex_token (pfile, result)
867 const unsigned char *comment_start;
871 bol = pfile->state.next_bol;
873 buffer = pfile->buffer;
874 pfile->state.next_bol = 0;
875 result->flags = buffer->saved_flags;
876 buffer->saved_flags = 0;
878 pfile->lexer_pos.line = buffer->lineno;
879 result->line = pfile->line;
881 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
883 c = buffer->read_ahead;
884 if (c == EOF && buffer->cur < buffer->rlimit)
887 pfile->lexer_pos.col++;
889 result->col = pfile->lexer_pos.col;
892 buffer->read_ahead = EOF;
896 if (!pfile->state.in_directive)
898 unsigned char ret = pfile->buffer->return_at_eof;
900 /* Non-empty files should end in a newline. Don't warn for
901 command line and _Pragma buffers. */
902 if (pfile->lexer_pos.col != 0 && !buffer->from_stage3)
903 cpp_pedwarn (pfile, "no newline at end of file");
904 _cpp_pop_buffer (pfile);
905 if (pfile->buffer && !ret)
911 pfile->state.next_bol = 1;
912 result->type = CPP_EOF;
915 case ' ': case '\t': case '\f': case '\v': case '\0':
916 skip_whitespace (pfile, c);
917 result->flags |= PREV_WHITE;
920 case '\n': case '\r':
921 if (!pfile->state.in_directive)
923 handle_newline (pfile, c);
924 if (!pfile->state.parsing_args)
925 pfile->pseudo_newlines = 0;
927 pfile->lexer_pos.output_line = buffer->lineno;
928 /* This is a new line, so clear any white space flag.
929 Newlines in arguments are white space (6.10.3.10);
930 parse_arg takes care of that. */
931 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
935 /* Don't let directives spill over to the next line. */
936 buffer->read_ahead = c;
937 pfile->state.next_bol = 1;
938 result->type = CPP_EOF;
939 /* Don't break; pfile->state.skipping might be true. */
944 /* These could start an escaped newline, or '?' a trigraph. Let
945 skip_escaped_newlines do all the work. */
947 unsigned int lineno = buffer->lineno;
949 c = skip_escaped_newlines (buffer, c);
950 if (lineno != buffer->lineno)
951 /* We had at least one escaped newline of some sort, and the
952 next character is in buffer->read_ahead. Update the
953 token's line and column. */
956 /* We are either the original '?' or '\\', or a trigraph. */
957 result->type = CPP_QUERY;
958 buffer->read_ahead = EOF;
966 case '0': case '1': case '2': case '3': case '4':
967 case '5': case '6': case '7': case '8': case '9':
968 result->type = CPP_NUMBER;
969 parse_number (pfile, &result->val.str, c, 0);
973 if (!CPP_OPTION (pfile, dollars_in_ident))
975 /* Fall through... */
978 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
979 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
980 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
981 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
983 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
984 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
985 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
986 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
988 result->type = CPP_NAME;
989 result->val.node = parse_identifier (pfile, c);
991 /* 'L' may introduce wide characters or strings. */
992 if (result->val.node == pfile->spec_nodes.n_L)
994 c = buffer->read_ahead; /* For make_string. */
995 if (c == '\'' || c == '"')
997 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1001 /* Convert named operators to their proper types. */
1002 else if (result->val.node->flags & NODE_OPERATOR)
1004 result->flags |= NAMED_OP;
1005 result->type = result->val.node->value.operator;
1011 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1013 parse_string (pfile, result, c);
1017 /* A potential block or line comment. */
1018 comment_start = buffer->cur;
1019 result->type = CPP_DIV;
1020 c = get_effective_char (buffer);
1022 ACCEPT_CHAR (CPP_DIV_EQ);
1023 if (c != '/' && c != '*')
1028 if (skip_block_comment (pfile))
1029 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1030 pfile->lexer_pos.col,
1031 "unterminated comment");
1035 if (!CPP_OPTION (pfile, cplusplus_comments)
1036 && !CPP_IN_SYSTEM_HEADER (pfile))
1039 /* Warn about comments only if pedantically GNUC89, and not
1040 in system headers. */
1041 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1042 && ! buffer->warned_cplusplus_comments)
1045 "C++ style comments are not allowed in ISO C89");
1047 "(this will be reported only once per input file)");
1048 buffer->warned_cplusplus_comments = 1;
1051 /* Skip_line_comment updates buffer->read_ahead. */
1052 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1053 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1054 pfile->lexer_pos.col,
1055 "multi-line comment");
1058 /* Skipping the comment has updated buffer->read_ahead. */
1059 if (!pfile->state.save_comments)
1061 result->flags |= PREV_WHITE;
1065 /* Save the comment as a token in its own right. */
1066 save_comment (pfile, result, comment_start);
1067 /* Don't do MI optimisation. */
1071 if (pfile->state.angled_headers)
1073 result->type = CPP_HEADER_NAME;
1074 c = '>'; /* terminator. */
1078 result->type = CPP_LESS;
1079 c = get_effective_char (buffer);
1081 ACCEPT_CHAR (CPP_LESS_EQ);
1084 ACCEPT_CHAR (CPP_LSHIFT);
1085 if (get_effective_char (buffer) == '=')
1086 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1088 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1090 ACCEPT_CHAR (CPP_MIN);
1091 if (get_effective_char (buffer) == '=')
1092 ACCEPT_CHAR (CPP_MIN_EQ);
1094 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1096 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1097 result->flags |= DIGRAPH;
1099 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1101 ACCEPT_CHAR (CPP_OPEN_BRACE);
1102 result->flags |= DIGRAPH;
1107 result->type = CPP_GREATER;
1108 c = get_effective_char (buffer);
1110 ACCEPT_CHAR (CPP_GREATER_EQ);
1113 ACCEPT_CHAR (CPP_RSHIFT);
1114 if (get_effective_char (buffer) == '=')
1115 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1117 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1119 ACCEPT_CHAR (CPP_MAX);
1120 if (get_effective_char (buffer) == '=')
1121 ACCEPT_CHAR (CPP_MAX_EQ);
1126 lex_percent (buffer, result);
1127 if (result->type == CPP_HASH)
1132 lex_dot (pfile, result);
1136 result->type = CPP_PLUS;
1137 c = get_effective_char (buffer);
1139 ACCEPT_CHAR (CPP_PLUS_EQ);
1141 ACCEPT_CHAR (CPP_PLUS_PLUS);
1145 result->type = CPP_MINUS;
1146 c = get_effective_char (buffer);
1149 ACCEPT_CHAR (CPP_DEREF);
1150 if (CPP_OPTION (pfile, cplusplus)
1151 && get_effective_char (buffer) == '*')
1152 ACCEPT_CHAR (CPP_DEREF_STAR);
1155 ACCEPT_CHAR (CPP_MINUS_EQ);
1157 ACCEPT_CHAR (CPP_MINUS_MINUS);
1161 result->type = CPP_MULT;
1162 if (get_effective_char (buffer) == '=')
1163 ACCEPT_CHAR (CPP_MULT_EQ);
1167 result->type = CPP_EQ;
1168 if (get_effective_char (buffer) == '=')
1169 ACCEPT_CHAR (CPP_EQ_EQ);
1173 result->type = CPP_NOT;
1174 if (get_effective_char (buffer) == '=')
1175 ACCEPT_CHAR (CPP_NOT_EQ);
1179 result->type = CPP_AND;
1180 c = get_effective_char (buffer);
1182 ACCEPT_CHAR (CPP_AND_EQ);
1184 ACCEPT_CHAR (CPP_AND_AND);
1188 c = buffer->extra_char; /* Can be set by error condition below. */
1191 buffer->read_ahead = c;
1192 buffer->extra_char = EOF;
1195 c = get_effective_char (buffer);
1199 ACCEPT_CHAR (CPP_PASTE);
1203 result->type = CPP_HASH;
1207 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1208 tokens within the list of arguments that would otherwise act
1209 as preprocessing directives, the behavior is undefined.
1211 This implementation will report a hard error, terminate the
1212 macro invocation, and proceed to process the directive. */
1213 if (pfile->state.parsing_args)
1215 if (pfile->state.parsing_args == 2)
1217 "directives may not be used inside a macro argument");
1219 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1220 buffer->extra_char = buffer->read_ahead;
1221 buffer->read_ahead = '#';
1222 pfile->state.next_bol = 1;
1223 result->type = CPP_EOF;
1225 /* Get whitespace right - newline_in_args sets it. */
1226 if (pfile->lexer_pos.col == 1)
1227 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1231 /* This is the hash introducing a directive. */
1232 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1233 goto done_directive; /* bol still 1. */
1234 /* This is in fact an assembler #. */
1239 result->type = CPP_OR;
1240 c = get_effective_char (buffer);
1242 ACCEPT_CHAR (CPP_OR_EQ);
1244 ACCEPT_CHAR (CPP_OR_OR);
1248 result->type = CPP_XOR;
1249 if (get_effective_char (buffer) == '=')
1250 ACCEPT_CHAR (CPP_XOR_EQ);
1254 result->type = CPP_COLON;
1255 c = get_effective_char (buffer);
1256 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1257 ACCEPT_CHAR (CPP_SCOPE);
1258 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1260 result->flags |= DIGRAPH;
1261 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1265 case '~': result->type = CPP_COMPL; break;
1266 case ',': result->type = CPP_COMMA; break;
1267 case '(': result->type = CPP_OPEN_PAREN; break;
1268 case ')': result->type = CPP_CLOSE_PAREN; break;
1269 case '[': result->type = CPP_OPEN_SQUARE; break;
1270 case ']': result->type = CPP_CLOSE_SQUARE; break;
1271 case '{': result->type = CPP_OPEN_BRACE; break;
1272 case '}': result->type = CPP_CLOSE_BRACE; break;
1273 case ';': result->type = CPP_SEMICOLON; break;
1275 /* @ is a punctuator in Objective C. */
1276 case '@': result->type = CPP_ATSIGN; break;
1280 result->type = CPP_OTHER;
1285 if (!pfile->state.in_directive && pfile->state.skipping)
1288 /* If not in a directive, this token invalidates controlling macros. */
1289 if (!pfile->state.in_directive)
1290 pfile->mi_valid = false;
1293 /* An upper bound on the number of bytes needed to spell a token,
1294 including preceding whitespace. */
1296 cpp_token_len (token)
1297 const cpp_token *token;
1301 switch (TOKEN_SPELL (token))
1303 default: len = 0; break;
1304 case SPELL_STRING: len = token->val.str.len; break;
1305 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1307 /* 1 for whitespace, 4 for comment delimeters. */
1311 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1312 already contain the enough space to hold the token's spelling.
1313 Returns a pointer to the character after the last character
1316 cpp_spell_token (pfile, token, buffer)
1317 cpp_reader *pfile; /* Would be nice to be rid of this... */
1318 const cpp_token *token;
1319 unsigned char *buffer;
1321 switch (TOKEN_SPELL (token))
1323 case SPELL_OPERATOR:
1325 const unsigned char *spelling;
1328 if (token->flags & DIGRAPH)
1330 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1331 else if (token->flags & NAMED_OP)
1334 spelling = TOKEN_NAME (token);
1336 while ((c = *spelling++) != '\0')
1343 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1344 buffer += NODE_LEN (token->val.node);
1349 int left, right, tag;
1350 switch (token->type)
1352 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1353 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1354 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1355 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1356 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1357 default: left = '\0'; right = '\0'; tag = '\0'; break;
1359 if (tag) *buffer++ = tag;
1360 if (left) *buffer++ = left;
1361 memcpy (buffer, token->val.str.text, token->val.str.len);
1362 buffer += token->val.str.len;
1363 if (right) *buffer++ = right;
1368 *buffer++ = token->val.c;
1372 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1379 /* Returns a token as a null-terminated string. The string is
1380 temporary, and automatically freed later. Useful for diagnostics. */
1382 cpp_token_as_text (pfile, token)
1384 const cpp_token *token;
1386 unsigned int len = cpp_token_len (token);
1387 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1389 end = cpp_spell_token (pfile, token, start);
1395 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1397 cpp_type2name (type)
1398 enum cpp_ttype type;
1400 return (const char *) token_spellings[type].name;
1403 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1404 for efficiency - to avoid double-buffering. Also, outputs a space
1405 if PREV_WHITE is flagged. */
1407 cpp_output_token (token, fp)
1408 const cpp_token *token;
1411 if (token->flags & PREV_WHITE)
1414 switch (TOKEN_SPELL (token))
1416 case SPELL_OPERATOR:
1418 const unsigned char *spelling;
1420 if (token->flags & DIGRAPH)
1422 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1423 else if (token->flags & NAMED_OP)
1426 spelling = TOKEN_NAME (token);
1428 ufputs (spelling, fp);
1434 ufputs (NODE_NAME (token->val.node), fp);
1439 int left, right, tag;
1440 switch (token->type)
1442 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1443 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1444 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1445 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1446 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1447 default: left = '\0'; right = '\0'; tag = '\0'; break;
1449 if (tag) putc (tag, fp);
1450 if (left) putc (left, fp);
1451 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1452 if (right) putc (right, fp);
1457 putc (token->val.c, fp);
1461 /* An error, most probably. */
1466 /* Compare two tokens. */
1468 _cpp_equiv_tokens (a, b)
1469 const cpp_token *a, *b;
1471 if (a->type == b->type && a->flags == b->flags)
1472 switch (TOKEN_SPELL (a))
1474 default: /* Keep compiler happy. */
1475 case SPELL_OPERATOR:
1478 return a->val.c == b->val.c; /* Character. */
1480 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1482 return a->val.node == b->val.node;
1484 return (a->val.str.len == b->val.str.len
1485 && !memcmp (a->val.str.text, b->val.str.text,
1492 /* Determine whether two tokens can be pasted together, and if so,
1493 what the resulting token is. Returns CPP_EOF if the tokens cannot
1494 be pasted, or the appropriate type for the merged token if they
1497 cpp_can_paste (pfile, token1, token2, digraph)
1499 const cpp_token *token1, *token2;
1502 enum cpp_ttype a = token1->type, b = token2->type;
1503 int cxx = CPP_OPTION (pfile, cplusplus);
1505 /* Treat named operators as if they were ordinary NAMEs. */
1506 if (token1->flags & NAMED_OP)
1508 if (token2->flags & NAMED_OP)
1511 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1512 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1517 if (b == a) return CPP_RSHIFT;
1518 if (b == CPP_QUERY && cxx) return CPP_MAX;
1519 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1522 if (b == a) return CPP_LSHIFT;
1523 if (b == CPP_QUERY && cxx) return CPP_MIN;
1524 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1525 if (CPP_OPTION (pfile, digraphs))
1528 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1530 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1534 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1535 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1536 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1539 if (b == a) return CPP_MINUS_MINUS;
1540 if (b == CPP_GREATER) return CPP_DEREF;
1543 if (b == a && cxx) return CPP_SCOPE;
1544 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1545 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1549 if (CPP_OPTION (pfile, digraphs))
1551 if (b == CPP_GREATER)
1552 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1554 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1558 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1561 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1562 if (b == CPP_NUMBER) return CPP_NUMBER;
1566 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1568 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1572 if (b == CPP_NAME) return CPP_NAME;
1574 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1576 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1578 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1582 if (b == CPP_NUMBER) return CPP_NUMBER;
1583 if (b == CPP_NAME) return CPP_NUMBER;
1584 if (b == CPP_DOT) return CPP_NUMBER;
1585 /* Numbers cannot have length zero, so this is safe. */
1586 if ((b == CPP_PLUS || b == CPP_MINUS)
1587 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1598 /* Returns nonzero if a space should be inserted to avoid an
1599 accidental token paste for output. For simplicity, it is
1600 conservative, and occasionally advises a space where one is not
1601 needed, e.g. "." and ".2". */
1604 cpp_avoid_paste (pfile, token1, token2)
1606 const cpp_token *token1, *token2;
1608 enum cpp_ttype a = token1->type, b = token2->type;
1611 if (token1->flags & NAMED_OP)
1613 if (token2->flags & NAMED_OP)
1617 if (token2->flags & DIGRAPH)
1618 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1619 else if (token_spellings[b].category == SPELL_OPERATOR)
1620 c = token_spellings[b].name[0];
1622 /* Quickly get everything that can paste with an '='. */
1623 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1628 case CPP_GREATER: return c == '>' || c == '?';
1629 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1630 case CPP_PLUS: return c == '+';
1631 case CPP_MINUS: return c == '-' || c == '>';
1632 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1633 case CPP_MOD: return c == ':' || c == '>';
1634 case CPP_AND: return c == '&';
1635 case CPP_OR: return c == '|';
1636 case CPP_COLON: return c == ':' || c == '>';
1637 case CPP_DEREF: return c == '*';
1638 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1639 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1640 case CPP_NAME: return ((b == CPP_NUMBER
1641 && name_p (pfile, &token2->val.str))
1643 || b == CPP_CHAR || b == CPP_STRING); /* L */
1644 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1645 || c == '.' || c == '+' || c == '-');
1646 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1647 && token1->val.c == '@'
1648 && (b == CPP_NAME || b == CPP_STRING));
1655 /* Output all the remaining tokens on the current line, and a newline
1656 character, to FP. Leading whitespace is removed. */
1658 cpp_output_line (pfile, fp)
1664 cpp_get_token (pfile, &token);
1665 token.flags &= ~PREV_WHITE;
1666 while (token.type != CPP_EOF)
1668 cpp_output_token (&token, fp);
1669 cpp_get_token (pfile, &token);
1675 /* Returns the value of a hexadecimal digit. */
1680 if (c >= 'a' && c <= 'f')
1681 return c - 'a' + 10;
1682 if (c >= 'A' && c <= 'F')
1683 return c - 'A' + 10;
1684 if (c >= '0' && c <= '9')
1689 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1690 failure if cpplib is not parsing C++ or C99. Such failure is
1691 silent, and no variables are updated. Otherwise returns 0, and
1692 warns if -Wtraditional.
1694 [lex.charset]: The character designated by the universal character
1695 name \UNNNNNNNN is that character whose character short name in
1696 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1697 universal character name \uNNNN is that character whose character
1698 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1699 for a universal character name is less than 0x20 or in the range
1700 0x7F-0x9F (inclusive), or if the universal character name
1701 designates a character in the basic source character set, then the
1702 program is ill-formed.
1704 We assume that wchar_t is Unicode, so we don't need to do any
1705 mapping. Is this ever wrong?
1707 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1708 LIMIT is the end of the string or charconst. PSTR is updated to
1709 point after the UCS on return, and the UCS is written into PC. */
1712 maybe_read_ucs (pfile, pstr, limit, pc)
1714 const unsigned char **pstr;
1715 const unsigned char *limit;
1718 const unsigned char *p = *pstr;
1719 unsigned int code = 0;
1720 unsigned int c = *pc, length;
1722 /* Only attempt to interpret a UCS for C++ and C99. */
1723 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1726 if (CPP_WTRADITIONAL (pfile))
1727 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1729 length = (c == 'u' ? 4: 8);
1731 if ((size_t) (limit - p) < length)
1733 cpp_error (pfile, "incomplete universal-character-name");
1734 /* Skip to the end to avoid more diagnostics. */
1739 for (; length; length--, p++)
1743 code = (code << 4) + hex_digit_value (c);
1747 "non-hex digit '%c' in universal-character-name", c);
1748 /* We shouldn't skip in case there are multibyte chars. */
1754 #ifdef TARGET_EBCDIC
1755 cpp_error (pfile, "universal-character-name on EBCDIC target");
1756 code = 0x3f; /* EBCDIC invalid character */
1758 /* True extended characters are OK. */
1760 && !(code & 0x80000000)
1761 && !(code >= 0xD800 && code <= 0xDFFF))
1763 /* The standard permits $, @ and ` to be specified as UCNs. We use
1764 hex escapes so that this also works with EBCDIC hosts. */
1765 else if (code == 0x24 || code == 0x40 || code == 0x60)
1767 /* Don't give another error if one occurred above. */
1768 else if (length == 0)
1769 cpp_error (pfile, "universal-character-name out of range");
1777 /* Interpret an escape sequence, and return its value. PSTR points to
1778 the input pointer, which is just after the backslash. LIMIT is how
1779 much text we have. MASK is a bitmask for the precision for the
1780 destination type (char or wchar_t). TRADITIONAL, if true, does not
1781 interpret escapes that did not exist in traditional C.
1783 Handles all relevant diagnostics. */
1786 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1788 const unsigned char **pstr;
1789 const unsigned char *limit;
1790 unsigned HOST_WIDE_INT mask;
1794 const unsigned char *str = *pstr;
1795 unsigned int c = *str++;
1799 case '\\': case '\'': case '"': case '?': break;
1800 case 'b': c = TARGET_BS; break;
1801 case 'f': c = TARGET_FF; break;
1802 case 'n': c = TARGET_NEWLINE; break;
1803 case 'r': c = TARGET_CR; break;
1804 case 't': c = TARGET_TAB; break;
1805 case 'v': c = TARGET_VT; break;
1807 case '(': case '{': case '[': case '%':
1808 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1809 '\%' is used to prevent SCCS from getting confused. */
1810 unknown = CPP_PEDANTIC (pfile);
1814 if (CPP_WTRADITIONAL (pfile))
1815 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1821 if (CPP_PEDANTIC (pfile))
1822 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1827 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1831 if (CPP_WTRADITIONAL (pfile))
1832 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1836 unsigned int i = 0, overflow = 0;
1837 int digits_found = 0;
1845 overflow |= i ^ (i << 4 >> 4);
1846 i = (i << 4) + hex_digit_value (c);
1851 cpp_error (pfile, "\\x used with no following hex digits");
1853 if (overflow | (i != (i & mask)))
1855 cpp_pedwarn (pfile, "hex escape sequence out of range");
1862 case '0': case '1': case '2': case '3':
1863 case '4': case '5': case '6': case '7':
1865 unsigned int i = c - '0';
1868 while (str < limit && ++count < 3)
1871 if (c < '0' || c > '7')
1874 i = (i << 3) + c - '0';
1877 if (i != (i & mask))
1879 cpp_pedwarn (pfile, "octal escape sequence out of range");
1894 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1896 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1900 cpp_pedwarn (pfile, "escape sequence out of range for character");
1906 #ifndef MAX_CHAR_TYPE_SIZE
1907 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1910 #ifndef MAX_WCHAR_TYPE_SIZE
1911 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1914 /* Interpret a (possibly wide) character constant in TOKEN.
1915 WARN_MULTI warns about multi-character charconsts, if not
1916 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1917 that did not exist in traditional C. PCHARS_SEEN points to a
1918 variable that is filled in with the number of characters seen. */
1920 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1922 const cpp_token *token;
1925 unsigned int *pchars_seen;
1927 const unsigned char *str = token->val.str.text;
1928 const unsigned char *limit = str + token->val.str.len;
1929 unsigned int chars_seen = 0;
1930 unsigned int width, max_chars, c;
1931 unsigned HOST_WIDE_INT mask;
1932 HOST_WIDE_INT result = 0;
1934 #ifdef MULTIBYTE_CHARS
1935 (void) local_mbtowc (NULL, NULL, 0);
1938 /* Width in bits. */
1939 if (token->type == CPP_CHAR)
1940 width = MAX_CHAR_TYPE_SIZE;
1942 width = MAX_WCHAR_TYPE_SIZE;
1944 if (width < HOST_BITS_PER_WIDE_INT)
1945 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1948 max_chars = HOST_BITS_PER_WIDE_INT / width;
1952 #ifdef MULTIBYTE_CHARS
1956 char_len = local_mbtowc (&wc, str, limit - str);
1959 cpp_warning (pfile, "ignoring invalid multibyte character");
1972 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1974 #ifdef MAP_CHARACTER
1976 c = MAP_CHARACTER (c);
1979 /* Merge character into result; ignore excess chars. */
1980 if (++chars_seen <= max_chars)
1982 if (width < HOST_BITS_PER_WIDE_INT)
1983 result = (result << width) | (c & mask);
1989 if (chars_seen == 0)
1990 cpp_error (pfile, "empty character constant");
1991 else if (chars_seen > max_chars)
1993 chars_seen = max_chars;
1994 cpp_warning (pfile, "character constant too long");
1996 else if (chars_seen > 1 && !traditional && warn_multi)
1997 cpp_warning (pfile, "multi-character character constant");
1999 /* If char type is signed, sign-extend the constant. The
2000 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
2001 if (token->type == CPP_CHAR && chars_seen)
2003 unsigned int nbits = chars_seen * width;
2004 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
2006 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
2007 || ((result >> (nbits - 1)) & 1) == 0)
2013 *pchars_seen = chars_seen;
2029 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2032 chunk_suitable (pool, chunk, size)
2037 /* Being at least twice SIZE means we can use memcpy in
2038 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2040 return (chunk && pool->locked != chunk
2041 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2044 /* Returns the end of the new pool. PTR points to a char in the old
2045 pool, and is updated to point to the same char in the new pool. */
2047 _cpp_next_chunk (pool, len, ptr)
2050 unsigned char **ptr;
2052 cpp_chunk *chunk = pool->cur->next;
2054 /* LEN is the minimum size we want in the new pool. */
2055 len += POOL_ROOM (pool);
2056 if (! chunk_suitable (pool, chunk, len))
2058 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2060 chunk->next = pool->cur->next;
2061 pool->cur->next = chunk;
2064 /* Update the pointer before changing chunk's front. */
2066 *ptr += chunk->base - POOL_FRONT (pool);
2068 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2069 chunk->front = chunk->base;
2072 return POOL_LIMIT (pool);
2079 unsigned char *base;
2082 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2083 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2084 /* Put the chunk descriptor at the end. Then chunk overruns will
2085 cause obvious chaos. */
2086 result = (cpp_chunk *) (base + size);
2087 result->base = base;
2088 result->front = base;
2089 result->limit = base + size;
2096 _cpp_init_pool (pool, size, align, temp)
2098 unsigned int size, align, temp;
2101 align = DEFAULT_ALIGNMENT;
2102 if (align & (align - 1))
2104 pool->align = align;
2105 pool->cur = new_chunk (size);
2109 pool->cur->next = pool->cur;
2113 _cpp_lock_pool (pool)
2116 if (pool->locks++ == 0)
2117 pool->locked = pool->cur;
2121 _cpp_unlock_pool (pool)
2124 if (--pool->locks == 0)
2129 _cpp_free_pool (pool)
2132 cpp_chunk *chunk = pool->cur, *next;
2140 while (chunk && chunk != pool->cur);
2143 /* Reserve LEN bytes from a memory pool. */
2145 _cpp_pool_reserve (pool, len)
2149 len = POOL_ALIGN (len, pool->align);
2150 if (len > (unsigned int) POOL_ROOM (pool))
2151 _cpp_next_chunk (pool, len, 0);
2153 return POOL_FRONT (pool);
2156 /* Allocate LEN bytes from a memory pool. */
2158 _cpp_pool_alloc (pool, len)
2162 unsigned char *result = _cpp_pool_reserve (pool, len);
2164 POOL_COMMIT (pool, len);