1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
23 /* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
28 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
54 /* Tokens with SPELL_STRING store their spelling in the token list,
55 and it's length in the token->val.name.len. */
67 enum spell_type category;
68 const unsigned char *name;
71 const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
74 #define OP(e, s) { SPELL_OPERATOR, U s },
75 #define TK(e, s) { s, U STRINGX (e) },
76 const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
80 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
81 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
83 static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
84 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
85 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
87 static int skip_block_comment PARAMS ((cpp_reader *));
88 static int skip_line_comment PARAMS ((cpp_reader *));
89 static void adjust_column PARAMS ((cpp_reader *));
90 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
91 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
92 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
93 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
94 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
95 static void unterminated PARAMS ((cpp_reader *, int));
96 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
97 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
98 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
99 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
100 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
101 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
102 const unsigned char *, unsigned int *));
104 static cpp_chunk *new_chunk PARAMS ((unsigned int));
105 static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
106 static unsigned int hex_digit_value PARAMS ((unsigned int));
110 Compares, the token TOKEN to the NUL-terminated string STRING.
111 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
114 cpp_ideq (token, string)
115 const cpp_token *token;
118 if (token->type != CPP_NAME)
121 return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
124 /* Call when meeting a newline. Returns the character after the newline
125 (or carriage-return newline combination), or EOF. */
127 handle_newline (pfile, newline_char)
129 cppchar_t newline_char;
132 cppchar_t next = EOF;
135 pfile->pseudo_newlines++;
137 buffer = pfile->buffer;
138 buffer->col_adjust = 0;
140 buffer->line_base = buffer->cur;
142 /* Handle CR-LF and LF-CR combinations, get the next character. */
143 if (buffer->cur < buffer->rlimit)
145 next = *buffer->cur++;
146 if (next + newline_char == '\r' + '\n')
148 buffer->line_base = buffer->cur;
149 if (buffer->cur < buffer->rlimit)
150 next = *buffer->cur++;
156 buffer->read_ahead = next;
160 /* Subroutine of skip_escaped_newlines; called when a trigraph is
161 encountered. It warns if necessary, and returns true if the
162 trigraph should be honoured. FROM_CHAR is the third character of a
163 trigraph, and presumed to be the previous character for position
166 trigraph_ok (pfile, from_char)
170 int accept = CPP_OPTION (pfile, trigraphs);
172 /* Don't warn about trigraphs in comments. */
173 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
175 cpp_buffer *buffer = pfile->buffer;
177 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
178 "trigraph ??%c converted to %c",
180 (int) _cpp_trigraph_map[from_char]);
181 else if (buffer->cur != buffer->last_Wtrigraphs)
183 buffer->last_Wtrigraphs = buffer->cur;
184 cpp_warning_with_line (pfile, buffer->lineno,
185 CPP_BUF_COL (buffer) - 2,
186 "trigraph ??%c ignored", (int) from_char);
193 /* Assumes local variables buffer and result. */
194 #define ACCEPT_CHAR(t) \
195 do { result->type = t; buffer->read_ahead = EOF; } while (0)
197 /* When we move to multibyte character sets, add to these something
198 that saves and restores the state of the multibyte conversion
199 library. This probably involves saving and restoring a "cookie".
200 In the case of glibc it is an 8-byte structure, so is not a high
201 overhead operation. In any case, it's out of the fast path. */
202 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
203 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
205 /* Skips any escaped newlines introduced by NEXT, which is either a
206 '?' or a '\\'. Returns the next character, which will also have
207 been placed in buffer->read_ahead. This routine performs
208 preprocessing stages 1 and 2 of the ISO C standard. */
210 skip_escaped_newlines (buffer, next)
214 /* Only do this if we apply stages 1 and 2. */
215 if (!buffer->from_stage3)
218 const unsigned char *saved_cur;
223 if (buffer->cur == buffer->rlimit)
229 next1 = *buffer->cur++;
230 if (next1 != '?' || buffer->cur == buffer->rlimit)
236 next1 = *buffer->cur++;
237 if (!_cpp_trigraph_map[next1]
238 || !trigraph_ok (buffer->pfile, next1))
244 /* We have a full trigraph here. */
245 next = _cpp_trigraph_map[next1];
246 if (next != '\\' || buffer->cur == buffer->rlimit)
251 /* We have a backslash, and room for at least one more character. */
255 next1 = *buffer->cur++;
256 if (!is_nvspace (next1))
260 while (buffer->cur < buffer->rlimit);
262 if (!is_vspace (next1))
268 if (space && !buffer->pfile->state.lexing_comment)
269 cpp_warning (buffer->pfile,
270 "backslash and newline separated by space");
272 next = handle_newline (buffer->pfile, next1);
274 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
276 while (next == '\\' || next == '?');
279 buffer->read_ahead = next;
283 /* Obtain the next character, after trigraph conversion and skipping
284 an arbitrary string of escaped newlines. The common case of no
285 trigraphs or escaped newlines falls through quickly. */
287 get_effective_char (buffer)
290 cppchar_t next = EOF;
292 if (buffer->cur < buffer->rlimit)
294 next = *buffer->cur++;
296 /* '?' can introduce trigraphs (and therefore backslash); '\\'
297 can introduce escaped newlines, which we want to skip, or
298 UCNs, which, depending upon lexer state, we will handle in
300 if (next == '?' || next == '\\')
301 next = skip_escaped_newlines (buffer, next);
304 buffer->read_ahead = next;
308 /* Skip a C-style block comment. We find the end of the comment by
309 seeing if an asterisk is before every '/' we encounter. Returns
310 non-zero if comment terminated by EOF, zero otherwise. */
312 skip_block_comment (pfile)
315 cpp_buffer *buffer = pfile->buffer;
316 cppchar_t c = EOF, prevc = EOF;
318 pfile->state.lexing_comment = 1;
319 while (buffer->cur != buffer->rlimit)
321 prevc = c, c = *buffer->cur++;
324 /* FIXME: For speed, create a new character class of characters
325 of interest inside block comments. */
326 if (c == '?' || c == '\\')
327 c = skip_escaped_newlines (buffer, c);
329 /* People like decorating comments with '*', so check for '/'
330 instead for efficiency. */
336 /* Warn about potential nested comments, but not if the '/'
337 comes immediately before the true comment delimeter.
338 Don't bother to get it right across escaped newlines. */
339 if (CPP_OPTION (pfile, warn_comments)
340 && buffer->cur != buffer->rlimit)
342 prevc = c, c = *buffer->cur++;
343 if (c == '*' && buffer->cur != buffer->rlimit)
345 prevc = c, c = *buffer->cur++;
347 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
348 CPP_BUF_COL (buffer),
349 "\"/*\" within comment");
354 else if (is_vspace (c))
356 prevc = c, c = handle_newline (pfile, c);
360 adjust_column (pfile);
363 pfile->state.lexing_comment = 0;
364 buffer->read_ahead = EOF;
365 return c != '/' || prevc != '*';
368 /* Skip a C++ line comment. Handles escaped newlines. Returns
369 non-zero if a multiline comment. The following new line, if any,
370 is left in buffer->read_ahead. */
372 skip_line_comment (pfile)
375 cpp_buffer *buffer = pfile->buffer;
376 unsigned int orig_lineno = buffer->lineno;
379 pfile->state.lexing_comment = 1;
383 if (buffer->cur == buffer->rlimit)
387 if (c == '?' || c == '\\')
388 c = skip_escaped_newlines (buffer, c);
390 while (!is_vspace (c));
392 pfile->state.lexing_comment = 0;
393 buffer->read_ahead = c; /* Leave any newline for caller. */
394 return orig_lineno != buffer->lineno;
397 /* pfile->buffer->cur is one beyond the \t character. Update
398 col_adjust so we track the column correctly. */
400 adjust_column (pfile)
403 cpp_buffer *buffer = pfile->buffer;
404 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
406 /* Round it up to multiple of the tabstop, but subtract 1 since the
407 tab itself occupies a character position. */
408 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
409 - col % CPP_OPTION (pfile, tabstop)) - 1;
412 /* Skips whitespace, saving the next non-whitespace character.
413 Adjusts pfile->col_adjust to account for tabs. Without this,
414 tokens might be assigned an incorrect column. */
416 skip_whitespace (pfile, c)
420 cpp_buffer *buffer = pfile->buffer;
421 unsigned int warned = 0;
425 /* Horizontal space always OK. */
429 adjust_column (pfile);
430 /* Just \f \v or \0 left. */
435 cpp_warning (pfile, "null character(s) ignored");
439 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
440 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
441 CPP_BUF_COL (buffer),
442 "%s in preprocessing directive",
443 c == '\f' ? "form feed" : "vertical tab");
446 if (buffer->cur == buffer->rlimit)
450 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
451 while (is_nvspace (c));
453 /* Remember the next character. */
454 buffer->read_ahead = c;
457 /* See if the characters of a number token are valid in a name (no
460 name_p (pfile, string)
462 const cpp_string *string;
466 for (i = 0; i < string->len; i++)
467 if (!is_idchar (string->text[i]))
473 /* Parse an identifier, skipping embedded backslash-newlines.
474 Calculate the hash value of the token while parsing, for improved
475 performance. The hashing algorithm *must* match cpp_lookup(). */
477 static cpp_hashnode *
478 parse_identifier (pfile, c)
482 cpp_hashnode *result;
483 cpp_buffer *buffer = pfile->buffer;
484 unsigned int saw_dollar = 0, len;
485 struct obstack *stack = &pfile->hash_table->stack;
491 obstack_1grow (stack, c);
497 if (buffer->cur == buffer->rlimit)
502 while (is_idchar (c));
504 /* Potential escaped newline? */
505 if (c != '?' && c != '\\')
507 c = skip_escaped_newlines (buffer, c);
509 while (is_idchar (c));
511 /* Remember the next character. */
512 buffer->read_ahead = c;
514 /* $ is not a identifier character in the standard, but is commonly
515 accepted as an extension. Don't warn about it in skipped
516 conditional blocks. */
517 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
518 cpp_pedwarn (pfile, "'$' character(s) in identifier");
520 /* Identifiers are null-terminated. */
521 len = obstack_object_size (stack);
522 obstack_1grow (stack, '\0');
524 /* This routine commits the memory if necessary. */
525 result = (cpp_hashnode *)
526 ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
528 /* Some identifiers require diagnostics when lexed. */
529 if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
531 /* It is allowed to poison the same identifier twice. */
532 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
533 cpp_error (pfile, "attempt to use poisoned \"%s\"",
536 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
537 replacement list of a variadic macro. */
538 if (result == pfile->spec_nodes.n__VA_ARGS__
539 && !pfile->state.va_args_ok)
540 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
546 /* Parse a number, skipping embedded backslash-newlines. */
548 parse_number (pfile, number, c, leading_period)
554 cpp_buffer *buffer = pfile->buffer;
555 cpp_pool *pool = &pfile->ident_pool;
556 unsigned char *dest, *limit;
558 dest = POOL_FRONT (pool);
559 limit = POOL_LIMIT (pool);
561 /* Place a leading period. */
565 limit = _cpp_next_chunk (pool, 0, &dest);
573 /* Need room for terminating null. */
574 if (dest + 1 >= limit)
575 limit = _cpp_next_chunk (pool, 0, &dest);
579 if (buffer->cur == buffer->rlimit)
584 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
586 /* Potential escaped newline? */
587 if (c != '?' && c != '\\')
589 c = skip_escaped_newlines (buffer, c);
591 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
593 /* Remember the next character. */
594 buffer->read_ahead = c;
596 /* Null-terminate the number. */
599 number->text = POOL_FRONT (pool);
600 number->len = dest - number->text;
601 POOL_COMMIT (pool, number->len + 1);
604 /* Subroutine of parse_string. Emits error for unterminated strings. */
606 unterminated (pfile, term)
610 cpp_error (pfile, "missing terminating %c character", term);
612 if (term == '\"' && pfile->mlstring_pos.line
613 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
615 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
616 pfile->mlstring_pos.col,
617 "possible start of unterminated string literal");
618 pfile->mlstring_pos.line = 0;
622 /* Subroutine of parse_string. */
624 unescaped_terminator_p (pfile, dest)
626 const unsigned char *dest;
628 const unsigned char *start, *temp;
630 /* In #include-style directives, terminators are not escapeable. */
631 if (pfile->state.angled_headers)
634 start = POOL_FRONT (&pfile->ident_pool);
636 /* An odd number of consecutive backslashes represents an escaped
638 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
641 return ((dest - temp) & 1) == 0;
644 /* Parses a string, character constant, or angle-bracketed header file
645 name. Handles embedded trigraphs and escaped newlines. The stored
646 string is guaranteed NUL-terminated, but it is not guaranteed that
647 this is the first NUL since embedded NULs are preserved.
649 Multi-line strings are allowed, but they are deprecated. */
651 parse_string (pfile, token, terminator)
654 cppchar_t terminator;
656 cpp_buffer *buffer = pfile->buffer;
657 cpp_pool *pool = &pfile->ident_pool;
658 unsigned char *dest, *limit;
660 unsigned int nulls = 0;
662 dest = POOL_FRONT (pool);
663 limit = POOL_LIMIT (pool);
667 if (buffer->cur == buffer->rlimit)
673 /* We need space for the terminating NUL. */
675 limit = _cpp_next_chunk (pool, 0, &dest);
679 unterminated (pfile, terminator);
683 /* Handle trigraphs, escaped newlines etc. */
684 if (c == '?' || c == '\\')
685 c = skip_escaped_newlines (buffer, c);
687 if (c == terminator && unescaped_terminator_p (pfile, dest))
692 else if (is_vspace (c))
694 /* In assembly language, silently terminate string and
695 character literals at end of line. This is a kludge
696 around not knowing where comments are. */
697 if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
700 /* Character constants and header names may not extend over
701 multiple lines. In Standard C, neither may strings.
702 Unfortunately, we accept multiline strings as an
703 extension, except in #include family directives. */
704 if (terminator != '"' || pfile->state.angled_headers)
706 unterminated (pfile, terminator);
710 cpp_pedwarn (pfile, "multi-line string literals are deprecated");
711 if (pfile->mlstring_pos.line == 0)
712 pfile->mlstring_pos = pfile->lexer_pos;
714 c = handle_newline (pfile, c);
721 cpp_warning (pfile, "null character(s) preserved in literal");
727 /* Remember the next character. */
728 buffer->read_ahead = c;
731 token->val.str.text = POOL_FRONT (pool);
732 token->val.str.len = dest - token->val.str.text;
733 POOL_COMMIT (pool, token->val.str.len + 1);
736 /* The stored comment includes the comment start and any terminator. */
738 save_comment (pfile, token, from)
741 const unsigned char *from;
743 unsigned char *buffer;
746 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
747 /* C++ comments probably (not definitely) have moved past a new
748 line, which we don't want to save in the comment. */
749 if (pfile->buffer->read_ahead != EOF)
751 buffer = _cpp_pool_alloc (&pfile->ident_pool, len);
753 token->type = CPP_COMMENT;
754 token->val.str.len = len;
755 token->val.str.text = buffer;
758 memcpy (buffer + 1, from, len - 1);
761 /* Subroutine of lex_token to handle '%'. A little tricky, since we
762 want to avoid stepping back when lexing %:%X. */
764 lex_percent (buffer, result)
770 result->type = CPP_MOD;
771 /* Parsing %:%X could leave an extra character. */
772 if (buffer->extra_char == EOF)
773 c = get_effective_char (buffer);
776 c = buffer->read_ahead = buffer->extra_char;
777 buffer->extra_char = EOF;
781 ACCEPT_CHAR (CPP_MOD_EQ);
782 else if (CPP_OPTION (buffer->pfile, digraphs))
786 result->flags |= DIGRAPH;
787 ACCEPT_CHAR (CPP_HASH);
788 if (get_effective_char (buffer) == '%')
790 buffer->extra_char = get_effective_char (buffer);
791 if (buffer->extra_char == ':')
793 buffer->extra_char = EOF;
794 ACCEPT_CHAR (CPP_PASTE);
797 /* We'll catch the extra_char when we're called back. */
798 buffer->read_ahead = '%';
803 result->flags |= DIGRAPH;
804 ACCEPT_CHAR (CPP_CLOSE_BRACE);
809 /* Subroutine of lex_token to handle '.'. This is tricky, since we
810 want to avoid stepping back when lexing '...' or '.123'. In the
811 latter case we should also set a flag for parse_number. */
813 lex_dot (pfile, result)
817 cpp_buffer *buffer = pfile->buffer;
820 /* Parsing ..X could leave an extra character. */
821 if (buffer->extra_char == EOF)
822 c = get_effective_char (buffer);
825 c = buffer->read_ahead = buffer->extra_char;
826 buffer->extra_char = EOF;
829 /* All known character sets have 0...9 contiguous. */
830 if (c >= '0' && c <= '9')
832 result->type = CPP_NUMBER;
833 parse_number (pfile, &result->val.str, c, 1);
837 result->type = CPP_DOT;
840 buffer->extra_char = get_effective_char (buffer);
841 if (buffer->extra_char == '.')
843 buffer->extra_char = EOF;
844 ACCEPT_CHAR (CPP_ELLIPSIS);
847 /* We'll catch the extra_char when we're called back. */
848 buffer->read_ahead = '.';
850 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
851 ACCEPT_CHAR (CPP_DOT_STAR);
856 _cpp_lex_token (pfile, result)
862 const unsigned char *comment_start;
866 bol = pfile->state.next_bol;
868 buffer = pfile->buffer;
869 pfile->state.next_bol = 0;
870 result->flags = buffer->saved_flags;
871 buffer->saved_flags = 0;
873 pfile->lexer_pos.line = buffer->lineno;
874 result->line = pfile->line;
876 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
878 c = buffer->read_ahead;
879 if (c == EOF && buffer->cur < buffer->rlimit)
882 pfile->lexer_pos.col++;
884 result->col = pfile->lexer_pos.col;
887 buffer->read_ahead = EOF;
891 /* Non-empty files should end in a newline. Checking "bol" too
892 prevents multiple warnings when hitting the EOF more than
893 once, like in a directive. Don't warn for command line and
895 if (pfile->lexer_pos.col != 0 && !bol && !buffer->from_stage3)
896 cpp_pedwarn (pfile, "no newline at end of file");
897 pfile->state.next_bol = 1;
898 pfile->state.skipping = 0; /* In case missing #endif. */
899 result->type = CPP_EOF;
900 /* Don't do MI optimisation. */
903 case ' ': case '\t': case '\f': case '\v': case '\0':
904 skip_whitespace (pfile, c);
905 result->flags |= PREV_WHITE;
908 case '\n': case '\r':
909 if (!pfile->state.in_directive)
911 handle_newline (pfile, c);
912 if (!pfile->state.parsing_args)
913 pfile->pseudo_newlines = 0;
915 pfile->lexer_pos.output_line = buffer->lineno;
916 /* This is a new line, so clear any white space flag.
917 Newlines in arguments are white space (6.10.3.10);
918 parse_arg takes care of that. */
919 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
923 /* Don't let directives spill over to the next line. */
924 buffer->read_ahead = c;
925 pfile->state.next_bol = 1;
926 result->type = CPP_EOF;
927 /* Don't break; pfile->state.skipping might be true. */
932 /* These could start an escaped newline, or '?' a trigraph. Let
933 skip_escaped_newlines do all the work. */
935 unsigned int lineno = buffer->lineno;
937 c = skip_escaped_newlines (buffer, c);
938 if (lineno != buffer->lineno)
939 /* We had at least one escaped newline of some sort, and the
940 next character is in buffer->read_ahead. Update the
941 token's line and column. */
944 /* We are either the original '?' or '\\', or a trigraph. */
945 result->type = CPP_QUERY;
946 buffer->read_ahead = EOF;
954 case '0': case '1': case '2': case '3': case '4':
955 case '5': case '6': case '7': case '8': case '9':
956 result->type = CPP_NUMBER;
957 parse_number (pfile, &result->val.str, c, 0);
961 if (!CPP_OPTION (pfile, dollars_in_ident))
963 /* Fall through... */
966 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
967 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
968 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
969 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
971 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
972 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
973 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
974 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
976 result->type = CPP_NAME;
977 result->val.node = parse_identifier (pfile, c);
979 /* 'L' may introduce wide characters or strings. */
980 if (result->val.node == pfile->spec_nodes.n_L)
982 c = buffer->read_ahead; /* For make_string. */
983 if (c == '\'' || c == '"')
985 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
989 /* Convert named operators to their proper types. */
990 else if (result->val.node->flags & NODE_OPERATOR)
992 result->flags |= NAMED_OP;
993 result->type = result->val.node->value.operator;
999 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1001 parse_string (pfile, result, c);
1005 /* A potential block or line comment. */
1006 comment_start = buffer->cur;
1007 result->type = CPP_DIV;
1008 c = get_effective_char (buffer);
1010 ACCEPT_CHAR (CPP_DIV_EQ);
1011 if (c != '/' && c != '*')
1016 if (skip_block_comment (pfile))
1017 cpp_error_with_line (pfile, pfile->lexer_pos.line,
1018 pfile->lexer_pos.col,
1019 "unterminated comment");
1023 if (!CPP_OPTION (pfile, cplusplus_comments)
1024 && !CPP_IN_SYSTEM_HEADER (pfile))
1027 /* Warn about comments only if pedantically GNUC89, and not
1028 in system headers. */
1029 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1030 && ! buffer->warned_cplusplus_comments)
1033 "C++ style comments are not allowed in ISO C89");
1035 "(this will be reported only once per input file)");
1036 buffer->warned_cplusplus_comments = 1;
1039 /* Skip_line_comment updates buffer->read_ahead. */
1040 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1041 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1042 pfile->lexer_pos.col,
1043 "multi-line comment");
1046 /* Skipping the comment has updated buffer->read_ahead. */
1047 if (!pfile->state.save_comments)
1049 result->flags |= PREV_WHITE;
1053 /* Save the comment as a token in its own right. */
1054 save_comment (pfile, result, comment_start);
1055 /* Don't do MI optimisation. */
1059 if (pfile->state.angled_headers)
1061 result->type = CPP_HEADER_NAME;
1062 c = '>'; /* terminator. */
1066 result->type = CPP_LESS;
1067 c = get_effective_char (buffer);
1069 ACCEPT_CHAR (CPP_LESS_EQ);
1072 ACCEPT_CHAR (CPP_LSHIFT);
1073 if (get_effective_char (buffer) == '=')
1074 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1076 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1078 ACCEPT_CHAR (CPP_MIN);
1079 if (get_effective_char (buffer) == '=')
1080 ACCEPT_CHAR (CPP_MIN_EQ);
1082 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1084 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1085 result->flags |= DIGRAPH;
1087 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1089 ACCEPT_CHAR (CPP_OPEN_BRACE);
1090 result->flags |= DIGRAPH;
1095 result->type = CPP_GREATER;
1096 c = get_effective_char (buffer);
1098 ACCEPT_CHAR (CPP_GREATER_EQ);
1101 ACCEPT_CHAR (CPP_RSHIFT);
1102 if (get_effective_char (buffer) == '=')
1103 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1105 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1107 ACCEPT_CHAR (CPP_MAX);
1108 if (get_effective_char (buffer) == '=')
1109 ACCEPT_CHAR (CPP_MAX_EQ);
1114 lex_percent (buffer, result);
1115 if (result->type == CPP_HASH)
1120 lex_dot (pfile, result);
1124 result->type = CPP_PLUS;
1125 c = get_effective_char (buffer);
1127 ACCEPT_CHAR (CPP_PLUS_EQ);
1129 ACCEPT_CHAR (CPP_PLUS_PLUS);
1133 result->type = CPP_MINUS;
1134 c = get_effective_char (buffer);
1137 ACCEPT_CHAR (CPP_DEREF);
1138 if (CPP_OPTION (pfile, cplusplus)
1139 && get_effective_char (buffer) == '*')
1140 ACCEPT_CHAR (CPP_DEREF_STAR);
1143 ACCEPT_CHAR (CPP_MINUS_EQ);
1145 ACCEPT_CHAR (CPP_MINUS_MINUS);
1149 result->type = CPP_MULT;
1150 if (get_effective_char (buffer) == '=')
1151 ACCEPT_CHAR (CPP_MULT_EQ);
1155 result->type = CPP_EQ;
1156 if (get_effective_char (buffer) == '=')
1157 ACCEPT_CHAR (CPP_EQ_EQ);
1161 result->type = CPP_NOT;
1162 if (get_effective_char (buffer) == '=')
1163 ACCEPT_CHAR (CPP_NOT_EQ);
1167 result->type = CPP_AND;
1168 c = get_effective_char (buffer);
1170 ACCEPT_CHAR (CPP_AND_EQ);
1172 ACCEPT_CHAR (CPP_AND_AND);
1176 c = buffer->extra_char; /* Can be set by error condition below. */
1179 buffer->read_ahead = c;
1180 buffer->extra_char = EOF;
1183 c = get_effective_char (buffer);
1187 ACCEPT_CHAR (CPP_PASTE);
1191 result->type = CPP_HASH;
1195 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
1196 tokens within the list of arguments that would otherwise act
1197 as preprocessing directives, the behavior is undefined.
1199 This implementation will report a hard error, terminate the
1200 macro invocation, and proceed to process the directive. */
1201 if (pfile->state.parsing_args)
1203 if (pfile->state.parsing_args == 2)
1205 "directives may not be used inside a macro argument");
1207 /* Put a '#' in lookahead, return CPP_EOF for parse_arg. */
1208 buffer->extra_char = buffer->read_ahead;
1209 buffer->read_ahead = '#';
1210 pfile->state.next_bol = 1;
1211 result->type = CPP_EOF;
1213 /* Get whitespace right - newline_in_args sets it. */
1214 if (pfile->lexer_pos.col == 1)
1215 result->flags &= ~(PREV_WHITE | AVOID_LPASTE);
1219 /* This is the hash introducing a directive. */
1220 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1221 goto done_directive; /* bol still 1. */
1222 /* This is in fact an assembler #. */
1227 result->type = CPP_OR;
1228 c = get_effective_char (buffer);
1230 ACCEPT_CHAR (CPP_OR_EQ);
1232 ACCEPT_CHAR (CPP_OR_OR);
1236 result->type = CPP_XOR;
1237 if (get_effective_char (buffer) == '=')
1238 ACCEPT_CHAR (CPP_XOR_EQ);
1242 result->type = CPP_COLON;
1243 c = get_effective_char (buffer);
1244 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1245 ACCEPT_CHAR (CPP_SCOPE);
1246 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1248 result->flags |= DIGRAPH;
1249 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1253 case '~': result->type = CPP_COMPL; break;
1254 case ',': result->type = CPP_COMMA; break;
1255 case '(': result->type = CPP_OPEN_PAREN; break;
1256 case ')': result->type = CPP_CLOSE_PAREN; break;
1257 case '[': result->type = CPP_OPEN_SQUARE; break;
1258 case ']': result->type = CPP_CLOSE_SQUARE; break;
1259 case '{': result->type = CPP_OPEN_BRACE; break;
1260 case '}': result->type = CPP_CLOSE_BRACE; break;
1261 case ';': result->type = CPP_SEMICOLON; break;
1263 /* @ is a punctuator in Objective C. */
1264 case '@': result->type = CPP_ATSIGN; break;
1268 result->type = CPP_OTHER;
1273 if (!pfile->state.in_directive && pfile->state.skipping)
1276 /* If not in a directive, this token invalidates controlling macros. */
1277 if (!pfile->state.in_directive)
1278 pfile->mi_valid = false;
1281 /* An upper bound on the number of bytes needed to spell a token,
1282 including preceding whitespace. */
1284 cpp_token_len (token)
1285 const cpp_token *token;
1289 switch (TOKEN_SPELL (token))
1291 default: len = 0; break;
1292 case SPELL_STRING: len = token->val.str.len; break;
1293 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1295 /* 1 for whitespace, 4 for comment delimeters. */
1299 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1300 already contain the enough space to hold the token's spelling.
1301 Returns a pointer to the character after the last character
1304 cpp_spell_token (pfile, token, buffer)
1305 cpp_reader *pfile; /* Would be nice to be rid of this... */
1306 const cpp_token *token;
1307 unsigned char *buffer;
1309 switch (TOKEN_SPELL (token))
1311 case SPELL_OPERATOR:
1313 const unsigned char *spelling;
1316 if (token->flags & DIGRAPH)
1318 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1319 else if (token->flags & NAMED_OP)
1322 spelling = TOKEN_NAME (token);
1324 while ((c = *spelling++) != '\0')
1331 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1332 buffer += NODE_LEN (token->val.node);
1337 int left, right, tag;
1338 switch (token->type)
1340 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1341 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1342 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1343 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1344 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1345 default: left = '\0'; right = '\0'; tag = '\0'; break;
1347 if (tag) *buffer++ = tag;
1348 if (left) *buffer++ = left;
1349 memcpy (buffer, token->val.str.text, token->val.str.len);
1350 buffer += token->val.str.len;
1351 if (right) *buffer++ = right;
1356 *buffer++ = token->val.c;
1360 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1367 /* Returns a token as a null-terminated string. The string is
1368 temporary, and automatically freed later. Useful for diagnostics. */
1370 cpp_token_as_text (pfile, token)
1372 const cpp_token *token;
1374 unsigned int len = cpp_token_len (token);
1375 unsigned char *start = _cpp_pool_alloc (&pfile->ident_pool, len), *end;
1377 end = cpp_spell_token (pfile, token, start);
1383 /* Used by C front ends. Should really move to using cpp_token_as_text. */
1385 cpp_type2name (type)
1386 enum cpp_ttype type;
1388 return (const char *) token_spellings[type].name;
1391 /* Writes the spelling of token to FP. Separate from cpp_spell_token
1392 for efficiency - to avoid double-buffering. Also, outputs a space
1393 if PREV_WHITE is flagged. */
1395 cpp_output_token (token, fp)
1396 const cpp_token *token;
1399 if (token->flags & PREV_WHITE)
1402 switch (TOKEN_SPELL (token))
1404 case SPELL_OPERATOR:
1406 const unsigned char *spelling;
1408 if (token->flags & DIGRAPH)
1410 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1411 else if (token->flags & NAMED_OP)
1414 spelling = TOKEN_NAME (token);
1416 ufputs (spelling, fp);
1422 ufputs (NODE_NAME (token->val.node), fp);
1427 int left, right, tag;
1428 switch (token->type)
1430 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1431 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1432 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1433 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1434 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1435 default: left = '\0'; right = '\0'; tag = '\0'; break;
1437 if (tag) putc (tag, fp);
1438 if (left) putc (left, fp);
1439 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1440 if (right) putc (right, fp);
1445 putc (token->val.c, fp);
1449 /* An error, most probably. */
1454 /* Compare two tokens. */
1456 _cpp_equiv_tokens (a, b)
1457 const cpp_token *a, *b;
1459 if (a->type == b->type && a->flags == b->flags)
1460 switch (TOKEN_SPELL (a))
1462 default: /* Keep compiler happy. */
1463 case SPELL_OPERATOR:
1466 return a->val.c == b->val.c; /* Character. */
1468 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1470 return a->val.node == b->val.node;
1472 return (a->val.str.len == b->val.str.len
1473 && !memcmp (a->val.str.text, b->val.str.text,
1480 /* Determine whether two tokens can be pasted together, and if so,
1481 what the resulting token is. Returns CPP_EOF if the tokens cannot
1482 be pasted, or the appropriate type for the merged token if they
1485 cpp_can_paste (pfile, token1, token2, digraph)
1487 const cpp_token *token1, *token2;
1490 enum cpp_ttype a = token1->type, b = token2->type;
1491 int cxx = CPP_OPTION (pfile, cplusplus);
1493 /* Treat named operators as if they were ordinary NAMEs. */
1494 if (token1->flags & NAMED_OP)
1496 if (token2->flags & NAMED_OP)
1499 if ((int) a <= (int) CPP_LAST_EQ && b == CPP_EQ)
1500 return (enum cpp_ttype) ((int) a + ((int) CPP_EQ_EQ - (int) CPP_EQ));
1505 if (b == a) return CPP_RSHIFT;
1506 if (b == CPP_QUERY && cxx) return CPP_MAX;
1507 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1510 if (b == a) return CPP_LSHIFT;
1511 if (b == CPP_QUERY && cxx) return CPP_MIN;
1512 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
1513 if (CPP_OPTION (pfile, digraphs))
1516 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1518 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1522 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1523 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1524 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1527 if (b == a) return CPP_MINUS_MINUS;
1528 if (b == CPP_GREATER) return CPP_DEREF;
1531 if (b == a && cxx) return CPP_SCOPE;
1532 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
1533 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1537 if (CPP_OPTION (pfile, digraphs))
1539 if (b == CPP_GREATER)
1540 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1542 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1546 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1549 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1550 if (b == CPP_NUMBER) return CPP_NUMBER;
1554 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1556 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1560 if (b == CPP_NAME) return CPP_NAME;
1562 && name_p (pfile, &token2->val.str)) return CPP_NAME;
1564 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
1566 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
1570 if (b == CPP_NUMBER) return CPP_NUMBER;
1571 if (b == CPP_NAME) return CPP_NUMBER;
1572 if (b == CPP_DOT) return CPP_NUMBER;
1573 /* Numbers cannot have length zero, so this is safe. */
1574 if ((b == CPP_PLUS || b == CPP_MINUS)
1575 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
1586 /* Returns nonzero if a space should be inserted to avoid an
1587 accidental token paste for output. For simplicity, it is
1588 conservative, and occasionally advises a space where one is not
1589 needed, e.g. "." and ".2". */
1592 cpp_avoid_paste (pfile, token1, token2)
1594 const cpp_token *token1, *token2;
1596 enum cpp_ttype a = token1->type, b = token2->type;
1599 if (token1->flags & NAMED_OP)
1601 if (token2->flags & NAMED_OP)
1605 if (token2->flags & DIGRAPH)
1606 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1607 else if (token_spellings[b].category == SPELL_OPERATOR)
1608 c = token_spellings[b].name[0];
1610 /* Quickly get everything that can paste with an '='. */
1611 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1616 case CPP_GREATER: return c == '>' || c == '?';
1617 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1618 case CPP_PLUS: return c == '+';
1619 case CPP_MINUS: return c == '-' || c == '>';
1620 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1621 case CPP_MOD: return c == ':' || c == '>';
1622 case CPP_AND: return c == '&';
1623 case CPP_OR: return c == '|';
1624 case CPP_COLON: return c == ':' || c == '>';
1625 case CPP_DEREF: return c == '*';
1626 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1627 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1628 case CPP_NAME: return ((b == CPP_NUMBER
1629 && name_p (pfile, &token2->val.str))
1631 || b == CPP_CHAR || b == CPP_STRING); /* L */
1632 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1633 || c == '.' || c == '+' || c == '-');
1634 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1635 && token1->val.c == '@'
1636 && (b == CPP_NAME || b == CPP_STRING));
1643 /* Output all the remaining tokens on the current line, and a newline
1644 character, to FP. Leading whitespace is removed. */
1646 cpp_output_line (pfile, fp)
1652 cpp_get_token (pfile, &token);
1653 token.flags &= ~PREV_WHITE;
1654 while (token.type != CPP_EOF)
1656 cpp_output_token (&token, fp);
1657 cpp_get_token (pfile, &token);
1663 /* Returns the value of a hexadecimal digit. */
1668 if (c >= 'a' && c <= 'f')
1669 return c - 'a' + 10;
1670 if (c >= 'A' && c <= 'F')
1671 return c - 'A' + 10;
1672 if (c >= '0' && c <= '9')
1677 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1678 failure if cpplib is not parsing C++ or C99. Such failure is
1679 silent, and no variables are updated. Otherwise returns 0, and
1680 warns if -Wtraditional.
1682 [lex.charset]: The character designated by the universal character
1683 name \UNNNNNNNN is that character whose character short name in
1684 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1685 universal character name \uNNNN is that character whose character
1686 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1687 for a universal character name is less than 0x20 or in the range
1688 0x7F-0x9F (inclusive), or if the universal character name
1689 designates a character in the basic source character set, then the
1690 program is ill-formed.
1692 We assume that wchar_t is Unicode, so we don't need to do any
1693 mapping. Is this ever wrong?
1695 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1696 LIMIT is the end of the string or charconst. PSTR is updated to
1697 point after the UCS on return, and the UCS is written into PC. */
1700 maybe_read_ucs (pfile, pstr, limit, pc)
1702 const unsigned char **pstr;
1703 const unsigned char *limit;
1706 const unsigned char *p = *pstr;
1707 unsigned int code = 0;
1708 unsigned int c = *pc, length;
1710 /* Only attempt to interpret a UCS for C++ and C99. */
1711 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1714 if (CPP_WTRADITIONAL (pfile))
1715 cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
1717 length = (c == 'u' ? 4: 8);
1719 if ((size_t) (limit - p) < length)
1721 cpp_error (pfile, "incomplete universal-character-name");
1722 /* Skip to the end to avoid more diagnostics. */
1727 for (; length; length--, p++)
1731 code = (code << 4) + hex_digit_value (c);
1735 "non-hex digit '%c' in universal-character-name", c);
1736 /* We shouldn't skip in case there are multibyte chars. */
1742 #ifdef TARGET_EBCDIC
1743 cpp_error (pfile, "universal-character-name on EBCDIC target");
1744 code = 0x3f; /* EBCDIC invalid character */
1746 /* True extended characters are OK. */
1748 && !(code & 0x80000000)
1749 && !(code >= 0xD800 && code <= 0xDFFF))
1751 /* The standard permits $, @ and ` to be specified as UCNs. We use
1752 hex escapes so that this also works with EBCDIC hosts. */
1753 else if (code == 0x24 || code == 0x40 || code == 0x60)
1755 /* Don't give another error if one occurred above. */
1756 else if (length == 0)
1757 cpp_error (pfile, "universal-character-name out of range");
1765 /* Interpret an escape sequence, and return its value. PSTR points to
1766 the input pointer, which is just after the backslash. LIMIT is how
1767 much text we have. MASK is a bitmask for the precision for the
1768 destination type (char or wchar_t). TRADITIONAL, if true, does not
1769 interpret escapes that did not exist in traditional C.
1771 Handles all relevant diagnostics. */
1774 cpp_parse_escape (pfile, pstr, limit, mask, traditional)
1776 const unsigned char **pstr;
1777 const unsigned char *limit;
1778 unsigned HOST_WIDE_INT mask;
1782 const unsigned char *str = *pstr;
1783 unsigned int c = *str++;
1787 case '\\': case '\'': case '"': case '?': break;
1788 case 'b': c = TARGET_BS; break;
1789 case 'f': c = TARGET_FF; break;
1790 case 'n': c = TARGET_NEWLINE; break;
1791 case 'r': c = TARGET_CR; break;
1792 case 't': c = TARGET_TAB; break;
1793 case 'v': c = TARGET_VT; break;
1795 case '(': case '{': case '[': case '%':
1796 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1797 '\%' is used to prevent SCCS from getting confused. */
1798 unknown = CPP_PEDANTIC (pfile);
1802 if (CPP_WTRADITIONAL (pfile))
1803 cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
1809 if (CPP_PEDANTIC (pfile))
1810 cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
1815 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1819 if (CPP_WTRADITIONAL (pfile))
1820 cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
1824 unsigned int i = 0, overflow = 0;
1825 int digits_found = 0;
1833 overflow |= i ^ (i << 4 >> 4);
1834 i = (i << 4) + hex_digit_value (c);
1839 cpp_error (pfile, "\\x used with no following hex digits");
1841 if (overflow | (i != (i & mask)))
1843 cpp_pedwarn (pfile, "hex escape sequence out of range");
1850 case '0': case '1': case '2': case '3':
1851 case '4': case '5': case '6': case '7':
1853 unsigned int i = c - '0';
1856 while (str < limit && ++count < 3)
1859 if (c < '0' || c > '7')
1862 i = (i << 3) + c - '0';
1865 if (i != (i & mask))
1867 cpp_pedwarn (pfile, "octal escape sequence out of range");
1882 cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
1884 cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
1888 cpp_pedwarn (pfile, "escape sequence out of range for character");
1894 #ifndef MAX_CHAR_TYPE_SIZE
1895 #define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
1898 #ifndef MAX_WCHAR_TYPE_SIZE
1899 #define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
1902 /* Interpret a (possibly wide) character constant in TOKEN.
1903 WARN_MULTI warns about multi-character charconsts, if not
1904 TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
1905 that did not exist in traditional C. PCHARS_SEEN points to a
1906 variable that is filled in with the number of characters seen. */
1908 cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
1910 const cpp_token *token;
1913 unsigned int *pchars_seen;
1915 const unsigned char *str = token->val.str.text;
1916 const unsigned char *limit = str + token->val.str.len;
1917 unsigned int chars_seen = 0;
1918 unsigned int width, max_chars, c;
1919 unsigned HOST_WIDE_INT mask;
1920 HOST_WIDE_INT result = 0;
1922 #ifdef MULTIBYTE_CHARS
1923 (void) local_mbtowc (NULL, NULL, 0);
1926 /* Width in bits. */
1927 if (token->type == CPP_CHAR)
1928 width = MAX_CHAR_TYPE_SIZE;
1930 width = MAX_WCHAR_TYPE_SIZE;
1932 if (width < HOST_BITS_PER_WIDE_INT)
1933 mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
1936 max_chars = HOST_BITS_PER_WIDE_INT / width;
1940 #ifdef MULTIBYTE_CHARS
1944 char_len = local_mbtowc (&wc, str, limit - str);
1947 cpp_warning (pfile, "ignoring invalid multibyte character");
1960 c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
1962 #ifdef MAP_CHARACTER
1964 c = MAP_CHARACTER (c);
1967 /* Merge character into result; ignore excess chars. */
1968 if (++chars_seen <= max_chars)
1970 if (width < HOST_BITS_PER_WIDE_INT)
1971 result = (result << width) | (c & mask);
1977 if (chars_seen == 0)
1978 cpp_error (pfile, "empty character constant");
1979 else if (chars_seen > max_chars)
1981 chars_seen = max_chars;
1982 cpp_warning (pfile, "character constant too long");
1984 else if (chars_seen > 1 && !traditional && warn_multi)
1985 cpp_warning (pfile, "multi-character character constant");
1987 /* If char type is signed, sign-extend the constant. The
1988 __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
1989 if (token->type == CPP_CHAR && chars_seen)
1991 unsigned int nbits = chars_seen * width;
1992 unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
1994 if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
1995 || ((result >> (nbits - 1)) & 1) == 0)
2001 *pchars_seen = chars_seen;
2017 #define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
2020 chunk_suitable (pool, chunk, size)
2025 /* Being at least twice SIZE means we can use memcpy in
2026 _cpp_next_chunk rather than memmove. Besides, it's a good idea
2028 return (chunk && pool->locked != chunk
2029 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
2032 /* Returns the end of the new pool. PTR points to a char in the old
2033 pool, and is updated to point to the same char in the new pool. */
2035 _cpp_next_chunk (pool, len, ptr)
2038 unsigned char **ptr;
2040 cpp_chunk *chunk = pool->cur->next;
2042 /* LEN is the minimum size we want in the new pool. */
2043 len += POOL_ROOM (pool);
2044 if (! chunk_suitable (pool, chunk, len))
2046 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
2048 chunk->next = pool->cur->next;
2049 pool->cur->next = chunk;
2052 /* Update the pointer before changing chunk's front. */
2054 *ptr += chunk->base - POOL_FRONT (pool);
2056 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
2057 chunk->front = chunk->base;
2060 return POOL_LIMIT (pool);
2067 unsigned char *base;
2070 size = POOL_ALIGN (size, DEFAULT_ALIGNMENT);
2071 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
2072 /* Put the chunk descriptor at the end. Then chunk overruns will
2073 cause obvious chaos. */
2074 result = (cpp_chunk *) (base + size);
2075 result->base = base;
2076 result->front = base;
2077 result->limit = base + size;
2084 _cpp_init_pool (pool, size, align, temp)
2086 unsigned int size, align, temp;
2089 align = DEFAULT_ALIGNMENT;
2090 if (align & (align - 1))
2092 pool->align = align;
2093 pool->cur = new_chunk (size);
2097 pool->cur->next = pool->cur;
2101 _cpp_lock_pool (pool)
2104 if (pool->locks++ == 0)
2105 pool->locked = pool->cur;
2109 _cpp_unlock_pool (pool)
2112 if (--pool->locks == 0)
2117 _cpp_free_pool (pool)
2120 cpp_chunk *chunk = pool->cur, *next;
2128 while (chunk && chunk != pool->cur);
2131 /* Reserve LEN bytes from a memory pool. */
2133 _cpp_pool_reserve (pool, len)
2137 len = POOL_ALIGN (len, pool->align);
2138 if (len > (unsigned int) POOL_ROOM (pool))
2139 _cpp_next_chunk (pool, len, 0);
2141 return POOL_FRONT (pool);
2144 /* Allocate LEN bytes from a memory pool. */
2146 _cpp_pool_alloc (pool, len)
2150 unsigned char *result = _cpp_pool_reserve (pool, len);
2152 POOL_COMMIT (pool, len);