1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
25 #include "coretypes.h"
30 #ifdef MULTIBYTE_CHARS
35 /* Tokens with SPELL_STRING store their spelling in the token list,
36 and it's length in the token->val.name.len. */
49 enum spell_type category;
50 const unsigned char *name;
53 static const unsigned char *const digraph_spellings[] =
54 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
56 #define OP(e, s) { SPELL_OPERATOR, U s },
57 #define TK(e, s) { s, U STRINGX (e) },
58 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
62 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
63 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
64 #define BACKUP() do {buffer->cur = buffer->backup_to;} while (0)
66 static void handle_newline PARAMS ((cpp_reader *));
67 static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
68 static cppchar_t get_effective_char PARAMS ((cpp_reader *));
70 static int skip_block_comment PARAMS ((cpp_reader *));
71 static int skip_line_comment PARAMS ((cpp_reader *));
72 static void adjust_column PARAMS ((cpp_reader *));
73 static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
74 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
75 static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int,
77 static void parse_number PARAMS ((cpp_reader *, cpp_string *, int));
78 static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *));
79 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
80 static bool trigraph_p PARAMS ((cpp_reader *));
81 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
83 static bool continue_after_nul PARAMS ((cpp_reader *));
84 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
85 static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **,
86 const unsigned char *, cppchar_t *));
87 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
89 static unsigned int hex_digit_value PARAMS ((unsigned int));
90 static _cpp_buff *new_buff PARAMS ((size_t));
92 /* Change to the native locale for multibyte conversions. */
96 #ifdef MULTIBYTE_CHARS
97 setlocale (LC_CTYPE, "");
98 GET_ENVIRONMENT (literal_codeset, "LANG");
104 Compares, the token TOKEN to the NUL-terminated string STRING.
105 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
107 cpp_ideq (token, string)
108 const cpp_token *token;
111 if (token->type != CPP_NAME)
114 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
117 /* Call when meeting a newline, assumed to be in buffer->cur[-1].
118 Returns with buffer->cur pointing to the character immediately
119 following the newline (combination). */
121 handle_newline (pfile)
124 cpp_buffer *buffer = pfile->buffer;
126 /* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
127 only accept CR-LF; maybe we should fall back to that behavior? */
128 if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
131 buffer->line_base = buffer->cur;
132 buffer->col_adjust = 0;
136 /* Subroutine of skip_escaped_newlines; called when a 3-character
137 sequence beginning with "??" is encountered. buffer->cur points to
140 Warn if necessary, and returns true if the sequence forms a
141 trigraph and the trigraph should be honored. */
146 cpp_buffer *buffer = pfile->buffer;
147 cppchar_t from_char = buffer->cur[1];
150 if (!_cpp_trigraph_map[from_char])
153 accept = CPP_OPTION (pfile, trigraphs);
155 /* Don't warn about trigraphs in comments. */
156 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
159 cpp_error_with_line (pfile, DL_WARNING,
160 pfile->line, CPP_BUF_COL (buffer) - 1,
161 "trigraph ??%c converted to %c",
163 (int) _cpp_trigraph_map[from_char]);
164 else if (buffer->cur != buffer->last_Wtrigraphs)
166 buffer->last_Wtrigraphs = buffer->cur;
167 cpp_error_with_line (pfile, DL_WARNING,
168 pfile->line, CPP_BUF_COL (buffer) - 1,
169 "trigraph ??%c ignored", (int) from_char);
176 /* Skips any escaped newlines introduced by '?' or a '\\', assumed to
177 lie in buffer->cur[-1]. Returns the next byte, which will be in
178 buffer->cur[-1]. This routine performs preprocessing stages 1 and
179 2 of the ISO C standard. */
181 skip_escaped_newlines (pfile)
184 cpp_buffer *buffer = pfile->buffer;
185 cppchar_t next = buffer->cur[-1];
187 /* Only do this if we apply stages 1 and 2. */
188 if (!buffer->from_stage3)
190 const unsigned char *saved_cur;
197 if (buffer->cur[0] != '?' || !trigraph_p (pfile))
200 /* Translate the trigraph. */
201 next = _cpp_trigraph_map[buffer->cur[1]];
207 if (buffer->cur == buffer->rlimit)
210 /* We have a backslash, and room for at least one more
211 character. Skip horizontal whitespace. */
212 saved_cur = buffer->cur;
214 next1 = *buffer->cur++;
215 while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
217 if (!is_vspace (next1))
219 buffer->cur = saved_cur;
223 if (saved_cur != buffer->cur - 1
224 && !pfile->state.lexing_comment)
225 cpp_error (pfile, DL_WARNING,
226 "backslash and newline separated by space");
228 handle_newline (pfile);
229 buffer->backup_to = buffer->cur;
230 if (buffer->cur == buffer->rlimit)
232 cpp_error (pfile, DL_PEDWARN,
233 "backslash-newline at end of file");
237 next = *buffer->cur++;
239 while (next == '\\' || next == '?');
245 /* Obtain the next character, after trigraph conversion and skipping
246 an arbitrarily long string of escaped newlines. The common case of
247 no trigraphs or escaped newlines falls through quickly. On return,
248 buffer->backup_to points to where to return to if the character is
249 not to be processed. */
251 get_effective_char (pfile)
255 cpp_buffer *buffer = pfile->buffer;
257 buffer->backup_to = buffer->cur;
258 next = *buffer->cur++;
259 if (__builtin_expect (next == '?' || next == '\\', 0))
260 next = skip_escaped_newlines (pfile);
265 /* Skip a C-style block comment. We find the end of the comment by
266 seeing if an asterisk is before every '/' we encounter. Returns
267 nonzero if comment terminated by EOF, zero otherwise. */
269 skip_block_comment (pfile)
272 cpp_buffer *buffer = pfile->buffer;
273 cppchar_t c = EOF, prevc = EOF;
275 pfile->state.lexing_comment = 1;
276 while (buffer->cur != buffer->rlimit)
278 prevc = c, c = *buffer->cur++;
280 /* FIXME: For speed, create a new character class of characters
281 of interest inside block comments. */
282 if (c == '?' || c == '\\')
283 c = skip_escaped_newlines (pfile);
285 /* People like decorating comments with '*', so check for '/'
286 instead for efficiency. */
292 /* Warn about potential nested comments, but not if the '/'
293 comes immediately before the true comment delimiter.
294 Don't bother to get it right across escaped newlines. */
295 if (CPP_OPTION (pfile, warn_comments)
296 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
297 cpp_error_with_line (pfile, DL_WARNING,
298 pfile->line, CPP_BUF_COL (buffer),
299 "\"/*\" within comment");
301 else if (is_vspace (c))
302 handle_newline (pfile);
304 adjust_column (pfile);
307 pfile->state.lexing_comment = 0;
308 return c != '/' || prevc != '*';
311 /* Skip a C++ line comment, leaving buffer->cur pointing to the
312 terminating newline. Handles escaped newlines. Returns nonzero
313 if a multiline comment. */
315 skip_line_comment (pfile)
318 cpp_buffer *buffer = pfile->buffer;
319 unsigned int orig_line = pfile->line;
321 #ifdef MULTIBYTE_CHARS
326 pfile->state.lexing_comment = 1;
327 #ifdef MULTIBYTE_CHARS
328 /* Reset multibyte conversion state. */
329 (void) local_mbtowc (NULL, NULL, 0);
333 if (buffer->cur == buffer->rlimit)
336 #ifdef MULTIBYTE_CHARS
337 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
338 buffer->rlimit - buffer->cur);
341 cpp_error (pfile, DL_WARNING,
342 "ignoring invalid multibyte character");
348 buffer->cur += char_len;
354 if (c == '?' || c == '\\')
355 c = skip_escaped_newlines (pfile);
357 while (!is_vspace (c));
359 /* Step back over the newline, except at EOF. */
363 pfile->state.lexing_comment = 0;
364 return orig_line != pfile->line;
367 /* pfile->buffer->cur is one beyond the \t character. Update
368 col_adjust so we track the column correctly. */
370 adjust_column (pfile)
373 cpp_buffer *buffer = pfile->buffer;
374 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
376 /* Round it up to multiple of the tabstop, but subtract 1 since the
377 tab itself occupies a character position. */
378 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
379 - col % CPP_OPTION (pfile, tabstop)) - 1;
382 /* Skips whitespace, saving the next non-whitespace character.
383 Adjusts pfile->col_adjust to account for tabs. Without this,
384 tokens might be assigned an incorrect column. */
386 skip_whitespace (pfile, c)
390 cpp_buffer *buffer = pfile->buffer;
391 unsigned int warned = 0;
395 /* Horizontal space always OK. */
399 adjust_column (pfile);
400 /* Just \f \v or \0 left. */
403 if (buffer->cur - 1 == buffer->rlimit)
407 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
411 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
412 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
413 CPP_BUF_COL (buffer),
414 "%s in preprocessing directive",
415 c == '\f' ? "form feed" : "vertical tab");
419 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
420 while (is_nvspace (c));
426 /* See if the characters of a number token are valid in a name (no
429 name_p (pfile, string)
431 const cpp_string *string;
435 for (i = 0; i < string->len; i++)
436 if (!is_idchar (string->text[i]))
442 /* Parse an identifier, skipping embedded backslash-newlines. This is
443 a critical inner loop. The common case is an identifier which has
444 not been split by backslash-newline, does not contain a dollar
445 sign, and has already been scanned (roughly 10:1 ratio of
446 seen:unseen identifiers in normal code; the distribution is
447 Poisson-like). Second most common case is a new identifier, not
448 split and no dollar sign. The other possibilities are rare and
449 have been relegated to parse_slow. */
450 static cpp_hashnode *
451 parse_identifier (pfile)
454 cpp_hashnode *result;
455 const uchar *cur, *base;
457 /* Fast-path loop. Skim over a normal identifier.
458 N.B. ISIDNUM does not include $. */
459 cur = pfile->buffer->cur;
460 while (ISIDNUM (*cur))
463 /* Check for slow-path cases. */
464 if (*cur == '?' || *cur == '\\' || *cur == '$')
468 base = parse_slow (pfile, cur, 0, &len);
469 result = (cpp_hashnode *)
470 ht_lookup (pfile->hash_table, base, len, HT_ALLOCED);
474 base = pfile->buffer->cur - 1;
475 pfile->buffer->cur = cur;
476 result = (cpp_hashnode *)
477 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
480 /* Rarely, identifiers require diagnostics when lexed.
481 XXX Has to be forced out of the fast path. */
482 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
483 && !pfile->state.skipping, 0))
485 /* It is allowed to poison the same identifier twice. */
486 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
487 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
490 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
491 replacement list of a variadic macro. */
492 if (result == pfile->spec_nodes.n__VA_ARGS__
493 && !pfile->state.va_args_ok)
494 cpp_error (pfile, DL_PEDWARN,
495 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
501 /* Slow path. This handles numbers and identifiers which have been
502 split, or contain dollar signs. The part of the token from
503 PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is
504 1 if it's a number, and 2 if it has a leading period. Returns a
505 pointer to the token's NUL-terminated spelling in permanent
506 storage, and sets PLEN to its length. */
508 parse_slow (pfile, cur, number_p, plen)
514 cpp_buffer *buffer = pfile->buffer;
515 const uchar *base = buffer->cur - 1;
516 struct obstack *stack = &pfile->hash_table->stack;
517 unsigned int c, prevc, saw_dollar = 0;
519 /* Place any leading period. */
521 obstack_1grow (stack, '.');
523 /* Copy the part of the token which is known to be okay. */
524 obstack_grow (stack, base, cur - base);
526 /* Now process the part which isn't. We are looking at one of
527 '$', '\\', or '?' on entry to this loop. */
533 /* Potential escaped newline? */
534 buffer->backup_to = buffer->cur - 1;
535 if (c == '?' || c == '\\')
536 c = skip_escaped_newlines (pfile);
542 if (c != '.' && !VALID_SIGN (c, prevc))
546 /* Handle normal identifier characters in this loop. */
550 obstack_1grow (stack, c);
557 while (is_idchar (c));
560 /* Step back over the unwanted char. */
563 /* $ is not an identifier character in the standard, but is commonly
564 accepted as an extension. Don't warn about it in skipped
565 conditional blocks. */
566 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping)
567 cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number");
569 /* Identifiers and numbers are null-terminated. */
570 *plen = obstack_object_size (stack);
571 obstack_1grow (stack, '\0');
572 return obstack_finish (stack);
575 /* Parse a number, beginning with character C, skipping embedded
576 backslash-newlines. LEADING_PERIOD is nonzero if there was a "."
577 before C. Place the result in NUMBER. */
579 parse_number (pfile, number, leading_period)
586 /* Fast-path loop. Skim over a normal number.
587 N.B. ISIDNUM does not include $. */
588 cur = pfile->buffer->cur;
589 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
592 /* Check for slow-path cases. */
593 if (*cur == '?' || *cur == '\\' || *cur == '$')
594 number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len);
597 const uchar *base = pfile->buffer->cur - 1;
600 number->len = cur - base + leading_period;
601 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
602 dest[number->len] = '\0';
607 memcpy (dest, base, cur - base);
608 pfile->buffer->cur = cur;
612 /* Subroutine of parse_string. */
614 unescaped_terminator_p (pfile, dest)
616 const unsigned char *dest;
618 const unsigned char *start, *temp;
620 /* In #include-style directives, terminators are not escapable. */
621 if (pfile->state.angled_headers)
624 start = BUFF_FRONT (pfile->u_buff);
626 /* An odd number of consecutive backslashes represents an escaped
628 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
631 return ((dest - temp) & 1) == 0;
634 /* Parses a string, character constant, or angle-bracketed header file
635 name. Handles embedded trigraphs and escaped newlines. The stored
636 string is guaranteed NUL-terminated, but it is not guaranteed that
637 this is the first NUL since embedded NULs are preserved.
639 When this function returns, buffer->cur points to the next
640 character to be processed. */
642 parse_string (pfile, token, terminator)
645 cppchar_t terminator;
647 cpp_buffer *buffer = pfile->buffer;
648 unsigned char *dest, *limit;
650 bool warned_nulls = false;
651 #ifdef MULTIBYTE_CHARS
656 dest = BUFF_FRONT (pfile->u_buff);
657 limit = BUFF_LIMIT (pfile->u_buff);
659 #ifdef MULTIBYTE_CHARS
660 /* Reset multibyte conversion state. */
661 (void) local_mbtowc (NULL, NULL, 0);
665 /* We need room for another char, possibly the terminating NUL. */
666 if ((size_t) (limit - dest) < 1)
668 size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
669 _cpp_extend_buff (pfile, &pfile->u_buff, 2);
670 dest = BUFF_FRONT (pfile->u_buff) + len_so_far;
671 limit = BUFF_LIMIT (pfile->u_buff);
674 #ifdef MULTIBYTE_CHARS
675 char_len = local_mbtowc (&wc, (const char *) buffer->cur,
676 buffer->rlimit - buffer->cur);
679 cpp_error (pfile, DL_WARNING,
680 "ignoring invalid multibyte character");
686 buffer->cur += char_len;
693 /* Handle trigraphs, escaped newlines etc. */
694 if (c == '?' || c == '\\')
695 c = skip_escaped_newlines (pfile);
699 if (unescaped_terminator_p (pfile, dest))
702 else if (is_vspace (c))
704 /* No string literal may extend over multiple lines. In
705 assembly language, suppress the error except for <>
706 includes. This is a kludge around not knowing where
709 if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>')
710 cpp_error (pfile, DL_ERROR, "missing terminating %c character",
717 if (buffer->cur - 1 == buffer->rlimit)
722 cpp_error (pfile, DL_WARNING,
723 "null character(s) preserved in literal");
726 #ifdef MULTIBYTE_CHARS
729 for ( ; char_len > 0; --char_len)
730 *dest++ = (*buffer->cur - char_len);
739 token->val.str.text = BUFF_FRONT (pfile->u_buff);
740 token->val.str.len = dest - BUFF_FRONT (pfile->u_buff);
741 BUFF_FRONT (pfile->u_buff) = dest + 1;
744 /* The stored comment includes the comment start and any terminator. */
746 save_comment (pfile, token, from, type)
749 const unsigned char *from;
752 unsigned char *buffer;
753 unsigned int len, clen;
755 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
757 /* C++ comments probably (not definitely) have moved past a new
758 line, which we don't want to save in the comment. */
759 if (is_vspace (pfile->buffer->cur[-1]))
762 /* If we are currently in a directive, then we need to store all
763 C++ comments as C comments internally, and so we need to
764 allocate a little extra space in that case.
766 Note that the only time we encounter a directive here is
767 when we are saving comments in a "#define". */
768 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
770 buffer = _cpp_unaligned_alloc (pfile, clen);
772 token->type = CPP_COMMENT;
773 token->val.str.len = clen;
774 token->val.str.text = buffer;
777 memcpy (buffer + 1, from, len - 1);
779 /* Finish conversion to a C comment, if necessary. */
780 if (pfile->state.in_directive && type == '/')
783 buffer[clen - 2] = '*';
784 buffer[clen - 1] = '/';
788 /* Allocate COUNT tokens for RUN. */
790 _cpp_init_tokenrun (run, count)
794 run->base = xnewvec (cpp_token, count);
795 run->limit = run->base + count;
799 /* Returns the next tokenrun, or creates one if there is none. */
804 if (run->next == NULL)
806 run->next = xnew (tokenrun);
807 run->next->prev = run;
808 _cpp_init_tokenrun (run->next, 250);
814 /* Allocate a single token that is invalidated at the same time as the
815 rest of the tokens on the line. Has its line and col set to the
816 same as the last lexed token, so that diagnostics appear in the
819 _cpp_temp_token (pfile)
822 cpp_token *old, *result;
824 old = pfile->cur_token - 1;
825 if (pfile->cur_token == pfile->cur_run->limit)
827 pfile->cur_run = next_tokenrun (pfile->cur_run);
828 pfile->cur_token = pfile->cur_run->base;
831 result = pfile->cur_token++;
832 result->line = old->line;
833 result->col = old->col;
837 /* Lex a token into RESULT (external interface). Takes care of issues
838 like directive handling, token lookahead, multiple include
839 optimization and skipping. */
841 _cpp_lex_token (pfile)
848 if (pfile->cur_token == pfile->cur_run->limit)
850 pfile->cur_run = next_tokenrun (pfile->cur_run);
851 pfile->cur_token = pfile->cur_run->base;
854 if (pfile->lookaheads)
857 result = pfile->cur_token++;
860 result = _cpp_lex_direct (pfile);
862 if (result->flags & BOL)
864 /* Is this a directive. If _cpp_handle_directive returns
865 false, it is an assembler #. */
866 if (result->type == CPP_HASH
867 /* 6.10.3 p 11: Directives in a list of macro arguments
868 gives undefined behavior. This implementation
869 handles the directive as normal. */
870 && pfile->state.parsing_args != 1
871 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
873 if (pfile->cb.line_change && !pfile->state.skipping)
874 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
877 /* We don't skip tokens in directives. */
878 if (pfile->state.in_directive)
881 /* Outside a directive, invalidate controlling macros. At file
882 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
883 get here and MI optimisation works. */
884 pfile->mi_valid = false;
886 if (!pfile->state.skipping || result->type == CPP_EOF)
893 /* A NUL terminates the current buffer. For ISO preprocessing this is
894 EOF, but for traditional preprocessing it indicates we need a line
895 refill. Returns TRUE to continue preprocessing a new buffer, FALSE
896 to return a CPP_EOF to the caller. */
898 continue_after_nul (pfile)
901 cpp_buffer *buffer = pfile->buffer;
904 buffer->saved_flags = BOL;
905 if (CPP_OPTION (pfile, traditional))
907 if (pfile->state.in_directive)
910 _cpp_remove_overlay (pfile);
911 more = _cpp_read_logical_line_trad (pfile);
912 _cpp_overlay_buffer (pfile, pfile->out.base,
913 pfile->out.cur - pfile->out.base);
914 pfile->line = pfile->out.first_line;
918 /* Stop parsing arguments with a CPP_EOF. When we finally come
919 back here, do the work of popping the buffer. */
920 if (!pfile->state.parsing_args)
922 if (buffer->cur != buffer->line_base)
924 /* Non-empty files should end in a newline. Don't warn
925 for command line and _Pragma buffers. */
926 if (!buffer->from_stage3)
927 cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
928 handle_newline (pfile);
931 /* Similarly, finish an in-progress directive with CPP_EOF
932 before popping the buffer. */
933 if (!pfile->state.in_directive && buffer->prev)
935 more = !buffer->return_at_eof;
936 _cpp_pop_buffer (pfile);
944 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
946 if (get_effective_char (pfile) == CHAR) \
947 result->type = THEN_TYPE; \
951 result->type = ELSE_TYPE; \
955 /* Lex a token into pfile->cur_token, which is also incremented, to
956 get diagnostics pointing to the correct location.
958 Does not handle issues such as token lookahead, multiple-include
959 optimisation, directives, skipping etc. This function is only
960 suitable for use by _cpp_lex_token, and in special cases like
961 lex_expansion_token which doesn't care for any of these issues.
963 When meeting a newline, returns CPP_EOF if parsing a directive,
964 otherwise returns to the start of the token buffer if permissible.
965 Returns the location of the lexed token. */
967 _cpp_lex_direct (pfile)
972 const unsigned char *comment_start;
973 cpp_token *result = pfile->cur_token++;
976 buffer = pfile->buffer;
977 result->flags = buffer->saved_flags;
978 buffer->saved_flags = 0;
980 result->line = pfile->line;
984 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
989 case ' ': case '\t': case '\f': case '\v': case '\0':
990 result->flags |= PREV_WHITE;
991 if (skip_whitespace (pfile, c))
996 if (continue_after_nul (pfile))
998 result->type = CPP_EOF;
1001 case '\n': case '\r':
1002 handle_newline (pfile);
1003 buffer->saved_flags = BOL;
1004 if (! pfile->state.in_directive)
1006 if (pfile->state.parsing_args == 2)
1007 buffer->saved_flags |= PREV_WHITE;
1008 if (!pfile->keep_tokens)
1010 pfile->cur_run = &pfile->base_run;
1011 result = pfile->base_run.base;
1012 pfile->cur_token = result + 1;
1016 result->type = CPP_EOF;
1021 /* These could start an escaped newline, or '?' a trigraph. Let
1022 skip_escaped_newlines do all the work. */
1024 unsigned int line = pfile->line;
1026 c = skip_escaped_newlines (pfile);
1027 if (line != pfile->line)
1030 /* We had at least one escaped newline of some sort.
1031 Update the token's line and column. */
1032 goto update_tokens_line;
1036 /* We are either the original '?' or '\\', or a trigraph. */
1038 result->type = CPP_QUERY;
1045 case '0': case '1': case '2': case '3': case '4':
1046 case '5': case '6': case '7': case '8': case '9':
1047 result->type = CPP_NUMBER;
1048 parse_number (pfile, &result->val.str, 0);
1052 /* 'L' may introduce wide characters or strings. */
1054 const unsigned char *pos = buffer->cur;
1056 c = get_effective_char (pfile);
1057 if (c == '\'' || c == '"')
1059 result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1060 parse_string (pfile, result, c);
1069 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1070 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1071 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1072 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1074 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1075 case 'G': case 'H': case 'I': case 'J': case 'K':
1076 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1077 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1079 result->type = CPP_NAME;
1080 result->val.node = parse_identifier (pfile);
1082 /* Convert named operators to their proper types. */
1083 if (result->val.node->flags & NODE_OPERATOR)
1085 result->flags |= NAMED_OP;
1086 result->type = result->val.node->directive_index;
1092 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1093 parse_string (pfile, result, c);
1097 /* A potential block or line comment. */
1098 comment_start = buffer->cur;
1099 c = get_effective_char (pfile);
1103 if (skip_block_comment (pfile))
1104 cpp_error (pfile, DL_ERROR, "unterminated comment");
1106 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
1107 || CPP_IN_SYSTEM_HEADER (pfile)))
1109 /* Warn about comments only if pedantically GNUC89, and not
1110 in system headers. */
1111 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
1112 && ! buffer->warned_cplusplus_comments)
1114 cpp_error (pfile, DL_PEDWARN,
1115 "C++ style comments are not allowed in ISO C90");
1116 cpp_error (pfile, DL_PEDWARN,
1117 "(this will be reported only once per input file)");
1118 buffer->warned_cplusplus_comments = 1;
1121 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1122 cpp_error (pfile, DL_WARNING, "multi-line comment");
1126 result->type = CPP_DIV_EQ;
1132 result->type = CPP_DIV;
1136 if (!pfile->state.save_comments)
1138 result->flags |= PREV_WHITE;
1139 goto update_tokens_line;
1142 /* Save the comment as a token in its own right. */
1143 save_comment (pfile, result, comment_start, c);
1147 if (pfile->state.angled_headers)
1149 result->type = CPP_HEADER_NAME;
1150 parse_string (pfile, result, '>');
1154 c = get_effective_char (pfile);
1156 result->type = CPP_LESS_EQ;
1158 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1159 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1160 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
1161 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1163 result->type = CPP_OPEN_SQUARE;
1164 result->flags |= DIGRAPH;
1166 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1168 result->type = CPP_OPEN_BRACE;
1169 result->flags |= DIGRAPH;
1174 result->type = CPP_LESS;
1179 c = get_effective_char (pfile);
1181 result->type = CPP_GREATER_EQ;
1183 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1184 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1185 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
1189 result->type = CPP_GREATER;
1194 c = get_effective_char (pfile);
1196 result->type = CPP_MOD_EQ;
1197 else if (CPP_OPTION (pfile, digraphs) && c == ':')
1199 result->flags |= DIGRAPH;
1200 result->type = CPP_HASH;
1201 if (get_effective_char (pfile) == '%')
1203 const unsigned char *pos = buffer->cur;
1205 if (get_effective_char (pfile) == ':')
1206 result->type = CPP_PASTE;
1208 buffer->cur = pos - 1;
1213 else if (CPP_OPTION (pfile, digraphs) && c == '>')
1215 result->flags |= DIGRAPH;
1216 result->type = CPP_CLOSE_BRACE;
1221 result->type = CPP_MOD;
1226 result->type = CPP_DOT;
1227 c = get_effective_char (pfile);
1230 const unsigned char *pos = buffer->cur;
1232 if (get_effective_char (pfile) == '.')
1233 result->type = CPP_ELLIPSIS;
1235 buffer->cur = pos - 1;
1237 /* All known character sets have 0...9 contiguous. */
1238 else if (ISDIGIT (c))
1240 result->type = CPP_NUMBER;
1241 parse_number (pfile, &result->val.str, 1);
1243 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1244 result->type = CPP_DOT_STAR;
1250 c = get_effective_char (pfile);
1252 result->type = CPP_PLUS_PLUS;
1254 result->type = CPP_PLUS_EQ;
1258 result->type = CPP_PLUS;
1263 c = get_effective_char (pfile);
1266 result->type = CPP_DEREF;
1267 if (CPP_OPTION (pfile, cplusplus))
1269 if (get_effective_char (pfile) == '*')
1270 result->type = CPP_DEREF_STAR;
1276 result->type = CPP_MINUS_MINUS;
1278 result->type = CPP_MINUS_EQ;
1282 result->type = CPP_MINUS;
1287 c = get_effective_char (pfile);
1289 result->type = CPP_AND_AND;
1291 result->type = CPP_AND_EQ;
1295 result->type = CPP_AND;
1300 c = get_effective_char (pfile);
1302 result->type = CPP_OR_OR;
1304 result->type = CPP_OR_EQ;
1308 result->type = CPP_OR;
1313 c = get_effective_char (pfile);
1314 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1315 result->type = CPP_SCOPE;
1316 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1318 result->flags |= DIGRAPH;
1319 result->type = CPP_CLOSE_SQUARE;
1324 result->type = CPP_COLON;
1328 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1329 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1330 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1331 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1332 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1334 case '~': result->type = CPP_COMPL; break;
1335 case ',': result->type = CPP_COMMA; break;
1336 case '(': result->type = CPP_OPEN_PAREN; break;
1337 case ')': result->type = CPP_CLOSE_PAREN; break;
1338 case '[': result->type = CPP_OPEN_SQUARE; break;
1339 case ']': result->type = CPP_CLOSE_SQUARE; break;
1340 case '{': result->type = CPP_OPEN_BRACE; break;
1341 case '}': result->type = CPP_CLOSE_BRACE; break;
1342 case ';': result->type = CPP_SEMICOLON; break;
1344 /* @ is a punctuator in Objective-C. */
1345 case '@': result->type = CPP_ATSIGN; break;
1348 if (CPP_OPTION (pfile, dollars_in_ident))
1350 /* Fall through... */
1354 result->type = CPP_OTHER;
1362 /* An upper bound on the number of bytes needed to spell TOKEN,
1363 including preceding whitespace. */
1365 cpp_token_len (token)
1366 const cpp_token *token;
1370 switch (TOKEN_SPELL (token))
1372 default: len = 0; break;
1374 case SPELL_STRING: len = token->val.str.len; break;
1375 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1377 /* 1 for whitespace, 4 for comment delimiters. */
1381 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1382 already contain the enough space to hold the token's spelling.
1383 Returns a pointer to the character after the last character
1386 cpp_spell_token (pfile, token, buffer)
1387 cpp_reader *pfile; /* Would be nice to be rid of this... */
1388 const cpp_token *token;
1389 unsigned char *buffer;
1391 switch (TOKEN_SPELL (token))
1393 case SPELL_OPERATOR:
1395 const unsigned char *spelling;
1398 if (token->flags & DIGRAPH)
1400 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1401 else if (token->flags & NAMED_OP)
1404 spelling = TOKEN_NAME (token);
1406 while ((c = *spelling++) != '\0')
1412 *buffer++ = token->val.c;
1417 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1418 buffer += NODE_LEN (token->val.node);
1422 memcpy (buffer, token->val.str.text, token->val.str.len);
1423 buffer += token->val.str.len;
1428 int left, right, tag;
1429 switch (token->type)
1431 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1432 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1433 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1434 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1435 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1437 cpp_error (pfile, DL_ICE, "unknown string token %s\n",
1438 TOKEN_NAME (token));
1441 if (tag) *buffer++ = tag;
1443 memcpy (buffer, token->val.str.text, token->val.str.len);
1444 buffer += token->val.str.len;
1450 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1457 /* Returns TOKEN spelt as a null-terminated string. The string is
1458 freed when the reader is destroyed. Useful for diagnostics. */
1460 cpp_token_as_text (pfile, token)
1462 const cpp_token *token;
1464 unsigned int len = cpp_token_len (token);
1465 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1467 end = cpp_spell_token (pfile, token, start);
1473 /* Used by C front ends, which really should move to using
1474 cpp_token_as_text. */
1476 cpp_type2name (type)
1477 enum cpp_ttype type;
1479 return (const char *) token_spellings[type].name;
1482 /* Writes the spelling of token to FP, without any preceding space.
1483 Separated from cpp_spell_token for efficiency - to avoid stdio
1484 double-buffering. */
1486 cpp_output_token (token, fp)
1487 const cpp_token *token;
1490 switch (TOKEN_SPELL (token))
1492 case SPELL_OPERATOR:
1494 const unsigned char *spelling;
1497 if (token->flags & DIGRAPH)
1499 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1500 else if (token->flags & NAMED_OP)
1503 spelling = TOKEN_NAME (token);
1508 while ((c = *++spelling) != '\0');
1513 putc (token->val.c, fp);
1518 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1522 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1527 int left, right, tag;
1528 switch (token->type)
1530 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1531 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1532 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1533 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1534 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1536 fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token));
1539 if (tag) putc (tag, fp);
1541 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1547 /* An error, most probably. */
1552 /* Compare two tokens. */
1554 _cpp_equiv_tokens (a, b)
1555 const cpp_token *a, *b;
1557 if (a->type == b->type && a->flags == b->flags)
1558 switch (TOKEN_SPELL (a))
1560 default: /* Keep compiler happy. */
1561 case SPELL_OPERATOR:
1564 return a->val.c == b->val.c; /* Character. */
1566 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1568 return a->val.node == b->val.node;
1571 return (a->val.str.len == b->val.str.len
1572 && !memcmp (a->val.str.text, b->val.str.text,
1579 /* Returns nonzero if a space should be inserted to avoid an
1580 accidental token paste for output. For simplicity, it is
1581 conservative, and occasionally advises a space where one is not
1582 needed, e.g. "." and ".2". */
1584 cpp_avoid_paste (pfile, token1, token2)
1586 const cpp_token *token1, *token2;
1588 enum cpp_ttype a = token1->type, b = token2->type;
1591 if (token1->flags & NAMED_OP)
1593 if (token2->flags & NAMED_OP)
1597 if (token2->flags & DIGRAPH)
1598 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1599 else if (token_spellings[b].category == SPELL_OPERATOR)
1600 c = token_spellings[b].name[0];
1602 /* Quickly get everything that can paste with an '='. */
1603 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1608 case CPP_GREATER: return c == '>' || c == '?';
1609 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1610 case CPP_PLUS: return c == '+';
1611 case CPP_MINUS: return c == '-' || c == '>';
1612 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1613 case CPP_MOD: return c == ':' || c == '>';
1614 case CPP_AND: return c == '&';
1615 case CPP_OR: return c == '|';
1616 case CPP_COLON: return c == ':' || c == '>';
1617 case CPP_DEREF: return c == '*';
1618 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1619 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1620 case CPP_NAME: return ((b == CPP_NUMBER
1621 && name_p (pfile, &token2->val.str))
1623 || b == CPP_CHAR || b == CPP_STRING); /* L */
1624 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1625 || c == '.' || c == '+' || c == '-');
1626 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1627 && token1->val.c == '@'
1628 && (b == CPP_NAME || b == CPP_STRING));
1635 /* Output all the remaining tokens on the current line, and a newline
1636 character, to FP. Leading whitespace is removed. If there are
1637 macros, special token padding is not performed. */
1639 cpp_output_line (pfile, fp)
1643 const cpp_token *token;
1645 token = cpp_get_token (pfile);
1646 while (token->type != CPP_EOF)
1648 cpp_output_token (token, fp);
1649 token = cpp_get_token (pfile);
1650 if (token->flags & PREV_WHITE)
1657 /* Returns the value of a hexadecimal digit. */
1663 return hex_value (c);
1668 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
1669 failure if cpplib is not parsing C++ or C99. Such failure is
1670 silent, and no variables are updated. Otherwise returns 0, and
1671 warns if -Wtraditional.
1673 [lex.charset]: The character designated by the universal character
1674 name \UNNNNNNNN is that character whose character short name in
1675 ISO/IEC 10646 is NNNNNNNN; the character designated by the
1676 universal character name \uNNNN is that character whose character
1677 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
1678 for a universal character name is less than 0x20 or in the range
1679 0x7F-0x9F (inclusive), or if the universal character name
1680 designates a character in the basic source character set, then the
1681 program is ill-formed.
1683 We assume that wchar_t is Unicode, so we don't need to do any
1684 mapping. Is this ever wrong?
1686 PC points to the 'u' or 'U', PSTR is points to the byte after PC,
1687 LIMIT is the end of the string or charconst. PSTR is updated to
1688 point after the UCS on return, and the UCS is written into PC. */
1691 maybe_read_ucs (pfile, pstr, limit, pc)
1693 const unsigned char **pstr;
1694 const unsigned char *limit;
1697 const unsigned char *p = *pstr;
1698 unsigned int code = 0;
1699 unsigned int c = *pc, length;
1701 /* Only attempt to interpret a UCS for C++ and C99. */
1702 if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
1705 if (CPP_WTRADITIONAL (pfile))
1706 cpp_error (pfile, DL_WARNING,
1707 "the meaning of '\\%c' is different in traditional C", c);
1709 length = (c == 'u' ? 4: 8);
1711 if ((size_t) (limit - p) < length)
1713 cpp_error (pfile, DL_ERROR, "incomplete universal-character-name");
1714 /* Skip to the end to avoid more diagnostics. */
1719 for (; length; length--, p++)
1723 code = (code << 4) + hex_digit_value (c);
1726 cpp_error (pfile, DL_ERROR,
1727 "non-hex digit '%c' in universal-character-name", c);
1728 /* We shouldn't skip in case there are multibyte chars. */
1734 if (CPP_OPTION (pfile, EBCDIC))
1736 cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target");
1737 code = 0x3f; /* EBCDIC invalid character */
1739 /* True extended characters are OK. */
1740 else if (code >= 0xa0
1741 && !(code & 0x80000000)
1742 && !(code >= 0xD800 && code <= 0xDFFF))
1744 /* The standard permits $, @ and ` to be specified as UCNs. We use
1745 hex escapes so that this also works with EBCDIC hosts. */
1746 else if (code == 0x24 || code == 0x40 || code == 0x60)
1748 /* Don't give another error if one occurred above. */
1749 else if (length == 0)
1750 cpp_error (pfile, DL_ERROR, "universal-character-name out of range");
1757 /* Returns the value of an escape sequence, truncated to the correct
1758 target precision. PSTR points to the input pointer, which is just
1759 after the backslash. LIMIT is how much text we have. WIDE is true
1760 if the escape sequence is part of a wide character constant or
1761 string literal. Handles all relevant diagnostics. */
1763 cpp_parse_escape (pfile, pstr, limit, wide)
1765 const unsigned char **pstr;
1766 const unsigned char *limit;
1769 /* Values of \a \b \e \f \n \r \t \v respectively. */
1770 static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
1771 static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
1774 const unsigned char *str = *pstr, *charconsts;
1778 if (CPP_OPTION (pfile, EBCDIC))
1779 charconsts = ebcdic;
1784 width = CPP_OPTION (pfile, wchar_precision);
1786 width = CPP_OPTION (pfile, char_precision);
1787 if (width < BITS_PER_CPPCHAR_T)
1788 mask = ((cppchar_t) 1 << width) - 1;
1795 case '\\': case '\'': case '"': case '?': break;
1796 case 'b': c = charconsts[1]; break;
1797 case 'f': c = charconsts[3]; break;
1798 case 'n': c = charconsts[4]; break;
1799 case 'r': c = charconsts[5]; break;
1800 case 't': c = charconsts[6]; break;
1801 case 'v': c = charconsts[7]; break;
1803 case '(': case '{': case '[': case '%':
1804 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1805 '\%' is used to prevent SCCS from getting confused. */
1806 unknown = CPP_PEDANTIC (pfile);
1810 if (CPP_WTRADITIONAL (pfile))
1811 cpp_error (pfile, DL_WARNING,
1812 "the meaning of '\\a' is different in traditional C");
1817 if (CPP_PEDANTIC (pfile))
1818 cpp_error (pfile, DL_PEDWARN,
1819 "non-ISO-standard escape sequence, '\\%c'", (int) c);
1824 unknown = maybe_read_ucs (pfile, &str, limit, &c);
1828 if (CPP_WTRADITIONAL (pfile))
1829 cpp_error (pfile, DL_WARNING,
1830 "the meaning of '\\x' is different in traditional C");
1833 cppchar_t i = 0, overflow = 0;
1834 int digits_found = 0;
1842 overflow |= i ^ (i << 4 >> 4);
1843 i = (i << 4) + hex_digit_value (c);
1848 cpp_error (pfile, DL_ERROR,
1849 "\\x used with no following hex digits");
1851 if (overflow | (i != (i & mask)))
1853 cpp_error (pfile, DL_PEDWARN,
1854 "hex escape sequence out of range");
1861 case '0': case '1': case '2': case '3':
1862 case '4': case '5': case '6': case '7':
1865 cppchar_t i = c - '0';
1867 while (str < limit && ++count < 3)
1870 if (c < '0' || c > '7')
1873 i = (i << 3) + c - '0';
1876 if (i != (i & mask))
1878 cpp_error (pfile, DL_PEDWARN,
1879 "octal escape sequence out of range");
1894 cpp_error (pfile, DL_PEDWARN,
1895 "unknown escape sequence '\\%c'", (int) c);
1897 cpp_error (pfile, DL_PEDWARN,
1898 "unknown escape sequence: '\\%03o'", (int) c);
1903 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1911 /* Interpret a (possibly wide) character constant in TOKEN.
1912 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1913 points to a variable that is filled in with the number of
1914 characters seen, and UNSIGNEDP to a variable that indicates whether
1915 the result has signed type. */
1917 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1919 const cpp_token *token;
1920 unsigned int *pchars_seen;
1923 const unsigned char *str = token->val.str.text;
1924 const unsigned char *limit = str + token->val.str.len;
1925 unsigned int chars_seen = 0;
1926 size_t width, max_chars;
1927 cppchar_t c, mask, result = 0;
1930 #ifdef MULTIBYTE_CHARS
1931 (void) local_mbtowc (NULL, NULL, 0);
1934 /* Width in bits. */
1935 if (token->type == CPP_CHAR)
1937 width = CPP_OPTION (pfile, char_precision);
1938 max_chars = CPP_OPTION (pfile, int_precision) / width;
1939 unsigned_p = CPP_OPTION (pfile, unsigned_char);
1943 width = CPP_OPTION (pfile, wchar_precision);
1945 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1948 if (width < BITS_PER_CPPCHAR_T)
1949 mask = ((cppchar_t) 1 << width) - 1;
1955 #ifdef MULTIBYTE_CHARS
1959 char_len = local_mbtowc (&wc, (const char *)str, limit - str);
1962 cpp_error (pfile, DL_WARNING,
1963 "ignoring invalid multibyte character");
1976 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1978 #ifdef MAP_CHARACTER
1980 c = MAP_CHARACTER (c);
1985 /* Truncate the character, scale the result and merge the two. */
1987 if (width < BITS_PER_CPPCHAR_T)
1988 result = (result << width) | c;
1993 if (chars_seen == 0)
1994 cpp_error (pfile, DL_ERROR, "empty character constant");
1995 else if (chars_seen > 1)
1997 /* Multichar charconsts are of type int and therefore signed. */
2000 if (chars_seen > max_chars)
2002 chars_seen = max_chars;
2003 cpp_error (pfile, DL_WARNING,
2004 "character constant too long for its type");
2006 else if (CPP_OPTION (pfile, warn_multichar))
2007 cpp_error (pfile, DL_WARNING, "multi-character character constant");
2010 /* Sign-extend or truncate the constant to cppchar_t. The value is
2011 in WIDTH bits, but for multi-char charconsts it's value is the
2012 full target type's width. */
2015 if (width < BITS_PER_CPPCHAR_T)
2017 mask = ((cppchar_t) 1 << width) - 1;
2018 if (unsigned_p || !(result & (1 << (width - 1))))
2024 *pchars_seen = chars_seen;
2025 *unsignedp = unsigned_p;
2029 /* Memory buffers. Changing these three constants can have a dramatic
2030 effect on performance. The values here are reasonable defaults,
2031 but might be tuned. If you adjust them, be sure to test across a
2032 range of uses of cpplib, including heavy nested function-like macro
2033 expansion. Also check the change in peak memory usage (NJAMD is a
2034 good tool for this). */
2035 #define MIN_BUFF_SIZE 8000
2036 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
2037 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
2038 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
2040 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
2041 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
2044 /* Create a new allocation buffer. Place the control block at the end
2045 of the buffer, so that buffer overflows will cause immediate chaos. */
2051 unsigned char *base;
2053 if (len < MIN_BUFF_SIZE)
2054 len = MIN_BUFF_SIZE;
2055 len = CPP_ALIGN (len);
2057 base = xmalloc (len + sizeof (_cpp_buff));
2058 result = (_cpp_buff *) (base + len);
2059 result->base = base;
2061 result->limit = base + len;
2062 result->next = NULL;
2066 /* Place a chain of unwanted allocation buffers on the free list. */
2068 _cpp_release_buff (pfile, buff)
2072 _cpp_buff *end = buff;
2076 end->next = pfile->free_buffs;
2077 pfile->free_buffs = buff;
2080 /* Return a free buffer of size at least MIN_SIZE. */
2082 _cpp_get_buff (pfile, min_size)
2086 _cpp_buff *result, **p;
2088 for (p = &pfile->free_buffs;; p = &(*p)->next)
2093 return new_buff (min_size);
2095 size = result->limit - result->base;
2096 /* Return a buffer that's big enough, but don't waste one that's
2098 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
2103 result->next = NULL;
2104 result->cur = result->base;
2108 /* Creates a new buffer with enough space to hold the uncommitted
2109 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
2110 the excess bytes to the new buffer. Chains the new buffer after
2111 BUFF, and returns the new buffer. */
2113 _cpp_append_extend_buff (pfile, buff, min_extra)
2118 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
2119 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
2121 buff->next = new_buff;
2122 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
2126 /* Creates a new buffer with enough space to hold the uncommitted
2127 remaining bytes of the buffer pointed to by BUFF, and at least
2128 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
2129 Chains the new buffer before the buffer pointed to by BUFF, and
2130 updates the pointer to point to the new buffer. */
2132 _cpp_extend_buff (pfile, pbuff, min_extra)
2137 _cpp_buff *new_buff, *old_buff = *pbuff;
2138 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
2140 new_buff = _cpp_get_buff (pfile, size);
2141 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
2142 new_buff->next = old_buff;
2146 /* Free a chain of buffers starting at BUFF. */
2148 _cpp_free_buff (buff)
2153 for (; buff; buff = next)
2160 /* Allocate permanent, unaligned storage of length LEN. */
2162 _cpp_unaligned_alloc (pfile, len)
2166 _cpp_buff *buff = pfile->u_buff;
2167 unsigned char *result = buff->cur;
2169 if (len > (size_t) (buff->limit - result))
2171 buff = _cpp_get_buff (pfile, len);
2172 buff->next = pfile->u_buff;
2173 pfile->u_buff = buff;
2177 buff->cur = result + len;
2181 /* Allocate permanent, unaligned storage of length LEN from a_buff.
2182 That buffer is used for growing allocations when saving macro
2183 replacement lists in a #define, and when parsing an answer to an
2184 assertion in #assert, #unassert or #if (and therefore possibly
2185 whilst expanding macros). It therefore must not be used by any
2186 code that they might call: specifically the lexer and the guts of
2189 All existing other uses clearly fit this restriction: storing
2190 registered pragmas during initialization. */
2192 _cpp_aligned_alloc (pfile, len)
2196 _cpp_buff *buff = pfile->a_buff;
2197 unsigned char *result = buff->cur;
2199 if (len > (size_t) (buff->limit - result))
2201 buff = _cpp_get_buff (pfile, len);
2202 buff->next = pfile->a_buff;
2203 pfile->a_buff = buff;
2207 buff->cur = result + len;