1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
39 enum spell_type category;
40 const unsigned char *name;
43 static const unsigned char *const digraph_spellings[] =
44 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
46 #define OP(e, s) { SPELL_OPERATOR, U s },
47 #define TK(e, s) { s, U STRINGX (e) },
48 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
52 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
53 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
55 static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int));
56 static int skip_line_comment PARAMS ((cpp_reader *));
57 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
58 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
59 static void lex_number PARAMS ((cpp_reader *, cpp_string *));
60 static bool forms_identifier_p PARAMS ((cpp_reader *, int));
61 static void lex_string PARAMS ((cpp_reader *, cpp_token *, const uchar *));
62 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *,
64 static void create_literal PARAMS ((cpp_reader *, cpp_token *, const uchar *,
65 unsigned int, enum cpp_ttype));
66 static int name_p PARAMS ((cpp_reader *, const cpp_string *));
67 static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **));
68 static tokenrun *next_tokenrun PARAMS ((tokenrun *));
70 static unsigned int hex_digit_value PARAMS ((unsigned int));
71 static _cpp_buff *new_buff PARAMS ((size_t));
76 Compares, the token TOKEN to the NUL-terminated string STRING.
77 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
79 cpp_ideq (token, string)
80 const cpp_token *token;
83 if (token->type != CPP_NAME)
86 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
89 /* Record a note TYPE at byte POS into the current cleaned logical
92 add_line_note (buffer, pos, type)
97 if (buffer->notes_used == buffer->notes_cap)
99 buffer->notes_cap = buffer->notes_cap * 2 + 200;
100 buffer->notes = (_cpp_line_note *)
101 xrealloc (buffer->notes, buffer->notes_cap * sizeof (_cpp_line_note));
104 buffer->notes[buffer->notes_used].pos = pos;
105 buffer->notes[buffer->notes_used].type = type;
106 buffer->notes_used++;
109 /* Returns with a logical line that contains no escaped newlines or
110 trigraphs. This is a time-critical inner loop. */
112 _cpp_clean_line (pfile)
119 buffer = pfile->buffer;
120 buffer->cur_note = buffer->notes_used = 0;
121 buffer->cur = buffer->line_base = buffer->next_line;
122 buffer->need_line = false;
123 s = buffer->next_line - 1;
125 if (!buffer->from_stage3)
134 if (c == '\n' || c == '\r')
136 /* Handle DOS line endings. */
137 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
139 if (s == buffer->rlimit)
144 while (p != buffer->next_line && is_nvspace (p[-1]))
146 if (p == buffer->next_line || p[-1] != '\\')
149 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
151 buffer->next_line = p - 1;
153 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
155 /* Add a note regardless, for the benefit of -Wtrigraphs. */
156 add_line_note (buffer, d, s[2]);
157 if (CPP_OPTION (pfile, trigraphs))
159 *d = _cpp_trigraph_map[s[2]];
169 while (*s != '\n' && *s != '\r');
172 /* Handle DOS line endings. */
173 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
178 /* A sentinel note that should never be processed. */
179 add_line_note (buffer, d + 1, '\n');
180 buffer->next_line = s + 1;
183 /* Process the notes created by add_line_note as far as the current
186 _cpp_process_line_notes (pfile, in_comment)
190 cpp_buffer *buffer = pfile->buffer;
194 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
197 if (note->pos > buffer->cur)
201 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
203 if (note->type == '\\' || note->type == ' ')
205 if (note->type == ' ' && !in_comment)
206 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
207 "backslash and newline separated by space");
209 if (buffer->next_line > buffer->rlimit)
211 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
212 "backslash-newline at end of file");
213 /* Prevent "no newline at end of file" warning. */
214 buffer->next_line = buffer->rlimit;
217 buffer->line_base = note->pos;
220 else if (_cpp_trigraph_map[note->type])
222 if (!in_comment && CPP_OPTION (pfile, warn_trigraphs))
224 if (CPP_OPTION (pfile, trigraphs))
225 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
226 "trigraph ??%c converted to %c",
228 (int) _cpp_trigraph_map[note->type]);
230 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
231 "trigraph ??%c ignored",
240 /* Skip a C-style block comment. We find the end of the comment by
241 seeing if an asterisk is before every '/' we encounter. Returns
242 nonzero if comment terminated by EOF, zero otherwise.
244 Buffer->cur points to the initial asterisk of the comment. */
246 _cpp_skip_block_comment (pfile)
249 cpp_buffer *buffer = pfile->buffer;
253 if (*buffer->cur == '/')
260 /* People like decorating comments with '*', so check for '/'
261 instead for efficiency. */
264 if (buffer->cur[-2] == '*')
267 /* Warn about potential nested comments, but not if the '/'
268 comes immediately before the true comment delimiter.
269 Don't bother to get it right across escaped newlines. */
270 if (CPP_OPTION (pfile, warn_comments)
271 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
272 cpp_error_with_line (pfile, DL_WARNING,
273 pfile->line, CPP_BUF_COL (buffer),
274 "\"/*\" within comment");
279 _cpp_process_line_notes (pfile, true);
280 if (buffer->next_line >= buffer->rlimit)
282 _cpp_clean_line (pfile);
290 /* Skip a C++ line comment, leaving buffer->cur pointing to the
291 terminating newline. Handles escaped newlines. Returns nonzero
292 if a multiline comment. */
294 skip_line_comment (pfile)
297 cpp_buffer *buffer = pfile->buffer;
298 unsigned int orig_line = pfile->line;
300 while (*buffer->cur != '\n')
303 _cpp_process_line_notes (pfile, true);
304 return orig_line != pfile->line;
307 /* Skips whitespace, saving the next non-whitespace character. */
309 skip_whitespace (pfile, c)
313 cpp_buffer *buffer = pfile->buffer;
314 bool saw_NUL = false;
318 /* Horizontal space always OK. */
319 if (c == ' ' || c == '\t')
321 /* Just \f \v or \0 left. */
324 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
325 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
326 CPP_BUF_COL (buffer),
327 "%s in preprocessing directive",
328 c == '\f' ? "form feed" : "vertical tab");
332 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
333 while (is_nvspace (c));
336 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
341 /* See if the characters of a number token are valid in a name (no
344 name_p (pfile, string)
346 const cpp_string *string;
350 for (i = 0; i < string->len; i++)
351 if (!is_idchar (string->text[i]))
357 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
358 an identifier. FIRST is TRUE if this starts an identifier. */
360 forms_identifier_p (pfile, first)
364 cpp_buffer *buffer = pfile->buffer;
366 if (*buffer->cur == '$')
368 if (!CPP_OPTION (pfile, dollars_in_ident))
372 if (CPP_PEDANTIC (pfile)
373 && !pfile->state.skipping
374 && !pfile->warned_dollar)
376 pfile->warned_dollar = true;
377 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
383 /* Is this a syntactically valid UCN? */
384 if (0 && *buffer->cur == '\\'
385 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
388 if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first))
396 /* Lex an identifier starting at BUFFER->CUR - 1. */
397 static cpp_hashnode *
398 lex_identifier (pfile, base)
402 cpp_hashnode *result;
407 cur = pfile->buffer->cur;
409 /* N.B. ISIDNUM does not include $. */
410 while (ISIDNUM (*cur))
413 pfile->buffer->cur = cur;
415 while (forms_identifier_p (pfile, false));
417 result = (cpp_hashnode *)
418 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
420 /* Rarely, identifiers require diagnostics when lexed. */
421 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
422 && !pfile->state.skipping, 0))
424 /* It is allowed to poison the same identifier twice. */
425 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
426 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
429 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
430 replacement list of a variadic macro. */
431 if (result == pfile->spec_nodes.n__VA_ARGS__
432 && !pfile->state.va_args_ok)
433 cpp_error (pfile, DL_PEDWARN,
434 "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
440 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
442 lex_number (pfile, number)
450 base = pfile->buffer->cur - 1;
453 cur = pfile->buffer->cur;
455 /* N.B. ISIDNUM does not include $. */
456 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
459 pfile->buffer->cur = cur;
461 while (forms_identifier_p (pfile, false));
463 number->len = cur - base;
464 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
465 memcpy (dest, base, number->len);
466 dest[number->len] = '\0';
470 /* Create a token of type TYPE with a literal spelling. */
472 create_literal (pfile, token, base, len, type)
479 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
481 memcpy (dest, base, len);
484 token->val.str.len = len;
485 token->val.str.text = dest;
488 /* Lexes a string, character constant, or angle-bracketed header file
489 name. The stored string contains the spelling, including opening
490 quote and leading any leading 'L'. It returns the type of the
491 literal, or CPP_OTHER if it was not properly terminated.
493 The spelling is NUL-terminated, but it is not guaranteed that this
494 is the first NUL since embedded NULs are preserved. */
496 lex_string (pfile, token, base)
501 bool saw_NUL = false;
503 cppchar_t terminator;
508 if (terminator == 'L')
510 if (terminator == '\"')
511 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
512 else if (terminator == '\'')
513 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
515 terminator = '>', type = CPP_HEADER_NAME;
519 cppchar_t c = *cur++;
521 /* In #include-style directives, terminators are not escapable. */
522 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
524 else if (c == terminator)
536 if (saw_NUL && !pfile->state.skipping)
537 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
539 pfile->buffer->cur = cur;
540 create_literal (pfile, token, base, cur - base, type);
543 /* The stored comment includes the comment start and any terminator. */
545 save_comment (pfile, token, from, type)
548 const unsigned char *from;
551 unsigned char *buffer;
552 unsigned int len, clen;
554 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
556 /* C++ comments probably (not definitely) have moved past a new
557 line, which we don't want to save in the comment. */
558 if (is_vspace (pfile->buffer->cur[-1]))
561 /* If we are currently in a directive, then we need to store all
562 C++ comments as C comments internally, and so we need to
563 allocate a little extra space in that case.
565 Note that the only time we encounter a directive here is
566 when we are saving comments in a "#define". */
567 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
569 buffer = _cpp_unaligned_alloc (pfile, clen);
571 token->type = CPP_COMMENT;
572 token->val.str.len = clen;
573 token->val.str.text = buffer;
576 memcpy (buffer + 1, from, len - 1);
578 /* Finish conversion to a C comment, if necessary. */
579 if (pfile->state.in_directive && type == '/')
582 buffer[clen - 2] = '*';
583 buffer[clen - 1] = '/';
587 /* Allocate COUNT tokens for RUN. */
589 _cpp_init_tokenrun (run, count)
593 run->base = xnewvec (cpp_token, count);
594 run->limit = run->base + count;
598 /* Returns the next tokenrun, or creates one if there is none. */
603 if (run->next == NULL)
605 run->next = xnew (tokenrun);
606 run->next->prev = run;
607 _cpp_init_tokenrun (run->next, 250);
613 /* Allocate a single token that is invalidated at the same time as the
614 rest of the tokens on the line. Has its line and col set to the
615 same as the last lexed token, so that diagnostics appear in the
618 _cpp_temp_token (pfile)
621 cpp_token *old, *result;
623 old = pfile->cur_token - 1;
624 if (pfile->cur_token == pfile->cur_run->limit)
626 pfile->cur_run = next_tokenrun (pfile->cur_run);
627 pfile->cur_token = pfile->cur_run->base;
630 result = pfile->cur_token++;
631 result->line = old->line;
632 result->col = old->col;
636 /* Lex a token into RESULT (external interface). Takes care of issues
637 like directive handling, token lookahead, multiple include
638 optimization and skipping. */
640 _cpp_lex_token (pfile)
647 if (pfile->cur_token == pfile->cur_run->limit)
649 pfile->cur_run = next_tokenrun (pfile->cur_run);
650 pfile->cur_token = pfile->cur_run->base;
653 if (pfile->lookaheads)
656 result = pfile->cur_token++;
659 result = _cpp_lex_direct (pfile);
661 if (result->flags & BOL)
663 /* Is this a directive. If _cpp_handle_directive returns
664 false, it is an assembler #. */
665 if (result->type == CPP_HASH
666 /* 6.10.3 p 11: Directives in a list of macro arguments
667 gives undefined behavior. This implementation
668 handles the directive as normal. */
669 && pfile->state.parsing_args != 1
670 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
672 if (pfile->cb.line_change && !pfile->state.skipping)
673 (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args);
676 /* We don't skip tokens in directives. */
677 if (pfile->state.in_directive)
680 /* Outside a directive, invalidate controlling macros. At file
681 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
682 get here and MI optimisation works. */
683 pfile->mi_valid = false;
685 if (!pfile->state.skipping || result->type == CPP_EOF)
692 /* Returns true if a fresh line has been loaded. */
694 _cpp_get_fresh_line (pfile)
697 /* We can't get a new line until we leave the current directive. */
698 if (pfile->state.in_directive)
703 cpp_buffer *buffer = pfile->buffer;
705 if (!buffer->need_line)
708 if (buffer->next_line < buffer->rlimit)
710 _cpp_clean_line (pfile);
714 /* First, get out of parsing arguments state. */
715 if (pfile->state.parsing_args)
718 /* End of buffer. Non-empty files should end in a newline. */
719 if (buffer->buf != buffer->rlimit
720 && buffer->next_line > buffer->rlimit
721 && !buffer->from_stage3)
723 /* Only warn once. */
724 buffer->next_line = buffer->rlimit;
725 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
726 CPP_BUF_COLUMN (buffer, buffer->cur),
727 "no newline at end of file");
733 if (buffer->return_at_eof)
735 _cpp_pop_buffer (pfile);
739 _cpp_pop_buffer (pfile);
743 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
746 result->type = ELSE_TYPE; \
747 if (*buffer->cur == CHAR) \
748 buffer->cur++, result->type = THEN_TYPE; \
752 /* Lex a token into pfile->cur_token, which is also incremented, to
753 get diagnostics pointing to the correct location.
755 Does not handle issues such as token lookahead, multiple-include
756 optimisation, directives, skipping etc. This function is only
757 suitable for use by _cpp_lex_token, and in special cases like
758 lex_expansion_token which doesn't care for any of these issues.
760 When meeting a newline, returns CPP_EOF if parsing a directive,
761 otherwise returns to the start of the token buffer if permissible.
762 Returns the location of the lexed token. */
764 _cpp_lex_direct (pfile)
769 const unsigned char *comment_start;
770 cpp_token *result = pfile->cur_token++;
774 if (pfile->buffer->need_line)
776 if (!_cpp_get_fresh_line (pfile))
778 result->type = CPP_EOF;
781 if (!pfile->keep_tokens)
783 pfile->cur_run = &pfile->base_run;
784 result = pfile->base_run.base;
785 pfile->cur_token = result + 1;
788 if (pfile->state.parsing_args == 2)
789 result->flags |= PREV_WHITE;
791 buffer = pfile->buffer;
793 result->line = pfile->line;
796 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
797 && !pfile->overlaid_buffer)
799 _cpp_process_line_notes (pfile, false);
800 result->line = pfile->line;
803 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
807 case ' ': case '\t': case '\f': case '\v': case '\0':
808 result->flags |= PREV_WHITE;
809 skip_whitespace (pfile, c);
814 buffer->need_line = true;
817 case '0': case '1': case '2': case '3': case '4':
818 case '5': case '6': case '7': case '8': case '9':
819 result->type = CPP_NUMBER;
820 lex_number (pfile, &result->val.str);
824 /* 'L' may introduce wide characters or strings. */
825 if (*buffer->cur == '\'' || *buffer->cur == '"')
827 lex_string (pfile, result, buffer->cur - 1);
833 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
834 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
835 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
836 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
838 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
839 case 'G': case 'H': case 'I': case 'J': case 'K':
840 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
841 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
843 result->type = CPP_NAME;
844 result->val.node = lex_identifier (pfile, buffer->cur - 1);
846 /* Convert named operators to their proper types. */
847 if (result->val.node->flags & NODE_OPERATOR)
849 result->flags |= NAMED_OP;
850 result->type = result->val.node->directive_index;
856 lex_string (pfile, result, buffer->cur - 1);
860 /* A potential block or line comment. */
861 comment_start = buffer->cur;
866 if (_cpp_skip_block_comment (pfile))
867 cpp_error (pfile, DL_ERROR, "unterminated comment");
869 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
870 || CPP_IN_SYSTEM_HEADER (pfile)))
872 /* Warn about comments only if pedantically GNUC89, and not
873 in system headers. */
874 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
875 && ! buffer->warned_cplusplus_comments)
877 cpp_error (pfile, DL_PEDWARN,
878 "C++ style comments are not allowed in ISO C90");
879 cpp_error (pfile, DL_PEDWARN,
880 "(this will be reported only once per input file)");
881 buffer->warned_cplusplus_comments = 1;
884 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
885 cpp_error (pfile, DL_WARNING, "multi-line comment");
890 result->type = CPP_DIV_EQ;
895 result->type = CPP_DIV;
899 if (!pfile->state.save_comments)
901 result->flags |= PREV_WHITE;
902 goto update_tokens_line;
905 /* Save the comment as a token in its own right. */
906 save_comment (pfile, result, comment_start, c);
910 if (pfile->state.angled_headers)
912 lex_string (pfile, result, buffer->cur - 1);
916 result->type = CPP_LESS;
917 if (*buffer->cur == '=')
918 buffer->cur++, result->type = CPP_LESS_EQ;
919 else if (*buffer->cur == '<')
922 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
924 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
927 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
929 else if (CPP_OPTION (pfile, digraphs))
931 if (*buffer->cur == ':')
934 result->flags |= DIGRAPH;
935 result->type = CPP_OPEN_SQUARE;
937 else if (*buffer->cur == '%')
940 result->flags |= DIGRAPH;
941 result->type = CPP_OPEN_BRACE;
947 result->type = CPP_GREATER;
948 if (*buffer->cur == '=')
949 buffer->cur++, result->type = CPP_GREATER_EQ;
950 else if (*buffer->cur == '>')
953 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
955 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
958 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
963 result->type = CPP_MOD;
964 if (*buffer->cur == '=')
965 buffer->cur++, result->type = CPP_MOD_EQ;
966 else if (CPP_OPTION (pfile, digraphs))
968 if (*buffer->cur == ':')
971 result->flags |= DIGRAPH;
972 result->type = CPP_HASH;
973 if (*buffer->cur == '%' && buffer->cur[1] == ':')
974 buffer->cur += 2, result->type = CPP_PASTE;
976 else if (*buffer->cur == '>')
979 result->flags |= DIGRAPH;
980 result->type = CPP_CLOSE_BRACE;
986 result->type = CPP_DOT;
987 if (ISDIGIT (*buffer->cur))
989 result->type = CPP_NUMBER;
990 lex_number (pfile, &result->val.str);
992 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
993 buffer->cur += 2, result->type = CPP_ELLIPSIS;
994 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
995 buffer->cur++, result->type = CPP_DOT_STAR;
999 result->type = CPP_PLUS;
1000 if (*buffer->cur == '+')
1001 buffer->cur++, result->type = CPP_PLUS_PLUS;
1002 else if (*buffer->cur == '=')
1003 buffer->cur++, result->type = CPP_PLUS_EQ;
1007 result->type = CPP_MINUS;
1008 if (*buffer->cur == '>')
1011 result->type = CPP_DEREF;
1012 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1013 buffer->cur++, result->type = CPP_DEREF_STAR;
1015 else if (*buffer->cur == '-')
1016 buffer->cur++, result->type = CPP_MINUS_MINUS;
1017 else if (*buffer->cur == '=')
1018 buffer->cur++, result->type = CPP_MINUS_EQ;
1022 result->type = CPP_AND;
1023 if (*buffer->cur == '&')
1024 buffer->cur++, result->type = CPP_AND_AND;
1025 else if (*buffer->cur == '=')
1026 buffer->cur++, result->type = CPP_AND_EQ;
1030 result->type = CPP_OR;
1031 if (*buffer->cur == '|')
1032 buffer->cur++, result->type = CPP_OR_OR;
1033 else if (*buffer->cur == '=')
1034 buffer->cur++, result->type = CPP_OR_EQ;
1038 result->type = CPP_COLON;
1039 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1040 buffer->cur++, result->type = CPP_SCOPE;
1041 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1044 result->flags |= DIGRAPH;
1045 result->type = CPP_CLOSE_SQUARE;
1049 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1050 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1051 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1052 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1053 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1055 case '?': result->type = CPP_QUERY; break;
1056 case '~': result->type = CPP_COMPL; break;
1057 case ',': result->type = CPP_COMMA; break;
1058 case '(': result->type = CPP_OPEN_PAREN; break;
1059 case ')': result->type = CPP_CLOSE_PAREN; break;
1060 case '[': result->type = CPP_OPEN_SQUARE; break;
1061 case ']': result->type = CPP_CLOSE_SQUARE; break;
1062 case '{': result->type = CPP_OPEN_BRACE; break;
1063 case '}': result->type = CPP_CLOSE_BRACE; break;
1064 case ';': result->type = CPP_SEMICOLON; break;
1066 /* @ is a punctuator in Objective-C. */
1067 case '@': result->type = CPP_ATSIGN; break;
1072 const uchar *base = --buffer->cur;
1074 if (forms_identifier_p (pfile, true))
1076 result->type = CPP_NAME;
1077 result->val.node = lex_identifier (pfile, base);
1084 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1091 /* An upper bound on the number of bytes needed to spell TOKEN.
1092 Does not include preceding whitespace. */
1094 cpp_token_len (token)
1095 const cpp_token *token;
1099 switch (TOKEN_SPELL (token))
1101 default: len = 4; break;
1102 case SPELL_LITERAL: len = token->val.str.len; break;
1103 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1109 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1110 already contain the enough space to hold the token's spelling.
1111 Returns a pointer to the character after the last character
1114 cpp_spell_token (pfile, token, buffer)
1115 cpp_reader *pfile; /* Would be nice to be rid of this... */
1116 const cpp_token *token;
1117 unsigned char *buffer;
1119 switch (TOKEN_SPELL (token))
1121 case SPELL_OPERATOR:
1123 const unsigned char *spelling;
1126 if (token->flags & DIGRAPH)
1128 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1129 else if (token->flags & NAMED_OP)
1132 spelling = TOKEN_NAME (token);
1134 while ((c = *spelling++) != '\0')
1141 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1142 buffer += NODE_LEN (token->val.node);
1146 memcpy (buffer, token->val.str.text, token->val.str.len);
1147 buffer += token->val.str.len;
1151 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1158 /* Returns TOKEN spelt as a null-terminated string. The string is
1159 freed when the reader is destroyed. Useful for diagnostics. */
1161 cpp_token_as_text (pfile, token)
1163 const cpp_token *token;
1165 unsigned int len = cpp_token_len (token) + 1;
1166 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1168 end = cpp_spell_token (pfile, token, start);
1174 /* Used by C front ends, which really should move to using
1175 cpp_token_as_text. */
1177 cpp_type2name (type)
1178 enum cpp_ttype type;
1180 return (const char *) token_spellings[type].name;
1183 /* Writes the spelling of token to FP, without any preceding space.
1184 Separated from cpp_spell_token for efficiency - to avoid stdio
1185 double-buffering. */
1187 cpp_output_token (token, fp)
1188 const cpp_token *token;
1191 switch (TOKEN_SPELL (token))
1193 case SPELL_OPERATOR:
1195 const unsigned char *spelling;
1198 if (token->flags & DIGRAPH)
1200 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1201 else if (token->flags & NAMED_OP)
1204 spelling = TOKEN_NAME (token);
1209 while ((c = *++spelling) != '\0');
1215 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1219 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1223 /* An error, most probably. */
1228 /* Compare two tokens. */
1230 _cpp_equiv_tokens (a, b)
1231 const cpp_token *a, *b;
1233 if (a->type == b->type && a->flags == b->flags)
1234 switch (TOKEN_SPELL (a))
1236 default: /* Keep compiler happy. */
1237 case SPELL_OPERATOR:
1240 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1242 return a->val.node == b->val.node;
1244 return (a->val.str.len == b->val.str.len
1245 && !memcmp (a->val.str.text, b->val.str.text,
1252 /* Returns nonzero if a space should be inserted to avoid an
1253 accidental token paste for output. For simplicity, it is
1254 conservative, and occasionally advises a space where one is not
1255 needed, e.g. "." and ".2". */
1257 cpp_avoid_paste (pfile, token1, token2)
1259 const cpp_token *token1, *token2;
1261 enum cpp_ttype a = token1->type, b = token2->type;
1264 if (token1->flags & NAMED_OP)
1266 if (token2->flags & NAMED_OP)
1270 if (token2->flags & DIGRAPH)
1271 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1272 else if (token_spellings[b].category == SPELL_OPERATOR)
1273 c = token_spellings[b].name[0];
1275 /* Quickly get everything that can paste with an '='. */
1276 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1281 case CPP_GREATER: return c == '>' || c == '?';
1282 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1283 case CPP_PLUS: return c == '+';
1284 case CPP_MINUS: return c == '-' || c == '>';
1285 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1286 case CPP_MOD: return c == ':' || c == '>';
1287 case CPP_AND: return c == '&';
1288 case CPP_OR: return c == '|';
1289 case CPP_COLON: return c == ':' || c == '>';
1290 case CPP_DEREF: return c == '*';
1291 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1292 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1293 case CPP_NAME: return ((b == CPP_NUMBER
1294 && name_p (pfile, &token2->val.str))
1296 || b == CPP_CHAR || b == CPP_STRING); /* L */
1297 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1298 || c == '.' || c == '+' || c == '-');
1300 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1302 || (CPP_OPTION (pfile, objc)
1303 && token1->val.str.text[0] == '@'
1304 && (b == CPP_NAME || b == CPP_STRING)));
1311 /* Output all the remaining tokens on the current line, and a newline
1312 character, to FP. Leading whitespace is removed. If there are
1313 macros, special token padding is not performed. */
1315 cpp_output_line (pfile, fp)
1319 const cpp_token *token;
1321 token = cpp_get_token (pfile);
1322 while (token->type != CPP_EOF)
1324 cpp_output_token (token, fp);
1325 token = cpp_get_token (pfile);
1326 if (token->flags & PREV_WHITE)
1333 /* Returns the value of a hexadecimal digit. */
1339 return hex_value (c);
1344 /* Read a possible universal character name starting at *PSTR. */
1346 maybe_read_ucn (pfile, pstr)
1350 cppchar_t result, c = (*pstr)[-1];
1352 result = _cpp_valid_ucn (pfile, pstr, false);
1355 if (CPP_WTRADITIONAL (pfile))
1356 cpp_error (pfile, DL_WARNING,
1357 "the meaning of '\\%c' is different in traditional C",
1360 if (CPP_OPTION (pfile, EBCDIC))
1362 cpp_error (pfile, DL_ERROR,
1363 "universal character with an EBCDIC target");
1364 result = 0x3f; /* EBCDIC invalid character */
1371 /* Returns the value of an escape sequence, truncated to the correct
1372 target precision. PSTR points to the input pointer, which is just
1373 after the backslash. LIMIT is how much text we have. WIDE is true
1374 if the escape sequence is part of a wide character constant or
1375 string literal. Handles all relevant diagnostics. */
1377 cpp_parse_escape (pfile, pstr, limit, wide)
1379 const unsigned char **pstr;
1380 const unsigned char *limit;
1383 /* Values of \a \b \e \f \n \r \t \v respectively. */
1384 static const uchar ascii[] = { 7, 8, 27, 12, 10, 13, 9, 11 };
1385 static const uchar ebcdic[] = { 47, 22, 39, 12, 21, 13, 5, 11 };
1388 const unsigned char *str = *pstr, *charconsts;
1389 cppchar_t c, ucn, mask;
1392 if (CPP_OPTION (pfile, EBCDIC))
1393 charconsts = ebcdic;
1398 width = CPP_OPTION (pfile, wchar_precision);
1400 width = CPP_OPTION (pfile, char_precision);
1401 if (width < BITS_PER_CPPCHAR_T)
1402 mask = ((cppchar_t) 1 << width) - 1;
1409 case '\\': case '\'': case '"': case '?': break;
1410 case 'b': c = charconsts[1]; break;
1411 case 'f': c = charconsts[3]; break;
1412 case 'n': c = charconsts[4]; break;
1413 case 'r': c = charconsts[5]; break;
1414 case 't': c = charconsts[6]; break;
1415 case 'v': c = charconsts[7]; break;
1417 case '(': case '{': case '[': case '%':
1418 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1419 '\%' is used to prevent SCCS from getting confused. */
1420 unknown = CPP_PEDANTIC (pfile);
1424 if (CPP_WTRADITIONAL (pfile))
1425 cpp_error (pfile, DL_WARNING,
1426 "the meaning of '\\a' is different in traditional C");
1431 if (CPP_PEDANTIC (pfile))
1432 cpp_error (pfile, DL_PEDWARN,
1433 "non-ISO-standard escape sequence, '\\%c'", (int) c);
1438 ucn = maybe_read_ucn (pfile, &str);
1446 if (CPP_WTRADITIONAL (pfile))
1447 cpp_error (pfile, DL_WARNING,
1448 "the meaning of '\\x' is different in traditional C");
1451 cppchar_t i = 0, overflow = 0;
1452 int digits_found = 0;
1460 overflow |= i ^ (i << 4 >> 4);
1461 i = (i << 4) + hex_digit_value (c);
1466 cpp_error (pfile, DL_ERROR,
1467 "\\x used with no following hex digits");
1469 if (overflow | (i != (i & mask)))
1471 cpp_error (pfile, DL_PEDWARN,
1472 "hex escape sequence out of range");
1479 case '0': case '1': case '2': case '3':
1480 case '4': case '5': case '6': case '7':
1483 cppchar_t i = c - '0';
1485 while (str < limit && ++count < 3)
1488 if (c < '0' || c > '7')
1491 i = (i << 3) + c - '0';
1494 if (i != (i & mask))
1496 cpp_error (pfile, DL_PEDWARN,
1497 "octal escape sequence out of range");
1512 cpp_error (pfile, DL_PEDWARN,
1513 "unknown escape sequence '\\%c'", (int) c);
1515 cpp_error (pfile, DL_PEDWARN,
1516 "unknown escape sequence: '\\%03o'", (int) c);
1521 cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type");
1529 /* Interpret a (possibly wide) character constant in TOKEN.
1530 WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN
1531 points to a variable that is filled in with the number of
1532 characters seen, and UNSIGNEDP to a variable that indicates whether
1533 the result has signed type. */
1535 cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp)
1537 const cpp_token *token;
1538 unsigned int *pchars_seen;
1541 const unsigned char *str, *limit;
1542 unsigned int chars_seen = 0;
1543 size_t width, max_chars;
1544 cppchar_t c, mask, result = 0;
1547 str = token->val.str.text + 1 + (token->type == CPP_WCHAR);
1548 limit = token->val.str.text + token->val.str.len - 1;
1550 if (token->type == CPP_CHAR)
1552 width = CPP_OPTION (pfile, char_precision);
1553 max_chars = CPP_OPTION (pfile, int_precision) / width;
1554 unsigned_p = CPP_OPTION (pfile, unsigned_char);
1558 width = CPP_OPTION (pfile, wchar_precision);
1560 unsigned_p = CPP_OPTION (pfile, unsigned_wchar);
1563 if (width < BITS_PER_CPPCHAR_T)
1564 mask = ((cppchar_t) 1 << width) - 1;
1573 c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR);
1575 #ifdef MAP_CHARACTER
1577 c = MAP_CHARACTER (c);
1582 /* Truncate the character, scale the result and merge the two. */
1584 if (width < BITS_PER_CPPCHAR_T)
1585 result = (result << width) | c;
1590 if (chars_seen == 0)
1591 cpp_error (pfile, DL_ERROR, "empty character constant");
1592 else if (chars_seen > 1)
1594 /* Multichar charconsts are of type int and therefore signed. */
1597 if (chars_seen > max_chars)
1599 chars_seen = max_chars;
1600 cpp_error (pfile, DL_WARNING,
1601 "character constant too long for its type");
1603 else if (CPP_OPTION (pfile, warn_multichar))
1604 cpp_error (pfile, DL_WARNING, "multi-character character constant");
1607 /* Sign-extend or truncate the constant to cppchar_t. The value is
1608 in WIDTH bits, but for multi-char charconsts it's value is the
1609 full target type's width. */
1612 if (width < BITS_PER_CPPCHAR_T)
1614 mask = ((cppchar_t) 1 << width) - 1;
1615 if (unsigned_p || !(result & (1 << (width - 1))))
1621 *pchars_seen = chars_seen;
1622 *unsignedp = unsigned_p;
1626 /* Memory buffers. Changing these three constants can have a dramatic
1627 effect on performance. The values here are reasonable defaults,
1628 but might be tuned. If you adjust them, be sure to test across a
1629 range of uses of cpplib, including heavy nested function-like macro
1630 expansion. Also check the change in peak memory usage (NJAMD is a
1631 good tool for this). */
1632 #define MIN_BUFF_SIZE 8000
1633 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1634 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1635 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1637 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1638 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1641 /* Create a new allocation buffer. Place the control block at the end
1642 of the buffer, so that buffer overflows will cause immediate chaos. */
1648 unsigned char *base;
1650 if (len < MIN_BUFF_SIZE)
1651 len = MIN_BUFF_SIZE;
1652 len = CPP_ALIGN (len);
1654 base = xmalloc (len + sizeof (_cpp_buff));
1655 result = (_cpp_buff *) (base + len);
1656 result->base = base;
1658 result->limit = base + len;
1659 result->next = NULL;
1663 /* Place a chain of unwanted allocation buffers on the free list. */
1665 _cpp_release_buff (pfile, buff)
1669 _cpp_buff *end = buff;
1673 end->next = pfile->free_buffs;
1674 pfile->free_buffs = buff;
1677 /* Return a free buffer of size at least MIN_SIZE. */
1679 _cpp_get_buff (pfile, min_size)
1683 _cpp_buff *result, **p;
1685 for (p = &pfile->free_buffs;; p = &(*p)->next)
1690 return new_buff (min_size);
1692 size = result->limit - result->base;
1693 /* Return a buffer that's big enough, but don't waste one that's
1695 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1700 result->next = NULL;
1701 result->cur = result->base;
1705 /* Creates a new buffer with enough space to hold the uncommitted
1706 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1707 the excess bytes to the new buffer. Chains the new buffer after
1708 BUFF, and returns the new buffer. */
1710 _cpp_append_extend_buff (pfile, buff, min_extra)
1715 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1716 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1718 buff->next = new_buff;
1719 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1723 /* Creates a new buffer with enough space to hold the uncommitted
1724 remaining bytes of the buffer pointed to by BUFF, and at least
1725 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1726 Chains the new buffer before the buffer pointed to by BUFF, and
1727 updates the pointer to point to the new buffer. */
1729 _cpp_extend_buff (pfile, pbuff, min_extra)
1734 _cpp_buff *new_buff, *old_buff = *pbuff;
1735 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1737 new_buff = _cpp_get_buff (pfile, size);
1738 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1739 new_buff->next = old_buff;
1743 /* Free a chain of buffers starting at BUFF. */
1745 _cpp_free_buff (buff)
1750 for (; buff; buff = next)
1757 /* Allocate permanent, unaligned storage of length LEN. */
1759 _cpp_unaligned_alloc (pfile, len)
1763 _cpp_buff *buff = pfile->u_buff;
1764 unsigned char *result = buff->cur;
1766 if (len > (size_t) (buff->limit - result))
1768 buff = _cpp_get_buff (pfile, len);
1769 buff->next = pfile->u_buff;
1770 pfile->u_buff = buff;
1774 buff->cur = result + len;
1778 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1779 That buffer is used for growing allocations when saving macro
1780 replacement lists in a #define, and when parsing an answer to an
1781 assertion in #assert, #unassert or #if (and therefore possibly
1782 whilst expanding macros). It therefore must not be used by any
1783 code that they might call: specifically the lexer and the guts of
1786 All existing other uses clearly fit this restriction: storing
1787 registered pragmas during initialization. */
1789 _cpp_aligned_alloc (pfile, len)
1793 _cpp_buff *buff = pfile->a_buff;
1794 unsigned char *result = buff->cur;
1796 if (len > (size_t) (buff->limit - result))
1798 buff = _cpp_get_buff (pfile, len);
1799 buff->next = pfile->a_buff;
1800 pfile->a_buff = buff;
1804 buff->cur = result + len;