1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
37 enum spell_type category;
38 const unsigned char *name;
41 static const unsigned char *const digraph_spellings[] =
42 { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
44 #define OP(e, s) { SPELL_OPERATOR, U s },
45 #define TK(e, s) { s, U #e },
46 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
50 #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51 #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
53 static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54 static int skip_line_comment (cpp_reader *);
55 static void skip_whitespace (cpp_reader *, cppchar_t);
56 static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
57 static void lex_number (cpp_reader *, cpp_string *);
58 static bool forms_identifier_p (cpp_reader *, int);
59 static void lex_string (cpp_reader *, cpp_token *, const uchar *);
60 static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
61 static void create_literal (cpp_reader *, cpp_token *, const uchar *,
62 unsigned int, enum cpp_ttype);
63 static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
64 static int name_p (cpp_reader *, const cpp_string *);
65 static tokenrun *next_tokenrun (tokenrun *);
67 static _cpp_buff *new_buff (size_t);
72 Compares, the token TOKEN to the NUL-terminated string STRING.
73 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
75 cpp_ideq (const cpp_token *token, const char *string)
77 if (token->type != CPP_NAME)
80 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
83 /* Record a note TYPE at byte POS into the current cleaned logical
86 add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
88 if (buffer->notes_used == buffer->notes_cap)
90 buffer->notes_cap = buffer->notes_cap * 2 + 200;
91 buffer->notes = xrealloc (buffer->notes,
92 buffer->notes_cap * sizeof (_cpp_line_note));
95 buffer->notes[buffer->notes_used].pos = pos;
96 buffer->notes[buffer->notes_used].type = type;
100 /* Returns with a logical line that contains no escaped newlines or
101 trigraphs. This is a time-critical inner loop. */
103 _cpp_clean_line (cpp_reader *pfile)
109 buffer = pfile->buffer;
110 buffer->cur_note = buffer->notes_used = 0;
111 buffer->cur = buffer->line_base = buffer->next_line;
112 buffer->need_line = false;
113 s = buffer->next_line - 1;
115 if (!buffer->from_stage3)
124 if (c == '\n' || c == '\r')
126 /* Handle DOS line endings. */
127 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
129 if (s == buffer->rlimit)
134 while (p != buffer->next_line && is_nvspace (p[-1]))
136 if (p == buffer->next_line || p[-1] != '\\')
139 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
141 buffer->next_line = p - 1;
143 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
145 /* Add a note regardless, for the benefit of -Wtrigraphs. */
146 add_line_note (buffer, d, s[2]);
147 if (CPP_OPTION (pfile, trigraphs))
149 *d = _cpp_trigraph_map[s[2]];
159 while (*s != '\n' && *s != '\r');
162 /* Handle DOS line endings. */
163 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
168 /* A sentinel note that should never be processed. */
169 add_line_note (buffer, d + 1, '\n');
170 buffer->next_line = s + 1;
173 /* Return true if the trigraph indicated by NOTE should be warned
174 about in a comment. */
176 warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
180 /* Within comments we don't warn about trigraphs, unless the
181 trigraph forms an escaped newline, as that may change
183 if (note->type != '/')
186 /* If -trigraphs, then this was an escaped newline iff the next note
188 if (CPP_OPTION (pfile, trigraphs))
189 return note[1].pos == note->pos;
191 /* Otherwise, see if this forms an escaped newline. */
193 while (is_nvspace (*p))
196 /* There might have been escaped newlines between the trigraph and the
197 newline we found. Hence the position test. */
198 return (*p == '\n' && p < note[1].pos);
201 /* Process the notes created by add_line_note as far as the current
204 _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
206 cpp_buffer *buffer = pfile->buffer;
210 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
213 if (note->pos > buffer->cur)
217 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
219 if (note->type == '\\' || note->type == ' ')
221 if (note->type == ' ' && !in_comment)
222 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
223 "backslash and newline separated by space");
225 if (buffer->next_line > buffer->rlimit)
227 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, col,
228 "backslash-newline at end of file");
229 /* Prevent "no newline at end of file" warning. */
230 buffer->next_line = buffer->rlimit;
233 buffer->line_base = note->pos;
236 else if (_cpp_trigraph_map[note->type])
238 if (CPP_OPTION (pfile, warn_trigraphs)
239 && (!in_comment || warn_in_comment (pfile, note)))
241 if (CPP_OPTION (pfile, trigraphs))
242 cpp_error_with_line (pfile, DL_WARNING, pfile->line, col,
243 "trigraph ??%c converted to %c",
245 (int) _cpp_trigraph_map[note->type]);
249 (pfile, DL_WARNING, pfile->line, col,
250 "trigraph ??%c ignored, use -trigraphs to enable",
260 /* Skip a C-style block comment. We find the end of the comment by
261 seeing if an asterisk is before every '/' we encounter. Returns
262 nonzero if comment terminated by EOF, zero otherwise.
264 Buffer->cur points to the initial asterisk of the comment. */
266 _cpp_skip_block_comment (cpp_reader *pfile)
268 cpp_buffer *buffer = pfile->buffer;
272 if (*buffer->cur == '/')
279 /* People like decorating comments with '*', so check for '/'
280 instead for efficiency. */
283 if (buffer->cur[-2] == '*')
286 /* Warn about potential nested comments, but not if the '/'
287 comes immediately before the true comment delimiter.
288 Don't bother to get it right across escaped newlines. */
289 if (CPP_OPTION (pfile, warn_comments)
290 && buffer->cur[0] == '*' && buffer->cur[1] != '/')
291 cpp_error_with_line (pfile, DL_WARNING,
292 pfile->line, CPP_BUF_COL (buffer),
293 "\"/*\" within comment");
298 _cpp_process_line_notes (pfile, true);
299 if (buffer->next_line >= buffer->rlimit)
301 _cpp_clean_line (pfile);
306 _cpp_process_line_notes (pfile, true);
310 /* Skip a C++ line comment, leaving buffer->cur pointing to the
311 terminating newline. Handles escaped newlines. Returns nonzero
312 if a multiline comment. */
314 skip_line_comment (cpp_reader *pfile)
316 cpp_buffer *buffer = pfile->buffer;
317 unsigned int orig_line = pfile->line;
319 while (*buffer->cur != '\n')
322 _cpp_process_line_notes (pfile, true);
323 return orig_line != pfile->line;
326 /* Skips whitespace, saving the next non-whitespace character. */
328 skip_whitespace (cpp_reader *pfile, cppchar_t c)
330 cpp_buffer *buffer = pfile->buffer;
331 bool saw_NUL = false;
335 /* Horizontal space always OK. */
336 if (c == ' ' || c == '\t')
338 /* Just \f \v or \0 left. */
341 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
342 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line,
343 CPP_BUF_COL (buffer),
344 "%s in preprocessing directive",
345 c == '\f' ? "form feed" : "vertical tab");
349 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
350 while (is_nvspace (c));
353 cpp_error (pfile, DL_WARNING, "null character(s) ignored");
358 /* See if the characters of a number token are valid in a name (no
361 name_p (cpp_reader *pfile, const cpp_string *string)
365 for (i = 0; i < string->len; i++)
366 if (!is_idchar (string->text[i]))
372 /* Returns TRUE if the sequence starting at buffer->cur is invalid in
373 an identifier. FIRST is TRUE if this starts an identifier. */
375 forms_identifier_p (cpp_reader *pfile, int first)
377 cpp_buffer *buffer = pfile->buffer;
379 if (*buffer->cur == '$')
381 if (!CPP_OPTION (pfile, dollars_in_ident))
385 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
387 CPP_OPTION (pfile, warn_dollars) = 0;
388 cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number");
394 /* Is this a syntactically valid UCN? */
395 if (0 && *buffer->cur == '\\'
396 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
399 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first))
407 /* Lex an identifier starting at BUFFER->CUR - 1. */
408 static cpp_hashnode *
409 lex_identifier (cpp_reader *pfile, const uchar *base)
411 cpp_hashnode *result;
416 cur = pfile->buffer->cur;
418 /* N.B. ISIDNUM does not include $. */
419 while (ISIDNUM (*cur))
422 pfile->buffer->cur = cur;
424 while (forms_identifier_p (pfile, false));
426 result = (cpp_hashnode *)
427 ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
429 /* Rarely, identifiers require diagnostics when lexed. */
430 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
431 && !pfile->state.skipping, 0))
433 /* It is allowed to poison the same identifier twice. */
434 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
435 cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"",
438 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
439 replacement list of a variadic macro. */
440 if (result == pfile->spec_nodes.n__VA_ARGS__
441 && !pfile->state.va_args_ok)
442 cpp_error (pfile, DL_PEDWARN,
443 "__VA_ARGS__ can only appear in the expansion"
444 " of a C99 variadic macro");
450 /* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
452 lex_number (cpp_reader *pfile, cpp_string *number)
458 base = pfile->buffer->cur - 1;
461 cur = pfile->buffer->cur;
463 /* N.B. ISIDNUM does not include $. */
464 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
467 pfile->buffer->cur = cur;
469 while (forms_identifier_p (pfile, false));
471 number->len = cur - base;
472 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
473 memcpy (dest, base, number->len);
474 dest[number->len] = '\0';
478 /* Create a token of type TYPE with a literal spelling. */
480 create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
481 unsigned int len, enum cpp_ttype type)
483 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
485 memcpy (dest, base, len);
488 token->val.str.len = len;
489 token->val.str.text = dest;
492 /* Lexes a string, character constant, or angle-bracketed header file
493 name. The stored string contains the spelling, including opening
494 quote and leading any leading 'L'. It returns the type of the
495 literal, or CPP_OTHER if it was not properly terminated.
497 The spelling is NUL-terminated, but it is not guaranteed that this
498 is the first NUL since embedded NULs are preserved. */
500 lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
502 bool saw_NUL = false;
504 cppchar_t terminator;
509 if (terminator == 'L')
511 if (terminator == '\"')
512 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
513 else if (terminator == '\'')
514 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
516 terminator = '>', type = CPP_HEADER_NAME;
520 cppchar_t c = *cur++;
522 /* In #include-style directives, terminators are not escapable. */
523 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
525 else if (c == terminator)
537 if (saw_NUL && !pfile->state.skipping)
538 cpp_error (pfile, DL_WARNING, "null character(s) preserved in literal");
540 pfile->buffer->cur = cur;
541 create_literal (pfile, token, base, cur - base, type);
544 /* The stored comment includes the comment start and any terminator. */
546 save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
549 unsigned char *buffer;
550 unsigned int len, clen;
552 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
554 /* C++ comments probably (not definitely) have moved past a new
555 line, which we don't want to save in the comment. */
556 if (is_vspace (pfile->buffer->cur[-1]))
559 /* If we are currently in a directive, then we need to store all
560 C++ comments as C comments internally, and so we need to
561 allocate a little extra space in that case.
563 Note that the only time we encounter a directive here is
564 when we are saving comments in a "#define". */
565 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
567 buffer = _cpp_unaligned_alloc (pfile, clen);
569 token->type = CPP_COMMENT;
570 token->val.str.len = clen;
571 token->val.str.text = buffer;
574 memcpy (buffer + 1, from, len - 1);
576 /* Finish conversion to a C comment, if necessary. */
577 if (pfile->state.in_directive && type == '/')
580 buffer[clen - 2] = '*';
581 buffer[clen - 1] = '/';
585 /* Allocate COUNT tokens for RUN. */
587 _cpp_init_tokenrun (tokenrun *run, unsigned int count)
589 run->base = xnewvec (cpp_token, count);
590 run->limit = run->base + count;
594 /* Returns the next tokenrun, or creates one if there is none. */
596 next_tokenrun (tokenrun *run)
598 if (run->next == NULL)
600 run->next = xnew (tokenrun);
601 run->next->prev = run;
602 _cpp_init_tokenrun (run->next, 250);
608 /* Allocate a single token that is invalidated at the same time as the
609 rest of the tokens on the line. Has its line and col set to the
610 same as the last lexed token, so that diagnostics appear in the
613 _cpp_temp_token (cpp_reader *pfile)
615 cpp_token *old, *result;
617 old = pfile->cur_token - 1;
618 if (pfile->cur_token == pfile->cur_run->limit)
620 pfile->cur_run = next_tokenrun (pfile->cur_run);
621 pfile->cur_token = pfile->cur_run->base;
624 result = pfile->cur_token++;
625 result->line = old->line;
626 result->col = old->col;
630 /* Lex a token into RESULT (external interface). Takes care of issues
631 like directive handling, token lookahead, multiple include
632 optimization and skipping. */
634 _cpp_lex_token (cpp_reader *pfile)
640 if (pfile->cur_token == pfile->cur_run->limit)
642 pfile->cur_run = next_tokenrun (pfile->cur_run);
643 pfile->cur_token = pfile->cur_run->base;
646 if (pfile->lookaheads)
649 result = pfile->cur_token++;
652 result = _cpp_lex_direct (pfile);
654 if (result->flags & BOL)
656 /* Is this a directive. If _cpp_handle_directive returns
657 false, it is an assembler #. */
658 if (result->type == CPP_HASH
659 /* 6.10.3 p 11: Directives in a list of macro arguments
660 gives undefined behavior. This implementation
661 handles the directive as normal. */
662 && pfile->state.parsing_args != 1
663 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
665 if (pfile->cb.line_change && !pfile->state.skipping)
666 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
669 /* We don't skip tokens in directives. */
670 if (pfile->state.in_directive)
673 /* Outside a directive, invalidate controlling macros. At file
674 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
675 get here and MI optimization works. */
676 pfile->mi_valid = false;
678 if (!pfile->state.skipping || result->type == CPP_EOF)
685 /* Returns true if a fresh line has been loaded. */
687 _cpp_get_fresh_line (cpp_reader *pfile)
689 /* We can't get a new line until we leave the current directive. */
690 if (pfile->state.in_directive)
695 cpp_buffer *buffer = pfile->buffer;
697 if (!buffer->need_line)
700 if (buffer->next_line < buffer->rlimit)
702 _cpp_clean_line (pfile);
706 /* First, get out of parsing arguments state. */
707 if (pfile->state.parsing_args)
710 /* End of buffer. Non-empty files should end in a newline. */
711 if (buffer->buf != buffer->rlimit
712 && buffer->next_line > buffer->rlimit
713 && !buffer->from_stage3)
715 /* Only warn once. */
716 buffer->next_line = buffer->rlimit;
717 cpp_error_with_line (pfile, DL_PEDWARN, pfile->line - 1,
718 CPP_BUF_COLUMN (buffer, buffer->cur),
719 "no newline at end of file");
725 if (buffer->return_at_eof)
727 _cpp_pop_buffer (pfile);
731 _cpp_pop_buffer (pfile);
735 #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
738 result->type = ELSE_TYPE; \
739 if (*buffer->cur == CHAR) \
740 buffer->cur++, result->type = THEN_TYPE; \
744 /* Lex a token into pfile->cur_token, which is also incremented, to
745 get diagnostics pointing to the correct location.
747 Does not handle issues such as token lookahead, multiple-include
748 optimization, directives, skipping etc. This function is only
749 suitable for use by _cpp_lex_token, and in special cases like
750 lex_expansion_token which doesn't care for any of these issues.
752 When meeting a newline, returns CPP_EOF if parsing a directive,
753 otherwise returns to the start of the token buffer if permissible.
754 Returns the location of the lexed token. */
756 _cpp_lex_direct (cpp_reader *pfile)
760 const unsigned char *comment_start;
761 cpp_token *result = pfile->cur_token++;
765 if (pfile->buffer->need_line)
767 if (!_cpp_get_fresh_line (pfile))
769 result->type = CPP_EOF;
770 if (!pfile->state.in_directive)
772 /* Tell the compiler the line number of the EOF token. */
773 result->line = pfile->line;
778 if (!pfile->keep_tokens)
780 pfile->cur_run = &pfile->base_run;
781 result = pfile->base_run.base;
782 pfile->cur_token = result + 1;
785 if (pfile->state.parsing_args == 2)
786 result->flags |= PREV_WHITE;
788 buffer = pfile->buffer;
790 result->line = pfile->line;
793 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
794 && !pfile->overlaid_buffer)
796 _cpp_process_line_notes (pfile, false);
797 result->line = pfile->line;
800 result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
804 case ' ': case '\t': case '\f': case '\v': case '\0':
805 result->flags |= PREV_WHITE;
806 skip_whitespace (pfile, c);
811 buffer->need_line = true;
814 case '0': case '1': case '2': case '3': case '4':
815 case '5': case '6': case '7': case '8': case '9':
816 result->type = CPP_NUMBER;
817 lex_number (pfile, &result->val.str);
821 /* 'L' may introduce wide characters or strings. */
822 if (*buffer->cur == '\'' || *buffer->cur == '"')
824 lex_string (pfile, result, buffer->cur - 1);
830 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
831 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
832 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
833 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
835 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
836 case 'G': case 'H': case 'I': case 'J': case 'K':
837 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
838 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
840 result->type = CPP_NAME;
841 result->val.node = lex_identifier (pfile, buffer->cur - 1);
843 /* Convert named operators to their proper types. */
844 if (result->val.node->flags & NODE_OPERATOR)
846 result->flags |= NAMED_OP;
847 result->type = result->val.node->directive_index;
853 lex_string (pfile, result, buffer->cur - 1);
857 /* A potential block or line comment. */
858 comment_start = buffer->cur;
863 if (_cpp_skip_block_comment (pfile))
864 cpp_error (pfile, DL_ERROR, "unterminated comment");
866 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
867 || CPP_IN_SYSTEM_HEADER (pfile)))
869 /* Warn about comments only if pedantically GNUC89, and not
870 in system headers. */
871 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
872 && ! buffer->warned_cplusplus_comments)
874 cpp_error (pfile, DL_PEDWARN,
875 "C++ style comments are not allowed in ISO C90");
876 cpp_error (pfile, DL_PEDWARN,
877 "(this will be reported only once per input file)");
878 buffer->warned_cplusplus_comments = 1;
881 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
882 cpp_error (pfile, DL_WARNING, "multi-line comment");
887 result->type = CPP_DIV_EQ;
892 result->type = CPP_DIV;
896 if (!pfile->state.save_comments)
898 result->flags |= PREV_WHITE;
899 goto update_tokens_line;
902 /* Save the comment as a token in its own right. */
903 save_comment (pfile, result, comment_start, c);
907 if (pfile->state.angled_headers)
909 lex_string (pfile, result, buffer->cur - 1);
913 result->type = CPP_LESS;
914 if (*buffer->cur == '=')
915 buffer->cur++, result->type = CPP_LESS_EQ;
916 else if (*buffer->cur == '<')
919 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
921 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
924 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
926 else if (CPP_OPTION (pfile, digraphs))
928 if (*buffer->cur == ':')
931 result->flags |= DIGRAPH;
932 result->type = CPP_OPEN_SQUARE;
934 else if (*buffer->cur == '%')
937 result->flags |= DIGRAPH;
938 result->type = CPP_OPEN_BRACE;
944 result->type = CPP_GREATER;
945 if (*buffer->cur == '=')
946 buffer->cur++, result->type = CPP_GREATER_EQ;
947 else if (*buffer->cur == '>')
950 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
952 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
955 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
960 result->type = CPP_MOD;
961 if (*buffer->cur == '=')
962 buffer->cur++, result->type = CPP_MOD_EQ;
963 else if (CPP_OPTION (pfile, digraphs))
965 if (*buffer->cur == ':')
968 result->flags |= DIGRAPH;
969 result->type = CPP_HASH;
970 if (*buffer->cur == '%' && buffer->cur[1] == ':')
971 buffer->cur += 2, result->type = CPP_PASTE;
973 else if (*buffer->cur == '>')
976 result->flags |= DIGRAPH;
977 result->type = CPP_CLOSE_BRACE;
983 result->type = CPP_DOT;
984 if (ISDIGIT (*buffer->cur))
986 result->type = CPP_NUMBER;
987 lex_number (pfile, &result->val.str);
989 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
990 buffer->cur += 2, result->type = CPP_ELLIPSIS;
991 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
992 buffer->cur++, result->type = CPP_DOT_STAR;
996 result->type = CPP_PLUS;
997 if (*buffer->cur == '+')
998 buffer->cur++, result->type = CPP_PLUS_PLUS;
999 else if (*buffer->cur == '=')
1000 buffer->cur++, result->type = CPP_PLUS_EQ;
1004 result->type = CPP_MINUS;
1005 if (*buffer->cur == '>')
1008 result->type = CPP_DEREF;
1009 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1010 buffer->cur++, result->type = CPP_DEREF_STAR;
1012 else if (*buffer->cur == '-')
1013 buffer->cur++, result->type = CPP_MINUS_MINUS;
1014 else if (*buffer->cur == '=')
1015 buffer->cur++, result->type = CPP_MINUS_EQ;
1019 result->type = CPP_AND;
1020 if (*buffer->cur == '&')
1021 buffer->cur++, result->type = CPP_AND_AND;
1022 else if (*buffer->cur == '=')
1023 buffer->cur++, result->type = CPP_AND_EQ;
1027 result->type = CPP_OR;
1028 if (*buffer->cur == '|')
1029 buffer->cur++, result->type = CPP_OR_OR;
1030 else if (*buffer->cur == '=')
1031 buffer->cur++, result->type = CPP_OR_EQ;
1035 result->type = CPP_COLON;
1036 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1037 buffer->cur++, result->type = CPP_SCOPE;
1038 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1041 result->flags |= DIGRAPH;
1042 result->type = CPP_CLOSE_SQUARE;
1046 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1047 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1048 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1049 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1050 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1052 case '?': result->type = CPP_QUERY; break;
1053 case '~': result->type = CPP_COMPL; break;
1054 case ',': result->type = CPP_COMMA; break;
1055 case '(': result->type = CPP_OPEN_PAREN; break;
1056 case ')': result->type = CPP_CLOSE_PAREN; break;
1057 case '[': result->type = CPP_OPEN_SQUARE; break;
1058 case ']': result->type = CPP_CLOSE_SQUARE; break;
1059 case '{': result->type = CPP_OPEN_BRACE; break;
1060 case '}': result->type = CPP_CLOSE_BRACE; break;
1061 case ';': result->type = CPP_SEMICOLON; break;
1063 /* @ is a punctuator in Objective-C. */
1064 case '@': result->type = CPP_ATSIGN; break;
1069 const uchar *base = --buffer->cur;
1071 if (forms_identifier_p (pfile, true))
1073 result->type = CPP_NAME;
1074 result->val.node = lex_identifier (pfile, base);
1081 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1088 /* An upper bound on the number of bytes needed to spell TOKEN.
1089 Does not include preceding whitespace. */
1091 cpp_token_len (const cpp_token *token)
1095 switch (TOKEN_SPELL (token))
1097 default: len = 4; break;
1098 case SPELL_LITERAL: len = token->val.str.len; break;
1099 case SPELL_IDENT: len = NODE_LEN (token->val.node); break;
1105 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
1106 already contain the enough space to hold the token's spelling.
1107 Returns a pointer to the character after the last character written.
1108 FIXME: Would be nice if we didn't need the PFILE argument. */
1110 cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1111 unsigned char *buffer)
1113 switch (TOKEN_SPELL (token))
1115 case SPELL_OPERATOR:
1117 const unsigned char *spelling;
1120 if (token->flags & DIGRAPH)
1122 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1123 else if (token->flags & NAMED_OP)
1126 spelling = TOKEN_NAME (token);
1128 while ((c = *spelling++) != '\0')
1135 memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node));
1136 buffer += NODE_LEN (token->val.node);
1140 memcpy (buffer, token->val.str.text, token->val.str.len);
1141 buffer += token->val.str.len;
1145 cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token));
1152 /* Returns TOKEN spelt as a null-terminated string. The string is
1153 freed when the reader is destroyed. Useful for diagnostics. */
1155 cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1157 unsigned int len = cpp_token_len (token) + 1;
1158 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1160 end = cpp_spell_token (pfile, token, start);
1166 /* Used by C front ends, which really should move to using
1167 cpp_token_as_text. */
1169 cpp_type2name (enum cpp_ttype type)
1171 return (const char *) token_spellings[type].name;
1174 /* Writes the spelling of token to FP, without any preceding space.
1175 Separated from cpp_spell_token for efficiency - to avoid stdio
1176 double-buffering. */
1178 cpp_output_token (const cpp_token *token, FILE *fp)
1180 switch (TOKEN_SPELL (token))
1182 case SPELL_OPERATOR:
1184 const unsigned char *spelling;
1187 if (token->flags & DIGRAPH)
1189 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
1190 else if (token->flags & NAMED_OP)
1193 spelling = TOKEN_NAME (token);
1198 while ((c = *++spelling) != '\0');
1204 fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp);
1208 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1212 /* An error, most probably. */
1217 /* Compare two tokens. */
1219 _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1221 if (a->type == b->type && a->flags == b->flags)
1222 switch (TOKEN_SPELL (a))
1224 default: /* Keep compiler happy. */
1225 case SPELL_OPERATOR:
1228 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
1230 return a->val.node == b->val.node;
1232 return (a->val.str.len == b->val.str.len
1233 && !memcmp (a->val.str.text, b->val.str.text,
1240 /* Returns nonzero if a space should be inserted to avoid an
1241 accidental token paste for output. For simplicity, it is
1242 conservative, and occasionally advises a space where one is not
1243 needed, e.g. "." and ".2". */
1245 cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1246 const cpp_token *token2)
1248 enum cpp_ttype a = token1->type, b = token2->type;
1251 if (token1->flags & NAMED_OP)
1253 if (token2->flags & NAMED_OP)
1257 if (token2->flags & DIGRAPH)
1258 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1259 else if (token_spellings[b].category == SPELL_OPERATOR)
1260 c = token_spellings[b].name[0];
1262 /* Quickly get everything that can paste with an '='. */
1263 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1268 case CPP_GREATER: return c == '>' || c == '?';
1269 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1270 case CPP_PLUS: return c == '+';
1271 case CPP_MINUS: return c == '-' || c == '>';
1272 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1273 case CPP_MOD: return c == ':' || c == '>';
1274 case CPP_AND: return c == '&';
1275 case CPP_OR: return c == '|';
1276 case CPP_COLON: return c == ':' || c == '>';
1277 case CPP_DEREF: return c == '*';
1278 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
1279 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1280 case CPP_NAME: return ((b == CPP_NUMBER
1281 && name_p (pfile, &token2->val.str))
1283 || b == CPP_CHAR || b == CPP_STRING); /* L */
1284 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1285 || c == '.' || c == '+' || c == '-');
1287 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1289 || (CPP_OPTION (pfile, objc)
1290 && token1->val.str.text[0] == '@'
1291 && (b == CPP_NAME || b == CPP_STRING)));
1298 /* Output all the remaining tokens on the current line, and a newline
1299 character, to FP. Leading whitespace is removed. If there are
1300 macros, special token padding is not performed. */
1302 cpp_output_line (cpp_reader *pfile, FILE *fp)
1304 const cpp_token *token;
1306 token = cpp_get_token (pfile);
1307 while (token->type != CPP_EOF)
1309 cpp_output_token (token, fp);
1310 token = cpp_get_token (pfile);
1311 if (token->flags & PREV_WHITE)
1318 /* Memory buffers. Changing these three constants can have a dramatic
1319 effect on performance. The values here are reasonable defaults,
1320 but might be tuned. If you adjust them, be sure to test across a
1321 range of uses of cpplib, including heavy nested function-like macro
1322 expansion. Also check the change in peak memory usage (NJAMD is a
1323 good tool for this). */
1324 #define MIN_BUFF_SIZE 8000
1325 #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1326 #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1327 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1329 #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1330 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1333 /* Create a new allocation buffer. Place the control block at the end
1334 of the buffer, so that buffer overflows will cause immediate chaos. */
1336 new_buff (size_t len)
1339 unsigned char *base;
1341 if (len < MIN_BUFF_SIZE)
1342 len = MIN_BUFF_SIZE;
1343 len = CPP_ALIGN (len);
1345 base = xmalloc (len + sizeof (_cpp_buff));
1346 result = (_cpp_buff *) (base + len);
1347 result->base = base;
1349 result->limit = base + len;
1350 result->next = NULL;
1354 /* Place a chain of unwanted allocation buffers on the free list. */
1356 _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1358 _cpp_buff *end = buff;
1362 end->next = pfile->free_buffs;
1363 pfile->free_buffs = buff;
1366 /* Return a free buffer of size at least MIN_SIZE. */
1368 _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1370 _cpp_buff *result, **p;
1372 for (p = &pfile->free_buffs;; p = &(*p)->next)
1377 return new_buff (min_size);
1379 size = result->limit - result->base;
1380 /* Return a buffer that's big enough, but don't waste one that's
1382 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1387 result->next = NULL;
1388 result->cur = result->base;
1392 /* Creates a new buffer with enough space to hold the uncommitted
1393 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1394 the excess bytes to the new buffer. Chains the new buffer after
1395 BUFF, and returns the new buffer. */
1397 _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1399 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1400 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1402 buff->next = new_buff;
1403 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1407 /* Creates a new buffer with enough space to hold the uncommitted
1408 remaining bytes of the buffer pointed to by BUFF, and at least
1409 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1410 Chains the new buffer before the buffer pointed to by BUFF, and
1411 updates the pointer to point to the new buffer. */
1413 _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1415 _cpp_buff *new_buff, *old_buff = *pbuff;
1416 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1418 new_buff = _cpp_get_buff (pfile, size);
1419 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1420 new_buff->next = old_buff;
1424 /* Free a chain of buffers starting at BUFF. */
1426 _cpp_free_buff (_cpp_buff *buff)
1430 for (; buff; buff = next)
1437 /* Allocate permanent, unaligned storage of length LEN. */
1439 _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1441 _cpp_buff *buff = pfile->u_buff;
1442 unsigned char *result = buff->cur;
1444 if (len > (size_t) (buff->limit - result))
1446 buff = _cpp_get_buff (pfile, len);
1447 buff->next = pfile->u_buff;
1448 pfile->u_buff = buff;
1452 buff->cur = result + len;
1456 /* Allocate permanent, unaligned storage of length LEN from a_buff.
1457 That buffer is used for growing allocations when saving macro
1458 replacement lists in a #define, and when parsing an answer to an
1459 assertion in #assert, #unassert or #if (and therefore possibly
1460 whilst expanding macros). It therefore must not be used by any
1461 code that they might call: specifically the lexer and the guts of
1464 All existing other uses clearly fit this restriction: storing
1465 registered pragmas during initialization. */
1467 _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1469 _cpp_buff *buff = pfile->a_buff;
1470 unsigned char *result = buff->cur;
1472 if (len > (size_t) (buff->limit - result))
1474 buff = _cpp_get_buff (pfile, len);
1475 buff->next = pfile->a_buff;
1476 pfile->a_buff = buff;
1480 buff->cur = result + len;