1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31 #define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
35 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37 #define GETC() GETBUF (CPP_BUFFER (pfile))
38 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
40 static void skip_block_comment PARAMS ((cpp_reader *));
41 static void skip_line_comment PARAMS ((cpp_reader *));
42 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43 static int skip_comment PARAMS ((cpp_reader *, int));
44 static int copy_comment PARAMS ((cpp_reader *, int));
45 static void skip_string PARAMS ((cpp_reader *, int));
46 static void parse_string PARAMS ((cpp_reader *, int));
47 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
48 static void null_warning PARAMS ((cpp_reader *, unsigned int));
50 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
52 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
54 static void bump_column PARAMS ((cpp_printer *, unsigned int,
56 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
57 static void expand_token_space PARAMS ((cpp_toklist *));
58 static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
59 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
62 #define auto_expand_name_space(list) \
63 expand_name_space ((list), (list)->name_cap / 2)
65 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
68 _cpp_grow_token_buffer (pfile, n)
72 long old_written = CPP_WRITTEN (pfile);
73 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
74 pfile->token_buffer = (U_CHAR *)
75 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
76 CPP_SET_WRITTEN (pfile, old_written);
79 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
80 If BUFFER != NULL, then use the LENGTH characters in BUFFER
81 as the new input buffer.
82 Return the new buffer, or NULL on failure. */
85 cpp_push_buffer (pfile, buffer, length)
90 cpp_buffer *buf = CPP_BUFFER (pfile);
92 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
94 cpp_fatal (pfile, "macro or `#include' recursion too deep");
98 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
100 new->if_stack = pfile->if_stack;
101 new->buf = new->cur = buffer;
102 new->rlimit = buffer + length;
105 new->line_base = NULL;
107 CPP_BUFFER (pfile) = new;
112 cpp_pop_buffer (pfile)
115 cpp_buffer *buf = CPP_BUFFER (pfile);
116 if (ACTIVE_MARK_P (pfile))
117 cpp_ice (pfile, "mark active in cpp_pop_buffer");
121 _cpp_unwind_if_stack (pfile, buf);
123 free ((PTR) buf->buf);
124 if (pfile->system_include_depth)
125 pfile->system_include_depth--;
126 if (pfile->potential_control_macro)
128 buf->ihash->control_macro = pfile->potential_control_macro;
129 pfile->potential_control_macro = 0;
131 pfile->input_stack_listing_current = 0;
135 HASHNODE *m = buf->macro;
138 if ((m->type == T_FMACRO && buf->mapped)
139 || m->type == T_SPECLINE || m->type == T_FILE
140 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
141 || m->type == T_STDC)
142 free ((PTR) buf->buf);
144 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
146 pfile->buffer_stack_depth--;
147 return CPP_BUFFER (pfile);
150 /* Deal with the annoying semantics of fwrite. */
152 safe_fwrite (pfile, buf, len, fp)
162 count = fwrite (buf, 1, len, fp);
171 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
174 /* Notify the compiler proper that the current line number has jumped,
175 or the current file name has changed. */
178 output_line_command (pfile, print, line)
183 cpp_buffer *ip = cpp_file_buffer (pfile);
184 enum { same = 0, enter, leave, rname } change;
185 static const char * const codes[] = { "", " 1", " 2", "" };
187 if (CPP_OPTION (pfile, no_line_commands))
190 /* Determine whether the current filename has changed, and if so,
191 how. 'nominal_fname' values are unique, so they can be compared
192 by comparing pointers. */
193 if (ip->nominal_fname == print->last_fname)
197 if (pfile->buffer_stack_depth == print->last_bsd)
201 if (pfile->buffer_stack_depth > print->last_bsd)
205 print->last_bsd = pfile->buffer_stack_depth;
207 print->last_fname = ip->nominal_fname;
209 /* If the current file has not changed, we can output a few newlines
210 instead if we want to increase the line number by a small amount.
211 We cannot do this if print->lineno is zero, because that means we
212 haven't output any line commands yet. (The very first line
213 command output is a `same_file' command.) */
214 if (change == same && print->lineno != 0
215 && line >= print->lineno && line < print->lineno + 8)
217 while (line > print->lineno)
219 putc ('\n', print->outf);
225 #ifndef NO_IMPLICIT_EXTERN_C
226 if (CPP_OPTION (pfile, cplusplus))
227 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
229 ip->system_header_p ? " 3" : "",
230 (ip->system_header_p == 2) ? " 4" : "");
233 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
235 ip->system_header_p ? " 3" : "");
236 print->lineno = line;
239 /* Write the contents of the token_buffer to the output stream, and
240 clear the token_buffer. Also handles generating line commands and
241 keeping track of file transitions. */
244 cpp_output_tokens (pfile, print)
250 if (CPP_WRITTEN (pfile) - print->written)
252 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
254 safe_fwrite (pfile, pfile->token_buffer,
255 CPP_WRITTEN (pfile) - print->written, print->outf);
258 ip = cpp_file_buffer (pfile);
260 output_line_command (pfile, print, CPP_BUF_LINE (ip));
262 CPP_SET_WRITTEN (pfile, print->written);
265 /* Helper for cpp_output_list - increases the column number to match
266 what we expect it to be. */
269 bump_column (print, from, to)
271 unsigned int from, to;
273 unsigned int tabs, spcs;
274 unsigned int delta = to - from;
276 /* Only if FROM is 0, advance by tabs. */
278 tabs = delta / 8, spcs = delta % 8;
280 tabs = 0, spcs = delta;
282 while (tabs--) putc ('\t', print->outf);
283 while (spcs--) putc (' ', print->outf);
286 /* Write out the list L onto pfile->token_buffer. This function is
289 1) pfile->token_buffer is not going to continue to exist.
290 2) At the moment, tokens don't carry the information described
291 in cpplib.h; they are all strings.
292 3) The list has to be a complete line, and has to be written starting
293 at the beginning of a line. */
296 cpp_output_list (pfile, print, list)
299 const cpp_toklist *list;
302 unsigned int curcol = 1;
304 /* XXX Probably does not do what is intended. */
305 if (print->lineno != list->line)
306 output_line_command (pfile, print, list->line);
308 for (i = 0; i < list->tokens_used; i++)
310 if (TOK_TYPE (list, i) == CPP_VSPACE)
312 output_line_command (pfile, print, list->tokens[i].aux);
316 if (curcol < TOK_COL (list, i))
318 /* Insert space to bring the column to what it should be. */
319 bump_column (print, curcol - 1, TOK_COL (list, i));
320 curcol = TOK_COL (list, i);
322 /* XXX We may have to insert space to prevent an accidental
324 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
325 curcol += TOK_LEN (list, i);
329 /* Scan a string (which may have escape marks), perform macro expansion,
330 and write the result to the token_buffer. */
333 _cpp_expand_to_buffer (pfile, buf, length)
339 enum cpp_ttype token;
344 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
348 /* Copy the buffer, because it might be in an unsafe place - for
349 example, a sequence on the token_buffer, where the pointers will
350 be invalidated if we enlarge the token_buffer. */
351 buf1 = alloca (length);
352 memcpy (buf1, buf, length);
354 /* Set up the input on the input stack. */
355 stop = CPP_BUFFER (pfile);
356 if (cpp_push_buffer (pfile, buf1, length) == NULL)
358 CPP_BUFFER (pfile)->has_escapes = 1;
360 /* Scan the input, create the output. */
363 token = cpp_get_token (pfile);
364 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
369 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
372 cpp_scan_buffer_nooutput (pfile)
375 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
376 enum cpp_ttype token;
377 unsigned int old_written = CPP_WRITTEN (pfile);
378 /* In no-output mode, we can ignore everything but directives. */
381 if (! pfile->only_seen_white)
382 _cpp_skip_rest_of_line (pfile);
383 token = cpp_get_token (pfile);
384 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
387 CPP_SET_WRITTEN (pfile, old_written);
390 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
393 cpp_scan_buffer (pfile, print)
397 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
398 enum cpp_ttype token;
402 token = cpp_get_token (pfile);
403 if (token == CPP_EOF || token == CPP_VSPACE
404 /* XXX Temporary kluge - force flush after #include only */
405 || (token == CPP_DIRECTIVE
406 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
408 cpp_output_tokens (pfile, print);
409 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
415 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
418 cpp_file_buffer (pfile)
423 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
424 if (ip->ihash != NULL)
429 /* Token-buffer helper functions. */
431 /* Expand a token list's string space. */
433 expand_name_space (list, len)
437 list->name_cap += len;
438 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
441 /* Expand the number of tokens in a list. */
443 expand_token_space (list)
446 list->tokens_cap *= 2;
447 list->tokens = (cpp_token *)
448 xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
449 list->tokens++; /* Skip the dummy. */
452 /* Initialize a token list. We allocate an extra token in front of
453 the token list, as this allows us to always peek at the previous
454 token without worrying about underflowing the list. */
456 init_token_list (pfile, list, recycle)
461 /* Recycling a used list saves 3 free-malloc pairs. */
464 /* Initialize token space. Put a dummy token before the start
465 that will fail matches. */
466 list->tokens_cap = 256; /* 4K's worth. */
467 list->tokens = (cpp_token *)
468 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
469 list->tokens[0].type = CPP_EOF;
472 /* Initialize name space. */
473 list->name_cap = 1024;
474 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
476 /* Only create a comment space on demand. */
477 list->comments_cap = 0;
481 list->tokens_used = 0;
483 list->comments_used = 0;
485 list->line = pfile->buffer->lineno;
486 list->dir_handler = 0;
490 /* Scan an entire line and create a token list for it. Does not
491 macro-expand or execute directives. */
494 _cpp_scan_line (pfile, list)
503 init_token_list (pfile, list, 1);
505 written = CPP_WRITTEN (pfile);
510 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
511 type = _cpp_lex_token (pfile);
512 len = CPP_WRITTEN (pfile) - written;
513 CPP_SET_WRITTEN (pfile, written);
514 if (type == CPP_HSPACE)
516 if (CPP_PEDANTIC (pfile))
517 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
521 else if (type == CPP_COMMENT)
522 /* Only happens when processing -traditional macro definitions.
523 Do not give this a token entry, but do not change space_before
527 if (list->tokens_used >= list->tokens_cap)
528 expand_token_space (list);
529 if (list->name_used + len >= list->name_cap)
530 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
532 if (type == CPP_MACRO)
536 TOK_TYPE (list, i) = type;
537 TOK_COL (list, i) = col;
538 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
540 if (type == CPP_VSPACE)
543 TOK_LEN (list, i) = len;
544 TOK_OFFSET (list, i) = list->name_used;
545 memcpy (TOK_NAME (list, i), CPP_PWRITTEN (pfile), len);
546 list->name_used += len;
550 TOK_AUX (list, i) = CPP_BUFFER (pfile)->lineno + 1;
552 /* XXX Temporary kluge: put back the newline. */
557 /* Skip a C-style block comment. We know it's a comment, and point is
558 at the second character of the starter. */
560 skip_block_comment (pfile)
563 unsigned int line, col;
564 const U_CHAR *limit, *cur;
567 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
568 col = CPP_BUF_COL (CPP_BUFFER (pfile));
569 limit = CPP_BUFFER (pfile)->rlimit;
570 cur = CPP_BUFFER (pfile)->cur;
575 if (c == '\n' || c == '\r')
577 /* \r cannot be a macro escape marker here. */
578 if (!ACTIVE_MARK_P (pfile))
579 CPP_BUMP_LINE_CUR (pfile, cur);
583 /* Check for teminator. */
584 if (cur < limit && *cur == '/')
587 /* Warn about comment starter embedded in comment. */
588 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
589 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
590 cur - CPP_BUFFER (pfile)->line_base,
591 "'/*' within comment");
595 cpp_error_with_line (pfile, line, col, "unterminated comment");
598 CPP_BUFFER (pfile)->cur = cur + 1;
601 /* Skip a C++/Chill line comment. We know it's a comment, and point
602 is at the second character of the initiator. */
604 skip_line_comment (pfile)
612 /* We don't have to worry about EOF in here. */
615 /* Don't consider final '\n' to be part of comment. */
621 /* \r cannot be a macro escape marker here. */
622 if (!ACTIVE_MARK_P (pfile))
623 CPP_BUMP_LINE (pfile);
624 if (CPP_OPTION (pfile, warn_comments))
625 cpp_warning (pfile, "backslash-newline within line comment");
630 /* Skip a comment - C, C++, or Chill style. M is the first character
631 of the comment marker. If this really is a comment, skip to its
632 end and return ' '. If this is not a comment, return M (which will
636 skip_comment (pfile, m)
640 if (m == '/' && PEEKC() == '*')
642 skip_block_comment (pfile);
645 else if (m == '/' && PEEKC() == '/')
647 if (CPP_BUFFER (pfile)->system_header_p)
649 /* We silently allow C++ comments in system headers, irrespective
650 of conformance mode, because lots of busted systems do that
651 and trying to clean it up in fixincludes is a nightmare. */
652 skip_line_comment (pfile);
655 else if (CPP_OPTION (pfile, cplusplus_comments))
657 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
659 if (CPP_WTRADITIONAL (pfile))
661 "C++ style comments are not allowed in traditional C");
662 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
664 "C++ style comments are not allowed in ISO C89");
665 if (CPP_WTRADITIONAL (pfile)
666 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
668 "(this will be reported only once per input file)");
669 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
671 skip_line_comment (pfile);
677 else if (m == '-' && PEEKC() == '-'
678 && CPP_OPTION (pfile, chill))
680 skip_line_comment (pfile);
687 /* Identical to skip_comment except that it copies the comment into the
688 token_buffer. This is used if !discard_comments. */
690 copy_comment (pfile, m)
694 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
697 if (skip_comment (pfile, m) == m)
700 limit = CPP_BUFFER (pfile)->cur;
701 CPP_RESERVE (pfile, limit - start + 2);
702 CPP_PUTC_Q (pfile, m);
703 for (; start <= limit; start++)
705 CPP_PUTC_Q (pfile, *start);
711 null_warning (pfile, count)
716 cpp_warning (pfile, "embedded null character ignored");
718 cpp_warning (pfile, "embedded null characters ignored");
721 /* Skip whitespace \-newline and comments. Does not macro-expand. */
724 _cpp_skip_hspace (pfile)
727 unsigned int null_count = 0;
735 else if (is_hspace(c))
737 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
738 cpp_pedwarn (pfile, "%s in preprocessing directive",
739 c == '\f' ? "formfeed" : "vertical tab");
745 /* \r is a backslash-newline marker if !has_escapes, and
746 a deletable-whitespace or no-reexpansion marker otherwise. */
747 if (CPP_BUFFER (pfile)->has_escapes)
755 CPP_BUMP_LINE (pfile);
757 else if (c == '/' || c == '-')
759 c = skip_comment (pfile, c);
769 null_warning (pfile, null_count);
772 /* Read and discard the rest of the current line. */
775 _cpp_skip_rest_of_line (pfile)
789 if (! CPP_BUFFER (pfile)->has_escapes)
790 CPP_BUMP_LINE (pfile);
795 skip_string (pfile, c);
800 skip_comment (pfile, c);
805 if (CPP_PEDANTIC (pfile))
806 cpp_pedwarn (pfile, "%s in preprocessing directive",
807 c == '\f' ? "formfeed" : "vertical tab");
814 /* Parse an identifier starting with C. */
817 _cpp_parse_name (pfile, c)
829 if (c == '$' && CPP_PEDANTIC (pfile))
830 cpp_pedwarn (pfile, "`$' in identifier");
832 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
833 CPP_PUTC_Q (pfile, c);
841 /* Parse and skip over a string starting with C. A single quoted
842 string is treated like a double -- some programs (e.g., troff) are
843 perverse this way. (However, a single quoted string is not allowed
844 to extend over multiple lines.) */
846 skip_string (pfile, c)
850 unsigned int start_line, start_column;
851 unsigned int null_count = 0;
853 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
854 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
861 cpp_error_with_line (pfile, start_line, start_column,
862 "unterminated string or character constant");
863 if (pfile->multiline_string_line != start_line
864 && pfile->multiline_string_line != 0)
865 cpp_error_with_line (pfile,
866 pfile->multiline_string_line, -1,
867 "possible real start of unterminated constant");
868 pfile->multiline_string_line = 0;
876 CPP_BUMP_LINE (pfile);
877 /* In Fortran and assembly language, silently terminate
878 strings of either variety at end of line. This is a
879 kludge around not knowing where comments are in these
881 if (CPP_OPTION (pfile, lang_fortran)
882 || CPP_OPTION (pfile, lang_asm))
887 /* Character constants may not extend over multiple lines.
888 In Standard C, neither may strings. We accept multiline
889 strings as an extension. */
892 cpp_error_with_line (pfile, start_line, start_column,
893 "unterminated character constant");
897 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
898 cpp_pedwarn_with_line (pfile, start_line, start_column,
899 "string constant runs past end of line");
900 if (pfile->multiline_string_line == 0)
901 pfile->multiline_string_line = start_line;
905 if (CPP_BUFFER (pfile)->has_escapes)
907 cpp_ice (pfile, "\\r escape inside string constant");
911 /* Backslash newline is replaced by nothing at all. */
912 CPP_BUMP_LINE (pfile);
929 cpp_warning (pfile, "null character in string or character constant");
930 else if (null_count > 1)
931 cpp_warning (pfile, "null characters in string or character constant");
934 /* Parse a string and copy it to the output. */
937 parse_string (pfile, c)
941 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
944 skip_string (pfile, c);
946 limit = CPP_BUFFER (pfile)->cur;
947 CPP_RESERVE (pfile, limit - start + 2);
948 CPP_PUTC_Q (pfile, c);
949 for (; start < limit; start++)
951 CPP_PUTC_Q (pfile, *start);
954 /* Read an assertion into the token buffer, converting to
955 canonical form: `#predicate(a n swe r)' The next non-whitespace
956 character to read should be the first letter of the predicate.
957 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
958 with answer (see callers for why). In case of 0, an error has been
961 _cpp_parse_assertion (pfile)
965 _cpp_skip_hspace (pfile);
969 cpp_error (pfile, "assertion without predicate");
972 else if (! is_idstart(c))
974 cpp_error (pfile, "assertion predicate is not an identifier");
977 CPP_PUTC(pfile, '#');
979 _cpp_parse_name (pfile, c);
984 if (is_hspace(c) || c == '\r')
985 _cpp_skip_hspace (pfile);
991 CPP_PUTC(pfile, '(');
994 while ((c = GETC()) != ')')
1000 CPP_PUTC(pfile, ' ');
1004 else if (c == '\n' || c == EOF)
1006 if (c == '\n') FORWARD(-1);
1007 cpp_error (pfile, "un-terminated assertion answer");
1011 /* \r cannot be a macro escape here. */
1012 CPP_BUMP_LINE (pfile);
1015 CPP_PUTC (pfile, c);
1020 if (pfile->limit[-1] == ' ')
1021 pfile->limit[-1] = ')';
1022 else if (pfile->limit[-1] == '(')
1024 cpp_error (pfile, "empty token sequence in assertion");
1028 CPP_PUTC (pfile, ')');
1033 /* Get the next token, and add it to the text in pfile->token_buffer.
1034 Return the kind of token we got. */
1037 _cpp_lex_token (pfile)
1041 enum cpp_ttype token;
1043 if (CPP_BUFFER (pfile) == NULL)
1054 if (PEEKC () == '=')
1058 if (CPP_OPTION (pfile, discard_comments))
1059 c = skip_comment (pfile, c);
1061 c = copy_comment (pfile, c);
1065 /* Comments are equivalent to spaces.
1066 For -traditional, a comment is equivalent to nothing. */
1067 if (!CPP_OPTION (pfile, discard_comments))
1069 else if (CPP_TRADITIONAL (pfile))
1071 if (pfile->parsing_define_directive)
1077 CPP_PUTC (pfile, c);
1082 CPP_PUTC (pfile, c);
1085 if (pfile->parsing_if_directive)
1087 CPP_ADJUST_WRITTEN (pfile, -1);
1088 if (_cpp_parse_assertion (pfile))
1089 return CPP_ASSERTION;
1093 if (pfile->parsing_define_directive)
1099 CPP_PUTC (pfile, c2);
1101 else if (c2 == '%' && PEEKN (1) == ':')
1103 /* Digraph: "%:" == "#". */
1105 CPP_RESERVE (pfile, 2);
1106 CPP_PUTC_Q (pfile, c2);
1107 CPP_PUTC_Q (pfile, GETC ());
1115 if (!pfile->only_seen_white)
1118 /* Remove the "#" or "%:" from the token buffer. */
1119 CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
1120 return CPP_DIRECTIVE;
1124 parse_string (pfile, c);
1125 return c == '\'' ? CPP_CHAR : CPP_STRING;
1128 if (!CPP_OPTION (pfile, dollars_in_ident))
1134 /* Digraph: ":>" == "]". */
1136 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1144 if (c2 == c || c2 == '=')
1149 /* Digraphs: "%:" == "#", "%>" == "}". */
1154 CPP_RESERVE (pfile, 2);
1155 CPP_PUTC_Q (pfile, c);
1156 CPP_PUTC_Q (pfile, c2);
1162 CPP_RESERVE (pfile, 2);
1163 CPP_PUTC_Q (pfile, c);
1164 CPP_PUTC_Q (pfile, c2);
1165 return CPP_OPEN_BRACE;
1167 /* else fall through */
1173 if (PEEKC () == '=')
1181 if (CPP_OPTION (pfile, chill))
1182 goto comment; /* Chill style comment */
1190 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1192 /* In C++, there's a ->* operator. */
1194 CPP_RESERVE (pfile, 4);
1195 CPP_PUTC_Q (pfile, c);
1196 CPP_PUTC_Q (pfile, GETC ());
1197 CPP_PUTC_Q (pfile, GETC ());
1205 if (pfile->parsing_include_directive)
1209 CPP_PUTC (pfile, c);
1213 if (c == '\n' || c == EOF)
1216 "missing '>' in `#include <FILENAME>'");
1221 if (!CPP_BUFFER (pfile)->has_escapes)
1223 /* Backslash newline is replaced by nothing. */
1224 CPP_ADJUST_WRITTEN (pfile, -1);
1225 CPP_BUMP_LINE (pfile);
1229 /* We might conceivably get \r- or \r<space> in
1230 here. Just delete 'em. */
1232 if (d != '-' && d != ' ')
1233 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1234 CPP_ADJUST_WRITTEN (pfile, -1);
1240 /* Digraphs: "<%" == "{", "<:" == "[". */
1245 CPP_RESERVE (pfile, 2);
1246 CPP_PUTC_Q (pfile, c);
1247 CPP_PUTC_Q (pfile, c2);
1248 return CPP_CLOSE_BRACE;
1252 /* else fall through */
1257 /* GNU C++ supports MIN and MAX operators <? and >?. */
1258 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1261 CPP_RESERVE (pfile, 3);
1262 CPP_PUTC_Q (pfile, c);
1263 CPP_PUTC_Q (pfile, c2);
1264 if (PEEKC () == '=')
1265 CPP_PUTC_Q (pfile, GETC ());
1272 CPP_PUTC (pfile, c);
1277 /* In C++ there's a .* operator. */
1278 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1281 if (c2 == '.' && PEEKN(1) == '.')
1283 CPP_RESERVE (pfile, 3);
1284 CPP_PUTC_Q (pfile, '.');
1285 CPP_PUTC_Q (pfile, '.');
1286 CPP_PUTC_Q (pfile, '.');
1288 return CPP_ELLIPSIS;
1293 CPP_RESERVE (pfile, 2);
1294 CPP_PUTC_Q (pfile, c);
1295 CPP_PUTC_Q (pfile, GETC ());
1300 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1302 CPP_PUTC (pfile, c);
1304 parse_string (pfile, c);
1305 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1309 case '0': case '1': case '2': case '3': case '4':
1310 case '5': case '6': case '7': case '8': case '9':
1315 CPP_RESERVE (pfile, 2);
1316 CPP_PUTC_Q (pfile, c);
1320 if (!is_numchar(c) && c != '.'
1321 && ((c2 != 'e' && c2 != 'E'
1322 && ((c2 != 'p' && c2 != 'P')
1323 || CPP_OPTION (pfile, c89)))
1324 || (c != '+' && c != '-')))
1330 case 'b': case 'c': case 'd': case 'h': case 'o':
1331 case 'B': case 'C': case 'D': case 'H': case 'O':
1332 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1334 CPP_RESERVE (pfile, 2);
1335 CPP_PUTC_Q (pfile, c);
1336 CPP_PUTC_Q (pfile, '\'');
1342 goto chill_number_eof;
1345 CPP_PUTC (pfile, c);
1349 CPP_RESERVE (pfile, 2);
1350 CPP_PUTC_Q (pfile, c);
1363 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1364 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1365 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1366 case 'x': case 'y': case 'z':
1367 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1368 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1369 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1372 _cpp_parse_name (pfile, c);
1375 case ' ': case '\t': case '\v': case '\f': case '\0':
1384 CPP_PUTC (pfile, c);
1386 if (c == EOF || !is_hspace(c))
1391 null_warning (pfile, null_count);
1396 if (CPP_BUFFER (pfile)->has_escapes)
1401 if (pfile->output_escapes)
1402 CPP_PUTS (pfile, "\r-", 2);
1403 _cpp_parse_name (pfile, GETC ());
1408 /* "\r " means a space, but only if necessary to prevent
1409 accidental token concatenation. */
1410 CPP_RESERVE (pfile, 2);
1411 if (pfile->output_escapes)
1412 CPP_PUTC_Q (pfile, '\r');
1413 CPP_PUTC_Q (pfile, c);
1418 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1424 /* Backslash newline is ignored. */
1425 if (!ACTIVE_MARK_P (pfile))
1426 CPP_BUMP_LINE (pfile);
1431 CPP_PUTC (pfile, c);
1434 case '(': token = CPP_OPEN_PAREN; goto char1;
1435 case ')': token = CPP_CLOSE_PAREN; goto char1;
1436 case '{': token = CPP_OPEN_BRACE; goto char1;
1437 case '}': token = CPP_CLOSE_BRACE; goto char1;
1438 case ',': token = CPP_COMMA; goto char1;
1439 case ';': token = CPP_SEMICOLON; goto char1;
1445 CPP_PUTC (pfile, c);
1450 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1451 Caller is expected to have checked no_macro_expand. */
1453 maybe_macroexpand (pfile, written)
1457 U_CHAR *macro = pfile->token_buffer + written;
1458 size_t len = CPP_WRITTEN (pfile) - written;
1459 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1461 /* _cpp_lookup never returns null. */
1462 if (hp->type == T_VOID)
1464 if (hp->disabled || hp->type == T_IDENTITY)
1466 if (pfile->output_escapes)
1468 /* Insert a no-reexpand marker before IDENT. */
1469 CPP_RESERVE (pfile, 2);
1470 CPP_ADJUST_WRITTEN (pfile, 2);
1471 macro = pfile->token_buffer + written;
1473 memmove (macro + 2, macro, len);
1479 if (hp->type == T_EMPTY)
1481 /* Special case optimization: macro expands to nothing. */
1482 CPP_SET_WRITTEN (pfile, written);
1483 CPP_PUTC_Q (pfile, ' ');
1487 /* If macro wants an arglist, verify that a '(' follows. */
1488 if (hp->type == T_FMACRO)
1490 int macbuf_whitespace = 0;
1493 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1495 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1498 _cpp_skip_hspace (pfile);
1505 if (point != CPP_BUFFER (pfile)->cur)
1506 macbuf_whitespace = 1;
1510 goto not_macro_call;
1511 cpp_pop_buffer (pfile);
1514 CPP_SET_MARK (pfile);
1517 _cpp_skip_hspace (pfile);
1524 CPP_GOTO_MARK (pfile);
1529 if (macbuf_whitespace)
1530 CPP_PUTC (pfile, ' ');
1536 /* This is now known to be a macro call.
1537 Expand the macro, reading arguments as needed,
1538 and push the expansion on the input stack. */
1539 _cpp_macroexpand (pfile, hp);
1540 CPP_SET_WRITTEN (pfile, written);
1544 /* Complain about \v or \f in a preprocessing directive (constraint
1545 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1547 pedantic_whitespace (pfile, p, len)
1555 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1556 else if (*p == '\f')
1557 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1565 cpp_get_token (pfile)
1568 enum cpp_ttype token;
1569 long written = CPP_WRITTEN (pfile);
1572 token = _cpp_lex_token (pfile);
1577 pfile->potential_control_macro = 0;
1578 pfile->only_seen_white = 0;
1582 if (pfile->only_seen_white == 0)
1583 pfile->only_seen_white = 1;
1584 CPP_BUMP_LINE (pfile);
1592 pfile->potential_control_macro = 0;
1593 if (_cpp_handle_directive (pfile))
1594 return CPP_DIRECTIVE;
1595 pfile->only_seen_white = 0;
1596 CPP_PUTC (pfile, '#');
1600 pfile->potential_control_macro = 0;
1601 pfile->only_seen_white = 0;
1602 if (! pfile->no_macro_expand
1603 && maybe_macroexpand (pfile, written))
1608 if (CPP_BUFFER (pfile) == NULL)
1610 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1612 cpp_pop_buffer (pfile);
1615 cpp_pop_buffer (pfile);
1620 /* Like cpp_get_token, but skip spaces and comments. */
1623 cpp_get_non_space_token (pfile)
1626 int old_written = CPP_WRITTEN (pfile);
1629 enum cpp_ttype token = cpp_get_token (pfile);
1630 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1632 CPP_SET_WRITTEN (pfile, old_written);
1636 /* Like cpp_get_token, except that it does not execute directives,
1637 does not consume vertical space, and discards horizontal space. */
1639 _cpp_get_directive_token (pfile)
1643 enum cpp_ttype token;
1646 old_written = CPP_WRITTEN (pfile);
1647 token = _cpp_lex_token (pfile);
1654 /* Put it back and return VSPACE. */
1656 CPP_ADJUST_WRITTEN (pfile, -1);
1660 if (CPP_PEDANTIC (pfile))
1661 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1662 CPP_WRITTEN (pfile) - old_written);
1663 CPP_SET_WRITTEN (pfile, old_written);
1668 /* Don't execute the directive, but don't smash it to OTHER either. */
1669 CPP_PUTC (pfile, '#');
1670 return CPP_DIRECTIVE;
1673 if (! pfile->no_macro_expand
1674 && maybe_macroexpand (pfile, old_written))
1679 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1681 cpp_pop_buffer (pfile);
1685 /* This can happen for files that don't end with a newline,
1686 and for cpp_define and friends. Pretend they do, so
1687 callers don't have to deal. A warning will be issued by
1688 someone else, if necessary. */
1693 /* Determine the current line and column. Used only by read_and_prescan. */
1695 find_position (start, limit, linep)
1698 unsigned long *linep;
1700 unsigned long line = *linep;
1701 U_CHAR *lbase = start;
1702 while (start < limit)
1704 U_CHAR ch = *start++;
1705 if (ch == '\n' || ch == '\r')
1715 /* The following table is used by _cpp_read_and_prescan. If we have
1716 designated initializers, it can be constant data; otherwise, it is
1717 set up at runtime by _cpp_init_input_buffer. */
1720 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1723 #if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L)
1724 #define init_chartab() /* nothing */
1725 #define CHARTAB static const unsigned char chartab[UCHAR_MAX + 1] = {
1727 #define s(p, v) [p] = v,
1729 #define CHARTAB static unsigned char chartab[UCHAR_MAX + 1] = { 0 }; \
1730 static void init_chartab PARAMS ((void)) { \
1731 unsigned char *x = chartab;
1733 #define s(p, v) x[p] = v;
1736 /* Table of characters that can't be handled in the inner loop.
1737 Also contains the mapping between trigraph third characters and their
1739 #define SPECCASE_CR 1
1740 #define SPECCASE_BACKSLASH 2
1741 #define SPECCASE_QUESTION 3
1744 s('\r', SPECCASE_CR)
1745 s('\\', SPECCASE_BACKSLASH)
1746 s('?', SPECCASE_QUESTION)
1748 s('=', '#') s(')', ']') s('!', '|')
1749 s('(', '[') s('\'', '^') s('>', '}')
1750 s('/', '\\') s('<', '{') s('-', '~')
1757 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1758 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1760 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1761 much memory to allocate initially; more will be allocated if
1762 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1763 canonical form (\n). If enabled, convert and/or warn about
1764 trigraphs. Convert backslash-newline to a one-character escape
1765 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1766 token). If there is no newline at the end of the file, add one and
1767 warn. Returns -1 on failure, or the actual length of the data to
1770 This function does a lot of work, and can be a serious performance
1771 bottleneck. It has been tuned heavily; make sure you understand it
1772 before hacking. The common case - no trigraphs, Unix style line
1773 breaks, backslash-newline set off by whitespace, newline at EOF -
1774 has been optimized at the expense of the others. The performance
1775 penalty for DOS style line breaks (\r\n) is about 15%.
1777 Warnings lose particularly heavily since we have to determine the
1778 line number, which involves scanning from the beginning of the file
1779 or from the last warning. The penalty for the absence of a newline
1780 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1782 If your file has more than one kind of end-of-line marker, you
1783 will get messed-up line numbering.
1785 So that the cases of the switch statement do not have to concern
1786 themselves with the complications of reading beyond the end of the
1787 buffer, the buffer is guaranteed to have at least 3 characters in
1788 it (or however many are left in the file, if less) on entry to the
1789 switch. This is enough to handle trigraphs and the "\\\n\r" and
1792 The end of the buffer is marked by a '\\', which, being a special
1793 character, guarantees we will exit the fast-scan loops and perform
1797 _cpp_read_and_prescan (pfile, fp, desc, len)
1803 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1804 U_CHAR *ip, *op, *line_base;
1807 unsigned int deferred_newlines;
1812 deferred_newlines = 0;
1816 ibase = pfile->input_buffer + 3;
1818 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1822 U_CHAR *near_buff_end;
1824 count = read (desc, ibase, pfile->input_buffer_len);
1828 ibase[count] = '\\'; /* Marks end of buffer */
1831 near_buff_end = pfile->input_buffer + count;
1836 size_t delta_line_base;
1840 This could happen if the file is larger than half the
1841 maximum address space of the machine. */
1844 delta_op = op - buf;
1845 delta_line_base = line_base - buf;
1846 buf = (U_CHAR *) xrealloc (buf, len);
1847 op = buf + delta_op;
1848 line_base = buf + delta_line_base;
1855 /* Allow normal processing of the (at most 2) remaining
1856 characters. The end-of-buffer marker is still present
1857 and prevents false matches within the switch. */
1858 near_buff_end = ibase - 1;
1865 /* Deal with \-newline, potentially in the middle of a token. */
1866 if (deferred_newlines)
1868 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1870 /* Previous was not white space. Skip to white
1871 space, if we can, before outputting the \r's */
1873 while (ip[span] != ' '
1876 && NORMAL(ip[span]))
1878 memcpy (op, ip, span);
1881 if (! NORMAL(ip[0]))
1884 while (deferred_newlines)
1885 deferred_newlines--, *op++ = '\r';
1888 /* Copy as much as we can without special treatment. */
1890 while (NORMAL (ip[span])) span++;
1891 memcpy (op, ip, span);
1896 if (ip > near_buff_end) /* Do we have enough chars? */
1898 switch (chartab[*ip++])
1900 case SPECCASE_CR: /* \r */
1909 case SPECCASE_BACKSLASH: /* \ */
1912 deferred_newlines++;
1914 if (*ip == '\r') ip++;
1916 else if (*ip == '\r')
1918 deferred_newlines++;
1920 if (*ip == '\n') ip++;
1926 case SPECCASE_QUESTION: /* ? */
1930 *op++ = '?'; /* Normal non-trigraph case */
1939 if (CPP_OPTION (pfile, warn_trigraphs))
1942 line_base = find_position (line_base, op, &line);
1943 col = op - line_base + 1;
1944 if (CPP_OPTION (pfile, trigraphs))
1945 cpp_warning_with_line (pfile, line, col,
1946 "trigraph ??%c converted to %c", d, t);
1948 cpp_warning_with_line (pfile, line, col,
1949 "trigraph ??%c ignored", d);
1953 if (CPP_OPTION (pfile, trigraphs))
1955 op[-1] = t; /* Overwrite '?' */
1960 goto do_speccase; /* May need buffer refill */
1972 /* Copy previous char plus unprocessed (at most 2) chars
1973 to beginning of buffer, refill it with another
1974 read(), and continue processing */
1975 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
1985 line_base = find_position (line_base, op, &line);
1986 col = op - line_base + 1;
1987 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
1988 if (offset + 1 > len)
1991 if (offset + 1 > len)
1993 buf = (U_CHAR *) xrealloc (buf, len);
1999 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2003 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2004 (unsigned long)offset);
2009 cpp_error_from_errno (pfile, fp->ihash->name);
2014 /* Allocate pfile->input_buffer, and initialize chartab[]
2015 if it hasn't happened already. */
2018 _cpp_init_input_buffer (pfile)
2024 init_token_list (pfile, &pfile->directbuf, 0);
2026 /* Determine the appropriate size for the input buffer. Normal C
2027 source files are smaller than eight K. */
2028 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2029 address arithmetic all the time, and 3 for pushback during buffer
2030 refill, in case there's a potential trigraph or end-of-line
2031 digraph at the end of a block. */
2033 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2034 pfile->input_buffer = tmp;
2035 pfile->input_buffer_len = 8192;
2039 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2040 and extending for LEN characters to the NUL-terminated string
2041 STRING. Typical usage:
2043 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2049 cpp_idcmp (token, len, string)
2050 const U_CHAR *token;
2054 size_t len2 = strlen (string);
2057 if ((r = memcmp (token, string, MIN (len, len2))))
2060 /* The longer of the two strings sorts after the shorter. */
2063 else if (len < len2)
2071 /* Lexing algorithm.
2073 The original lexer in cpplib was made up of two passes: a first pass
2074 that replaced trigraphs and deleted esacped newlines, and a second
2075 pass that tokenized the result of the first pass. Tokenisation was
2076 performed by peeking at the next character in the input stream. For
2077 example, if the input stream contained "!=", the handler for the !
2078 character would peek at the next character, and if it were a '='
2079 would skip over it, and return a "!=" token, otherwise it would
2080 return just the "!" token.
2082 To implement a single-pass lexer, this peeking ahead is unworkable.
2083 An arbitrary number of escaped newlines, and trigraphs (in particular
2084 ??/ which translates to the escape \), could separate the '!' and '='
2085 in the input stream, yet the next token is still a "!=".
2087 Suppose instead that we lex by one logical line at a time, producing
2088 a token list or stack for each logical line, and when seeing the '!'
2089 push a CPP_NOT token on the list. Then if the '!' is part of a
2090 longer token ("!=") we know we must see the remainder of the token by
2091 the time we reach the end of the logical line. Thus we can have the
2092 '=' handler look at the previous token (at the end of the list / top
2093 of the stack) and see if it is a "!" token, and if so, instead of
2094 pushing a "=" token revise the existing token to be a "!=" token.
2096 This works in the presence of escaped newlines, because the '\' would
2097 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2098 newline ('\n' or '\r') handler looks at the token at the top of the
2099 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2100 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2101 the '=' handler would never see any intervening escaped newlines.
2103 To make trigraphs work in this context, as in precedence trigraphs
2104 are highest and converted before anything else, the '?' handler does
2105 lookahead to see if it is a trigraph, and if so skips the trigraph
2106 and pushes the token it represents onto the top of the stack. This
2107 also works in the particular case of a CPP_BACKSLASH trigraph.
2109 To the preprocessor, whitespace is only significant to the point of
2110 knowing whether whitespace precedes a particular token. For example,
2111 the '=' handler needs to know whether there was whitespace between it
2112 and a "!" token on the top of the stack, to make the token conversion
2113 decision correctly. So each token has a PREV_WHITESPACE flag to
2114 indicate this - the standard permits consecutive whitespace to be
2115 regarded as a single space. The compiler front ends are not
2116 interested in whitespace at all; they just require a token stream.
2117 Another place where whitespace is significant to the preprocessor is
2118 a #define statment - if there is whitespace between the macro name
2119 and an initial "(" token the macro is "object-like", otherwise it is
2120 a function-like macro that takes arguments.
2122 However, all is not rosy. Parsing of identifiers, numbers, comments
2123 and strings becomes trickier because of the possibility of raw
2124 trigraphs and escaped newlines in the input stream.
2126 The trigraphs are three consecutive characters beginning with two
2127 question marks. A question mark is not valid as part of a number or
2128 identifier, so parsing of a number or identifier terminates normally
2129 upon reaching it, returning to the mainloop which handles the
2130 trigraph just like it would in any other position. Similarly for the
2131 backslash of a backslash-newline combination. So we just need the
2132 escaped-newline dropper in the mainloop to check if the token on the
2133 top of the stack after dropping the escaped newline is a number or
2134 identifier, and if so to continue the processing it as if nothing had
2137 For strings, we replace trigraphs whenever we reach a quote or
2138 newline, because there might be a backslash trigraph escaping them.
2139 We need to be careful that we start trigraph replacing from where we
2140 left off previously, because it is possible for a first scan to leave
2141 "fake" trigraphs that a second scan would pick up as real (e.g. the
2142 sequence "????/\n=" would find a fake ??= trigraph after removing the
2145 For line comments, on reaching a newline we scan the previous
2146 character(s) to see if it escaped, and continue if it is. Block
2147 comments ignore everything and just focus on finding the comment
2148 termination mark. The only difficult thing, and it is surprisingly
2149 tricky, is checking if an asterisk precedes the final slash since
2150 they could be separated by escaped newlines. If the preprocessor is
2151 invoked with the output comments option, we don't bother removing
2152 escaped newlines and replacing trigraphs for output.
2154 Finally, numbers can begin with a period, which is pushed initially
2155 as a CPP_DOT token in its own right. The digit handler checks if the
2156 previous token was a CPP_DOT not separated by whitespace, and if so
2157 pops it off the stack and pushes a period into the number's buffer
2158 before calling the number parser.
2162 static void expand_comment_space PARAMS ((cpp_toklist *));
2163 void init_trigraph_map PARAMS ((void));
2164 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
2166 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
2167 const unsigned char *));
2168 static int skip_block_comment PARAMS ((cpp_reader *));
2169 static int skip_line_comment PARAMS ((cpp_reader *));
2170 static void skip_whitespace PARAMS ((cpp_reader *, int));
2171 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
2172 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
2173 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
2175 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
2176 static void copy_comment PARAMS ((cpp_toklist *, const unsigned char *,
2177 unsigned int, unsigned int, unsigned int));
2178 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
2180 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
2182 unsigned int spell_string PARAMS ((unsigned char *, cpp_toklist *,
2184 unsigned int spell_comment PARAMS ((unsigned char *, cpp_toklist *,
2186 unsigned int spell_name PARAMS ((unsigned char *, cpp_toklist *,
2189 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
2192 /* Macros on a cpp_name. */
2193 #define INIT_NAME(list, name) \
2194 do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
2196 #define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
2197 #define COLUMN(cur) ((cur) - buffer->line_base)
2199 /* Maybe put these in the ISTABLE eventually. */
2200 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
2201 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
2203 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
2204 character, if any, is in buffer. */
2205 #define handle_newline(cur, limit, c) \
2207 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
2209 CPP_BUMP_LINE_CUR (pfile, (cur)); \
2212 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
2213 #define PREV_TOKEN_TYPE (cur_token[-1].type)
2215 #define SPELL_TEXT 0
2216 #define SPELL_HANDLER 1
2217 #define SPELL_CHAR 2
2218 #define SPELL_NONE 3
2221 #define T(e, s) {SPELL_TEXT, s},
2222 #define H(e, s) {SPELL_HANDLER, s},
2223 #define C(e, s) {SPELL_CHAR, s},
2224 #define N(e, s) {SPELL_NONE, s},
2225 #define E(e, s) {SPELL_EOL, s},
2227 static const struct token_spelling
2231 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
2239 static const unsigned char *digraph_spellings [] = {"%:", "%:%:", "<:",
2243 expand_comment_space (list)
2246 if (list->comments_cap == 0)
2248 list->comments_cap = 10;
2249 list->comments = (cpp_token *)
2250 xmalloc (list->comments_cap * sizeof (cpp_token));
2254 list->comments_cap *= 2;
2255 list->comments = (cpp_token *)
2256 xrealloc (list->comments, list->comments_cap);
2261 cpp_free_token_list (list)
2265 free (list->comments);
2266 free (list->tokens - 1); /* Backup over dummy token. */
2267 free (list->namebuf);
2271 static unsigned char trigraph_map[256];
2274 init_trigraph_map ()
2276 trigraph_map['='] = '#';
2277 trigraph_map['('] = '[';
2278 trigraph_map[')'] = ']';
2279 trigraph_map['/'] = '\\';
2280 trigraph_map['\''] = '^';
2281 trigraph_map['<'] = '{';
2282 trigraph_map['>'] = '}';
2283 trigraph_map['!'] = '|';
2284 trigraph_map['-'] = '~';
2287 /* Call when a trigraph is encountered. It warns if necessary, and
2288 returns true if the trigraph should be honoured. END is the third
2289 character of a trigraph in the input stream. */
2291 trigraph_ok (pfile, end)
2293 const unsigned char *end;
2295 int accept = CPP_OPTION (pfile, trigraphs);
2297 if (CPP_OPTION (pfile, warn_trigraphs))
2299 unsigned int col = end - 1 - pfile->buffer->line_base;
2301 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2302 "trigraph ??%c converted to %c",
2303 (int) *end, (int) trigraph_map[*end]);
2305 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2306 "trigraph ??%c ignored", (int) *end);
2311 /* Scan a string for trigraphs, warning or replacing them inline as
2312 appropriate. When parsing a string, we must call this routine
2313 before processing a newline character (if trigraphs are enabled),
2314 since the newline might be escaped by a preceding backslash
2315 trigraph sequence. Returns a pointer to the end of the name after
2318 static unsigned char*
2319 trigraph_replace (pfile, src, limit)
2322 unsigned char* limit;
2324 unsigned char *dest;
2326 /* Starting with src[1], find two consecutive '?'. The case of no
2327 trigraphs is streamlined. */
2329 for (; src + 1 < limit; src += 2)
2334 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2337 else if (src + 2 == limit || src[1] != '?')
2340 /* Check if it really is a trigraph. */
2341 if (trigraph_map[src[2]] == 0)
2345 goto trigraph_found;
2349 /* Now we have a trigraph, we need to scan the remaining buffer, and
2350 copy-shifting its contents left if replacement is enabled. */
2351 for (; src + 2 < limit; dest++, src++)
2352 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2356 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2357 *dest = trigraph_map[*src];
2360 /* Copy remaining (at most 2) characters. */
2366 /* If CUR is a backslash or the end of a trigraphed backslash, return
2367 a pointer to its beginning, otherwise NULL. We don't read beyond
2368 the buffer start, because there is the start of the comment in the
2370 static const unsigned char *
2371 backslash_start (pfile, cur)
2373 const unsigned char *cur;
2377 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2378 && trigraph_ok (pfile, cur))
2383 /* Skip a C-style block comment. This is probably the trickiest
2384 handler. We find the end of the comment by seeing if an asterisk
2385 is before every '/' we encounter. The nasty complication is that a
2386 previous asterisk may be separated by one or more escaped newlines.
2387 Returns non-zero if comment terminated by EOF, zero otherwise. */
2389 skip_block_comment (pfile)
2392 cpp_buffer *buffer = pfile->buffer;
2393 const unsigned char *char_after_star = 0;
2394 register const unsigned char *cur = buffer->cur;
2397 /* Inner loop would think the comment has ended if the first comment
2398 character is a '/'. Avoid this and keep the inner loop clean by
2399 skipping such a character. */
2400 if (cur < buffer->rlimit && cur[0] == '/')
2403 for (; cur < buffer->rlimit; )
2405 unsigned char c = *cur++;
2407 /* People like decorating comments with '*', so check for
2408 '/' instead for efficiency. */
2411 if (cur[-2] == '*' || cur - 1 == char_after_star)
2414 /* Warn about potential nested comments, but not when
2415 the final character inside the comment is a '/'.
2416 Don't bother to get it right across escaped newlines. */
2417 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2418 && cur[0] == '*' && cur[1] != '/')
2421 cpp_warning (pfile, "'/*' within comment");
2424 else if (IS_NEWLINE(c))
2426 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2428 handle_newline (cur, buffer->rlimit, c);
2429 /* Work correctly if there is an asterisk before an
2430 arbirtrarily long sequence of escaped newlines. */
2431 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2432 char_after_star = cur;
2434 char_after_star = 0;
2444 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2445 Returns non-zero if a multiline comment. */
2447 skip_line_comment (pfile)
2450 cpp_buffer *buffer = pfile->buffer;
2451 register const unsigned char *cur = buffer->cur;
2454 for (; cur < buffer->rlimit; )
2456 unsigned char c = *cur++;
2460 /* Check for a (trigaph?) backslash escaping the newline. */
2461 if (!backslash_start (pfile, cur - 2))
2464 handle_newline (cur, buffer->rlimit, c);
2470 buffer->cur = cur - 1; /* Leave newline for caller. */
2474 /* Skips whitespace, stopping at next non-whitespace character. */
2476 skip_whitespace (pfile, in_directive)
2480 cpp_buffer *buffer = pfile->buffer;
2481 register const unsigned char *cur = buffer->cur;
2482 unsigned short null_count = 0;
2484 for (; cur < buffer->rlimit; )
2486 unsigned char c = *cur++;
2488 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2490 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2494 /* Mut be '\f' or '\v' */
2495 else if (in_directive && CPP_PEDANTIC (pfile))
2496 cpp_pedwarn (pfile, "%s in preprocessing directive",
2497 c == '\f' ? "formfeed" : "vertical tab");
2502 buffer->cur = cur - 1;
2504 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2505 : "embedded null character ignored");
2508 /* Parse (append) an identifier. */
2510 parse_name (pfile, list, name)
2515 const unsigned char *name_limit;
2516 unsigned char *namebuf;
2517 cpp_buffer *buffer = pfile->buffer;
2518 register const unsigned char *cur = buffer->cur;
2521 name_limit = list->namebuf + list->name_cap;
2522 namebuf = list->namebuf + list->name_used;
2524 for (; cur < buffer->rlimit && namebuf < name_limit; )
2526 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2532 if (c == '$' && CPP_PEDANTIC (pfile))
2535 cpp_pedwarn (pfile, "'$' character in identifier");
2539 /* Run out of name space? */
2540 if (cur < buffer->rlimit)
2542 list->name_used = namebuf - list->namebuf;
2543 auto_expand_name_space (list);
2549 name->len = namebuf - (list->namebuf + name->offset);
2550 list->name_used = namebuf - list->namebuf;
2553 /* Parse (append) a number. */
2555 #define VALID_SIGN(c, prevc) \
2556 (((c) == '+' || (c) == '-') && \
2557 ((prevc) == 'e' || (prevc) == 'E' \
2558 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2561 parse_number (pfile, list, name)
2566 const unsigned char *name_limit;
2567 unsigned char *namebuf;
2568 cpp_buffer *buffer = pfile->buffer;
2569 register const unsigned char *cur = buffer->cur;
2572 name_limit = list->namebuf + list->name_cap;
2573 namebuf = list->namebuf + list->name_used;
2575 for (; cur < buffer->rlimit && namebuf < name_limit; )
2577 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2579 /* Perhaps we should accept '$' here if we accept it for
2580 identifiers. We know namebuf[-1] is safe, because for c to
2581 be a sign we must have pushed at least one character. */
2582 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2589 /* Run out of name space? */
2590 if (cur < buffer->rlimit)
2592 list->name_used = namebuf - list->namebuf;
2593 auto_expand_name_space (list);
2599 name->len = namebuf - (list->namebuf + name->offset);
2600 list->name_used = namebuf - list->namebuf;
2603 /* Places a string terminated by an unescaped TERMINATOR into a
2604 cpp_name, which should be expandable and thus at the top of the
2605 list's stack. Handles embedded trigraphs, if necessary, and
2608 Can be used for character constants (terminator = '\''), string
2609 constants ('"'), angled headers ('>') and assertions (')'). */
2612 parse_string (pfile, list, name, terminator)
2616 unsigned int terminator;
2618 cpp_buffer *buffer = pfile->buffer;
2619 register const unsigned char *cur = buffer->cur;
2620 const unsigned char *name_limit;
2621 unsigned char *namebuf;
2622 unsigned int null_count = 0;
2623 int trigraphed_len = 0;
2626 name_limit = list->namebuf + list->name_cap;
2627 namebuf = list->namebuf + list->name_used;
2629 for (; cur < buffer->rlimit && namebuf < name_limit; )
2631 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2635 else if (c == terminator || IS_NEWLINE (c))
2637 unsigned char* name_start = list->namebuf + name->offset;
2639 /* Needed for trigraph_replace and multiline string warning. */
2642 /* Scan for trigraphs before checking if backslash-escaped. */
2643 if (CPP_OPTION (pfile, trigraphs)
2644 || CPP_OPTION (pfile, warn_trigraphs))
2646 namebuf = trigraph_replace (pfile, name_start + trigraphed_len,
2648 trigraphed_len = namebuf - 2 - (name_start + trigraphed_len);
2649 if (trigraphed_len < 0)
2653 namebuf--; /* Drop the newline / terminator from the name. */
2656 /* Drop a backslash newline, and continue. */
2657 if (namebuf[-1] == '\\')
2659 handle_newline (cur, buffer->rlimit, c);
2666 /* In Fortran and assembly language, silently terminate
2667 strings of either variety at end of line. This is a
2668 kludge around not knowing where comments are in these
2670 if (CPP_OPTION (pfile, lang_fortran)
2671 || CPP_OPTION (pfile, lang_asm))
2674 /* Character constants, headers and asserts may not
2675 extend over multiple lines. In Standard C, neither
2676 may strings. We accept multiline strings as an
2677 extension, but not in directives. */
2678 if (terminator != '"' || IS_DIRECTIVE (list))
2681 cur++; /* Move forwards again. */
2683 if (pfile->multiline_string_line == 0)
2685 pfile->multiline_string_line = list->line;
2686 if (CPP_PEDANTIC (pfile))
2687 cpp_pedwarn (pfile, "multi-line string constant");
2691 handle_newline (cur, buffer->rlimit, c);
2695 unsigned char *temp;
2697 /* An odd number of consecutive backslashes represents
2698 an escaped terminator. */
2700 while (temp >= name_start && *temp == '\\')
2703 if ((namebuf - temp) & 1)
2710 /* Run out of name space? */
2711 if (cur < buffer->rlimit)
2713 list->name_used = namebuf - list->namebuf;
2714 auto_expand_name_space (list);
2718 /* We may not have trigraph-replaced the input for this code path,
2719 but as the input is in error by being unterminated we don't
2720 bother. Prevent warnings about no newlines at EOF. */
2721 if (IS_NEWLINE(cur[-1]))
2725 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2727 if (terminator == '\"' && pfile->multiline_string_line != list->line
2728 && pfile->multiline_string_line != 0)
2730 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2731 "possible start of unterminated string literal");
2732 pfile->multiline_string_line = 0;
2737 name->len = namebuf - (list->namebuf + name->offset);
2738 list->name_used = namebuf - list->namebuf;
2741 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2742 : "null character preserved"));
2745 /* The character C helps us distinguish comment types: '*' = C style,
2746 '-' = Chill-style and '/' = C++ style. For code simplicity, the
2747 stored comment includes any C-style comment terminator. */
2749 copy_comment (list, from, len, tok_no, type)
2751 const unsigned char *from;
2753 unsigned int tok_no;
2758 if (list->comments_used == list->comments_cap)
2759 expand_comment_space (list);
2761 if (list->name_used + len > list->name_cap)
2762 expand_name_space (list, len);
2764 comment = &list->comments[list->comments_used++];
2765 comment->type = type;
2766 comment->aux = tok_no;
2767 comment->val.name.len = len;
2768 comment->val.name.offset = list->name_used;
2770 memcpy (list->namebuf + list->name_used, from, len);
2771 list->name_used += len;
2775 * The tokenizer's main loop. Returns a token list, representing a
2776 * logical line in the input file, terminated with a CPP_VSPACE
2777 * token. On EOF, a token list containing the single CPP_EOF token
2780 * Implementation relies almost entirely on lookback, rather than
2781 * looking forwards. This means that tokenization requires just
2782 * a single pass of the file, even in the presence of trigraphs and
2783 * escaped newlines, providing significant performance benefits.
2784 * Trigraph overhead is negligible if they are disabled, and low
2785 * even when enabled.
2788 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
2789 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
2790 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
2791 #define BACKUP_DIGRAPH(ttype) do { \
2792 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
2795 _cpp_lex_line (pfile, list)
2799 cpp_token *cur_token, *token_limit;
2800 cpp_buffer *buffer = pfile->buffer;
2801 register const unsigned char *cur = buffer->cur;
2802 unsigned char flags = 0;
2805 token_limit = list->tokens + list->tokens_cap;
2806 cur_token = list->tokens + list->tokens_used;
2808 for (; cur < buffer->rlimit && cur_token < token_limit;)
2810 unsigned char c = *cur++;
2812 /* Optimize whitespace skipping, in particular the case of a
2813 single whitespace character, as every other token is probably
2814 whitespace. (' ' '\t' '\v' '\f' '\0'). */
2815 if (is_hspace ((unsigned int) c))
2817 if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
2819 buffer->cur = cur - (c == '\0'); /* Get the null warning. */
2820 skip_whitespace (pfile, IS_DIRECTIVE (list));
2823 flags = PREV_WHITESPACE;
2824 if (cur == buffer->rlimit)
2829 /* Initialize current token. Its type is set in the switch. */
2830 cur_token->col = COLUMN (cur);
2831 cur_token->flags = flags;
2836 case '0': case '1': case '2': case '3': case '4':
2837 case '5': case '6': case '7': case '8': case '9':
2838 /* Prepend an immediately previous CPP_DOT token. */
2839 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2842 if (list->name_cap == list->name_used)
2843 auto_expand_name_space (list);
2845 cur_token->val.name.len = 1;
2846 cur_token->val.name.offset = list->name_used;
2847 list->namebuf[list->name_used++] = '.';
2850 INIT_NAME (list, cur_token->val.name);
2851 cur--; /* Backup character. */
2855 parse_number (pfile, list, &cur_token->val.name);
2858 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2863 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2864 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2865 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2866 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2868 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2869 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2870 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2871 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2873 INIT_NAME (list, cur_token->val.name);
2874 cur--; /* Backup character. */
2875 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2879 parse_name (pfile, list, &cur_token->val.name);
2882 /* Find handler for newly created / extended directive. */
2883 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2884 _cpp_check_directive (list, cur_token);
2891 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2892 /* Do we have a wide string? */
2893 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2894 && cur_token[-1].val.name.len == 1
2895 && *(list->namebuf + cur_token[-1].val.name.offset) == 'L'
2896 && !CPP_TRADITIONAL (pfile))
2898 /* No need for 'L' any more. */
2900 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2904 /* Here c is one of ' " > or ). */
2905 INIT_NAME (list, cur_token->val.name);
2907 parse_string (pfile, list, &cur_token->val.name, c);
2913 cur_token->type = CPP_DIV;
2916 if (PREV_TOKEN_TYPE == CPP_DIV)
2918 /* We silently allow C++ comments in system headers,
2919 irrespective of conformance mode, because lots of
2920 broken systems do that and trying to clean it up
2921 in fixincludes is a nightmare. */
2922 if (buffer->system_header_p)
2923 goto do_line_comment;
2924 else if (CPP_OPTION (pfile, cplusplus_comments))
2926 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2927 && ! buffer->warned_cplusplus_comments)
2931 "C++ style comments are not allowed in ISO C89");
2933 "(this will be reported only once per input file)");
2934 buffer->warned_cplusplus_comments = 1;
2940 "comment start split across lines");
2941 if (skip_line_comment (pfile))
2942 cpp_error_with_line (pfile, list->line,
2944 "multi-line comment");
2945 if (!CPP_OPTION (pfile, discard_comments))
2946 copy_comment (list, cur, buffer->cur - cur,
2947 cur_token - 1 - list->tokens, c == '/'
2948 ? CPP_CPP_COMMENT: CPP_CHILL_COMMENT);
2951 /* Back-up to first '-' or '/'. */
2953 if (!CPP_OPTION (pfile, traditional))
2954 flags = PREV_WHITESPACE;
2962 cur_token->type = CPP_MULT;
2965 if (PREV_TOKEN_TYPE == CPP_DIV)
2970 "comment start '/*' split across lines");
2971 if (skip_block_comment (pfile))
2972 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
2973 "unterminated comment");
2974 else if (buffer->cur[-2] != '*')
2976 "comment end '*/' split across lines");
2977 if (!CPP_OPTION (pfile, discard_comments))
2978 copy_comment (list, cur, buffer->cur - cur,
2979 cur_token - 1 - list->tokens, CPP_C_COMMENT);
2983 if (!CPP_OPTION (pfile, traditional))
2984 flags = PREV_WHITESPACE;
2986 else if (CPP_OPTION (pfile, cplusplus))
2988 /* In C++, there are .* and ->* operators. */
2989 if (PREV_TOKEN_TYPE == CPP_DEREF)
2990 BACKUP_TOKEN (CPP_DEREF_STAR);
2991 else if (PREV_TOKEN_TYPE == CPP_DOT)
2992 BACKUP_TOKEN (CPP_DOT_STAR);
3000 handle_newline (cur, buffer->rlimit, c);
3001 if (PREV_TOKEN_TYPE != CPP_BACKSLASH || !IMMED_TOKEN ())
3003 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3007 "backslash and newline separated by space");
3009 PUSH_TOKEN (CPP_VSPACE);
3012 /* Remove the escaped newline. Then continue to process
3013 any interrupted name or number. */
3018 if (cur_token->type == CPP_NAME)
3020 else if (cur_token->type == CPP_NUMBER)
3021 goto continue_number;
3027 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3029 if (CPP_OPTION (pfile, chill))
3030 goto do_line_comment;
3031 REVISE_TOKEN (CPP_MINUS_MINUS);
3034 PUSH_TOKEN (CPP_MINUS);
3037 /* The digraph flag checking ensures that ## and %:%:
3038 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3041 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3042 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3043 REVISE_TOKEN (CPP_PASTE);
3045 PUSH_TOKEN (CPP_HASH);
3049 cur_token->type = CPP_COLON;
3052 if (PREV_TOKEN_TYPE == CPP_COLON
3053 && CPP_OPTION (pfile, cplusplus))
3054 BACKUP_TOKEN (CPP_SCOPE);
3055 /* Digraph: "<:" is a '[' */
3056 else if (PREV_TOKEN_TYPE == CPP_LESS)
3057 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3058 /* Digraph: "%:" is a '#' */
3059 else if (PREV_TOKEN_TYPE == CPP_MOD)
3061 (--cur_token)->flags |= DIGRAPH;
3069 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3070 REVISE_TOKEN (CPP_AND_AND);
3072 PUSH_TOKEN (CPP_AND);
3077 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3078 REVISE_TOKEN (CPP_OR_OR);
3080 PUSH_TOKEN (CPP_OR);
3084 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3085 REVISE_TOKEN (CPP_PLUS_PLUS);
3087 PUSH_TOKEN (CPP_PLUS);
3091 /* This relies on equidistance of "?=" and "?" tokens. */
3092 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3093 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3095 PUSH_TOKEN (CPP_EQ);
3099 cur_token->type = CPP_GREATER;
3102 if (PREV_TOKEN_TYPE == CPP_GREATER)
3103 BACKUP_TOKEN (CPP_RSHIFT);
3104 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3105 BACKUP_TOKEN (CPP_DEREF);
3106 /* Digraph: ":>" is a ']' */
3107 else if (PREV_TOKEN_TYPE == CPP_COLON)
3108 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3109 /* Digraph: "%>" is a '}' */
3110 else if (PREV_TOKEN_TYPE == CPP_MOD)
3111 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3117 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3119 REVISE_TOKEN (CPP_LSHIFT);
3122 /* Is this the beginning of a header name? */
3123 if (list->dir_flags & SYNTAX_INCLUDE)
3125 c = '>'; /* Terminator. */
3126 cur_token->type = CPP_HEADER_NAME;
3127 goto do_parse_string;
3129 PUSH_TOKEN (CPP_LESS);
3133 /* Digraph: "<%" is a '{' */
3134 cur_token->type = CPP_MOD;
3135 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3136 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3141 /* Is this the beginning of an assertion string? */
3142 if (list->dir_flags & SYNTAX_ASSERT)
3144 c = ')'; /* Terminator. */
3145 cur_token->type = CPP_ASSERTION;
3146 goto do_parse_string;
3148 PUSH_TOKEN (CPP_OPEN_PAREN);
3152 if (cur + 1 < buffer->rlimit && *cur == '?'
3153 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3155 /* Handle trigraph. */
3159 case '(': goto make_open_square;
3160 case ')': goto make_close_square;
3161 case '<': goto make_open_brace;
3162 case '>': goto make_close_brace;
3163 case '=': goto make_hash;
3164 case '!': goto make_or;
3165 case '-': goto make_complement;
3166 case '/': goto make_backslash;
3167 case '\'': goto make_xor;
3170 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3172 /* GNU C++ defines <? and >? operators. */
3173 if (PREV_TOKEN_TYPE == CPP_LESS)
3175 REVISE_TOKEN (CPP_MIN);
3178 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3180 REVISE_TOKEN (CPP_MAX);
3184 PUSH_TOKEN (CPP_QUERY);
3188 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3190 && !(cur_token[-1].flags & PREV_WHITESPACE))
3193 PUSH_TOKEN (CPP_ELLIPSIS);
3196 PUSH_TOKEN (CPP_DOT);
3200 case '~': PUSH_TOKEN (CPP_COMPL); break;
3202 case '^': PUSH_TOKEN (CPP_XOR); break;
3204 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3206 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3208 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3210 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3212 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3213 case '!': PUSH_TOKEN (CPP_NOT); break;
3214 case ',': PUSH_TOKEN (CPP_COMMA); break;
3215 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3216 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3219 if (CPP_OPTION (pfile, dollars_in_ident))
3224 PUSH_TOKEN (CPP_OTHER);
3229 /* Run out of token space? */
3230 if (cur_token == token_limit)
3232 list->tokens_used = cur_token - list->tokens;
3233 expand_token_space (list);
3237 cur_token->type = CPP_EOF;
3238 cur_token->flags = flags;
3240 if (cur_token != &list->tokens[0])
3242 /* Next call back will get just a CPP_EOF. */
3244 cpp_warning (pfile, "no newline at end of file");
3245 PUSH_TOKEN (CPP_VSPACE);
3251 list->tokens_used = cur_token - list->tokens;
3253 /* FIXME: take this check out and put it in the caller.
3254 list->directive == 0 indicates an unknown directive (but null
3255 directive is OK). This is the first time we can be sure the
3256 directive is invalid, and thus warn about it, because it might
3257 have been split by escaped newlines. Also, don't complain about
3258 invalid directives in assembly source, we don't know where the
3259 comments are, and # may introduce assembler pseudo-ops. */
3261 if (IS_DIRECTIVE (list) && list->dir_handler == 0
3262 && list->tokens[1].type != CPP_VSPACE
3263 && !CPP_OPTION (pfile, lang_asm))
3264 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3265 "invalid preprocessing directive");
3268 /* Token spelling functions. Used for output of a preprocessed file,
3269 stringizing and token pasting. They all assume sufficient buffer
3270 is allocated, and return exactly how much they used. */
3272 /* Needs buffer of 3 + len. */
3274 spell_string (buffer, list, token)
3275 unsigned char *buffer;
3279 unsigned char c, *orig_buff = buffer;
3282 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3284 c = token->type == CPP_STRING || token->type == CPP_WSTRING ? '"': '\'';
3287 len = token->val.name.len;
3288 memcpy (buffer, list->namebuf + token->val.name.offset, len);
3291 return buffer - orig_buff;
3294 /* Needs buffer of len + 2. */
3296 spell_comment (buffer, list, token)
3297 unsigned char *buffer;
3303 if (token->type == CPP_C_COMMENT)
3308 else if (token->type == CPP_CPP_COMMENT)
3319 len = token->val.name.len;
3320 memcpy (buffer, list->namebuf + token->val.name.offset, len);
3325 /* Needs buffer of len. */
3327 spell_name (buffer, list, token)
3328 unsigned char *buffer;
3334 len = token->val.name.len;
3335 memcpy (buffer, list->namebuf + token->val.name.offset, len);
3342 _cpp_lex_file (pfile)
3348 init_trigraph_map ();
3349 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3351 for (recycle = 0; ;)
3353 init_token_list (pfile, list, recycle);
3356 _cpp_lex_line (pfile, list);
3357 if (list->tokens[0].type == CPP_EOF)
3360 if (list->dir_handler)
3362 if (list->dir_handler (pfile))
3364 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3369 _cpp_output_list (pfile, list);
3373 /* This could be useful to other routines. If you allocate this many
3374 bytes, you have enough room to spell the token. */
3375 #define TOKEN_LEN(token) (4 + (token_spellings[token->type].type == \
3376 SPELL_HANDLER ? token->val.name.len: 0))
3379 _cpp_output_list (pfile, list)
3383 unsigned int comment_no = 0;
3384 cpp_token *token, *comment_token = 0;
3386 if (list->comments_used > 0)
3387 comment_token = list->tokens + list->comments[0].aux;
3389 CPP_RESERVE (pfile, 2); /* Always have room for " \n". */
3390 for (token = &list->tokens[0];; token++)
3392 if (token->flags & PREV_WHITESPACE)
3394 /* Output comments if -C. Otherwise a space will do. */
3395 if (token == comment_token)
3397 cpp_token *comment = &list->comments[comment_no];
3400 CPP_RESERVE (pfile, 2 + TOKEN_LEN (comment));
3401 pfile->limit += spell_comment (pfile->limit, list, comment);
3402 comment_no++, comment++;
3403 if (comment_no == list->comments_used)
3405 comment_token = comment->aux + list->tokens;
3407 while (comment_token == token);
3410 CPP_PUTC_Q (pfile, ' ');
3413 CPP_RESERVE (pfile, 2 + TOKEN_LEN (token));
3414 switch (token_spellings[token->type].type)
3418 const unsigned char *spelling;
3421 if (token->flags & DIGRAPH)
3422 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3424 spelling = token_spellings[token->type].speller;
3426 while ((c = *spelling++) != '\0')
3427 CPP_PUTC_Q (pfile, c);
3435 s = (speller) token_spellings[token->type].speller;
3436 pfile->limit += s (pfile->limit, list, token);
3441 *pfile->limit++ = token->aux;
3445 CPP_PUTC_Q (pfile, '\n');
3449 cpp_error (pfile, "Unwriteable token");