1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
30 #define PEEKBUF(BUFFER, N) \
31 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
32 #define GETBUF(BUFFER) \
33 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
34 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
36 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
37 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
38 #define GETC() GETBUF (CPP_BUFFER (pfile))
39 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
41 static void skip_block_comment PARAMS ((cpp_reader *));
42 static void skip_line_comment PARAMS ((cpp_reader *));
43 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
44 static int skip_comment PARAMS ((cpp_reader *, int));
45 static int copy_comment PARAMS ((cpp_reader *, int));
46 static void skip_string PARAMS ((cpp_reader *, int));
47 static void parse_string PARAMS ((cpp_reader *, int));
48 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
49 static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
50 static void null_warning PARAMS ((cpp_reader *, unsigned int));
52 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
54 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
56 static void bump_column PARAMS ((cpp_printer *, unsigned int,
58 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
59 static void expand_token_space PARAMS ((cpp_toklist *));
60 static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
61 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
64 #define auto_expand_name_space(list) \
65 expand_name_space ((list), (list)->name_cap / 2)
67 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
70 _cpp_grow_token_buffer (pfile, n)
74 long old_written = CPP_WRITTEN (pfile);
75 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
76 pfile->token_buffer = (U_CHAR *)
77 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
78 CPP_SET_WRITTEN (pfile, old_written);
82 null_cleanup (pbuf, pfile)
83 cpp_buffer *pbuf ATTRIBUTE_UNUSED;
84 cpp_reader *pfile ATTRIBUTE_UNUSED;
89 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
90 If BUFFER != NULL, then use the LENGTH characters in BUFFER
91 as the new input buffer.
92 Return the new buffer, or NULL on failure. */
95 cpp_push_buffer (pfile, buffer, length)
100 cpp_buffer *buf = CPP_BUFFER (pfile);
102 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
104 cpp_fatal (pfile, "macro or `#include' recursion too deep");
108 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
110 new->if_stack = pfile->if_stack;
111 new->cleanup = null_cleanup;
112 new->buf = new->cur = buffer;
113 new->rlimit = buffer + length;
116 new->line_base = NULL;
118 CPP_BUFFER (pfile) = new;
123 cpp_pop_buffer (pfile)
126 cpp_buffer *buf = CPP_BUFFER (pfile);
127 if (ACTIVE_MARK_P (pfile))
128 cpp_ice (pfile, "mark active in cpp_pop_buffer");
129 (*buf->cleanup) (buf, pfile);
130 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
132 pfile->buffer_stack_depth--;
133 return CPP_BUFFER (pfile);
136 /* Deal with the annoying semantics of fwrite. */
138 safe_fwrite (pfile, buf, len, fp)
148 count = fwrite (buf, 1, len, fp);
157 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
160 /* Notify the compiler proper that the current line number has jumped,
161 or the current file name has changed. */
164 output_line_command (pfile, print, line)
169 cpp_buffer *ip = cpp_file_buffer (pfile);
170 enum { same = 0, enter, leave, rname } change;
171 static const char * const codes[] = { "", " 1", " 2", "" };
173 if (CPP_OPTION (pfile, no_line_commands))
176 /* Determine whether the current filename has changed, and if so,
177 how. 'nominal_fname' values are unique, so they can be compared
178 by comparing pointers. */
179 if (ip->nominal_fname == print->last_fname)
183 if (pfile->buffer_stack_depth == print->last_bsd)
187 if (pfile->buffer_stack_depth > print->last_bsd)
191 print->last_bsd = pfile->buffer_stack_depth;
193 print->last_fname = ip->nominal_fname;
195 /* If the current file has not changed, we can output a few newlines
196 instead if we want to increase the line number by a small amount.
197 We cannot do this if print->lineno is zero, because that means we
198 haven't output any line commands yet. (The very first line
199 command output is a `same_file' command.) */
200 if (change == same && print->lineno != 0
201 && line >= print->lineno && line < print->lineno + 8)
203 while (line > print->lineno)
205 putc ('\n', print->outf);
211 #ifndef NO_IMPLICIT_EXTERN_C
212 if (CPP_OPTION (pfile, cplusplus))
213 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
215 ip->system_header_p ? " 3" : "",
216 (ip->system_header_p == 2) ? " 4" : "");
219 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
221 ip->system_header_p ? " 3" : "");
222 print->lineno = line;
225 /* Write the contents of the token_buffer to the output stream, and
226 clear the token_buffer. Also handles generating line commands and
227 keeping track of file transitions. */
230 cpp_output_tokens (pfile, print)
236 if (CPP_WRITTEN (pfile) - print->written)
238 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
240 safe_fwrite (pfile, pfile->token_buffer,
241 CPP_WRITTEN (pfile) - print->written, print->outf);
244 ip = cpp_file_buffer (pfile);
246 output_line_command (pfile, print, CPP_BUF_LINE (ip));
248 CPP_SET_WRITTEN (pfile, print->written);
251 /* Helper for cpp_output_list - increases the column number to match
252 what we expect it to be. */
255 bump_column (print, from, to)
257 unsigned int from, to;
259 unsigned int tabs, spcs;
260 unsigned int delta = to - from;
262 /* Only if FROM is 0, advance by tabs. */
264 tabs = delta / 8, spcs = delta % 8;
266 tabs = 0, spcs = delta;
268 while (tabs--) putc ('\t', print->outf);
269 while (spcs--) putc (' ', print->outf);
272 /* Write out the list L onto pfile->token_buffer. This function is
275 1) pfile->token_buffer is not going to continue to exist.
276 2) At the moment, tokens don't carry the information described
277 in cpplib.h; they are all strings.
278 3) The list has to be a complete line, and has to be written starting
279 at the beginning of a line. */
282 cpp_output_list (pfile, print, list)
285 const cpp_toklist *list;
288 unsigned int curcol = 1;
290 /* XXX Probably does not do what is intended. */
291 if (print->lineno != list->line)
292 output_line_command (pfile, print, list->line);
294 for (i = 0; i < list->tokens_used; i++)
296 if (list->tokens[i].type == CPP_VSPACE)
298 output_line_command (pfile, print, list->tokens[i].aux);
302 if (curcol < list->tokens[i].col)
304 /* Insert space to bring the column to what it should be. */
305 bump_column (print, curcol - 1, list->tokens[i].col);
306 curcol = list->tokens[i].col;
308 /* XXX We may have to insert space to prevent an accidental
310 safe_fwrite (pfile, list->namebuf + list->tokens[i].val.name.offset,
311 list->tokens[i].val.name.len, print->outf);
312 curcol += list->tokens[i].val.name.len;
316 /* Scan a string (which may have escape marks), perform macro expansion,
317 and write the result to the token_buffer. */
320 _cpp_expand_to_buffer (pfile, buf, length)
326 enum cpp_ttype token;
331 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
335 /* Copy the buffer, because it might be in an unsafe place - for
336 example, a sequence on the token_buffer, where the pointers will
337 be invalidated if we enlarge the token_buffer. */
338 buf1 = alloca (length);
339 memcpy (buf1, buf, length);
341 /* Set up the input on the input stack. */
342 ip = cpp_push_buffer (pfile, buf1, length);
347 /* Scan the input, create the output. */
350 token = cpp_get_token (pfile);
351 if (token == CPP_EOF)
353 if (token == CPP_POP && CPP_BUFFER (pfile) == ip)
355 cpp_pop_buffer (pfile);
361 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.
362 Then pop the buffer. */
365 cpp_scan_buffer_nooutput (pfile)
368 cpp_buffer *buffer = CPP_BUFFER (pfile);
369 enum cpp_ttype token;
370 unsigned int old_written = CPP_WRITTEN (pfile);
371 /* In no-output mode, we can ignore everything but directives. */
374 if (! pfile->only_seen_white)
375 _cpp_skip_rest_of_line (pfile);
376 token = cpp_get_token (pfile);
377 if (token == CPP_EOF)
379 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
381 cpp_pop_buffer (pfile);
385 CPP_SET_WRITTEN (pfile, old_written);
388 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.
389 Then pop the buffer. */
392 cpp_scan_buffer (pfile, print)
396 cpp_buffer *buffer = CPP_BUFFER (pfile);
397 enum cpp_ttype token;
401 token = cpp_get_token (pfile);
402 if ((token == CPP_POP && !CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
403 || token == CPP_EOF || token == CPP_VSPACE
404 /* XXX Temporary kluge - force flush after #include only */
405 || (token == CPP_DIRECTIVE
406 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
408 cpp_output_tokens (pfile, print);
409 if (token == CPP_EOF)
411 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
413 cpp_pop_buffer (pfile);
420 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
423 cpp_file_buffer (pfile)
428 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
429 if (ip->ihash != NULL)
434 /* Token-buffer helper functions. */
436 /* Expand a token list's string space. */
438 expand_name_space (list, len)
442 list->name_cap += len;
443 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
446 /* Expand the number of tokens in a list. */
448 expand_token_space (list)
451 list->tokens_cap *= 2;
452 list->tokens = (cpp_token *)
453 xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
454 list->tokens++; /* Skip the dummy. */
457 /* Initialize a token list. We allocate an extra token in front of
458 the token list, as this allows us to always peek at the previous
459 token without worrying about underflowing the list. */
461 init_token_list (pfile, list, recycle)
466 /* Recycling a used list saves 3 free-malloc pairs. */
469 /* Initialize token space. Put a dummy token before the start
470 that will fail matches. */
471 list->tokens_cap = 256; /* 4K's worth. */
472 list->tokens = (cpp_token *)
473 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
474 list->tokens[0].type = CPP_EOF;
477 /* Initialize name space. */
478 list->name_cap = 1024;
479 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
481 /* Only create a comment space on demand. */
482 list->comments_cap = 0;
486 list->tokens_used = 0;
488 list->comments_used = 0;
490 list->line = pfile->buffer->lineno;
491 list->dir_handler = 0;
495 /* Scan an entire line and create a token list for it. Does not
496 macro-expand or execute directives. */
499 _cpp_scan_line (pfile, list)
508 init_token_list (pfile, list, 1);
510 written = CPP_WRITTEN (pfile);
515 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
516 type = _cpp_lex_token (pfile);
517 len = CPP_WRITTEN (pfile) - written;
518 CPP_SET_WRITTEN (pfile, written);
519 if (type == CPP_HSPACE)
521 if (CPP_PEDANTIC (pfile))
522 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
526 else if (type == CPP_COMMENT)
527 /* Only happens when processing -traditional macro definitions.
528 Do not give this a token entry, but do not change space_before
532 if (list->tokens_used >= list->tokens_cap)
533 expand_token_space (list);
534 if (list->name_used + len >= list->name_cap)
535 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
537 if (type == CPP_MACRO)
541 list->tokens[i].type = type;
542 list->tokens[i].col = col;
543 list->tokens[i].flags = space_before ? PREV_WHITESPACE : 0;
545 if (type == CPP_VSPACE)
548 list->tokens[i].val.name.len = len;
549 list->tokens[i].val.name.offset = list->name_used;
550 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
551 list->name_used += len;
555 list->tokens[i].aux = CPP_BUFFER (pfile)->lineno + 1;
557 /* XXX Temporary kluge: put back the newline. */
562 /* Skip a C-style block comment. We know it's a comment, and point is
563 at the second character of the starter. */
565 skip_block_comment (pfile)
568 unsigned int line, col;
569 const U_CHAR *limit, *cur;
572 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
573 col = CPP_BUF_COL (CPP_BUFFER (pfile));
574 limit = CPP_BUFFER (pfile)->rlimit;
575 cur = CPP_BUFFER (pfile)->cur;
580 if (c == '\n' || c == '\r')
582 /* \r cannot be a macro escape marker here. */
583 if (!ACTIVE_MARK_P (pfile))
584 CPP_BUMP_LINE_CUR (pfile, cur);
588 /* Check for teminator. */
589 if (cur < limit && *cur == '/')
592 /* Warn about comment starter embedded in comment. */
593 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
594 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
595 cur - CPP_BUFFER (pfile)->line_base,
596 "'/*' within comment");
600 cpp_error_with_line (pfile, line, col, "unterminated comment");
603 CPP_BUFFER (pfile)->cur = cur + 1;
606 /* Skip a C++/Chill line comment. We know it's a comment, and point
607 is at the second character of the initiator. */
609 skip_line_comment (pfile)
617 /* We don't have to worry about EOF in here. */
620 /* Don't consider final '\n' to be part of comment. */
626 /* \r cannot be a macro escape marker here. */
627 if (!ACTIVE_MARK_P (pfile))
628 CPP_BUMP_LINE (pfile);
629 if (CPP_OPTION (pfile, warn_comments))
630 cpp_warning (pfile, "backslash-newline within line comment");
635 /* Skip a comment - C, C++, or Chill style. M is the first character
636 of the comment marker. If this really is a comment, skip to its
637 end and return ' '. If this is not a comment, return M (which will
641 skip_comment (pfile, m)
645 if (m == '/' && PEEKC() == '*')
647 skip_block_comment (pfile);
650 else if (m == '/' && PEEKC() == '/')
652 if (CPP_BUFFER (pfile)->system_header_p)
654 /* We silently allow C++ comments in system headers, irrespective
655 of conformance mode, because lots of busted systems do that
656 and trying to clean it up in fixincludes is a nightmare. */
657 skip_line_comment (pfile);
660 else if (CPP_OPTION (pfile, cplusplus_comments))
662 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
664 if (CPP_WTRADITIONAL (pfile))
666 "C++ style comments are not allowed in traditional C");
667 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
669 "C++ style comments are not allowed in ISO C89");
670 if (CPP_WTRADITIONAL (pfile)
671 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
673 "(this will be reported only once per input file)");
674 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
676 skip_line_comment (pfile);
682 else if (m == '-' && PEEKC() == '-'
683 && CPP_OPTION (pfile, chill))
685 skip_line_comment (pfile);
692 /* Identical to skip_comment except that it copies the comment into the
693 token_buffer. This is used if !discard_comments. */
695 copy_comment (pfile, m)
699 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
702 if (skip_comment (pfile, m) == m)
705 limit = CPP_BUFFER (pfile)->cur;
706 CPP_RESERVE (pfile, limit - start + 2);
707 CPP_PUTC_Q (pfile, m);
708 for (; start <= limit; start++)
710 CPP_PUTC_Q (pfile, *start);
716 null_warning (pfile, count)
721 cpp_warning (pfile, "embedded null character ignored");
723 cpp_warning (pfile, "embedded null characters ignored");
726 /* Skip whitespace \-newline and comments. Does not macro-expand. */
729 _cpp_skip_hspace (pfile)
732 unsigned int null_count = 0;
740 else if (is_hspace(c))
742 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
743 cpp_pedwarn (pfile, "%s in preprocessing directive",
744 c == '\f' ? "formfeed" : "vertical tab");
750 /* \r is a backslash-newline marker if !has_escapes, and
751 a deletable-whitespace or no-reexpansion marker otherwise. */
752 if (CPP_BUFFER (pfile)->has_escapes)
760 CPP_BUMP_LINE (pfile);
762 else if (c == '/' || c == '-')
764 c = skip_comment (pfile, c);
774 null_warning (pfile, null_count);
777 /* Read and discard the rest of the current line. */
780 _cpp_skip_rest_of_line (pfile)
794 if (! CPP_BUFFER (pfile)->has_escapes)
795 CPP_BUMP_LINE (pfile);
800 skip_string (pfile, c);
805 skip_comment (pfile, c);
810 if (CPP_PEDANTIC (pfile))
811 cpp_pedwarn (pfile, "%s in preprocessing directive",
812 c == '\f' ? "formfeed" : "vertical tab");
819 /* Parse an identifier starting with C. */
822 _cpp_parse_name (pfile, c)
834 if (c == '$' && CPP_PEDANTIC (pfile))
835 cpp_pedwarn (pfile, "`$' in identifier");
837 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
838 CPP_PUTC_Q (pfile, c);
846 /* Parse and skip over a string starting with C. A single quoted
847 string is treated like a double -- some programs (e.g., troff) are
848 perverse this way. (However, a single quoted string is not allowed
849 to extend over multiple lines.) */
851 skip_string (pfile, c)
855 unsigned int start_line, start_column;
856 unsigned int null_count = 0;
858 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
859 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
866 cpp_error_with_line (pfile, start_line, start_column,
867 "unterminated string or character constant");
868 if (pfile->multiline_string_line != start_line
869 && pfile->multiline_string_line != 0)
870 cpp_error_with_line (pfile,
871 pfile->multiline_string_line, -1,
872 "possible real start of unterminated constant");
873 pfile->multiline_string_line = 0;
881 CPP_BUMP_LINE (pfile);
882 /* In Fortran and assembly language, silently terminate
883 strings of either variety at end of line. This is a
884 kludge around not knowing where comments are in these
886 if (CPP_OPTION (pfile, lang_fortran)
887 || CPP_OPTION (pfile, lang_asm))
892 /* Character constants may not extend over multiple lines.
893 In Standard C, neither may strings. We accept multiline
894 strings as an extension. */
897 cpp_error_with_line (pfile, start_line, start_column,
898 "unterminated character constant");
902 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
903 cpp_pedwarn_with_line (pfile, start_line, start_column,
904 "string constant runs past end of line");
905 if (pfile->multiline_string_line == 0)
906 pfile->multiline_string_line = start_line;
910 if (CPP_BUFFER (pfile)->has_escapes)
912 cpp_ice (pfile, "\\r escape inside string constant");
916 /* Backslash newline is replaced by nothing at all. */
917 CPP_BUMP_LINE (pfile);
934 cpp_warning (pfile, "null character in string or character constant");
935 else if (null_count > 1)
936 cpp_warning (pfile, "null characters in string or character constant");
939 /* Parse a string and copy it to the output. */
942 parse_string (pfile, c)
946 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
949 skip_string (pfile, c);
951 limit = CPP_BUFFER (pfile)->cur;
952 CPP_RESERVE (pfile, limit - start + 2);
953 CPP_PUTC_Q (pfile, c);
954 for (; start < limit; start++)
956 CPP_PUTC_Q (pfile, *start);
959 /* Read an assertion into the token buffer, converting to
960 canonical form: `#predicate(a n swe r)' The next non-whitespace
961 character to read should be the first letter of the predicate.
962 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
963 with answer (see callers for why). In case of 0, an error has been
966 _cpp_parse_assertion (pfile)
970 _cpp_skip_hspace (pfile);
974 cpp_error (pfile, "assertion without predicate");
977 else if (! is_idstart(c))
979 cpp_error (pfile, "assertion predicate is not an identifier");
982 CPP_PUTC(pfile, '#');
984 _cpp_parse_name (pfile, c);
989 if (is_hspace(c) || c == '\r')
990 _cpp_skip_hspace (pfile);
996 CPP_PUTC(pfile, '(');
999 while ((c = GETC()) != ')')
1005 CPP_PUTC(pfile, ' ');
1009 else if (c == '\n' || c == EOF)
1011 if (c == '\n') FORWARD(-1);
1012 cpp_error (pfile, "un-terminated assertion answer");
1016 /* \r cannot be a macro escape here. */
1017 CPP_BUMP_LINE (pfile);
1020 CPP_PUTC (pfile, c);
1025 if (pfile->limit[-1] == ' ')
1026 pfile->limit[-1] = ')';
1027 else if (pfile->limit[-1] == '(')
1029 cpp_error (pfile, "empty token sequence in assertion");
1033 CPP_PUTC (pfile, ')');
1038 /* Get the next token, and add it to the text in pfile->token_buffer.
1039 Return the kind of token we got. */
1042 _cpp_lex_token (pfile)
1046 enum cpp_ttype token;
1048 if (CPP_BUFFER (pfile) == NULL)
1059 if (PEEKC () == '=')
1063 if (CPP_OPTION (pfile, discard_comments))
1064 c = skip_comment (pfile, c);
1066 c = copy_comment (pfile, c);
1070 /* Comments are equivalent to spaces.
1071 For -traditional, a comment is equivalent to nothing. */
1072 if (!CPP_OPTION (pfile, discard_comments))
1074 else if (CPP_TRADITIONAL (pfile))
1076 if (pfile->parsing_define_directive)
1082 CPP_PUTC (pfile, c);
1087 CPP_PUTC (pfile, c);
1090 if (pfile->parsing_if_directive)
1092 CPP_ADJUST_WRITTEN (pfile, -1);
1093 if (_cpp_parse_assertion (pfile))
1094 return CPP_ASSERTION;
1098 if (pfile->parsing_define_directive)
1104 CPP_PUTC (pfile, c2);
1106 else if (c2 == '%' && PEEKN (1) == ':')
1108 /* Digraph: "%:" == "#". */
1110 CPP_RESERVE (pfile, 2);
1111 CPP_PUTC_Q (pfile, c2);
1112 CPP_PUTC_Q (pfile, GETC ());
1120 if (!pfile->only_seen_white)
1123 /* Remove the "#" or "%:" from the token buffer. */
1124 CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
1125 return CPP_DIRECTIVE;
1129 parse_string (pfile, c);
1130 return c == '\'' ? CPP_CHAR : CPP_STRING;
1133 if (!CPP_OPTION (pfile, dollars_in_ident))
1139 /* Digraph: ":>" == "]". */
1141 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1149 if (c2 == c || c2 == '=')
1154 /* Digraphs: "%:" == "#", "%>" == "}". */
1159 CPP_RESERVE (pfile, 2);
1160 CPP_PUTC_Q (pfile, c);
1161 CPP_PUTC_Q (pfile, c2);
1167 CPP_RESERVE (pfile, 2);
1168 CPP_PUTC_Q (pfile, c);
1169 CPP_PUTC_Q (pfile, c2);
1170 return CPP_OPEN_BRACE;
1172 /* else fall through */
1178 if (PEEKC () == '=')
1186 if (CPP_OPTION (pfile, chill))
1187 goto comment; /* Chill style comment */
1195 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1197 /* In C++, there's a ->* operator. */
1199 CPP_RESERVE (pfile, 4);
1200 CPP_PUTC_Q (pfile, c);
1201 CPP_PUTC_Q (pfile, GETC ());
1202 CPP_PUTC_Q (pfile, GETC ());
1210 if (pfile->parsing_include_directive)
1214 CPP_PUTC (pfile, c);
1218 if (c == '\n' || c == EOF)
1221 "missing '>' in `#include <FILENAME>'");
1226 if (!CPP_BUFFER (pfile)->has_escapes)
1228 /* Backslash newline is replaced by nothing. */
1229 CPP_ADJUST_WRITTEN (pfile, -1);
1230 CPP_BUMP_LINE (pfile);
1234 /* We might conceivably get \r- or \r<space> in
1235 here. Just delete 'em. */
1237 if (d != '-' && d != ' ')
1238 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1239 CPP_ADJUST_WRITTEN (pfile, -1);
1245 /* Digraphs: "<%" == "{", "<:" == "[". */
1250 CPP_RESERVE (pfile, 2);
1251 CPP_PUTC_Q (pfile, c);
1252 CPP_PUTC_Q (pfile, c2);
1253 return CPP_CLOSE_BRACE;
1257 /* else fall through */
1262 /* GNU C++ supports MIN and MAX operators <? and >?. */
1263 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1266 CPP_RESERVE (pfile, 3);
1267 CPP_PUTC_Q (pfile, c);
1268 CPP_PUTC_Q (pfile, c2);
1269 if (PEEKC () == '=')
1270 CPP_PUTC_Q (pfile, GETC ());
1277 CPP_PUTC (pfile, c);
1282 /* In C++ there's a .* operator. */
1283 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1286 if (c2 == '.' && PEEKN(1) == '.')
1288 CPP_RESERVE (pfile, 3);
1289 CPP_PUTC_Q (pfile, '.');
1290 CPP_PUTC_Q (pfile, '.');
1291 CPP_PUTC_Q (pfile, '.');
1293 return CPP_ELLIPSIS;
1298 CPP_RESERVE (pfile, 2);
1299 CPP_PUTC_Q (pfile, c);
1300 CPP_PUTC_Q (pfile, GETC ());
1305 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1307 CPP_PUTC (pfile, c);
1309 parse_string (pfile, c);
1310 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1314 case '0': case '1': case '2': case '3': case '4':
1315 case '5': case '6': case '7': case '8': case '9':
1320 CPP_RESERVE (pfile, 2);
1321 CPP_PUTC_Q (pfile, c);
1325 if (!is_numchar(c) && c != '.'
1326 && ((c2 != 'e' && c2 != 'E'
1327 && ((c2 != 'p' && c2 != 'P')
1328 || CPP_OPTION (pfile, c89)))
1329 || (c != '+' && c != '-')))
1335 case 'b': case 'c': case 'd': case 'h': case 'o':
1336 case 'B': case 'C': case 'D': case 'H': case 'O':
1337 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1339 CPP_RESERVE (pfile, 2);
1340 CPP_PUTC_Q (pfile, c);
1341 CPP_PUTC_Q (pfile, '\'');
1347 goto chill_number_eof;
1350 CPP_PUTC (pfile, c);
1354 CPP_RESERVE (pfile, 2);
1355 CPP_PUTC_Q (pfile, c);
1368 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1369 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1370 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1371 case 'x': case 'y': case 'z':
1372 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1373 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1374 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1377 _cpp_parse_name (pfile, c);
1380 case ' ': case '\t': case '\v': case '\f': case '\0':
1389 CPP_PUTC (pfile, c);
1391 if (c == EOF || !is_hspace(c))
1396 null_warning (pfile, null_count);
1401 if (CPP_BUFFER (pfile)->has_escapes)
1406 if (pfile->output_escapes)
1407 CPP_PUTS (pfile, "\r-", 2);
1408 _cpp_parse_name (pfile, GETC ());
1413 /* "\r " means a space, but only if necessary to prevent
1414 accidental token concatenation. */
1415 CPP_RESERVE (pfile, 2);
1416 if (pfile->output_escapes)
1417 CPP_PUTC_Q (pfile, '\r');
1418 CPP_PUTC_Q (pfile, c);
1423 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1429 /* Backslash newline is ignored. */
1430 if (!ACTIVE_MARK_P (pfile))
1431 CPP_BUMP_LINE (pfile);
1436 CPP_PUTC (pfile, c);
1439 case '(': token = CPP_OPEN_PAREN; goto char1;
1440 case ')': token = CPP_CLOSE_PAREN; goto char1;
1441 case '{': token = CPP_OPEN_BRACE; goto char1;
1442 case '}': token = CPP_CLOSE_BRACE; goto char1;
1443 case ',': token = CPP_COMMA; goto char1;
1444 case ';': token = CPP_SEMICOLON; goto char1;
1450 CPP_PUTC (pfile, c);
1455 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1456 Caller is expected to have checked no_macro_expand. */
1458 maybe_macroexpand (pfile, written)
1462 U_CHAR *macro = pfile->token_buffer + written;
1463 size_t len = CPP_WRITTEN (pfile) - written;
1464 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1468 if (hp->disabled || hp->type == T_IDENTITY)
1470 if (pfile->output_escapes)
1472 /* Insert a no-reexpand marker before IDENT. */
1473 CPP_RESERVE (pfile, 2);
1474 CPP_ADJUST_WRITTEN (pfile, 2);
1475 macro = pfile->token_buffer + written;
1477 memmove (macro + 2, macro, len);
1483 if (hp->type == T_EMPTY)
1485 /* Special case optimization: macro expands to nothing. */
1486 CPP_SET_WRITTEN (pfile, written);
1487 CPP_PUTC_Q (pfile, ' ');
1491 /* If macro wants an arglist, verify that a '(' follows. */
1492 if (hp->type == T_FMACRO)
1494 int macbuf_whitespace = 0;
1497 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1499 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1502 _cpp_skip_hspace (pfile);
1509 if (point != CPP_BUFFER (pfile)->cur)
1510 macbuf_whitespace = 1;
1514 goto not_macro_call;
1515 cpp_pop_buffer (pfile);
1518 CPP_SET_MARK (pfile);
1521 _cpp_skip_hspace (pfile);
1528 CPP_GOTO_MARK (pfile);
1533 if (macbuf_whitespace)
1534 CPP_PUTC (pfile, ' ');
1540 /* This is now known to be a macro call.
1541 Expand the macro, reading arguments as needed,
1542 and push the expansion on the input stack. */
1543 _cpp_macroexpand (pfile, hp);
1544 CPP_SET_WRITTEN (pfile, written);
1548 /* Complain about \v or \f in a preprocessing directive (constraint
1549 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1551 pedantic_whitespace (pfile, p, len)
1559 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1560 else if (*p == '\f')
1561 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1569 cpp_get_token (pfile)
1572 enum cpp_ttype token;
1573 long written = CPP_WRITTEN (pfile);
1576 token = _cpp_lex_token (pfile);
1581 pfile->potential_control_macro = 0;
1582 pfile->only_seen_white = 0;
1586 if (pfile->only_seen_white == 0)
1587 pfile->only_seen_white = 1;
1588 CPP_BUMP_LINE (pfile);
1596 pfile->potential_control_macro = 0;
1597 if (_cpp_handle_directive (pfile))
1598 return CPP_DIRECTIVE;
1599 pfile->only_seen_white = 0;
1600 CPP_PUTC (pfile, '#');
1604 pfile->potential_control_macro = 0;
1605 pfile->only_seen_white = 0;
1606 if (! pfile->no_macro_expand
1607 && maybe_macroexpand (pfile, written))
1612 if (CPP_BUFFER (pfile) == NULL)
1614 if (CPP_BUFFER (pfile)->manual_pop)
1615 /* If we've been reading from redirected input, the
1616 frontend will pop the buffer. */
1619 if (CPP_BUFFER (pfile)->seen_eof)
1621 cpp_pop_buffer (pfile);
1626 _cpp_handle_eof (pfile);
1632 /* Like cpp_get_token, but skip spaces and comments. */
1635 cpp_get_non_space_token (pfile)
1638 int old_written = CPP_WRITTEN (pfile);
1641 enum cpp_ttype token = cpp_get_token (pfile);
1642 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1644 CPP_SET_WRITTEN (pfile, old_written);
1648 /* Like cpp_get_token, except that it does not execute directives,
1649 does not consume vertical space, discards horizontal space, and
1650 automatically pops off macro buffers. */
1652 _cpp_get_directive_token (pfile)
1656 enum cpp_ttype token;
1659 old_written = CPP_WRITTEN (pfile);
1660 token = _cpp_lex_token (pfile);
1667 /* Put it back and return VSPACE. */
1669 CPP_ADJUST_WRITTEN (pfile, -1);
1673 if (CPP_PEDANTIC (pfile))
1674 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1675 CPP_WRITTEN (pfile) - old_written);
1676 CPP_SET_WRITTEN (pfile, old_written);
1681 /* Don't execute the directive, but don't smash it to OTHER either. */
1682 CPP_PUTC (pfile, '#');
1683 return CPP_DIRECTIVE;
1686 if (! pfile->no_macro_expand
1687 && maybe_macroexpand (pfile, old_written))
1692 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1694 cpp_pop_buffer (pfile);
1698 /* This can happen for files that don't end with a newline,
1699 and for cpp_define and friends. Pretend they do, so
1700 callers don't have to deal. A warning will be issued by
1701 someone else, if necessary. */
1706 /* Determine the current line and column. Used only by read_and_prescan. */
1708 find_position (start, limit, linep)
1711 unsigned long *linep;
1713 unsigned long line = *linep;
1714 U_CHAR *lbase = start;
1715 while (start < limit)
1717 U_CHAR ch = *start++;
1718 if (ch == '\n' || ch == '\r')
1728 /* The following table is used by _cpp_read_and_prescan. If we have
1729 designated initializers, it can be constant data; otherwise, it is
1730 set up at runtime by _cpp_init_input_buffer. */
1733 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1736 #if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L)
1737 #define init_chartab() /* nothing */
1738 #define CHARTAB static const unsigned char chartab[UCHAR_MAX + 1] = {
1740 #define s(p, v) [p] = v,
1742 #define CHARTAB static unsigned char chartab[UCHAR_MAX + 1] = { 0 }; \
1743 static void init_chartab PARAMS ((void)) { \
1744 unsigned char *x = chartab;
1746 #define s(p, v) x[p] = v;
1749 /* Table of characters that can't be handled in the inner loop.
1750 Also contains the mapping between trigraph third characters and their
1752 #define SPECCASE_CR 1
1753 #define SPECCASE_BACKSLASH 2
1754 #define SPECCASE_QUESTION 3
1757 s('\r', SPECCASE_CR)
1758 s('\\', SPECCASE_BACKSLASH)
1759 s('?', SPECCASE_QUESTION)
1761 s('=', '#') s(')', ']') s('!', '|')
1762 s('(', '[') s('\'', '^') s('>', '}')
1763 s('/', '\\') s('<', '{') s('-', '~')
1770 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1771 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1773 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1774 much memory to allocate initially; more will be allocated if
1775 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1776 canonical form (\n). If enabled, convert and/or warn about
1777 trigraphs. Convert backslash-newline to a one-character escape
1778 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1779 token). If there is no newline at the end of the file, add one and
1780 warn. Returns -1 on failure, or the actual length of the data to
1783 This function does a lot of work, and can be a serious performance
1784 bottleneck. It has been tuned heavily; make sure you understand it
1785 before hacking. The common case - no trigraphs, Unix style line
1786 breaks, backslash-newline set off by whitespace, newline at EOF -
1787 has been optimized at the expense of the others. The performance
1788 penalty for DOS style line breaks (\r\n) is about 15%.
1790 Warnings lose particularly heavily since we have to determine the
1791 line number, which involves scanning from the beginning of the file
1792 or from the last warning. The penalty for the absence of a newline
1793 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1795 If your file has more than one kind of end-of-line marker, you
1796 will get messed-up line numbering.
1798 So that the cases of the switch statement do not have to concern
1799 themselves with the complications of reading beyond the end of the
1800 buffer, the buffer is guaranteed to have at least 3 characters in
1801 it (or however many are left in the file, if less) on entry to the
1802 switch. This is enough to handle trigraphs and the "\\\n\r" and
1805 The end of the buffer is marked by a '\\', which, being a special
1806 character, guarantees we will exit the fast-scan loops and perform
1810 _cpp_read_and_prescan (pfile, fp, desc, len)
1816 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1817 U_CHAR *ip, *op, *line_base;
1820 unsigned int deferred_newlines;
1825 deferred_newlines = 0;
1829 ibase = pfile->input_buffer + 3;
1831 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1835 U_CHAR *near_buff_end;
1837 count = read (desc, ibase, pfile->input_buffer_len);
1841 ibase[count] = '\\'; /* Marks end of buffer */
1844 near_buff_end = pfile->input_buffer + count;
1849 size_t delta_line_base;
1853 This could happen if the file is larger than half the
1854 maximum address space of the machine. */
1857 delta_op = op - buf;
1858 delta_line_base = line_base - buf;
1859 buf = (U_CHAR *) xrealloc (buf, len);
1860 op = buf + delta_op;
1861 line_base = buf + delta_line_base;
1868 /* Allow normal processing of the (at most 2) remaining
1869 characters. The end-of-buffer marker is still present
1870 and prevents false matches within the switch. */
1871 near_buff_end = ibase - 1;
1878 /* Deal with \-newline, potentially in the middle of a token. */
1879 if (deferred_newlines)
1881 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1883 /* Previous was not white space. Skip to white
1884 space, if we can, before outputting the \r's */
1886 while (ip[span] != ' '
1889 && NORMAL(ip[span]))
1891 memcpy (op, ip, span);
1894 if (! NORMAL(ip[0]))
1897 while (deferred_newlines)
1898 deferred_newlines--, *op++ = '\r';
1901 /* Copy as much as we can without special treatment. */
1903 while (NORMAL (ip[span])) span++;
1904 memcpy (op, ip, span);
1909 if (ip > near_buff_end) /* Do we have enough chars? */
1911 switch (chartab[*ip++])
1913 case SPECCASE_CR: /* \r */
1922 case SPECCASE_BACKSLASH: /* \ */
1925 deferred_newlines++;
1927 if (*ip == '\r') ip++;
1929 else if (*ip == '\r')
1931 deferred_newlines++;
1933 if (*ip == '\n') ip++;
1939 case SPECCASE_QUESTION: /* ? */
1943 *op++ = '?'; /* Normal non-trigraph case */
1952 if (CPP_OPTION (pfile, warn_trigraphs))
1955 line_base = find_position (line_base, op, &line);
1956 col = op - line_base + 1;
1957 if (CPP_OPTION (pfile, trigraphs))
1958 cpp_warning_with_line (pfile, line, col,
1959 "trigraph ??%c converted to %c", d, t);
1961 cpp_warning_with_line (pfile, line, col,
1962 "trigraph ??%c ignored", d);
1966 if (CPP_OPTION (pfile, trigraphs))
1968 op[-1] = t; /* Overwrite '?' */
1973 goto do_speccase; /* May need buffer refill */
1985 /* Copy previous char plus unprocessed (at most 2) chars
1986 to beginning of buffer, refill it with another
1987 read(), and continue processing */
1988 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
1998 line_base = find_position (line_base, op, &line);
1999 col = op - line_base + 1;
2000 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2001 if (offset + 1 > len)
2004 if (offset + 1 > len)
2006 buf = (U_CHAR *) xrealloc (buf, len);
2012 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2016 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2017 (unsigned long)offset);
2022 cpp_error_from_errno (pfile, fp->ihash->name);
2027 /* Allocate pfile->input_buffer, and initialize chartab[]
2028 if it hasn't happened already. */
2031 _cpp_init_input_buffer (pfile)
2037 init_token_list (pfile, &pfile->directbuf, 0);
2039 /* Determine the appropriate size for the input buffer. Normal C
2040 source files are smaller than eight K. */
2041 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2042 address arithmetic all the time, and 3 for pushback during buffer
2043 refill, in case there's a potential trigraph or end-of-line
2044 digraph at the end of a block. */
2046 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2047 pfile->input_buffer = tmp;
2048 pfile->input_buffer_len = 8192;
2052 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2053 and extending for LEN characters to the NUL-terminated string
2054 STRING. Typical usage:
2056 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2062 cpp_idcmp (token, len, string)
2063 const U_CHAR *token;
2067 size_t len2 = strlen (string);
2070 if ((r = memcmp (token, string, MIN (len, len2))))
2073 /* The longer of the two strings sorts after the shorter. */
2076 else if (len < len2)
2084 /* Lexing algorithm.
2086 The original lexer in cpplib was made up of two passes: a first pass
2087 that replaced trigraphs and deleted esacped newlines, and a second
2088 pass that tokenized the result of the first pass. Tokenisation was
2089 performed by peeking at the next character in the input stream. For
2090 example, if the input stream contained "!=", the handler for the !
2091 character would peek at the next character, and if it were a '='
2092 would skip over it, and return a "!=" token, otherwise it would
2093 return just the "!" token.
2095 To implement a single-pass lexer, this peeking ahead is unworkable.
2096 An arbitrary number of escaped newlines, and trigraphs (in particular
2097 ??/ which translates to the escape \), could separate the '!' and '='
2098 in the input stream, yet the next token is still a "!=".
2100 Suppose instead that we lex by one logical line at a time, producing
2101 a token list or stack for each logical line, and when seeing the '!'
2102 push a CPP_NOT token on the list. Then if the '!' is part of a
2103 longer token ("!=") we know we must see the remainder of the token by
2104 the time we reach the end of the logical line. Thus we can have the
2105 '=' handler look at the previous token (at the end of the list / top
2106 of the stack) and see if it is a "!" token, and if so, instead of
2107 pushing a "=" token revise the existing token to be a "!=" token.
2109 This works in the presence of escaped newlines, because the '\' would
2110 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2111 newline ('\n' or '\r') handler looks at the token at the top of the
2112 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2113 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2114 the '=' handler would never see any intervening escaped newlines.
2116 To make trigraphs work in this context, as in precedence trigraphs
2117 are highest and converted before anything else, the '?' handler does
2118 lookahead to see if it is a trigraph, and if so skips the trigraph
2119 and pushes the token it represents onto the top of the stack. This
2120 also works in the particular case of a CPP_BACKSLASH trigraph.
2122 To the preprocessor, whitespace is only significant to the point of
2123 knowing whether whitespace precedes a particular token. For example,
2124 the '=' handler needs to know whether there was whitespace between it
2125 and a "!" token on the top of the stack, to make the token conversion
2126 decision correctly. So each token has a PREV_WHITESPACE flag to
2127 indicate this - the standard permits consecutive whitespace to be
2128 regarded as a single space. The compiler front ends are not
2129 interested in whitespace at all; they just require a token stream.
2130 Another place where whitespace is significant to the preprocessor is
2131 a #define statment - if there is whitespace between the macro name
2132 and an initial "(" token the macro is "object-like", otherwise it is
2133 a function-like macro that takes arguments.
2135 However, all is not rosy. Parsing of identifiers, numbers, comments
2136 and strings becomes trickier because of the possibility of raw
2137 trigraphs and escaped newlines in the input stream.
2139 The trigraphs are three consecutive characters beginning with two
2140 question marks. A question mark is not valid as part of a number or
2141 identifier, so parsing of a number or identifier terminates normally
2142 upon reaching it, returning to the mainloop which handles the
2143 trigraph just like it would in any other position. Similarly for the
2144 backslash of a backslash-newline combination. So we just need the
2145 escaped-newline dropper in the mainloop to check if the token on the
2146 top of the stack after dropping the escaped newline is a number or
2147 identifier, and if so to continue the processing it as if nothing had
2150 For strings, we replace trigraphs whenever we reach a quote or
2151 newline, because there might be a backslash trigraph escaping them.
2152 We need to be careful that we start trigraph replacing from where we
2153 left off previously, because it is possible for a first scan to leave
2154 "fake" trigraphs that a second scan would pick up as real (e.g. the
2155 sequence "????/\n=" would find a fake ??= trigraph after removing the
2158 For line comments, on reaching a newline we scan the previous
2159 character(s) to see if it escaped, and continue if it is. Block
2160 comments ignore everything and just focus on finding the comment
2161 termination mark. The only difficult thing, and it is surprisingly
2162 tricky, is checking if an asterisk precedes the final slash since
2163 they could be separated by escaped newlines. If the preprocessor is
2164 invoked with the output comments option, we don't bother removing
2165 escaped newlines and replacing trigraphs for output.
2167 Finally, numbers can begin with a period, which is pushed initially
2168 as a CPP_DOT token in its own right. The digit handler checks if the
2169 previous token was a CPP_DOT not separated by whitespace, and if so
2170 pops it off the stack and pushes a period into the number's buffer
2171 before calling the number parser.
2175 static void expand_comment_space PARAMS ((cpp_toklist *));
2176 void init_trigraph_map PARAMS ((void));
2177 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
2179 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
2180 const unsigned char *));
2181 static int skip_block_comment PARAMS ((cpp_reader *));
2182 static int skip_line_comment PARAMS ((cpp_reader *));
2183 static void skip_whitespace PARAMS ((cpp_reader *, int));
2184 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
2185 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
2186 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
2188 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
2189 static void copy_comment PARAMS ((cpp_toklist *, const unsigned char *,
2190 unsigned int, unsigned int, unsigned int));
2191 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
2193 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
2195 unsigned int spell_string PARAMS ((unsigned char *, cpp_toklist *,
2197 unsigned int spell_comment PARAMS ((unsigned char *, cpp_toklist *,
2199 unsigned int spell_name PARAMS ((unsigned char *, cpp_toklist *,
2202 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
2205 /* Macros on a cpp_name. */
2206 #define INIT_NAME(list, name) \
2207 do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
2209 #define IS_DIRECTIVE(list) (list->tokens[0].type == CPP_HASH)
2210 #define COLUMN(cur) ((cur) - buffer->line_base)
2212 /* Maybe put these in the ISTABLE eventually. */
2213 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
2214 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
2216 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
2217 character, if any, is in buffer. */
2218 #define handle_newline(cur, limit, c) \
2220 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
2222 CPP_BUMP_LINE_CUR (pfile, (cur)); \
2225 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
2226 #define PREV_TOKEN_TYPE (cur_token[-1].type)
2228 #define SPELL_TEXT 0
2229 #define SPELL_HANDLER 1
2230 #define SPELL_CHAR 2
2231 #define SPELL_NONE 3
2234 #define T(e, s) {SPELL_TEXT, s},
2235 #define H(e, s) {SPELL_HANDLER, s},
2236 #define C(e, s) {SPELL_CHAR, s},
2237 #define N(e, s) {SPELL_NONE, s},
2238 #define E(e, s) {SPELL_EOL, s},
2240 static const struct token_spelling
2244 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
2252 static const unsigned char *digraph_spellings [] = {"%:", "%:%:", "<:",
2256 expand_comment_space (list)
2259 if (list->comments_cap == 0)
2261 list->comments_cap = 10;
2262 list->comments = (cpp_token *)
2263 xmalloc (list->comments_cap * sizeof (cpp_token));
2267 list->comments_cap *= 2;
2268 list->comments = (cpp_token *)
2269 xrealloc (list->comments, list->comments_cap);
2274 cpp_free_token_list (list)
2278 free (list->comments);
2279 free (list->tokens - 1); /* Backup over dummy token. */
2280 free (list->namebuf);
2284 static unsigned char trigraph_map[256];
2287 init_trigraph_map ()
2289 trigraph_map['='] = '#';
2290 trigraph_map['('] = '[';
2291 trigraph_map[')'] = ']';
2292 trigraph_map['/'] = '\\';
2293 trigraph_map['\''] = '^';
2294 trigraph_map['<'] = '{';
2295 trigraph_map['>'] = '}';
2296 trigraph_map['!'] = '|';
2297 trigraph_map['-'] = '~';
2300 /* Call when a trigraph is encountered. It warns if necessary, and
2301 returns true if the trigraph should be honoured. END is the third
2302 character of a trigraph in the input stream. */
2304 trigraph_ok (pfile, end)
2306 const unsigned char *end;
2308 int accept = CPP_OPTION (pfile, trigraphs);
2310 if (CPP_OPTION (pfile, warn_trigraphs))
2312 unsigned int col = end - 1 - pfile->buffer->line_base;
2314 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2315 "trigraph ??%c converted to %c",
2316 (int) *end, (int) trigraph_map[*end]);
2318 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2319 "trigraph ??%c ignored", (int) *end);
2324 /* Scan a string for trigraphs, warning or replacing them inline as
2325 appropriate. When parsing a string, we must call this routine
2326 before processing a newline character (if trigraphs are enabled),
2327 since the newline might be escaped by a preceding backslash
2328 trigraph sequence. Returns a pointer to the end of the name after
2331 static unsigned char*
2332 trigraph_replace (pfile, src, limit)
2335 unsigned char* limit;
2337 unsigned char *dest;
2339 /* Starting with src[1], find two consecutive '?'. The case of no
2340 trigraphs is streamlined. */
2342 for (; src + 1 < limit; src += 2)
2347 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2350 else if (src + 2 == limit || src[1] != '?')
2353 /* Check if it really is a trigraph. */
2354 if (trigraph_map[src[2]] == 0)
2358 goto trigraph_found;
2362 /* Now we have a trigraph, we need to scan the remaining buffer, and
2363 copy-shifting its contents left if replacement is enabled. */
2364 for (; src + 2 < limit; dest++, src++)
2365 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2369 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2370 *dest = trigraph_map[*src];
2373 /* Copy remaining (at most 2) characters. */
2379 /* If CUR is a backslash or the end of a trigraphed backslash, return
2380 a pointer to its beginning, otherwise NULL. We don't read beyond
2381 the buffer start, because there is the start of the comment in the
2383 static const unsigned char *
2384 backslash_start (pfile, cur)
2386 const unsigned char *cur;
2390 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2391 && trigraph_ok (pfile, cur))
2396 /* Skip a C-style block comment. This is probably the trickiest
2397 handler. We find the end of the comment by seeing if an asterisk
2398 is before every '/' we encounter. The nasty complication is that a
2399 previous asterisk may be separated by one or more escaped newlines.
2400 Returns non-zero if comment terminated by EOF, zero otherwise. */
2402 skip_block_comment (pfile)
2405 cpp_buffer *buffer = pfile->buffer;
2406 const unsigned char *char_after_star = 0;
2407 register const unsigned char *cur = buffer->cur;
2410 /* Inner loop would think the comment has ended if the first comment
2411 character is a '/'. Avoid this and keep the inner loop clean by
2412 skipping such a character. */
2413 if (cur < buffer->rlimit && cur[0] == '/')
2416 for (; cur < buffer->rlimit; )
2418 unsigned char c = *cur++;
2420 /* People like decorating comments with '*', so check for
2421 '/' instead for efficiency. */
2424 if (cur[-2] == '*' || cur - 1 == char_after_star)
2427 /* Warn about potential nested comments, but not when
2428 the final character inside the comment is a '/'.
2429 Don't bother to get it right across escaped newlines. */
2430 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2431 && cur[0] == '*' && cur[1] != '/')
2434 cpp_warning (pfile, "'/*' within comment");
2437 else if (IS_NEWLINE(c))
2439 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2441 handle_newline (cur, buffer->rlimit, c);
2442 /* Work correctly if there is an asterisk before an
2443 arbirtrarily long sequence of escaped newlines. */
2444 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2445 char_after_star = cur;
2447 char_after_star = 0;
2457 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2458 Returns non-zero if a multiline comment. */
2460 skip_line_comment (pfile)
2463 cpp_buffer *buffer = pfile->buffer;
2464 register const unsigned char *cur = buffer->cur;
2467 for (; cur < buffer->rlimit; )
2469 unsigned char c = *cur++;
2473 /* Check for a (trigaph?) backslash escaping the newline. */
2474 if (!backslash_start (pfile, cur - 2))
2477 handle_newline (cur, buffer->rlimit, c);
2483 buffer->cur = cur - 1; /* Leave newline for caller. */
2487 /* Skips whitespace, stopping at next non-whitespace character. */
2489 skip_whitespace (pfile, in_directive)
2493 cpp_buffer *buffer = pfile->buffer;
2494 register const unsigned char *cur = buffer->cur;
2495 unsigned short null_count = 0;
2497 for (; cur < buffer->rlimit; )
2499 unsigned char c = *cur++;
2501 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2503 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2507 /* Mut be '\f' or '\v' */
2508 else if (in_directive && CPP_PEDANTIC (pfile))
2509 cpp_pedwarn (pfile, "%s in preprocessing directive",
2510 c == '\f' ? "formfeed" : "vertical tab");
2515 buffer->cur = cur - 1;
2517 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2518 : "embedded null character ignored");
2521 /* Parse (append) an identifier. */
2523 parse_name (pfile, list, name)
2528 const unsigned char *name_limit;
2529 unsigned char *namebuf;
2530 cpp_buffer *buffer = pfile->buffer;
2531 register const unsigned char *cur = buffer->cur;
2534 name_limit = list->namebuf + list->name_cap;
2535 namebuf = list->namebuf + list->name_used;
2537 for (; cur < buffer->rlimit && namebuf < name_limit; )
2539 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2545 if (c == '$' && CPP_PEDANTIC (pfile))
2548 cpp_pedwarn (pfile, "'$' character in identifier");
2552 /* Run out of name space? */
2553 if (cur < buffer->rlimit)
2555 list->name_used = namebuf - list->namebuf;
2556 auto_expand_name_space (list);
2562 name->len = namebuf - (list->namebuf + name->offset);
2563 list->name_used = namebuf - list->namebuf;
2566 /* Parse (append) a number. */
2568 #define VALID_SIGN(c, prevc) \
2569 (((c) == '+' || (c) == '-') && \
2570 ((prevc) == 'e' || (prevc) == 'E' \
2571 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2574 parse_number (pfile, list, name)
2579 const unsigned char *name_limit;
2580 unsigned char *namebuf;
2581 cpp_buffer *buffer = pfile->buffer;
2582 register const unsigned char *cur = buffer->cur;
2585 name_limit = list->namebuf + list->name_cap;
2586 namebuf = list->namebuf + list->name_used;
2588 for (; cur < buffer->rlimit && namebuf < name_limit; )
2590 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2592 /* Perhaps we should accept '$' here if we accept it for
2593 identifiers. We know namebuf[-1] is safe, because for c to
2594 be a sign we must have pushed at least one character. */
2595 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2602 /* Run out of name space? */
2603 if (cur < buffer->rlimit)
2605 list->name_used = namebuf - list->namebuf;
2606 auto_expand_name_space (list);
2612 name->len = namebuf - (list->namebuf + name->offset);
2613 list->name_used = namebuf - list->namebuf;
2616 /* Places a string terminated by an unescaped TERMINATOR into a
2617 cpp_name, which should be expandable and thus at the top of the
2618 list's stack. Handles embedded trigraphs, if necessary, and
2621 Can be used for character constants (terminator = '\''), string
2622 constants ('"'), angled headers ('>') and assertions (')'). */
2625 parse_string (pfile, list, name, terminator)
2629 unsigned int terminator;
2631 cpp_buffer *buffer = pfile->buffer;
2632 register const unsigned char *cur = buffer->cur;
2633 const unsigned char *name_limit;
2634 unsigned char *namebuf;
2635 unsigned int null_count = 0;
2636 int trigraphed_len = 0;
2639 name_limit = list->namebuf + list->name_cap;
2640 namebuf = list->namebuf + list->name_used;
2642 for (; cur < buffer->rlimit && namebuf < name_limit; )
2644 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2648 else if (c == terminator || IS_NEWLINE (c))
2650 unsigned char* name_start = list->namebuf + name->offset;
2652 /* Needed for trigraph_replace and multiline string warning. */
2655 /* Scan for trigraphs before checking if backslash-escaped. */
2656 if (CPP_OPTION (pfile, trigraphs)
2657 || CPP_OPTION (pfile, warn_trigraphs))
2659 namebuf = trigraph_replace (pfile, name_start + trigraphed_len,
2661 trigraphed_len = namebuf - 2 - (name_start + trigraphed_len);
2662 if (trigraphed_len < 0)
2666 namebuf--; /* Drop the newline / terminator from the name. */
2669 /* Drop a backslash newline, and continue. */
2670 if (namebuf[-1] == '\\')
2672 handle_newline (cur, buffer->rlimit, c);
2679 /* In Fortran and assembly language, silently terminate
2680 strings of either variety at end of line. This is a
2681 kludge around not knowing where comments are in these
2683 if (CPP_OPTION (pfile, lang_fortran)
2684 || CPP_OPTION (pfile, lang_asm))
2687 /* Character constants, headers and asserts may not
2688 extend over multiple lines. In Standard C, neither
2689 may strings. We accept multiline strings as an
2690 extension, but not in directives. */
2691 if (terminator != '"' || IS_DIRECTIVE (list))
2694 cur++; /* Move forwards again. */
2696 if (pfile->multiline_string_line == 0)
2698 pfile->multiline_string_line = list->line;
2699 if (CPP_PEDANTIC (pfile))
2700 cpp_pedwarn (pfile, "multi-line string constant");
2704 handle_newline (cur, buffer->rlimit, c);
2708 unsigned char *temp;
2710 /* An odd number of consecutive backslashes represents
2711 an escaped terminator. */
2713 while (temp >= name_start && *temp == '\\')
2716 if ((namebuf - temp) & 1)
2723 /* Run out of name space? */
2724 if (cur < buffer->rlimit)
2726 list->name_used = namebuf - list->namebuf;
2727 auto_expand_name_space (list);
2731 /* We may not have trigraph-replaced the input for this code path,
2732 but as the input is in error by being unterminated we don't
2733 bother. Prevent warnings about no newlines at EOF. */
2734 if (IS_NEWLINE(cur[-1]))
2738 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2740 if (terminator == '\"' && pfile->multiline_string_line != list->line
2741 && pfile->multiline_string_line != 0)
2743 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2744 "possible start of unterminated string literal");
2745 pfile->multiline_string_line = 0;
2750 name->len = namebuf - (list->namebuf + name->offset);
2751 list->name_used = namebuf - list->namebuf;
2754 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2755 : "null character preserved"));
2758 /* The character C helps us distinguish comment types: '*' = C style,
2759 '-' = Chill-style and '/' = C++ style. For code simplicity, the
2760 stored comment includes any C-style comment terminator. */
2762 copy_comment (list, from, len, tok_no, type)
2764 const unsigned char *from;
2766 unsigned int tok_no;
2771 if (list->comments_used == list->comments_cap)
2772 expand_comment_space (list);
2774 if (list->name_used + len > list->name_cap)
2775 expand_name_space (list, len);
2777 comment = &list->comments[list->comments_used++];
2778 comment->type = type;
2779 comment->aux = tok_no;
2780 comment->val.name.len = len;
2781 comment->val.name.offset = list->name_used;
2783 memcpy (list->namebuf + list->name_used, from, len);
2784 list->name_used += len;
2788 * The tokenizer's main loop. Returns a token list, representing a
2789 * logical line in the input file, terminated with a CPP_VSPACE
2790 * token. On EOF, a token list containing the single CPP_EOF token
2793 * Implementation relies almost entirely on lookback, rather than
2794 * looking forwards. This means that tokenization requires just
2795 * a single pass of the file, even in the presence of trigraphs and
2796 * escaped newlines, providing significant performance benefits.
2797 * Trigraph overhead is negligible if they are disabled, and low
2798 * even when enabled.
2801 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
2802 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
2803 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
2804 #define BACKUP_DIGRAPH(ttype) do { \
2805 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
2808 _cpp_lex_line (pfile, list)
2812 cpp_token *cur_token, *token_limit;
2813 cpp_buffer *buffer = pfile->buffer;
2814 register const unsigned char *cur = buffer->cur;
2815 unsigned char flags = 0;
2818 token_limit = list->tokens + list->tokens_cap;
2819 cur_token = list->tokens + list->tokens_used;
2821 for (; cur < buffer->rlimit && cur_token < token_limit;)
2823 unsigned char c = *cur++;
2825 /* Optimize whitespace skipping, in particular the case of a
2826 single whitespace character, as every other token is probably
2827 whitespace. (' ' '\t' '\v' '\f' '\0'). */
2828 if (is_hspace ((unsigned int) c))
2830 if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
2832 buffer->cur = cur - (c == '\0'); /* Get the null warning. */
2833 skip_whitespace (pfile, IS_DIRECTIVE (list));
2836 flags = PREV_WHITESPACE;
2837 if (cur == buffer->rlimit)
2842 /* Initialize current token. Its type is set in the switch. */
2843 cur_token->col = COLUMN (cur);
2844 cur_token->flags = flags;
2849 case '0': case '1': case '2': case '3': case '4':
2850 case '5': case '6': case '7': case '8': case '9':
2851 /* Prepend an immediately previous CPP_DOT token. */
2852 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2855 if (list->name_cap == list->name_used)
2856 auto_expand_name_space (list);
2858 cur_token->val.name.len = 1;
2859 cur_token->val.name.offset = list->name_used;
2860 list->namebuf[list->name_used++] = '.';
2863 INIT_NAME (list, cur_token->val.name);
2864 cur--; /* Backup character. */
2868 parse_number (pfile, list, &cur_token->val.name);
2871 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2876 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2877 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2878 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2879 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2881 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2882 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2883 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2884 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2886 INIT_NAME (list, cur_token->val.name);
2887 cur--; /* Backup character. */
2888 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2892 parse_name (pfile, list, &cur_token->val.name);
2895 /* Find handler for newly created / extended directive. */
2896 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2897 _cpp_check_directive (list, cur_token);
2904 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2905 /* Do we have a wide string? */
2906 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2907 && cur_token[-1].val.name.len == 1
2908 && TOK_NAME (list, cur_token - 1)[0] == 'L'
2909 && !CPP_TRADITIONAL (pfile))
2911 /* No need for 'L' any more. */
2913 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2917 /* Here c is one of ' " > or ). */
2918 INIT_NAME (list, cur_token->val.name);
2920 parse_string (pfile, list, &cur_token->val.name, c);
2926 cur_token->type = CPP_DIV;
2929 if (PREV_TOKEN_TYPE == CPP_DIV)
2931 /* We silently allow C++ comments in system headers,
2932 irrespective of conformance mode, because lots of
2933 broken systems do that and trying to clean it up
2934 in fixincludes is a nightmare. */
2935 if (buffer->system_header_p)
2936 goto do_line_comment;
2937 else if (CPP_OPTION (pfile, cplusplus_comments))
2939 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2940 && ! buffer->warned_cplusplus_comments)
2944 "C++ style comments are not allowed in ISO C89");
2946 "(this will be reported only once per input file)");
2947 buffer->warned_cplusplus_comments = 1;
2953 "comment start split across lines");
2954 if (skip_line_comment (pfile))
2955 cpp_error_with_line (pfile, list->line,
2957 "multi-line comment");
2958 if (!CPP_OPTION (pfile, discard_comments))
2959 copy_comment (list, cur, buffer->cur - cur,
2960 cur_token - 1 - list->tokens, c == '/'
2961 ? CPP_CPP_COMMENT: CPP_CHILL_COMMENT);
2964 /* Back-up to first '-' or '/'. */
2966 if (!CPP_OPTION (pfile, traditional))
2967 flags = PREV_WHITESPACE;
2975 cur_token->type = CPP_MULT;
2978 if (PREV_TOKEN_TYPE == CPP_DIV)
2983 "comment start '/*' split across lines");
2984 if (skip_block_comment (pfile))
2985 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
2986 "unterminated comment");
2987 else if (buffer->cur[-2] != '*')
2989 "comment end '*/' split across lines");
2990 if (!CPP_OPTION (pfile, discard_comments))
2991 copy_comment (list, cur, buffer->cur - cur,
2992 cur_token - 1 - list->tokens, CPP_C_COMMENT);
2996 if (!CPP_OPTION (pfile, traditional))
2997 flags = PREV_WHITESPACE;
2999 else if (CPP_OPTION (pfile, cplusplus))
3001 /* In C++, there are .* and ->* operators. */
3002 if (PREV_TOKEN_TYPE == CPP_DEREF)
3003 BACKUP_TOKEN (CPP_DEREF_STAR);
3004 else if (PREV_TOKEN_TYPE == CPP_DOT)
3005 BACKUP_TOKEN (CPP_DOT_STAR);
3013 handle_newline (cur, buffer->rlimit, c);
3014 if (PREV_TOKEN_TYPE != CPP_BACKSLASH || !IMMED_TOKEN ())
3016 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3020 "backslash and newline separated by space");
3022 PUSH_TOKEN (CPP_VSPACE);
3025 /* Remove the escaped newline. Then continue to process
3026 any interrupted name or number. */
3031 if (cur_token->type == CPP_NAME)
3033 else if (cur_token->type == CPP_NUMBER)
3034 goto continue_number;
3040 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3042 if (CPP_OPTION (pfile, chill))
3043 goto do_line_comment;
3044 REVISE_TOKEN (CPP_MINUS_MINUS);
3047 PUSH_TOKEN (CPP_MINUS);
3050 /* The digraph flag checking ensures that ## and %:%:
3051 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3054 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3055 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3056 REVISE_TOKEN (CPP_PASTE);
3058 PUSH_TOKEN (CPP_HASH);
3062 cur_token->type = CPP_COLON;
3065 if (PREV_TOKEN_TYPE == CPP_COLON
3066 && CPP_OPTION (pfile, cplusplus))
3067 BACKUP_TOKEN (CPP_SCOPE);
3068 /* Digraph: "<:" is a '[' */
3069 else if (PREV_TOKEN_TYPE == CPP_LESS)
3070 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3071 /* Digraph: "%:" is a '#' */
3072 else if (PREV_TOKEN_TYPE == CPP_MOD)
3074 (--cur_token)->flags |= DIGRAPH;
3082 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3083 REVISE_TOKEN (CPP_AND_AND);
3085 PUSH_TOKEN (CPP_AND);
3090 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3091 REVISE_TOKEN (CPP_OR_OR);
3093 PUSH_TOKEN (CPP_OR);
3097 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3098 REVISE_TOKEN (CPP_PLUS_PLUS);
3100 PUSH_TOKEN (CPP_PLUS);
3104 /* This relies on equidistance of "?=" and "?" tokens. */
3105 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3106 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3108 PUSH_TOKEN (CPP_EQ);
3112 cur_token->type = CPP_GREATER;
3115 if (PREV_TOKEN_TYPE == CPP_GREATER)
3116 BACKUP_TOKEN (CPP_RSHIFT);
3117 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3118 BACKUP_TOKEN (CPP_DEREF);
3119 /* Digraph: ":>" is a ']' */
3120 else if (PREV_TOKEN_TYPE == CPP_COLON)
3121 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3122 /* Digraph: "%>" is a '}' */
3123 else if (PREV_TOKEN_TYPE == CPP_MOD)
3124 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3130 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3132 REVISE_TOKEN (CPP_LSHIFT);
3135 /* Is this the beginning of a header name? */
3136 if (list->dir_flags & SYNTAX_INCLUDE)
3138 c = '>'; /* Terminator. */
3139 cur_token->type = CPP_HEADER_NAME;
3140 goto do_parse_string;
3142 PUSH_TOKEN (CPP_LESS);
3146 /* Digraph: "<%" is a '{' */
3147 cur_token->type = CPP_MOD;
3148 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3149 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3154 /* Is this the beginning of an assertion string? */
3155 if (list->dir_flags & SYNTAX_ASSERT)
3157 c = ')'; /* Terminator. */
3158 cur_token->type = CPP_ASSERTION;
3159 goto do_parse_string;
3161 PUSH_TOKEN (CPP_OPEN_PAREN);
3165 if (cur + 1 < buffer->rlimit && *cur == '?'
3166 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3168 /* Handle trigraph. */
3172 case '(': goto make_open_square;
3173 case ')': goto make_close_square;
3174 case '<': goto make_open_brace;
3175 case '>': goto make_close_brace;
3176 case '=': goto make_hash;
3177 case '!': goto make_or;
3178 case '-': goto make_complement;
3179 case '/': goto make_backslash;
3180 case '\'': goto make_xor;
3183 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3185 /* GNU C++ defines <? and >? operators. */
3186 if (PREV_TOKEN_TYPE == CPP_LESS)
3188 REVISE_TOKEN (CPP_MIN);
3191 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3193 REVISE_TOKEN (CPP_MAX);
3197 PUSH_TOKEN (CPP_QUERY);
3201 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3203 && !(cur_token[-1].flags & PREV_WHITESPACE))
3206 PUSH_TOKEN (CPP_ELLIPSIS);
3209 PUSH_TOKEN (CPP_DOT);
3213 case '~': PUSH_TOKEN (CPP_COMPL); break;
3215 case '^': PUSH_TOKEN (CPP_XOR); break;
3217 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3219 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3221 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3223 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3225 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3226 case '!': PUSH_TOKEN (CPP_NOT); break;
3227 case ',': PUSH_TOKEN (CPP_COMMA); break;
3228 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3229 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3232 if (CPP_OPTION (pfile, dollars_in_ident))
3237 PUSH_TOKEN (CPP_OTHER);
3242 /* Run out of token space? */
3243 if (cur_token == token_limit)
3245 list->tokens_used = cur_token - list->tokens;
3246 expand_token_space (list);
3250 cur_token->type = CPP_EOF;
3251 cur_token->flags = flags;
3253 if (cur_token != &list->tokens[0])
3255 /* Next call back will get just a CPP_EOF. */
3257 cpp_warning (pfile, "no newline at end of file");
3258 PUSH_TOKEN (CPP_VSPACE);
3264 list->tokens_used = cur_token - list->tokens;
3266 /* FIXME: take this check out and put it in the caller.
3267 list->directive == 0 indicates an unknown directive (but null
3268 directive is OK). This is the first time we can be sure the
3269 directive is invalid, and thus warn about it, because it might
3270 have been split by escaped newlines. Also, don't complain about
3271 invalid directives in assembly source, we don't know where the
3272 comments are, and # may introduce assembler pseudo-ops. */
3274 if (IS_DIRECTIVE (list) && list->dir_handler == 0
3275 && list->tokens[1].type != CPP_VSPACE
3276 && !CPP_OPTION (pfile, lang_asm))
3277 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3278 "invalid preprocessing directive");
3281 /* Token spelling functions. Used for output of a preprocessed file,
3282 stringizing and token pasting. They all assume sufficient buffer
3283 is allocated, and return exactly how much they used. */
3285 /* Needs buffer of 3 + len. */
3287 spell_string (buffer, list, token)
3288 unsigned char *buffer;
3292 unsigned char c, *orig_buff = buffer;
3295 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3297 c = token->type == CPP_STRING || token->type == CPP_WSTRING ? '"': '\'';
3300 len = token->val.name.len;
3301 memcpy (buffer, TOK_NAME (list, token), len);
3304 return buffer - orig_buff;
3307 /* Needs buffer of len + 2. */
3309 spell_comment (buffer, list, token)
3310 unsigned char *buffer;
3316 if (token->type == CPP_C_COMMENT)
3321 else if (token->type == CPP_CPP_COMMENT)
3332 len = token->val.name.len;
3333 memcpy (buffer, TOK_NAME (list, token), len);
3338 /* Needs buffer of len. */
3340 spell_name (buffer, list, token)
3341 unsigned char *buffer;
3347 len = token->val.name.len;
3348 memcpy (buffer, TOK_NAME (list, token), len);
3355 _cpp_lex_file (pfile)
3361 init_trigraph_map ();
3362 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3364 for (recycle = 0; ;)
3366 init_token_list (pfile, list, recycle);
3369 _cpp_lex_line (pfile, list);
3370 if (list->tokens[0].type == CPP_EOF)
3373 if (list->dir_handler)
3375 if (list->dir_handler (pfile))
3377 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3382 _cpp_output_list (pfile, list);
3386 /* This could be useful to other routines. If you allocate this many
3387 bytes, you have enough room to spell the token. */
3388 #define TOKEN_LEN(token) (4 + (token_spellings[token->type].type == \
3389 SPELL_HANDLER ? token->val.name.len: 0))
3392 _cpp_output_list (pfile, list)
3396 unsigned int comment_no = 0;
3397 cpp_token *token, *comment_token = 0;
3399 if (list->comments_used > 0)
3400 comment_token = list->tokens + list->comments[0].aux;
3402 CPP_RESERVE (pfile, 2); /* Always have room for " \n". */
3403 for (token = &list->tokens[0];; token++)
3405 if (token->flags & PREV_WHITESPACE)
3407 /* Output comments if -C. Otherwise a space will do. */
3408 if (token == comment_token)
3410 cpp_token *comment = &list->comments[comment_no];
3413 CPP_RESERVE (pfile, 2 + TOKEN_LEN (comment));
3414 pfile->limit += spell_comment (pfile->limit, list, comment);
3415 comment_no++, comment++;
3416 if (comment_no == list->comments_used)
3418 comment_token = comment->aux + list->tokens;
3420 while (comment_token == token);
3423 CPP_PUTC_Q (pfile, ' ');
3426 CPP_RESERVE (pfile, 2 + TOKEN_LEN (token));
3427 switch (token_spellings[token->type].type)
3431 const unsigned char *spelling;
3434 if (token->flags & DIGRAPH)
3435 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3437 spelling = token_spellings[token->type].speller;
3439 while ((c = *spelling++) != '\0')
3440 CPP_PUTC_Q (pfile, c);
3448 s = (speller) token_spellings[token->type].speller;
3449 pfile->limit += s (pfile->limit, list, token);
3454 *pfile->limit++ = token->aux;
3458 CPP_PUTC_Q (pfile, '\n');
3462 cpp_error (pfile, "Unwriteable token");