1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
8 This program is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
28 #define PEEKBUF(BUFFER, N) \
29 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
30 #define GETBUF(BUFFER) \
31 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
32 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
34 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
35 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
36 #define GETC() GETBUF (CPP_BUFFER (pfile))
37 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
39 static void skip_block_comment PARAMS ((cpp_reader *));
40 static void skip_line_comment PARAMS ((cpp_reader *));
41 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
42 static int skip_comment PARAMS ((cpp_reader *, int));
43 static int copy_comment PARAMS ((cpp_reader *, int));
44 static void skip_string PARAMS ((cpp_reader *, int));
45 static void parse_string PARAMS ((cpp_reader *, int));
46 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
47 static int null_cleanup PARAMS ((cpp_buffer *, cpp_reader *));
48 static void null_warning PARAMS ((cpp_reader *, unsigned int));
50 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
53 _cpp_grow_token_buffer (pfile, n)
57 long old_written = CPP_WRITTEN (pfile);
58 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
59 pfile->token_buffer = (U_CHAR *)
60 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
61 CPP_SET_WRITTEN (pfile, old_written);
65 null_cleanup (pbuf, pfile)
66 cpp_buffer *pbuf ATTRIBUTE_UNUSED;
67 cpp_reader *pfile ATTRIBUTE_UNUSED;
72 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
73 If BUFFER != NULL, then use the LENGTH characters in BUFFER
74 as the new input buffer.
75 Return the new buffer, or NULL on failure. */
78 cpp_push_buffer (pfile, buffer, length)
83 cpp_buffer *buf = CPP_BUFFER (pfile);
85 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
87 cpp_fatal (pfile, "macro or `#include' recursion too deep");
91 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
93 new->if_stack = pfile->if_stack;
94 new->cleanup = null_cleanup;
95 new->buf = new->cur = buffer;
96 new->rlimit = buffer + length;
99 new->line_base = NULL;
101 CPP_BUFFER (pfile) = new;
106 cpp_pop_buffer (pfile)
109 cpp_buffer *buf = CPP_BUFFER (pfile);
110 if (ACTIVE_MARK_P (pfile))
111 cpp_ice (pfile, "mark active in cpp_pop_buffer");
112 (*buf->cleanup) (buf, pfile);
113 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
115 pfile->buffer_stack_depth--;
116 return CPP_BUFFER (pfile);
119 /* Scan until CPP_BUFFER (PFILE) is exhausted into PFILE->token_buffer.
120 Pop the buffer when done. */
123 cpp_scan_buffer (pfile)
126 cpp_buffer *buffer = CPP_BUFFER (pfile);
127 enum cpp_ttype token;
128 if (CPP_OPTION (pfile, no_output))
130 long old_written = CPP_WRITTEN (pfile);
131 /* In no-output mode, we can ignore everything but directives. */
134 if (! pfile->only_seen_white)
135 _cpp_skip_rest_of_line (pfile);
136 token = cpp_get_token (pfile);
137 if (token == CPP_EOF) /* Should not happen ... */
139 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
141 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL)
142 cpp_pop_buffer (pfile);
146 CPP_SET_WRITTEN (pfile, old_written);
152 token = cpp_get_token (pfile);
153 if (token == CPP_EOF) /* Should not happen ... */
155 if (token == CPP_POP && CPP_BUFFER (pfile) == buffer)
157 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) != NULL)
158 cpp_pop_buffer (pfile);
166 * Rescan a string (which may have escape marks) into pfile's buffer.
167 * Place the result in pfile->token_buffer.
169 * The input is copied before it is scanned, so it is safe to pass
170 * it something from the token_buffer that will get overwritten
171 * (because it follows CPP_WRITTEN). This is used by do_include.
175 cpp_expand_to_buffer (pfile, buf, length)
180 register cpp_buffer *ip;
186 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
190 /* Set up the input on the input stack. */
192 buf1 = (U_CHAR *) alloca (length + 1);
193 memcpy (buf1, buf, length);
196 ip = cpp_push_buffer (pfile, buf1, length);
201 /* Scan the input, create the output. */
202 save_no_output = CPP_OPTION (pfile, no_output);
203 CPP_OPTION (pfile, no_output) = 0;
204 CPP_OPTION (pfile, no_line_commands)++;
205 cpp_scan_buffer (pfile);
206 CPP_OPTION (pfile, no_line_commands)--;
207 CPP_OPTION (pfile, no_output) = save_no_output;
209 CPP_NUL_TERMINATE (pfile);
212 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
215 cpp_file_buffer (pfile)
220 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
221 if (ip->ihash != NULL)
226 /* Skip a C-style block comment. We know it's a comment, and point is
227 at the second character of the starter. */
229 skip_block_comment (pfile)
232 unsigned int line, col;
233 const U_CHAR *limit, *cur;
236 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
237 col = CPP_BUF_COL (CPP_BUFFER (pfile));
238 limit = CPP_BUFFER (pfile)->rlimit;
239 cur = CPP_BUFFER (pfile)->cur;
244 if (c == '\n' || c == '\r')
246 /* \r cannot be a macro escape marker here. */
247 if (!ACTIVE_MARK_P (pfile))
248 CPP_BUMP_LINE_CUR (pfile, cur);
252 /* Check for teminator. */
253 if (cur < limit && *cur == '/')
256 /* Warn about comment starter embedded in comment. */
257 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
258 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
259 cur - CPP_BUFFER (pfile)->line_base,
260 "'/*' within comment");
264 cpp_error_with_line (pfile, line, col, "unterminated comment");
267 CPP_BUFFER (pfile)->cur = cur + 1;
270 /* Skip a C++/Chill line comment. We know it's a comment, and point
271 is at the second character of the initiator. */
273 skip_line_comment (pfile)
281 /* We don't have to worry about EOF in here. */
284 /* Don't consider final '\n' to be part of comment. */
290 /* \r cannot be a macro escape marker here. */
291 if (!ACTIVE_MARK_P (pfile))
292 CPP_BUMP_LINE (pfile);
293 if (CPP_OPTION (pfile, warn_comments))
294 cpp_warning (pfile, "backslash-newline within line comment");
299 /* Skip a comment - C, C++, or Chill style. M is the first character
300 of the comment marker. If this really is a comment, skip to its
301 end and return ' '. If this is not a comment, return M (which will
305 skip_comment (pfile, m)
309 if (m == '/' && PEEKC() == '*')
311 skip_block_comment (pfile);
314 else if (m == '/' && PEEKC() == '/')
316 if (CPP_BUFFER (pfile)->system_header_p)
318 /* We silently allow C++ comments in system headers, irrespective
319 of conformance mode, because lots of busted systems do that
320 and trying to clean it up in fixincludes is a nightmare. */
321 skip_line_comment (pfile);
324 else if (CPP_OPTION (pfile, cplusplus_comments))
326 if (CPP_OPTION (pfile, c89)
327 && CPP_PEDANTIC (pfile)
328 && ! CPP_BUFFER (pfile)->warned_cplusplus_comments)
331 "C++ style comments are not allowed in ISO C89");
333 "(this will be reported only once per input file)");
334 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
336 skip_line_comment (pfile);
342 else if (m == '-' && PEEKC() == '-'
343 && CPP_OPTION (pfile, chill))
345 skip_line_comment (pfile);
352 /* Identical to skip_comment except that it copies the comment into the
353 token_buffer. This is used if !discard_comments. */
355 copy_comment (pfile, m)
359 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
362 if (skip_comment (pfile, m) == m)
365 limit = CPP_BUFFER (pfile)->cur;
366 CPP_RESERVE (pfile, limit - start + 2);
367 CPP_PUTC_Q (pfile, m);
368 for (; start <= limit; start++)
370 CPP_PUTC_Q (pfile, *start);
376 null_warning (pfile, count)
381 cpp_warning (pfile, "embedded null character ignored");
383 cpp_warning (pfile, "embedded null characters ignored");
386 /* Skip whitespace \-newline and comments. Does not macro-expand. */
389 _cpp_skip_hspace (pfile)
392 unsigned int null_count = 0;
400 else if (is_hspace(c))
402 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
403 cpp_pedwarn (pfile, "%s in preprocessing directive",
404 c == '\f' ? "formfeed" : "vertical tab");
410 /* \r is a backslash-newline marker if !has_escapes, and
411 a deletable-whitespace or no-reexpansion marker otherwise. */
412 if (CPP_BUFFER (pfile)->has_escapes)
420 CPP_BUMP_LINE (pfile);
422 else if (c == '/' || c == '-')
424 c = skip_comment (pfile, c);
434 null_warning (pfile, null_count);
437 /* Read and discard the rest of the current line. */
440 _cpp_skip_rest_of_line (pfile)
454 if (! CPP_BUFFER (pfile)->has_escapes)
455 CPP_BUMP_LINE (pfile);
460 skip_string (pfile, c);
465 skip_comment (pfile, c);
470 if (CPP_PEDANTIC (pfile))
471 cpp_pedwarn (pfile, "%s in preprocessing directive",
472 c == '\f' ? "formfeed" : "vertical tab");
479 /* Parse an identifier starting with C. */
482 _cpp_parse_name (pfile, c)
494 if (c == '$' && CPP_PEDANTIC (pfile))
495 cpp_pedwarn (pfile, "`$' in identifier");
497 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
498 CPP_PUTC_Q (pfile, c);
503 CPP_NUL_TERMINATE_Q (pfile);
507 /* Parse and skip over a string starting with C. A single quoted
508 string is treated like a double -- some programs (e.g., troff) are
509 perverse this way. (However, a single quoted string is not allowed
510 to extend over multiple lines.) */
512 skip_string (pfile, c)
516 unsigned int start_line, start_column;
517 unsigned int null_count = 0;
519 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
520 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
527 cpp_error_with_line (pfile, start_line, start_column,
528 "unterminated string or character constant");
529 if (pfile->multiline_string_line != start_line
530 && pfile->multiline_string_line != 0)
531 cpp_error_with_line (pfile,
532 pfile->multiline_string_line, -1,
533 "possible real start of unterminated constant");
534 pfile->multiline_string_line = 0;
542 CPP_BUMP_LINE (pfile);
543 /* In Fortran and assembly language, silently terminate
544 strings of either variety at end of line. This is a
545 kludge around not knowing where comments are in these
547 if (CPP_OPTION (pfile, lang_fortran)
548 || CPP_OPTION (pfile, lang_asm))
553 /* Character constants may not extend over multiple lines.
554 In Standard C, neither may strings. We accept multiline
555 strings as an extension. */
558 cpp_error_with_line (pfile, start_line, start_column,
559 "unterminated character constant");
563 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
564 cpp_pedwarn_with_line (pfile, start_line, start_column,
565 "string constant runs past end of line");
566 if (pfile->multiline_string_line == 0)
567 pfile->multiline_string_line = start_line;
571 if (CPP_BUFFER (pfile)->has_escapes)
573 cpp_ice (pfile, "\\r escape inside string constant");
577 /* Backslash newline is replaced by nothing at all. */
578 CPP_BUMP_LINE (pfile);
595 cpp_warning (pfile, "null character in string or character constant");
596 else if (null_count > 1)
597 cpp_warning (pfile, "null characters in string or character constant");
600 /* Parse a string and copy it to the output. */
603 parse_string (pfile, c)
607 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
610 skip_string (pfile, c);
612 limit = CPP_BUFFER (pfile)->cur;
613 CPP_RESERVE (pfile, limit - start + 2);
614 CPP_PUTC_Q (pfile, c);
615 for (; start < limit; start++)
617 CPP_PUTC_Q (pfile, *start);
620 /* Read an assertion into the token buffer, converting to
621 canonical form: `#predicate(a n swe r)' The next non-whitespace
622 character to read should be the first letter of the predicate.
623 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
624 with answer (see callers for why). In case of 0, an error has been
627 _cpp_parse_assertion (pfile)
631 _cpp_skip_hspace (pfile);
635 cpp_error (pfile, "assertion without predicate");
638 else if (! is_idstart(c))
640 cpp_error (pfile, "assertion predicate is not an identifier");
643 CPP_PUTC(pfile, '#');
645 _cpp_parse_name (pfile, c);
650 if (is_hspace(c) || c == '\r')
651 _cpp_skip_hspace (pfile);
657 CPP_PUTC(pfile, '(');
660 while ((c = GETC()) != ')')
666 CPP_PUTC(pfile, ' ');
670 else if (c == '\n' || c == EOF)
672 if (c == '\n') FORWARD(-1);
673 cpp_error (pfile, "un-terminated assertion answer");
677 /* \r cannot be a macro escape here. */
678 CPP_BUMP_LINE (pfile);
686 if (pfile->limit[-1] == ' ')
687 pfile->limit[-1] = ')';
688 else if (pfile->limit[-1] == '(')
690 cpp_error (pfile, "empty token sequence in assertion");
694 CPP_PUTC (pfile, ')');
699 /* Get the next token, and add it to the text in pfile->token_buffer.
700 Return the kind of token we got. */
703 _cpp_lex_token (pfile)
706 register int c, c2, c3;
707 enum cpp_ttype token;
721 if (CPP_OPTION (pfile, discard_comments))
722 c = skip_comment (pfile, c);
724 c = copy_comment (pfile, c);
728 /* Comments are equivalent to spaces.
729 For -traditional, a comment is equivalent to nothing. */
730 if (!CPP_OPTION (pfile, discard_comments))
732 else if (CPP_TRADITIONAL (pfile)
733 && ! is_space (PEEKC ()))
735 if (pfile->parsing_define_directive)
747 if (pfile->parsing_if_directive)
749 if (_cpp_parse_assertion (pfile))
750 return CPP_ASSERTION;
754 if (pfile->parsing_define_directive && ! CPP_TRADITIONAL (pfile))
756 CPP_RESERVE (pfile, 3);
757 CPP_PUTC_Q (pfile, '#');
758 CPP_NUL_TERMINATE_Q (pfile);
760 return CPP_STRINGIZE;
763 CPP_PUTC_Q (pfile, '#');
764 CPP_NUL_TERMINATE_Q (pfile);
768 if (!pfile->only_seen_white)
770 return CPP_DIRECTIVE;
774 parse_string (pfile, c);
775 return c == '\'' ? CPP_CHAR : CPP_STRING;
778 if (!CPP_OPTION (pfile, dollars_in_ident))
783 if (CPP_OPTION (pfile, cplusplus) && PEEKC () == ':')
791 if (c2 == c || c2 == '=')
808 if (CPP_OPTION (pfile, chill))
809 goto comment; /* Chill style comment */
817 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
819 /* In C++, there's a ->* operator. */
821 CPP_RESERVE (pfile, 4);
822 CPP_PUTC_Q (pfile, c);
823 CPP_PUTC_Q (pfile, GETC ());
824 CPP_PUTC_Q (pfile, GETC ());
825 CPP_NUL_TERMINATE_Q (pfile);
833 if (pfile->parsing_include_directive)
841 if (c == '\n' || c == EOF)
844 "missing '>' in `#include <FILENAME>'");
849 if (!CPP_BUFFER (pfile)->has_escapes)
851 /* Backslash newline is replaced by nothing. */
852 CPP_ADJUST_WRITTEN (pfile, -1);
853 CPP_BUMP_LINE (pfile);
857 /* We might conceivably get \r- or \r<space> in
858 here. Just delete 'em. */
860 if (d != '-' && d != ' ')
861 cpp_ice (pfile, "unrecognized escape \\r%c", d);
862 CPP_ADJUST_WRITTEN (pfile, -1);
868 /* else fall through */
873 /* GNU C++ supports MIN and MAX operators <? and >?. */
874 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
877 CPP_RESERVE (pfile, 4);
879 CPP_PUTC (pfile, c2);
882 CPP_PUTC_Q (pfile, GETC ());
883 CPP_NUL_TERMINATE_Q (pfile);
890 CPP_RESERVE(pfile, 2);
891 CPP_PUTC_Q (pfile, '.');
896 /* In C++ there's a .* operator. */
897 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
900 if (c2 == '.' && PEEKN(1) == '.')
902 CPP_RESERVE(pfile, 4);
903 CPP_PUTC_Q (pfile, '.');
904 CPP_PUTC_Q (pfile, '.');
905 CPP_PUTC_Q (pfile, '.');
907 CPP_NUL_TERMINATE_Q (pfile);
914 CPP_RESERVE(pfile, 3);
915 CPP_PUTC_Q (pfile, c);
916 CPP_PUTC_Q (pfile, GETC ());
917 CPP_NUL_TERMINATE_Q (pfile);
922 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
926 parse_string (pfile, c);
927 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
931 case '0': case '1': case '2': case '3': case '4':
932 case '5': case '6': case '7': case '8': case '9':
937 CPP_RESERVE (pfile, 2);
938 CPP_PUTC_Q (pfile, c);
942 if (!is_numchar(c) && c != '.'
943 && ((c2 != 'e' && c2 != 'E'
944 && ((c2 != 'p' && c2 != 'P')
945 || CPP_OPTION (pfile, c89)))
946 || (c != '+' && c != '-')))
951 CPP_NUL_TERMINATE_Q (pfile);
953 case 'b': case 'c': case 'd': case 'h': case 'o':
954 case 'B': case 'C': case 'D': case 'H': case 'O':
955 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
957 CPP_RESERVE (pfile, 2);
958 CPP_PUTC_Q (pfile, c);
959 CPP_PUTC_Q (pfile, '\'');
965 goto chill_number_eof;
972 CPP_RESERVE (pfile, 2);
973 CPP_PUTC_Q (pfile, c);
974 CPP_NUL_TERMINATE_Q (pfile);
981 CPP_NUL_TERMINATE (pfile);
988 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
989 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
990 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
991 case 'x': case 'y': case 'z':
992 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
993 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
994 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
997 _cpp_parse_name (pfile, c);
1000 case ' ': case '\t': case '\v': case '\f': case '\0':
1009 CPP_PUTC (pfile, c);
1011 if (c == EOF || !is_hspace(c))
1016 null_warning (pfile, null_count);
1021 if (CPP_BUFFER (pfile)->has_escapes)
1026 if (pfile->output_escapes)
1027 CPP_PUTS (pfile, "\r-", 2);
1028 _cpp_parse_name (pfile, GETC ());
1033 /* "\r " means a space, but only if necessary to prevent
1034 accidental token concatenation. */
1035 CPP_RESERVE (pfile, 2);
1036 if (pfile->output_escapes)
1037 CPP_PUTC_Q (pfile, '\r');
1038 CPP_PUTC_Q (pfile, c);
1043 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1049 /* Backslash newline is ignored. */
1050 CPP_BUMP_LINE (pfile);
1055 CPP_PUTC (pfile, c);
1058 case '(': token = CPP_LPAREN; goto char1;
1059 case ')': token = CPP_RPAREN; goto char1;
1060 case '{': token = CPP_LBRACE; goto char1;
1061 case '}': token = CPP_RBRACE; goto char1;
1062 case ',': token = CPP_COMMA; goto char1;
1063 case ';': token = CPP_SEMICOLON; goto char1;
1069 CPP_PUTC (pfile, c);
1074 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1075 Caller is expected to have checked no_macro_expand. */
1077 maybe_macroexpand (pfile, written)
1081 U_CHAR *macro = pfile->token_buffer + written;
1082 size_t len = CPP_WRITTEN (pfile) - written;
1083 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1087 if (hp->type == T_DISABLED)
1089 if (pfile->output_escapes)
1091 /* Insert a no-reexpand marker before IDENT. */
1092 CPP_RESERVE (pfile, 2);
1093 CPP_ADJUST_WRITTEN (pfile, 2);
1094 macro = pfile->token_buffer + written;
1096 memmove (macro + 2, macro, len);
1102 if (hp->type == T_EMPTY)
1104 /* Special case optimization: macro expands to nothing. */
1105 CPP_SET_WRITTEN (pfile, written);
1106 CPP_PUTC_Q (pfile, ' ');
1110 /* If macro wants an arglist, verify that a '(' follows. */
1111 if (hp->type == T_MACRO && hp->value.defn->nargs >= 0)
1113 int macbuf_whitespace = 0;
1116 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1118 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1121 _cpp_skip_hspace (pfile);
1128 if (point != CPP_BUFFER (pfile)->cur)
1129 macbuf_whitespace = 1;
1133 goto not_macro_call;
1134 cpp_pop_buffer (pfile);
1137 CPP_SET_MARK (pfile);
1140 _cpp_skip_hspace (pfile);
1147 CPP_GOTO_MARK (pfile);
1152 if (macbuf_whitespace)
1153 CPP_PUTC (pfile, ' ');
1159 /* This is now known to be a macro call.
1160 Expand the macro, reading arguments as needed,
1161 and push the expansion on the input stack. */
1162 _cpp_macroexpand (pfile, hp);
1163 CPP_SET_WRITTEN (pfile, written);
1168 cpp_get_token (pfile)
1171 enum cpp_ttype token;
1172 long written = CPP_WRITTEN (pfile);
1175 token = _cpp_lex_token (pfile);
1180 pfile->potential_control_macro = 0;
1181 pfile->only_seen_white = 0;
1185 if (pfile->only_seen_white == 0)
1186 pfile->only_seen_white = 1;
1187 CPP_BUMP_LINE (pfile);
1188 if (! CPP_OPTION (pfile, no_line_commands))
1191 if (CPP_BUFFER (pfile)->lineno != pfile->lineno)
1192 _cpp_output_line_command (pfile, same_file);
1201 pfile->potential_control_macro = 0;
1202 if (_cpp_handle_directive (pfile))
1203 return CPP_DIRECTIVE;
1204 pfile->only_seen_white = 0;
1205 CPP_PUTC (pfile, '#');
1209 pfile->potential_control_macro = 0;
1210 pfile->only_seen_white = 0;
1211 if (! pfile->no_macro_expand
1212 && maybe_macroexpand (pfile, written))
1217 if (CPP_BUFFER (pfile)->manual_pop)
1218 /* If we've been reading from redirected input, the
1219 frontend will pop the buffer. */
1221 else if (CPP_BUFFER (pfile)->seen_eof)
1223 if (CPP_PREV_BUFFER (CPP_BUFFER (pfile)) == NULL)
1226 cpp_pop_buffer (pfile);
1231 _cpp_handle_eof (pfile);
1237 /* Like cpp_get_token, but skip spaces and comments. */
1240 cpp_get_non_space_token (pfile)
1243 int old_written = CPP_WRITTEN (pfile);
1246 enum cpp_ttype token = cpp_get_token (pfile);
1247 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1249 CPP_SET_WRITTEN (pfile, old_written);
1253 /* Like cpp_get_token, except that it does not execute directives,
1254 does not consume vertical space, and automatically pops off macro
1257 XXX This function will exist only till collect_expansion doesn't
1258 need to see whitespace anymore, then it'll be merged with
1259 _cpp_get_directive_token (below). */
1261 _cpp_get_define_token (pfile)
1265 enum cpp_ttype token;
1268 old_written = CPP_WRITTEN (pfile);
1269 token = _cpp_lex_token (pfile);
1276 /* Put it back and return VSPACE. */
1278 CPP_ADJUST_WRITTEN (pfile, -1);
1282 if (CPP_PEDANTIC (pfile))
1285 p = pfile->token_buffer + old_written;
1286 limit = CPP_PWRITTEN (pfile);
1289 if (*p == '\v' || *p == '\f')
1290 cpp_pedwarn (pfile, "%s in preprocessing directive",
1291 *p == '\f' ? "formfeed" : "vertical tab");
1298 /* Don't execute the directive, but don't smash it to OTHER either. */
1299 CPP_PUTC (pfile, '#');
1300 return CPP_DIRECTIVE;
1303 if (! pfile->no_macro_expand
1304 && maybe_macroexpand (pfile, old_written))
1309 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1311 cpp_pop_buffer (pfile);
1315 /* This can happen for files that don't end with a newline,
1316 and for cpp_define and friends. Pretend they do, so
1317 callers don't have to deal. A warning will be issued by
1318 someone else, if necessary. */
1323 /* Just like _cpp_get_define_token except that it discards horizontal
1327 _cpp_get_directive_token (pfile)
1330 int old_written = CPP_WRITTEN (pfile);
1333 enum cpp_ttype token = _cpp_get_define_token (pfile);
1334 if (token != CPP_COMMENT && token != CPP_HSPACE)
1336 CPP_SET_WRITTEN (pfile, old_written);
1340 /* Determine the current line and column. Used only by read_and_prescan. */
1342 find_position (start, limit, linep)
1345 unsigned long *linep;
1347 unsigned long line = *linep;
1348 U_CHAR *lbase = start;
1349 while (start < limit)
1351 U_CHAR ch = *start++;
1352 if (ch == '\n' || ch == '\r')
1362 /* The following table is used by _cpp_read_and_prescan. If we have
1363 designated initializers, it can be constant data; otherwise, it is
1364 set up at runtime by _cpp_init_input_buffer. */
1367 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1370 #if (GCC_VERSION >= 2007) || (__STDC_VERSION__ >= 199901L)
1371 #define init_chartab() /* nothing */
1372 #define CHARTAB static const unsigned char chartab[UCHAR_MAX + 1] = {
1374 #define s(p, v) [p] = v,
1376 #define CHARTAB static unsigned char chartab[UCHAR_MAX + 1] = { 0 }; \
1377 static void init_chartab PARAMS ((void)) { \
1378 unsigned char *x = chartab;
1380 #define s(p, v) x[p] = v;
1383 /* Table of characters that can't be handled in the inner loop.
1384 Also contains the mapping between trigraph third characters and their
1386 #define SPECCASE_CR 1
1387 #define SPECCASE_BACKSLASH 2
1388 #define SPECCASE_QUESTION 3
1391 s('\r', SPECCASE_CR)
1392 s('\\', SPECCASE_BACKSLASH)
1393 s('?', SPECCASE_QUESTION)
1395 s('=', '#') s(')', ']') s('!', '|')
1396 s('(', '[') s('\'', '^') s('>', '}')
1397 s('/', '\\') s('<', '{') s('-', '~')
1404 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1405 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1407 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1408 much memory to allocate initially; more will be allocated if
1409 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1410 canonical form (\n). If enabled, convert and/or warn about
1411 trigraphs. Convert backslash-newline to a one-character escape
1412 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1413 token). If there is no newline at the end of the file, add one and
1414 warn. Returns -1 on failure, or the actual length of the data to
1417 This function does a lot of work, and can be a serious performance
1418 bottleneck. It has been tuned heavily; make sure you understand it
1419 before hacking. The common case - no trigraphs, Unix style line
1420 breaks, backslash-newline set off by whitespace, newline at EOF -
1421 has been optimized at the expense of the others. The performance
1422 penalty for DOS style line breaks (\r\n) is about 15%.
1424 Warnings lose particularly heavily since we have to determine the
1425 line number, which involves scanning from the beginning of the file
1426 or from the last warning. The penalty for the absence of a newline
1427 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1429 If your file has more than one kind of end-of-line marker, you
1430 will get messed-up line numbering.
1432 So that the cases of the switch statement do not have to concern
1433 themselves with the complications of reading beyond the end of the
1434 buffer, the buffer is guaranteed to have at least 3 characters in
1435 it (or however many are left in the file, if less) on entry to the
1436 switch. This is enough to handle trigraphs and the "\\\n\r" and
1439 The end of the buffer is marked by a '\\', which, being a special
1440 character, guarantees we will exit the fast-scan loops and perform
1444 _cpp_read_and_prescan (pfile, fp, desc, len)
1450 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1451 U_CHAR *ip, *op, *line_base;
1454 unsigned int deferred_newlines;
1459 deferred_newlines = 0;
1463 ibase = pfile->input_buffer + 3;
1465 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1469 U_CHAR *near_buff_end;
1471 /* Copy previous char plus unprocessed (at most 2) chars
1472 to beginning of buffer, refill it with another
1473 read(), and continue processing */
1474 memcpy(ip - count - 1, ip - 1, 3);
1477 count = read (desc, ibase, pfile->input_buffer_len);
1481 ibase[count] = '\\'; /* Marks end of buffer */
1484 near_buff_end = pfile->input_buffer + count;
1489 size_t delta_line_base;
1493 This could happen if the file is larger than half the
1494 maximum address space of the machine. */
1497 delta_op = op - buf;
1498 delta_line_base = line_base - buf;
1499 buf = (U_CHAR *) xrealloc (buf, len);
1500 op = buf + delta_op;
1501 line_base = buf + delta_line_base;
1508 /* Allow normal processing of the (at most 2) remaining
1509 characters. The end-of-buffer marker is still present
1510 and prevents false matches within the switch. */
1511 near_buff_end = ibase - 1;
1518 /* Deal with \-newline, potentially in the middle of a token. */
1519 if (deferred_newlines)
1521 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1523 /* Previous was not white space. Skip to white
1524 space, if we can, before outputting the \r's */
1526 while (ip[span] != ' '
1529 && NORMAL(ip[span]))
1531 memcpy (op, ip, span);
1534 if (! NORMAL(ip[0]))
1537 while (deferred_newlines)
1538 deferred_newlines--, *op++ = '\r';
1541 /* Copy as much as we can without special treatment. */
1543 while (NORMAL (ip[span])) span++;
1544 memcpy (op, ip, span);
1549 if (ip > near_buff_end) /* Do we have enough chars? */
1551 switch (chartab[*ip++])
1553 case SPECCASE_CR: /* \r */
1562 case SPECCASE_BACKSLASH: /* \ */
1565 deferred_newlines++;
1567 if (*ip == '\r') ip++;
1569 else if (*ip == '\r')
1571 deferred_newlines++;
1573 if (*ip == '\n') ip++;
1579 case SPECCASE_QUESTION: /* ? */
1583 *op++ = '?'; /* Normal non-trigraph case */
1592 if (CPP_OPTION (pfile, warn_trigraphs))
1595 line_base = find_position (line_base, op, &line);
1596 col = op - line_base + 1;
1597 if (CPP_OPTION (pfile, trigraphs))
1598 cpp_warning_with_line (pfile, line, col,
1599 "trigraph ??%c converted to %c", d, t);
1601 cpp_warning_with_line (pfile, line, col,
1602 "trigraph ??%c ignored", d);
1606 if (CPP_OPTION (pfile, trigraphs))
1608 op[-1] = t; /* Overwrite '?' */
1613 goto do_speccase; /* May need buffer refill */
1633 line_base = find_position (line_base, op, &line);
1634 col = op - line_base + 1;
1635 cpp_warning_with_line (pfile, line, col, "no newline at end of file\n");
1636 if (offset + 1 > len)
1639 if (offset + 1 > len)
1641 buf = (U_CHAR *) xrealloc (buf, len);
1647 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
1651 cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset);
1656 cpp_error_from_errno (pfile, fp->ihash->name);
1661 /* Allocate pfile->input_buffer, and initialize chartab[]
1662 if it hasn't happened already. */
1665 _cpp_init_input_buffer (pfile)
1672 /* Determine the appropriate size for the input buffer. Normal C
1673 source files are smaller than eight K. */
1674 /* 8Kbytes of buffer proper, 1 to detect running off the end without
1675 address arithmetic all the time, and 3 for pushback during buffer
1676 refill, in case there's a potential trigraph or end-of-line
1677 digraph at the end of a block. */
1679 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
1680 pfile->input_buffer = tmp;
1681 pfile->input_buffer_len = 8192;