1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31 #define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
35 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37 #define GETC() GETBUF (CPP_BUFFER (pfile))
38 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
40 static void skip_block_comment PARAMS ((cpp_reader *));
41 static void skip_line_comment PARAMS ((cpp_reader *));
42 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43 static int skip_comment PARAMS ((cpp_reader *, int));
44 static int copy_comment PARAMS ((cpp_reader *, int));
45 static void skip_string PARAMS ((cpp_reader *, int));
46 static void parse_string PARAMS ((cpp_reader *, int));
47 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
48 static void null_warning PARAMS ((cpp_reader *, unsigned int));
50 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
52 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
54 static void bump_column PARAMS ((cpp_printer *, unsigned int,
56 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
57 static void expand_token_space PARAMS ((cpp_toklist *));
58 static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
59 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
62 #define auto_expand_name_space(list) \
63 expand_name_space ((list), (list)->name_cap / 2)
67 static void expand_comment_space PARAMS ((cpp_toklist *));
68 void init_trigraph_map PARAMS ((void));
69 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
71 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
72 const unsigned char *));
73 static int skip_block_comment2 PARAMS ((cpp_reader *));
74 static int skip_line_comment2 PARAMS ((cpp_reader *));
75 static void skip_whitespace PARAMS ((cpp_reader *, int));
76 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
77 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
78 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
80 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
81 static void save_comment PARAMS ((cpp_toklist *, const unsigned char *,
82 unsigned int, unsigned int, unsigned int));
83 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
85 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
87 static unsigned char * spell_token PARAMS ((cpp_reader *, cpp_token *,
88 cpp_toklist *, unsigned char *,
91 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
94 /* Macros on a cpp_name. */
95 #define INIT_NAME(list, name) \
96 do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
98 #define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
99 #define COLUMN(cur) ((cur) - buffer->line_base)
101 /* Maybe put these in the ISTABLE eventually. */
102 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
103 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
105 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
106 character, if any, is in buffer. */
107 #define handle_newline(cur, limit, c) \
109 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
111 CPP_BUMP_LINE_CUR (pfile, (cur)); \
114 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
115 #define PREV_TOKEN_TYPE (cur_token[-1].type)
117 /* Order here matters. Those beyond SPELL_NONE store their spelling
118 in the token list, and it's length in the token->val.name.len. */
119 #define SPELL_OPERATOR 0
122 #define SPELL_IDENT 3
123 #define SPELL_STRING 4
125 #define T(e, s) {SPELL_OPERATOR, s},
126 #define I(e, s) {SPELL_IDENT, s},
127 #define S(e, s) {SPELL_STRING, s},
128 #define C(e, s) {SPELL_CHAR, s},
129 #define N(e, s) {SPELL_NONE, s},
131 static const struct token_spelling
135 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
143 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
144 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
145 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
146 #define BACKUP_DIGRAPH(ttype) do { \
147 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
149 /* An upper bound on the number of bytes needed to spell a token,
150 including preceding whitespace. */
151 #define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
152 SPELL_NONE ? token->val.name.len: 0))
156 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
159 _cpp_grow_token_buffer (pfile, n)
163 long old_written = CPP_WRITTEN (pfile);
164 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
165 pfile->token_buffer = (U_CHAR *)
166 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
167 CPP_SET_WRITTEN (pfile, old_written);
170 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
171 If BUFFER != NULL, then use the LENGTH characters in BUFFER
172 as the new input buffer.
173 Return the new buffer, or NULL on failure. */
176 cpp_push_buffer (pfile, buffer, length)
178 const U_CHAR *buffer;
181 cpp_buffer *buf = CPP_BUFFER (pfile);
183 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
185 cpp_fatal (pfile, "macro or `#include' recursion too deep");
189 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
191 new->if_stack = pfile->if_stack;
192 new->buf = new->cur = buffer;
193 new->rlimit = buffer + length;
196 new->line_base = NULL;
198 CPP_BUFFER (pfile) = new;
203 cpp_pop_buffer (pfile)
206 cpp_buffer *buf = CPP_BUFFER (pfile);
207 if (ACTIVE_MARK_P (pfile))
208 cpp_ice (pfile, "mark active in cpp_pop_buffer");
212 _cpp_unwind_if_stack (pfile, buf);
214 free ((PTR) buf->buf);
215 if (pfile->system_include_depth)
216 pfile->system_include_depth--;
217 if (pfile->potential_control_macro)
219 buf->ihash->control_macro = pfile->potential_control_macro;
220 pfile->potential_control_macro = 0;
222 pfile->input_stack_listing_current = 0;
226 HASHNODE *m = buf->macro;
229 if ((m->type == T_FMACRO && buf->mapped)
230 || m->type == T_SPECLINE || m->type == T_FILE
231 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
232 || m->type == T_STDC)
233 free ((PTR) buf->buf);
235 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
237 pfile->buffer_stack_depth--;
238 return CPP_BUFFER (pfile);
241 /* Deal with the annoying semantics of fwrite. */
243 safe_fwrite (pfile, buf, len, fp)
253 count = fwrite (buf, 1, len, fp);
262 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
265 /* Notify the compiler proper that the current line number has jumped,
266 or the current file name has changed. */
269 output_line_command (pfile, print, line)
274 cpp_buffer *ip = cpp_file_buffer (pfile);
275 enum { same = 0, enter, leave, rname } change;
276 static const char * const codes[] = { "", " 1", " 2", "" };
278 if (CPP_OPTION (pfile, no_line_commands))
281 /* Determine whether the current filename has changed, and if so,
282 how. 'nominal_fname' values are unique, so they can be compared
283 by comparing pointers. */
284 if (ip->nominal_fname == print->last_fname)
288 if (pfile->buffer_stack_depth == print->last_bsd)
292 if (pfile->buffer_stack_depth > print->last_bsd)
296 print->last_bsd = pfile->buffer_stack_depth;
298 print->last_fname = ip->nominal_fname;
300 /* If the current file has not changed, we can output a few newlines
301 instead if we want to increase the line number by a small amount.
302 We cannot do this if print->lineno is zero, because that means we
303 haven't output any line commands yet. (The very first line
304 command output is a `same_file' command.) */
305 if (change == same && print->lineno != 0
306 && line >= print->lineno && line < print->lineno + 8)
308 while (line > print->lineno)
310 putc ('\n', print->outf);
316 #ifndef NO_IMPLICIT_EXTERN_C
317 if (CPP_OPTION (pfile, cplusplus))
318 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
320 ip->system_header_p ? " 3" : "",
321 (ip->system_header_p == 2) ? " 4" : "");
324 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
326 ip->system_header_p ? " 3" : "");
327 print->lineno = line;
330 /* Write the contents of the token_buffer to the output stream, and
331 clear the token_buffer. Also handles generating line commands and
332 keeping track of file transitions. */
335 cpp_output_tokens (pfile, print)
341 if (CPP_WRITTEN (pfile) - print->written)
343 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
345 safe_fwrite (pfile, pfile->token_buffer,
346 CPP_WRITTEN (pfile) - print->written, print->outf);
349 ip = cpp_file_buffer (pfile);
351 output_line_command (pfile, print, CPP_BUF_LINE (ip));
353 CPP_SET_WRITTEN (pfile, print->written);
356 /* Helper for cpp_output_list - increases the column number to match
357 what we expect it to be. */
360 bump_column (print, from, to)
362 unsigned int from, to;
364 unsigned int tabs, spcs;
365 unsigned int delta = to - from;
367 /* Only if FROM is 0, advance by tabs. */
369 tabs = delta / 8, spcs = delta % 8;
371 tabs = 0, spcs = delta;
373 while (tabs--) putc ('\t', print->outf);
374 while (spcs--) putc (' ', print->outf);
377 /* Write out the list L onto pfile->token_buffer. This function is
380 1) pfile->token_buffer is not going to continue to exist.
381 2) At the moment, tokens don't carry the information described
382 in cpplib.h; they are all strings.
383 3) The list has to be a complete line, and has to be written starting
384 at the beginning of a line. */
387 cpp_output_list (pfile, print, list)
390 const cpp_toklist *list;
393 unsigned int curcol = 1;
395 /* XXX Probably does not do what is intended. */
396 if (print->lineno != list->line)
397 output_line_command (pfile, print, list->line);
399 for (i = 0; i < list->tokens_used; i++)
401 if (TOK_TYPE (list, i) == CPP_VSPACE)
403 output_line_command (pfile, print, list->tokens[i].aux);
407 if (curcol < TOK_COL (list, i))
409 /* Insert space to bring the column to what it should be. */
410 bump_column (print, curcol - 1, TOK_COL (list, i));
411 curcol = TOK_COL (list, i);
413 /* XXX We may have to insert space to prevent an accidental
415 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
416 curcol += TOK_LEN (list, i);
420 /* Scan a string (which may have escape marks), perform macro expansion,
421 and write the result to the token_buffer. */
424 _cpp_expand_to_buffer (pfile, buf, length)
430 enum cpp_ttype token;
435 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
439 /* Copy the buffer, because it might be in an unsafe place - for
440 example, a sequence on the token_buffer, where the pointers will
441 be invalidated if we enlarge the token_buffer. */
442 buf1 = alloca (length);
443 memcpy (buf1, buf, length);
445 /* Set up the input on the input stack. */
446 stop = CPP_BUFFER (pfile);
447 if (cpp_push_buffer (pfile, buf1, length) == NULL)
449 CPP_BUFFER (pfile)->has_escapes = 1;
451 /* Scan the input, create the output. */
454 token = cpp_get_token (pfile);
455 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
460 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
463 cpp_scan_buffer_nooutput (pfile)
466 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
467 enum cpp_ttype token;
468 unsigned int old_written = CPP_WRITTEN (pfile);
469 /* In no-output mode, we can ignore everything but directives. */
472 if (! pfile->only_seen_white)
473 _cpp_skip_rest_of_line (pfile);
474 token = cpp_get_token (pfile);
475 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
478 CPP_SET_WRITTEN (pfile, old_written);
481 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
484 cpp_scan_buffer (pfile, print)
488 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
489 enum cpp_ttype token;
493 token = cpp_get_token (pfile);
494 if (token == CPP_EOF || token == CPP_VSPACE
495 /* XXX Temporary kluge - force flush after #include only */
496 || (token == CPP_DIRECTIVE
497 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
499 cpp_output_tokens (pfile, print);
500 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
506 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
509 cpp_file_buffer (pfile)
514 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
515 if (ip->ihash != NULL)
520 /* Token-buffer helper functions. */
522 /* Expand a token list's string space. */
524 expand_name_space (list, len)
528 list->name_cap += len;
529 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
532 /* Expand the number of tokens in a list. */
534 expand_token_space (list)
537 list->tokens_cap *= 2;
538 list->tokens = (cpp_token *)
539 xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
540 list->tokens++; /* Skip the dummy. */
543 /* Initialize a token list. We allocate an extra token in front of
544 the token list, as this allows us to always peek at the previous
545 token without worrying about underflowing the list. */
547 init_token_list (pfile, list, recycle)
552 /* Recycling a used list saves 3 free-malloc pairs. */
555 /* Initialize token space. Put a dummy token before the start
556 that will fail matches. */
557 list->tokens_cap = 256; /* 4K's worth. */
558 list->tokens = (cpp_token *)
559 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
560 list->tokens[0].type = CPP_EOF;
563 /* Initialize name space. */
564 list->name_cap = 1024;
565 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
567 /* Only create a comment space on demand. */
568 list->comments_cap = 0;
572 list->tokens_used = 0;
574 list->comments_used = 0;
576 list->line = pfile->buffer->lineno;
577 list->dir_handler = 0;
581 /* Scan an entire line and create a token list for it. Does not
582 macro-expand or execute directives. */
585 _cpp_scan_line (pfile, list)
594 init_token_list (pfile, list, 1);
596 written = CPP_WRITTEN (pfile);
601 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
602 type = _cpp_lex_token (pfile);
603 len = CPP_WRITTEN (pfile) - written;
604 CPP_SET_WRITTEN (pfile, written);
605 if (type == CPP_HSPACE)
607 if (CPP_PEDANTIC (pfile))
608 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
612 else if (type == CPP_COMMENT)
613 /* Only happens when processing -traditional macro definitions.
614 Do not give this a token entry, but do not change space_before
618 if (list->tokens_used >= list->tokens_cap)
619 expand_token_space (list);
620 if (list->name_used + len >= list->name_cap)
621 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
623 if (type == CPP_MACRO)
627 TOK_TYPE (list, i) = type;
628 TOK_COL (list, i) = col;
629 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
631 if (type == CPP_VSPACE)
634 TOK_LEN (list, i) = len;
635 TOK_OFFSET (list, i) = list->name_used;
636 memcpy (TOK_NAME (list, i), CPP_PWRITTEN (pfile), len);
637 list->name_used += len;
641 TOK_AUX (list, i) = CPP_BUFFER (pfile)->lineno + 1;
643 /* XXX Temporary kluge: put back the newline. */
648 /* Skip a C-style block comment. We know it's a comment, and point is
649 at the second character of the starter. */
651 skip_block_comment (pfile)
654 unsigned int line, col;
655 const U_CHAR *limit, *cur;
658 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
659 col = CPP_BUF_COL (CPP_BUFFER (pfile));
660 limit = CPP_BUFFER (pfile)->rlimit;
661 cur = CPP_BUFFER (pfile)->cur;
666 if (c == '\n' || c == '\r')
668 /* \r cannot be a macro escape marker here. */
669 if (!ACTIVE_MARK_P (pfile))
670 CPP_BUMP_LINE_CUR (pfile, cur);
674 /* Check for teminator. */
675 if (cur < limit && *cur == '/')
678 /* Warn about comment starter embedded in comment. */
679 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
680 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
681 cur - CPP_BUFFER (pfile)->line_base,
682 "'/*' within comment");
686 cpp_error_with_line (pfile, line, col, "unterminated comment");
689 CPP_BUFFER (pfile)->cur = cur + 1;
692 /* Skip a C++/Chill line comment. We know it's a comment, and point
693 is at the second character of the initiator. */
695 skip_line_comment (pfile)
703 /* We don't have to worry about EOF in here. */
706 /* Don't consider final '\n' to be part of comment. */
712 /* \r cannot be a macro escape marker here. */
713 if (!ACTIVE_MARK_P (pfile))
714 CPP_BUMP_LINE (pfile);
715 if (CPP_OPTION (pfile, warn_comments))
716 cpp_warning (pfile, "backslash-newline within line comment");
721 /* Skip a comment - C, C++, or Chill style. M is the first character
722 of the comment marker. If this really is a comment, skip to its
723 end and return ' '. If this is not a comment, return M (which will
727 skip_comment (pfile, m)
731 if (m == '/' && PEEKC() == '*')
733 skip_block_comment (pfile);
736 else if (m == '/' && PEEKC() == '/')
738 if (CPP_BUFFER (pfile)->system_header_p)
740 /* We silently allow C++ comments in system headers, irrespective
741 of conformance mode, because lots of busted systems do that
742 and trying to clean it up in fixincludes is a nightmare. */
743 skip_line_comment (pfile);
746 else if (CPP_OPTION (pfile, cplusplus_comments))
748 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
750 if (CPP_WTRADITIONAL (pfile))
752 "C++ style comments are not allowed in traditional C");
753 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
755 "C++ style comments are not allowed in ISO C89");
756 if (CPP_WTRADITIONAL (pfile)
757 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
759 "(this will be reported only once per input file)");
760 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
762 skip_line_comment (pfile);
768 else if (m == '-' && PEEKC() == '-'
769 && CPP_OPTION (pfile, chill))
771 skip_line_comment (pfile);
778 /* Identical to skip_comment except that it copies the comment into the
779 token_buffer. This is used if !discard_comments. */
781 copy_comment (pfile, m)
785 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
788 if (skip_comment (pfile, m) == m)
791 limit = CPP_BUFFER (pfile)->cur;
792 CPP_RESERVE (pfile, limit - start + 2);
793 CPP_PUTC_Q (pfile, m);
794 for (; start <= limit; start++)
796 CPP_PUTC_Q (pfile, *start);
802 null_warning (pfile, count)
807 cpp_warning (pfile, "embedded null character ignored");
809 cpp_warning (pfile, "embedded null characters ignored");
812 /* Skip whitespace \-newline and comments. Does not macro-expand. */
815 _cpp_skip_hspace (pfile)
818 unsigned int null_count = 0;
826 else if (is_hspace(c))
828 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
829 cpp_pedwarn (pfile, "%s in preprocessing directive",
830 c == '\f' ? "formfeed" : "vertical tab");
836 /* \r is a backslash-newline marker if !has_escapes, and
837 a deletable-whitespace or no-reexpansion marker otherwise. */
838 if (CPP_BUFFER (pfile)->has_escapes)
846 CPP_BUMP_LINE (pfile);
848 else if (c == '/' || c == '-')
850 c = skip_comment (pfile, c);
860 null_warning (pfile, null_count);
863 /* Read and discard the rest of the current line. */
866 _cpp_skip_rest_of_line (pfile)
880 if (! CPP_BUFFER (pfile)->has_escapes)
881 CPP_BUMP_LINE (pfile);
886 skip_string (pfile, c);
891 skip_comment (pfile, c);
896 if (CPP_PEDANTIC (pfile))
897 cpp_pedwarn (pfile, "%s in preprocessing directive",
898 c == '\f' ? "formfeed" : "vertical tab");
905 /* Parse an identifier starting with C. */
908 _cpp_parse_name (pfile, c)
920 if (c == '$' && CPP_PEDANTIC (pfile))
921 cpp_pedwarn (pfile, "`$' in identifier");
923 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
924 CPP_PUTC_Q (pfile, c);
932 /* Parse and skip over a string starting with C. A single quoted
933 string is treated like a double -- some programs (e.g., troff) are
934 perverse this way. (However, a single quoted string is not allowed
935 to extend over multiple lines.) */
937 skip_string (pfile, c)
941 unsigned int start_line, start_column;
942 unsigned int null_count = 0;
944 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
945 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
952 cpp_error_with_line (pfile, start_line, start_column,
953 "unterminated string or character constant");
954 if (pfile->multiline_string_line != start_line
955 && pfile->multiline_string_line != 0)
956 cpp_error_with_line (pfile,
957 pfile->multiline_string_line, -1,
958 "possible real start of unterminated constant");
959 pfile->multiline_string_line = 0;
967 CPP_BUMP_LINE (pfile);
968 /* In Fortran and assembly language, silently terminate
969 strings of either variety at end of line. This is a
970 kludge around not knowing where comments are in these
972 if (CPP_OPTION (pfile, lang_fortran)
973 || CPP_OPTION (pfile, lang_asm))
978 /* Character constants may not extend over multiple lines.
979 In Standard C, neither may strings. We accept multiline
980 strings as an extension. */
983 cpp_error_with_line (pfile, start_line, start_column,
984 "unterminated character constant");
988 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
989 cpp_pedwarn_with_line (pfile, start_line, start_column,
990 "string constant runs past end of line");
991 if (pfile->multiline_string_line == 0)
992 pfile->multiline_string_line = start_line;
996 if (CPP_BUFFER (pfile)->has_escapes)
998 cpp_ice (pfile, "\\r escape inside string constant");
1002 /* Backslash newline is replaced by nothing at all. */
1003 CPP_BUMP_LINE (pfile);
1019 if (null_count == 1)
1020 cpp_warning (pfile, "null character in string or character constant");
1021 else if (null_count > 1)
1022 cpp_warning (pfile, "null characters in string or character constant");
1025 /* Parse a string and copy it to the output. */
1028 parse_string (pfile, c)
1032 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1033 const U_CHAR *limit;
1035 skip_string (pfile, c);
1037 limit = CPP_BUFFER (pfile)->cur;
1038 CPP_RESERVE (pfile, limit - start + 2);
1039 CPP_PUTC_Q (pfile, c);
1040 for (; start < limit; start++)
1042 CPP_PUTC_Q (pfile, *start);
1045 /* Read an assertion into the token buffer, converting to
1046 canonical form: `#predicate(a n swe r)' The next non-whitespace
1047 character to read should be the first letter of the predicate.
1048 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
1049 with answer (see callers for why). In case of 0, an error has been
1052 _cpp_parse_assertion (pfile)
1056 _cpp_skip_hspace (pfile);
1060 cpp_error (pfile, "assertion without predicate");
1063 else if (! is_idstart(c))
1065 cpp_error (pfile, "assertion predicate is not an identifier");
1068 CPP_PUTC(pfile, '#');
1070 _cpp_parse_name (pfile, c);
1075 if (is_hspace(c) || c == '\r')
1076 _cpp_skip_hspace (pfile);
1082 CPP_PUTC(pfile, '(');
1085 while ((c = GETC()) != ')')
1091 CPP_PUTC(pfile, ' ');
1095 else if (c == '\n' || c == EOF)
1097 if (c == '\n') FORWARD(-1);
1098 cpp_error (pfile, "un-terminated assertion answer");
1102 /* \r cannot be a macro escape here. */
1103 CPP_BUMP_LINE (pfile);
1106 CPP_PUTC (pfile, c);
1111 if (pfile->limit[-1] == ' ')
1112 pfile->limit[-1] = ')';
1113 else if (pfile->limit[-1] == '(')
1115 cpp_error (pfile, "empty token sequence in assertion");
1119 CPP_PUTC (pfile, ')');
1124 /* Get the next token, and add it to the text in pfile->token_buffer.
1125 Return the kind of token we got. */
1128 _cpp_lex_token (pfile)
1132 enum cpp_ttype token;
1134 if (CPP_BUFFER (pfile) == NULL)
1145 if (PEEKC () == '=')
1149 if (CPP_OPTION (pfile, discard_comments))
1150 c = skip_comment (pfile, c);
1152 c = copy_comment (pfile, c);
1156 /* Comments are equivalent to spaces.
1157 For -traditional, a comment is equivalent to nothing. */
1158 if (!CPP_OPTION (pfile, discard_comments))
1160 else if (CPP_TRADITIONAL (pfile))
1162 if (pfile->parsing_define_directive)
1168 CPP_PUTC (pfile, c);
1173 CPP_PUTC (pfile, c);
1176 if (pfile->parsing_if_directive)
1178 CPP_ADJUST_WRITTEN (pfile, -1);
1179 if (_cpp_parse_assertion (pfile))
1180 return CPP_ASSERTION;
1184 if (pfile->parsing_define_directive)
1190 CPP_PUTC (pfile, c2);
1192 else if (c2 == '%' && PEEKN (1) == ':')
1194 /* Digraph: "%:" == "#". */
1196 CPP_RESERVE (pfile, 2);
1197 CPP_PUTC_Q (pfile, c2);
1198 CPP_PUTC_Q (pfile, GETC ());
1206 if (!pfile->only_seen_white)
1209 /* Remove the "#" or "%:" from the token buffer. */
1210 CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
1211 return CPP_DIRECTIVE;
1215 parse_string (pfile, c);
1216 return c == '\'' ? CPP_CHAR : CPP_STRING;
1219 if (!CPP_OPTION (pfile, dollars_in_ident))
1225 /* Digraph: ":>" == "]". */
1227 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1235 if (c2 == c || c2 == '=')
1240 /* Digraphs: "%:" == "#", "%>" == "}". */
1245 CPP_RESERVE (pfile, 2);
1246 CPP_PUTC_Q (pfile, c);
1247 CPP_PUTC_Q (pfile, c2);
1253 CPP_RESERVE (pfile, 2);
1254 CPP_PUTC_Q (pfile, c);
1255 CPP_PUTC_Q (pfile, c2);
1256 return CPP_OPEN_BRACE;
1258 /* else fall through */
1264 if (PEEKC () == '=')
1272 if (CPP_OPTION (pfile, chill))
1273 goto comment; /* Chill style comment */
1281 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1283 /* In C++, there's a ->* operator. */
1285 CPP_RESERVE (pfile, 4);
1286 CPP_PUTC_Q (pfile, c);
1287 CPP_PUTC_Q (pfile, GETC ());
1288 CPP_PUTC_Q (pfile, GETC ());
1296 if (pfile->parsing_include_directive)
1300 CPP_PUTC (pfile, c);
1304 if (c == '\n' || c == EOF)
1307 "missing '>' in `#include <FILENAME>'");
1312 if (!CPP_BUFFER (pfile)->has_escapes)
1314 /* Backslash newline is replaced by nothing. */
1315 CPP_ADJUST_WRITTEN (pfile, -1);
1316 CPP_BUMP_LINE (pfile);
1320 /* We might conceivably get \r- or \r<space> in
1321 here. Just delete 'em. */
1323 if (d != '-' && d != ' ')
1324 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1325 CPP_ADJUST_WRITTEN (pfile, -1);
1331 /* Digraphs: "<%" == "{", "<:" == "[". */
1336 CPP_RESERVE (pfile, 2);
1337 CPP_PUTC_Q (pfile, c);
1338 CPP_PUTC_Q (pfile, c2);
1339 return CPP_CLOSE_BRACE;
1343 /* else fall through */
1348 /* GNU C++ supports MIN and MAX operators <? and >?. */
1349 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1352 CPP_RESERVE (pfile, 3);
1353 CPP_PUTC_Q (pfile, c);
1354 CPP_PUTC_Q (pfile, c2);
1355 if (PEEKC () == '=')
1356 CPP_PUTC_Q (pfile, GETC ());
1363 CPP_PUTC (pfile, c);
1368 /* In C++ there's a .* operator. */
1369 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1372 if (c2 == '.' && PEEKN(1) == '.')
1374 CPP_RESERVE (pfile, 3);
1375 CPP_PUTC_Q (pfile, '.');
1376 CPP_PUTC_Q (pfile, '.');
1377 CPP_PUTC_Q (pfile, '.');
1379 return CPP_ELLIPSIS;
1384 CPP_RESERVE (pfile, 2);
1385 CPP_PUTC_Q (pfile, c);
1386 CPP_PUTC_Q (pfile, GETC ());
1391 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1393 CPP_PUTC (pfile, c);
1395 parse_string (pfile, c);
1396 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1400 case '0': case '1': case '2': case '3': case '4':
1401 case '5': case '6': case '7': case '8': case '9':
1406 CPP_RESERVE (pfile, 2);
1407 CPP_PUTC_Q (pfile, c);
1411 if (!is_numchar(c) && c != '.'
1412 && ((c2 != 'e' && c2 != 'E'
1413 && ((c2 != 'p' && c2 != 'P')
1414 || CPP_OPTION (pfile, c89)))
1415 || (c != '+' && c != '-')))
1421 case 'b': case 'c': case 'd': case 'h': case 'o':
1422 case 'B': case 'C': case 'D': case 'H': case 'O':
1423 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1425 CPP_RESERVE (pfile, 2);
1426 CPP_PUTC_Q (pfile, c);
1427 CPP_PUTC_Q (pfile, '\'');
1433 goto chill_number_eof;
1436 CPP_PUTC (pfile, c);
1440 CPP_RESERVE (pfile, 2);
1441 CPP_PUTC_Q (pfile, c);
1454 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1455 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1456 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1457 case 'x': case 'y': case 'z':
1458 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1459 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1460 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1463 _cpp_parse_name (pfile, c);
1466 case ' ': case '\t': case '\v': case '\f': case '\0':
1475 CPP_PUTC (pfile, c);
1477 if (c == EOF || !is_hspace(c))
1482 null_warning (pfile, null_count);
1487 if (CPP_BUFFER (pfile)->has_escapes)
1492 if (pfile->output_escapes)
1493 CPP_PUTS (pfile, "\r-", 2);
1494 _cpp_parse_name (pfile, GETC ());
1499 /* "\r " means a space, but only if necessary to prevent
1500 accidental token concatenation. */
1501 CPP_RESERVE (pfile, 2);
1502 if (pfile->output_escapes)
1503 CPP_PUTC_Q (pfile, '\r');
1504 CPP_PUTC_Q (pfile, c);
1509 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1515 /* Backslash newline is ignored. */
1516 if (!ACTIVE_MARK_P (pfile))
1517 CPP_BUMP_LINE (pfile);
1522 CPP_PUTC (pfile, c);
1525 case '(': token = CPP_OPEN_PAREN; goto char1;
1526 case ')': token = CPP_CLOSE_PAREN; goto char1;
1527 case '{': token = CPP_OPEN_BRACE; goto char1;
1528 case '}': token = CPP_CLOSE_BRACE; goto char1;
1529 case ',': token = CPP_COMMA; goto char1;
1530 case ';': token = CPP_SEMICOLON; goto char1;
1536 CPP_PUTC (pfile, c);
1541 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1542 Caller is expected to have checked no_macro_expand. */
1544 maybe_macroexpand (pfile, written)
1548 U_CHAR *macro = pfile->token_buffer + written;
1549 size_t len = CPP_WRITTEN (pfile) - written;
1550 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1552 /* _cpp_lookup never returns null. */
1553 if (hp->type == T_VOID)
1555 if (hp->disabled || hp->type == T_IDENTITY)
1557 if (pfile->output_escapes)
1559 /* Insert a no-reexpand marker before IDENT. */
1560 CPP_RESERVE (pfile, 2);
1561 CPP_ADJUST_WRITTEN (pfile, 2);
1562 macro = pfile->token_buffer + written;
1564 memmove (macro + 2, macro, len);
1570 if (hp->type == T_EMPTY)
1572 /* Special case optimization: macro expands to nothing. */
1573 CPP_SET_WRITTEN (pfile, written);
1574 CPP_PUTC_Q (pfile, ' ');
1578 /* If macro wants an arglist, verify that a '(' follows. */
1579 if (hp->type == T_FMACRO)
1581 int macbuf_whitespace = 0;
1584 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1586 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1589 _cpp_skip_hspace (pfile);
1596 if (point != CPP_BUFFER (pfile)->cur)
1597 macbuf_whitespace = 1;
1601 goto not_macro_call;
1602 cpp_pop_buffer (pfile);
1605 CPP_SET_MARK (pfile);
1608 _cpp_skip_hspace (pfile);
1615 CPP_GOTO_MARK (pfile);
1620 if (macbuf_whitespace)
1621 CPP_PUTC (pfile, ' ');
1627 /* This is now known to be a macro call.
1628 Expand the macro, reading arguments as needed,
1629 and push the expansion on the input stack. */
1630 _cpp_macroexpand (pfile, hp);
1631 CPP_SET_WRITTEN (pfile, written);
1635 /* Complain about \v or \f in a preprocessing directive (constraint
1636 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1638 pedantic_whitespace (pfile, p, len)
1646 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1647 else if (*p == '\f')
1648 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1656 cpp_get_token (pfile)
1659 enum cpp_ttype token;
1660 long written = CPP_WRITTEN (pfile);
1663 token = _cpp_lex_token (pfile);
1668 pfile->potential_control_macro = 0;
1669 pfile->only_seen_white = 0;
1673 if (pfile->only_seen_white == 0)
1674 pfile->only_seen_white = 1;
1675 CPP_BUMP_LINE (pfile);
1683 pfile->potential_control_macro = 0;
1684 if (_cpp_handle_directive (pfile))
1685 return CPP_DIRECTIVE;
1686 pfile->only_seen_white = 0;
1687 CPP_PUTC (pfile, '#');
1691 pfile->potential_control_macro = 0;
1692 pfile->only_seen_white = 0;
1693 if (! pfile->no_macro_expand
1694 && maybe_macroexpand (pfile, written))
1699 if (CPP_BUFFER (pfile) == NULL)
1701 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1703 cpp_pop_buffer (pfile);
1706 cpp_pop_buffer (pfile);
1711 /* Like cpp_get_token, but skip spaces and comments. */
1714 cpp_get_non_space_token (pfile)
1717 int old_written = CPP_WRITTEN (pfile);
1720 enum cpp_ttype token = cpp_get_token (pfile);
1721 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1723 CPP_SET_WRITTEN (pfile, old_written);
1727 /* Like cpp_get_token, except that it does not execute directives,
1728 does not consume vertical space, and discards horizontal space. */
1730 _cpp_get_directive_token (pfile)
1734 enum cpp_ttype token;
1738 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1739 old_written = CPP_WRITTEN (pfile);
1740 token = _cpp_lex_token (pfile);
1747 /* Put it back and return VSPACE. */
1749 CPP_ADJUST_WRITTEN (pfile, -1);
1753 /* The purpose of this rather strange check is to prevent pedantic
1754 warnings for ^L in an #ifdefed out block. */
1755 if (CPP_PEDANTIC (pfile) && ! at_bol)
1756 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1757 CPP_WRITTEN (pfile) - old_written);
1758 CPP_SET_WRITTEN (pfile, old_written);
1763 /* Don't execute the directive, but don't smash it to OTHER either. */
1764 CPP_PUTC (pfile, '#');
1765 return CPP_DIRECTIVE;
1768 if (! pfile->no_macro_expand
1769 && maybe_macroexpand (pfile, old_written))
1774 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1776 cpp_pop_buffer (pfile);
1780 /* This can happen for files that don't end with a newline,
1781 and for cpp_define and friends. Pretend they do, so
1782 callers don't have to deal. A warning will be issued by
1783 someone else, if necessary. */
1788 /* Determine the current line and column. Used only by read_and_prescan. */
1790 find_position (start, limit, linep)
1793 unsigned long *linep;
1795 unsigned long line = *linep;
1796 U_CHAR *lbase = start;
1797 while (start < limit)
1799 U_CHAR ch = *start++;
1800 if (ch == '\n' || ch == '\r')
1810 /* The following table is used by _cpp_read_and_prescan. If we have
1811 designated initializers, it can be constant data; otherwise, it is
1812 set up at runtime by _cpp_init_input_buffer. */
1815 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1818 #if (GCC_VERSION >= 2007)
1819 #define init_chartab() /* nothing */
1820 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1822 #define s(p, v) [p] = v,
1824 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1825 static void init_chartab PARAMS ((void)) { \
1826 unsigned char *x = chartab;
1828 #define s(p, v) x[p] = v;
1831 /* Table of characters that can't be handled in the inner loop.
1832 Also contains the mapping between trigraph third characters and their
1834 #define SPECCASE_CR 1
1835 #define SPECCASE_BACKSLASH 2
1836 #define SPECCASE_QUESTION 3
1839 s('\r', SPECCASE_CR)
1840 s('\\', SPECCASE_BACKSLASH)
1841 s('?', SPECCASE_QUESTION)
1843 s('=', '#') s(')', ']') s('!', '|')
1844 s('(', '[') s('\'', '^') s('>', '}')
1845 s('/', '\\') s('<', '{') s('-', '~')
1852 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1853 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1855 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1856 much memory to allocate initially; more will be allocated if
1857 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1858 canonical form (\n). If enabled, convert and/or warn about
1859 trigraphs. Convert backslash-newline to a one-character escape
1860 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1861 token). If there is no newline at the end of the file, add one and
1862 warn. Returns -1 on failure, or the actual length of the data to
1865 This function does a lot of work, and can be a serious performance
1866 bottleneck. It has been tuned heavily; make sure you understand it
1867 before hacking. The common case - no trigraphs, Unix style line
1868 breaks, backslash-newline set off by whitespace, newline at EOF -
1869 has been optimized at the expense of the others. The performance
1870 penalty for DOS style line breaks (\r\n) is about 15%.
1872 Warnings lose particularly heavily since we have to determine the
1873 line number, which involves scanning from the beginning of the file
1874 or from the last warning. The penalty for the absence of a newline
1875 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1877 If your file has more than one kind of end-of-line marker, you
1878 will get messed-up line numbering.
1880 So that the cases of the switch statement do not have to concern
1881 themselves with the complications of reading beyond the end of the
1882 buffer, the buffer is guaranteed to have at least 3 characters in
1883 it (or however many are left in the file, if less) on entry to the
1884 switch. This is enough to handle trigraphs and the "\\\n\r" and
1887 The end of the buffer is marked by a '\\', which, being a special
1888 character, guarantees we will exit the fast-scan loops and perform
1892 _cpp_read_and_prescan (pfile, fp, desc, len)
1898 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1899 U_CHAR *ip, *op, *line_base;
1902 unsigned int deferred_newlines;
1907 deferred_newlines = 0;
1911 ibase = pfile->input_buffer + 3;
1913 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1917 U_CHAR *near_buff_end;
1919 count = read (desc, ibase, pfile->input_buffer_len);
1923 ibase[count] = '\\'; /* Marks end of buffer */
1926 near_buff_end = pfile->input_buffer + count;
1931 size_t delta_line_base;
1935 This could happen if the file is larger than half the
1936 maximum address space of the machine. */
1939 delta_op = op - buf;
1940 delta_line_base = line_base - buf;
1941 buf = (U_CHAR *) xrealloc (buf, len);
1942 op = buf + delta_op;
1943 line_base = buf + delta_line_base;
1950 /* Allow normal processing of the (at most 2) remaining
1951 characters. The end-of-buffer marker is still present
1952 and prevents false matches within the switch. */
1953 near_buff_end = ibase - 1;
1960 /* Deal with \-newline, potentially in the middle of a token. */
1961 if (deferred_newlines)
1963 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1965 /* Previous was not white space. Skip to white
1966 space, if we can, before outputting the \r's */
1968 while (ip[span] != ' '
1971 && NORMAL(ip[span]))
1973 memcpy (op, ip, span);
1976 if (! NORMAL(ip[0]))
1979 while (deferred_newlines)
1980 deferred_newlines--, *op++ = '\r';
1983 /* Copy as much as we can without special treatment. */
1985 while (NORMAL (ip[span])) span++;
1986 memcpy (op, ip, span);
1991 if (ip > near_buff_end) /* Do we have enough chars? */
1993 switch (chartab[*ip++])
1995 case SPECCASE_CR: /* \r */
2004 case SPECCASE_BACKSLASH: /* \ */
2007 deferred_newlines++;
2009 if (*ip == '\r') ip++;
2011 else if (*ip == '\r')
2013 deferred_newlines++;
2015 if (*ip == '\n') ip++;
2021 case SPECCASE_QUESTION: /* ? */
2025 *op++ = '?'; /* Normal non-trigraph case */
2034 if (CPP_OPTION (pfile, warn_trigraphs))
2037 line_base = find_position (line_base, op, &line);
2038 col = op - line_base + 1;
2039 if (CPP_OPTION (pfile, trigraphs))
2040 cpp_warning_with_line (pfile, line, col,
2041 "trigraph ??%c converted to %c", d, t);
2043 cpp_warning_with_line (pfile, line, col,
2044 "trigraph ??%c ignored", d);
2048 if (CPP_OPTION (pfile, trigraphs))
2050 op[-1] = t; /* Overwrite '?' */
2055 goto do_speccase; /* May need buffer refill */
2067 /* Copy previous char plus unprocessed (at most 2) chars
2068 to beginning of buffer, refill it with another
2069 read(), and continue processing */
2070 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
2080 line_base = find_position (line_base, op, &line);
2081 col = op - line_base + 1;
2082 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2083 if (offset + 1 > len)
2086 if (offset + 1 > len)
2088 buf = (U_CHAR *) xrealloc (buf, len);
2094 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2098 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2099 (unsigned long)offset);
2104 cpp_error_from_errno (pfile, fp->ihash->name);
2109 /* Allocate pfile->input_buffer, and initialize chartab[]
2110 if it hasn't happened already. */
2113 _cpp_init_input_buffer (pfile)
2119 init_token_list (pfile, &pfile->directbuf, 0);
2121 /* Determine the appropriate size for the input buffer. Normal C
2122 source files are smaller than eight K. */
2123 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2124 address arithmetic all the time, and 3 for pushback during buffer
2125 refill, in case there's a potential trigraph or end-of-line
2126 digraph at the end of a block. */
2128 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2129 pfile->input_buffer = tmp;
2130 pfile->input_buffer_len = 8192;
2134 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2135 and extending for LEN characters to the NUL-terminated string
2136 STRING. Typical usage:
2138 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2144 cpp_idcmp (token, len, string)
2145 const U_CHAR *token;
2149 size_t len2 = strlen (string);
2152 if ((r = memcmp (token, string, MIN (len, len2))))
2155 /* The longer of the two strings sorts after the shorter. */
2158 else if (len < len2)
2166 /* Lexing algorithm.
2168 The original lexer in cpplib was made up of two passes: a first pass
2169 that replaced trigraphs and deleted esacped newlines, and a second
2170 pass that tokenized the result of the first pass. Tokenisation was
2171 performed by peeking at the next character in the input stream. For
2172 example, if the input stream contained "!=", the handler for the !
2173 character would peek at the next character, and if it were a '='
2174 would skip over it, and return a "!=" token, otherwise it would
2175 return just the "!" token.
2177 To implement a single-pass lexer, this peeking ahead is unworkable.
2178 An arbitrary number of escaped newlines, and trigraphs (in particular
2179 ??/ which translates to the escape \), could separate the '!' and '='
2180 in the input stream, yet the next token is still a "!=".
2182 Suppose instead that we lex by one logical line at a time, producing
2183 a token list or stack for each logical line, and when seeing the '!'
2184 push a CPP_NOT token on the list. Then if the '!' is part of a
2185 longer token ("!=") we know we must see the remainder of the token by
2186 the time we reach the end of the logical line. Thus we can have the
2187 '=' handler look at the previous token (at the end of the list / top
2188 of the stack) and see if it is a "!" token, and if so, instead of
2189 pushing a "=" token revise the existing token to be a "!=" token.
2191 This works in the presence of escaped newlines, because the '\' would
2192 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2193 newline ('\n' or '\r') handler looks at the token at the top of the
2194 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2195 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2196 the '=' handler would never see any intervening escaped newlines.
2198 To make trigraphs work in this context, as in precedence trigraphs
2199 are highest and converted before anything else, the '?' handler does
2200 lookahead to see if it is a trigraph, and if so skips the trigraph
2201 and pushes the token it represents onto the top of the stack. This
2202 also works in the particular case of a CPP_BACKSLASH trigraph.
2204 To the preprocessor, whitespace is only significant to the point of
2205 knowing whether whitespace precedes a particular token. For example,
2206 the '=' handler needs to know whether there was whitespace between it
2207 and a "!" token on the top of the stack, to make the token conversion
2208 decision correctly. So each token has a PREV_WHITESPACE flag to
2209 indicate this - the standard permits consecutive whitespace to be
2210 regarded as a single space. The compiler front ends are not
2211 interested in whitespace at all; they just require a token stream.
2212 Another place where whitespace is significant to the preprocessor is
2213 a #define statment - if there is whitespace between the macro name
2214 and an initial "(" token the macro is "object-like", otherwise it is
2215 a function-like macro that takes arguments.
2217 However, all is not rosy. Parsing of identifiers, numbers, comments
2218 and strings becomes trickier because of the possibility of raw
2219 trigraphs and escaped newlines in the input stream.
2221 The trigraphs are three consecutive characters beginning with two
2222 question marks. A question mark is not valid as part of a number or
2223 identifier, so parsing of a number or identifier terminates normally
2224 upon reaching it, returning to the mainloop which handles the
2225 trigraph just like it would in any other position. Similarly for the
2226 backslash of a backslash-newline combination. So we just need the
2227 escaped-newline dropper in the mainloop to check if the token on the
2228 top of the stack after dropping the escaped newline is a number or
2229 identifier, and if so to continue the processing it as if nothing had
2232 For strings, we replace trigraphs whenever we reach a quote or
2233 newline, because there might be a backslash trigraph escaping them.
2234 We need to be careful that we start trigraph replacing from where we
2235 left off previously, because it is possible for a first scan to leave
2236 "fake" trigraphs that a second scan would pick up as real (e.g. the
2237 sequence "????/\n=" would find a fake ??= trigraph after removing the
2240 For line comments, on reaching a newline we scan the previous
2241 character(s) to see if it escaped, and continue if it is. Block
2242 comments ignore everything and just focus on finding the comment
2243 termination mark. The only difficult thing, and it is surprisingly
2244 tricky, is checking if an asterisk precedes the final slash since
2245 they could be separated by escaped newlines. If the preprocessor is
2246 invoked with the output comments option, we don't bother removing
2247 escaped newlines and replacing trigraphs for output.
2249 Finally, numbers can begin with a period, which is pushed initially
2250 as a CPP_DOT token in its own right. The digit handler checks if the
2251 previous token was a CPP_DOT not separated by whitespace, and if so
2252 pops it off the stack and pushes a period into the number's buffer
2253 before calling the number parser.
2257 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2258 U":>", U"<%", U"%>"};
2259 static unsigned char trigraph_map[256];
2262 expand_comment_space (list)
2265 if (list->comments_cap == 0)
2267 list->comments_cap = 10;
2268 list->comments = (cpp_token *)
2269 xmalloc (list->comments_cap * sizeof (cpp_token));
2273 list->comments_cap *= 2;
2274 list->comments = (cpp_token *)
2275 xrealloc (list->comments, list->comments_cap);
2280 cpp_free_token_list (list)
2284 free (list->comments);
2285 free (list->tokens - 1); /* Backup over dummy token. */
2286 free (list->namebuf);
2291 init_trigraph_map ()
2293 trigraph_map['='] = '#';
2294 trigraph_map['('] = '[';
2295 trigraph_map[')'] = ']';
2296 trigraph_map['/'] = '\\';
2297 trigraph_map['\''] = '^';
2298 trigraph_map['<'] = '{';
2299 trigraph_map['>'] = '}';
2300 trigraph_map['!'] = '|';
2301 trigraph_map['-'] = '~';
2304 /* Call when a trigraph is encountered. It warns if necessary, and
2305 returns true if the trigraph should be honoured. END is the third
2306 character of a trigraph in the input stream. */
2308 trigraph_ok (pfile, end)
2310 const unsigned char *end;
2312 int accept = CPP_OPTION (pfile, trigraphs);
2314 if (CPP_OPTION (pfile, warn_trigraphs))
2316 unsigned int col = end - 1 - pfile->buffer->line_base;
2318 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2319 "trigraph ??%c converted to %c",
2320 (int) *end, (int) trigraph_map[*end]);
2322 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2323 "trigraph ??%c ignored", (int) *end);
2328 /* Scan a string for trigraphs, warning or replacing them inline as
2329 appropriate. When parsing a string, we must call this routine
2330 before processing a newline character (if trigraphs are enabled),
2331 since the newline might be escaped by a preceding backslash
2332 trigraph sequence. Returns a pointer to the end of the name after
2335 static unsigned char*
2336 trigraph_replace (pfile, src, limit)
2339 unsigned char* limit;
2341 unsigned char *dest;
2343 /* Starting with src[1], find two consecutive '?'. The case of no
2344 trigraphs is streamlined. */
2346 for (; src + 1 < limit; src += 2)
2351 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2354 else if (src + 2 == limit || src[1] != '?')
2357 /* Check if it really is a trigraph. */
2358 if (trigraph_map[src[2]] == 0)
2362 goto trigraph_found;
2366 /* Now we have a trigraph, we need to scan the remaining buffer, and
2367 copy-shifting its contents left if replacement is enabled. */
2368 for (; src + 2 < limit; dest++, src++)
2369 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2373 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2374 *dest = trigraph_map[*src];
2377 /* Copy remaining (at most 2) characters. */
2383 /* If CUR is a backslash or the end of a trigraphed backslash, return
2384 a pointer to its beginning, otherwise NULL. We don't read beyond
2385 the buffer start, because there is the start of the comment in the
2387 static const unsigned char *
2388 backslash_start (pfile, cur)
2390 const unsigned char *cur;
2394 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2395 && trigraph_ok (pfile, cur))
2400 /* Skip a C-style block comment. This is probably the trickiest
2401 handler. We find the end of the comment by seeing if an asterisk
2402 is before every '/' we encounter. The nasty complication is that a
2403 previous asterisk may be separated by one or more escaped newlines.
2404 Returns non-zero if comment terminated by EOF, zero otherwise. */
2406 skip_block_comment2 (pfile)
2409 cpp_buffer *buffer = pfile->buffer;
2410 const unsigned char *char_after_star = 0;
2411 register const unsigned char *cur = buffer->cur;
2414 /* Inner loop would think the comment has ended if the first comment
2415 character is a '/'. Avoid this and keep the inner loop clean by
2416 skipping such a character. */
2417 if (cur < buffer->rlimit && cur[0] == '/')
2420 for (; cur < buffer->rlimit; )
2422 unsigned char c = *cur++;
2424 /* People like decorating comments with '*', so check for
2425 '/' instead for efficiency. */
2428 if (cur[-2] == '*' || cur - 1 == char_after_star)
2431 /* Warn about potential nested comments, but not when
2432 the final character inside the comment is a '/'.
2433 Don't bother to get it right across escaped newlines. */
2434 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2435 && cur[0] == '*' && cur[1] != '/')
2438 cpp_warning (pfile, "'/*' within comment");
2441 else if (IS_NEWLINE(c))
2443 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2445 handle_newline (cur, buffer->rlimit, c);
2446 /* Work correctly if there is an asterisk before an
2447 arbirtrarily long sequence of escaped newlines. */
2448 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2449 char_after_star = cur;
2451 char_after_star = 0;
2461 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2462 Returns non-zero if a multiline comment. */
2464 skip_line_comment2 (pfile)
2467 cpp_buffer *buffer = pfile->buffer;
2468 register const unsigned char *cur = buffer->cur;
2471 for (; cur < buffer->rlimit; )
2473 unsigned char c = *cur++;
2477 /* Check for a (trigaph?) backslash escaping the newline. */
2478 if (!backslash_start (pfile, cur - 2))
2481 handle_newline (cur, buffer->rlimit, c);
2487 buffer->cur = cur - 1; /* Leave newline for caller. */
2491 /* Skips whitespace, stopping at next non-whitespace character. */
2493 skip_whitespace (pfile, in_directive)
2497 cpp_buffer *buffer = pfile->buffer;
2498 register const unsigned char *cur = buffer->cur;
2499 unsigned short null_count = 0;
2501 for (; cur < buffer->rlimit; )
2503 unsigned char c = *cur++;
2505 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2507 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2511 /* Mut be '\f' or '\v' */
2512 else if (in_directive && CPP_PEDANTIC (pfile))
2513 cpp_pedwarn (pfile, "%s in preprocessing directive",
2514 c == '\f' ? "formfeed" : "vertical tab");
2519 buffer->cur = cur - 1;
2521 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2522 : "embedded null character ignored");
2525 /* Parse (append) an identifier. */
2527 parse_name (pfile, list, name)
2532 const unsigned char *name_limit;
2533 unsigned char *namebuf;
2534 cpp_buffer *buffer = pfile->buffer;
2535 register const unsigned char *cur = buffer->cur;
2538 name_limit = list->namebuf + list->name_cap;
2539 namebuf = list->namebuf + list->name_used;
2541 for (; cur < buffer->rlimit && namebuf < name_limit; )
2543 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2549 if (c == '$' && CPP_PEDANTIC (pfile))
2552 cpp_pedwarn (pfile, "'$' character in identifier");
2556 /* Run out of name space? */
2557 if (cur < buffer->rlimit)
2559 list->name_used = namebuf - list->namebuf;
2560 auto_expand_name_space (list);
2566 name->len = namebuf - (list->namebuf + name->offset);
2567 list->name_used = namebuf - list->namebuf;
2570 /* Parse (append) a number. */
2572 #define VALID_SIGN(c, prevc) \
2573 (((c) == '+' || (c) == '-') && \
2574 ((prevc) == 'e' || (prevc) == 'E' \
2575 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2578 parse_number (pfile, list, name)
2583 const unsigned char *name_limit;
2584 unsigned char *namebuf;
2585 cpp_buffer *buffer = pfile->buffer;
2586 register const unsigned char *cur = buffer->cur;
2589 name_limit = list->namebuf + list->name_cap;
2590 namebuf = list->namebuf + list->name_used;
2592 for (; cur < buffer->rlimit && namebuf < name_limit; )
2594 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2596 /* Perhaps we should accept '$' here if we accept it for
2597 identifiers. We know namebuf[-1] is safe, because for c to
2598 be a sign we must have pushed at least one character. */
2599 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2606 /* Run out of name space? */
2607 if (cur < buffer->rlimit)
2609 list->name_used = namebuf - list->namebuf;
2610 auto_expand_name_space (list);
2616 name->len = namebuf - (list->namebuf + name->offset);
2617 list->name_used = namebuf - list->namebuf;
2620 /* Places a string terminated by an unescaped TERMINATOR into a
2621 cpp_name, which should be expandable and thus at the top of the
2622 list's stack. Handles embedded trigraphs, if necessary, and
2625 Can be used for character constants (terminator = '\''), string
2626 constants ('"'), angled headers ('>') and assertions (')'). */
2629 parse_string2 (pfile, list, name, terminator)
2633 unsigned int terminator;
2635 cpp_buffer *buffer = pfile->buffer;
2636 register const unsigned char *cur = buffer->cur;
2637 const unsigned char *name_limit;
2638 unsigned char *namebuf;
2639 unsigned int null_count = 0;
2640 int trigraphed_len = 0;
2643 name_limit = list->namebuf + list->name_cap;
2644 namebuf = list->namebuf + list->name_used;
2646 for (; cur < buffer->rlimit && namebuf < name_limit; )
2648 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2652 else if (c == terminator || IS_NEWLINE (c))
2654 unsigned char* name_start = list->namebuf + name->offset;
2656 /* Needed for trigraph_replace and multiline string warning. */
2659 /* Scan for trigraphs before checking if backslash-escaped. */
2660 if (CPP_OPTION (pfile, trigraphs)
2661 || CPP_OPTION (pfile, warn_trigraphs))
2663 namebuf = trigraph_replace (pfile, name_start + trigraphed_len,
2665 trigraphed_len = namebuf - 2 - (name_start + trigraphed_len);
2666 if (trigraphed_len < 0)
2670 namebuf--; /* Drop the newline / terminator from the name. */
2673 /* Drop a backslash newline, and continue. */
2674 if (namebuf[-1] == '\\')
2676 handle_newline (cur, buffer->rlimit, c);
2683 /* In Fortran and assembly language, silently terminate
2684 strings of either variety at end of line. This is a
2685 kludge around not knowing where comments are in these
2687 if (CPP_OPTION (pfile, lang_fortran)
2688 || CPP_OPTION (pfile, lang_asm))
2691 /* Character constants, headers and asserts may not
2692 extend over multiple lines. In Standard C, neither
2693 may strings. We accept multiline strings as an
2694 extension, but not in directives. */
2695 if (terminator != '"' || IS_DIRECTIVE (list))
2698 cur++; /* Move forwards again. */
2700 if (pfile->multiline_string_line == 0)
2702 pfile->multiline_string_line = list->line;
2703 if (CPP_PEDANTIC (pfile))
2704 cpp_pedwarn (pfile, "multi-line string constant");
2708 handle_newline (cur, buffer->rlimit, c);
2712 unsigned char *temp;
2714 /* An odd number of consecutive backslashes represents
2715 an escaped terminator. */
2717 while (temp >= name_start && *temp == '\\')
2720 if ((namebuf - temp) & 1)
2727 /* Run out of name space? */
2728 if (cur < buffer->rlimit)
2730 list->name_used = namebuf - list->namebuf;
2731 auto_expand_name_space (list);
2735 /* We may not have trigraph-replaced the input for this code path,
2736 but as the input is in error by being unterminated we don't
2737 bother. Prevent warnings about no newlines at EOF. */
2738 if (IS_NEWLINE(cur[-1]))
2742 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2744 if (terminator == '\"' && pfile->multiline_string_line != list->line
2745 && pfile->multiline_string_line != 0)
2747 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2748 "possible start of unterminated string literal");
2749 pfile->multiline_string_line = 0;
2754 name->len = namebuf - (list->namebuf + name->offset);
2755 list->name_used = namebuf - list->namebuf;
2758 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2759 : "null character preserved"));
2762 /* The character TYPE helps us distinguish comment types: '*' = C
2763 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
2764 the stored comment includes the comment start and any terminator. */
2766 #define COMMENT_START_LEN 2
2768 save_comment (list, from, len, tok_no, type)
2770 const unsigned char *from;
2772 unsigned int tok_no;
2776 unsigned char *buffer;
2778 len += COMMENT_START_LEN;
2780 if (list->comments_used == list->comments_cap)
2781 expand_comment_space (list);
2783 if (list->name_used + len > list->name_cap)
2784 expand_name_space (list, len);
2786 comment = &list->comments[list->comments_used++];
2787 comment->type = CPP_COMMENT;
2788 comment->aux = tok_no;
2789 comment->val.name.len = len;
2790 comment->val.name.offset = list->name_used;
2792 buffer = list->namebuf + list->name_used;
2804 memcpy (buffer, from, len - COMMENT_START_LEN);
2805 list->name_used += len;
2809 * The tokenizer's main loop. Returns a token list, representing a
2810 * logical line in the input file, terminated with a CPP_VSPACE
2811 * token. On EOF, a token list containing the single CPP_EOF token
2814 * Implementation relies almost entirely on lookback, rather than
2815 * looking forwards. This means that tokenization requires just
2816 * a single pass of the file, even in the presence of trigraphs and
2817 * escaped newlines, providing significant performance benefits.
2818 * Trigraph overhead is negligible if they are disabled, and low
2819 * even when enabled.
2823 _cpp_lex_line (pfile, list)
2827 cpp_token *cur_token, *token_limit;
2828 cpp_buffer *buffer = pfile->buffer;
2829 register const unsigned char *cur = buffer->cur;
2830 unsigned char flags = 0;
2833 token_limit = list->tokens + list->tokens_cap;
2834 cur_token = list->tokens + list->tokens_used;
2836 for (; cur < buffer->rlimit && cur_token < token_limit;)
2838 unsigned char c = *cur++;
2840 /* Optimize whitespace skipping, in particular the case of a
2841 single whitespace character, as every other token is probably
2842 whitespace. (' ' '\t' '\v' '\f' '\0'). */
2843 if (is_hspace ((unsigned int) c))
2845 if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
2847 buffer->cur = cur - (c == '\0'); /* Get the null warning. */
2848 skip_whitespace (pfile, IS_DIRECTIVE (list));
2851 flags = PREV_WHITESPACE;
2852 if (cur == buffer->rlimit)
2857 /* Initialize current token. Its type is set in the switch. */
2858 cur_token->col = COLUMN (cur);
2859 cur_token->flags = flags;
2864 case '0': case '1': case '2': case '3': case '4':
2865 case '5': case '6': case '7': case '8': case '9':
2866 /* Prepend an immediately previous CPP_DOT token. */
2867 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2870 if (list->name_cap == list->name_used)
2871 auto_expand_name_space (list);
2873 cur_token->val.name.len = 1;
2874 cur_token->val.name.offset = list->name_used;
2875 list->namebuf[list->name_used++] = '.';
2878 INIT_NAME (list, cur_token->val.name);
2879 cur--; /* Backup character. */
2883 parse_number (pfile, list, &cur_token->val.name);
2886 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2891 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2892 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2893 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2894 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2896 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2897 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2898 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2899 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2901 INIT_NAME (list, cur_token->val.name);
2902 cur--; /* Backup character. */
2903 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2907 parse_name (pfile, list, &cur_token->val.name);
2910 /* Find handler for newly created / extended directive. */
2911 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2912 _cpp_check_directive (list, cur_token);
2919 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2920 /* Do we have a wide string? */
2921 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2922 && cur_token[-1].val.name.len == 1
2923 && *(list->namebuf + cur_token[-1].val.name.offset) == 'L'
2924 && !CPP_TRADITIONAL (pfile))
2926 /* No need for 'L' any more. */
2928 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2932 /* Here c is one of ' " > or ). */
2933 INIT_NAME (list, cur_token->val.name);
2935 parse_string2 (pfile, list, &cur_token->val.name, c);
2941 cur_token->type = CPP_DIV;
2944 if (PREV_TOKEN_TYPE == CPP_DIV)
2946 /* We silently allow C++ comments in system headers,
2947 irrespective of conformance mode, because lots of
2948 broken systems do that and trying to clean it up
2949 in fixincludes is a nightmare. */
2950 if (buffer->system_header_p)
2951 goto do_line_comment;
2952 else if (CPP_OPTION (pfile, cplusplus_comments))
2954 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2955 && ! buffer->warned_cplusplus_comments)
2959 "C++ style comments are not allowed in ISO C89");
2961 "(this will be reported only once per input file)");
2962 buffer->warned_cplusplus_comments = 1;
2968 "comment start split across lines");
2969 if (skip_line_comment2 (pfile))
2970 cpp_error_with_line (pfile, list->line,
2972 "multi-line comment");
2973 if (!CPP_OPTION (pfile, discard_comments))
2974 save_comment (list, cur, buffer->cur - cur,
2975 cur_token - 1 - list->tokens, c);
2978 /* Back-up to first '-' or '/'. */
2980 if (!CPP_OPTION (pfile, traditional))
2981 flags = PREV_WHITESPACE;
2989 cur_token->type = CPP_MULT;
2992 if (PREV_TOKEN_TYPE == CPP_DIV)
2997 "comment start '/*' split across lines");
2998 if (skip_block_comment2 (pfile))
2999 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3000 "unterminated comment");
3001 else if (buffer->cur[-2] != '*')
3003 "comment end '*/' split across lines");
3004 if (!CPP_OPTION (pfile, discard_comments))
3005 save_comment (list, cur, buffer->cur - cur,
3006 cur_token - 1 - list->tokens, c);
3010 if (!CPP_OPTION (pfile, traditional))
3011 flags = PREV_WHITESPACE;
3013 else if (CPP_OPTION (pfile, cplusplus))
3015 /* In C++, there are .* and ->* operators. */
3016 if (PREV_TOKEN_TYPE == CPP_DEREF)
3017 BACKUP_TOKEN (CPP_DEREF_STAR);
3018 else if (PREV_TOKEN_TYPE == CPP_DOT)
3019 BACKUP_TOKEN (CPP_DOT_STAR);
3027 handle_newline (cur, buffer->rlimit, c);
3028 if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3030 /* Remove the escaped newline. Then continue to process
3031 any interrupted name or number. */
3036 if (cur_token->type == CPP_NAME)
3038 else if (cur_token->type == CPP_NUMBER)
3039 goto continue_number;
3042 /* Remember whitespace setting. */
3043 flags = cur_token->flags;
3046 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3049 cpp_warning (pfile, "backslash and newline separated by space");
3051 PUSH_TOKEN (CPP_VSPACE);
3055 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3057 if (CPP_OPTION (pfile, chill))
3058 goto do_line_comment;
3059 REVISE_TOKEN (CPP_MINUS_MINUS);
3062 PUSH_TOKEN (CPP_MINUS);
3065 /* The digraph flag checking ensures that ## and %:%:
3066 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3069 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3070 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3071 REVISE_TOKEN (CPP_PASTE);
3073 PUSH_TOKEN (CPP_HASH);
3077 cur_token->type = CPP_COLON;
3080 if (PREV_TOKEN_TYPE == CPP_COLON
3081 && CPP_OPTION (pfile, cplusplus))
3082 BACKUP_TOKEN (CPP_SCOPE);
3083 /* Digraph: "<:" is a '[' */
3084 else if (PREV_TOKEN_TYPE == CPP_LESS)
3085 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3086 /* Digraph: "%:" is a '#' */
3087 else if (PREV_TOKEN_TYPE == CPP_MOD)
3089 (--cur_token)->flags |= DIGRAPH;
3097 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3098 REVISE_TOKEN (CPP_AND_AND);
3100 PUSH_TOKEN (CPP_AND);
3105 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3106 REVISE_TOKEN (CPP_OR_OR);
3108 PUSH_TOKEN (CPP_OR);
3112 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3113 REVISE_TOKEN (CPP_PLUS_PLUS);
3115 PUSH_TOKEN (CPP_PLUS);
3119 /* This relies on equidistance of "?=" and "?" tokens. */
3120 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3121 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3123 PUSH_TOKEN (CPP_EQ);
3127 cur_token->type = CPP_GREATER;
3130 if (PREV_TOKEN_TYPE == CPP_GREATER)
3131 BACKUP_TOKEN (CPP_RSHIFT);
3132 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3133 BACKUP_TOKEN (CPP_DEREF);
3134 /* Digraph: ":>" is a ']' */
3135 else if (PREV_TOKEN_TYPE == CPP_COLON)
3136 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3137 /* Digraph: "%>" is a '}' */
3138 else if (PREV_TOKEN_TYPE == CPP_MOD)
3139 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3145 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3147 REVISE_TOKEN (CPP_LSHIFT);
3150 /* Is this the beginning of a header name? */
3151 if (list->dir_flags & SYNTAX_INCLUDE)
3153 c = '>'; /* Terminator. */
3154 cur_token->type = CPP_HEADER_NAME;
3155 goto do_parse_string;
3157 PUSH_TOKEN (CPP_LESS);
3161 /* Digraph: "<%" is a '{' */
3162 cur_token->type = CPP_MOD;
3163 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3164 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3169 /* Is this the beginning of an assertion string? */
3170 if (list->dir_flags & SYNTAX_ASSERT)
3172 c = ')'; /* Terminator. */
3173 cur_token->type = CPP_ASSERTION;
3174 goto do_parse_string;
3176 PUSH_TOKEN (CPP_OPEN_PAREN);
3180 if (cur + 1 < buffer->rlimit && *cur == '?'
3181 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3183 /* Handle trigraph. */
3187 case '(': goto make_open_square;
3188 case ')': goto make_close_square;
3189 case '<': goto make_open_brace;
3190 case '>': goto make_close_brace;
3191 case '=': goto make_hash;
3192 case '!': goto make_or;
3193 case '-': goto make_complement;
3194 case '/': goto make_backslash;
3195 case '\'': goto make_xor;
3198 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3200 /* GNU C++ defines <? and >? operators. */
3201 if (PREV_TOKEN_TYPE == CPP_LESS)
3203 REVISE_TOKEN (CPP_MIN);
3206 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3208 REVISE_TOKEN (CPP_MAX);
3212 PUSH_TOKEN (CPP_QUERY);
3216 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3218 && !(cur_token[-1].flags & PREV_WHITESPACE))
3221 PUSH_TOKEN (CPP_ELLIPSIS);
3224 PUSH_TOKEN (CPP_DOT);
3228 case '~': PUSH_TOKEN (CPP_COMPL); break;
3230 case '^': PUSH_TOKEN (CPP_XOR); break;
3232 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3234 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3236 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3238 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3240 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3241 case '!': PUSH_TOKEN (CPP_NOT); break;
3242 case ',': PUSH_TOKEN (CPP_COMMA); break;
3243 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3244 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3247 if (CPP_OPTION (pfile, dollars_in_ident))
3252 PUSH_TOKEN (CPP_OTHER);
3257 /* Run out of token space? */
3258 if (cur_token == token_limit)
3260 list->tokens_used = cur_token - list->tokens;
3261 expand_token_space (list);
3265 cur_token->type = CPP_EOF;
3266 cur_token->flags = flags;
3268 if (cur_token != &list->tokens[0])
3270 /* Next call back will get just a CPP_EOF. */
3272 cpp_warning (pfile, "no newline at end of file");
3273 PUSH_TOKEN (CPP_VSPACE);
3279 list->tokens_used = cur_token - list->tokens;
3281 /* FIXME: take this check out and put it in the caller.
3282 list->directive == 0 indicates an unknown directive (but null
3283 directive is OK). This is the first time we can be sure the
3284 directive is invalid, and thus warn about it, because it might
3285 have been split by escaped newlines. Also, don't complain about
3286 invalid directives in assembly source, we don't know where the
3287 comments are, and # may introduce assembler pseudo-ops. */
3289 if (IS_DIRECTIVE (list) && list->dir_handler == 0
3290 && list->tokens[1].type != CPP_VSPACE
3291 && !CPP_OPTION (pfile, lang_asm))
3292 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3293 "invalid preprocessing directive");
3296 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3297 already contain the enough space to hold the token's spelling. If
3298 WHITESPACE is true, and the token was preceded by whitespace,
3299 output a single space before the token proper. Returns a pointer
3300 to the character after the last character written. */
3302 static unsigned char *
3303 spell_token (pfile, token, list, buffer, whitespace)
3304 cpp_reader *pfile; /* Would be nice to be rid of this... */
3306 cpp_toklist *list; /* FIXME: get rid of this... */
3307 unsigned char *buffer;
3310 /* Whitespace will not be wanted by handlers of the # and ##
3311 operators calling this function, but will be wanted by the
3312 function that writes out the preprocessed file. */
3313 if (whitespace && token->flags & PREV_WHITESPACE)
3316 switch (token_spellings[token->type].type)
3318 case SPELL_OPERATOR:
3320 const unsigned char *spelling;
3323 if (token->flags & DIGRAPH)
3324 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3326 spelling = token_spellings[token->type].speller;
3328 while ((c = *spelling++) != '\0')
3334 memcpy (buffer, list->namebuf + token->val.name.offset,
3335 token->val.name.len);
3336 buffer += token->val.name.len;
3343 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3346 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3349 memcpy (buffer, list->namebuf + token->val.name.offset,
3350 token->val.name.len);
3351 buffer += token->val.name.len;
3357 *buffer++ = token->aux;
3361 cpp_ice (pfile, "Unspellable token");
3368 /* Temporary function for illustrative purposes. */
3370 _cpp_lex_file (pfile)
3376 init_trigraph_map ();
3377 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3379 for (recycle = 0; ;)
3381 init_token_list (pfile, list, recycle);
3384 _cpp_lex_line (pfile, list);
3385 if (list->tokens[0].type == CPP_EOF)
3388 if (list->dir_handler)
3390 if (list->dir_handler (pfile))
3392 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3397 _cpp_output_list (pfile, list);
3401 /* Temporary function for illustrative purposes. */
3403 _cpp_output_list (pfile, list)
3407 cpp_token *token, *comment, *comment_before = 0;
3409 if (list->comments_used > 0)
3411 comment = &list->comments[0];
3412 comment_before = &list->tokens[comment->aux];
3415 token = &list->tokens[0];
3418 /* Output comments if -C. */
3419 while (token == comment_before)
3421 /* Make space for the comment, and copy it out. */
3422 CPP_RESERVE (pfile, TOKEN_LEN (comment));
3423 pfile->limit = spell_token (pfile, comment, list, pfile->limit, 0);
3425 /* Stop if no comments left, or no more comments appear
3426 before the current token. */
3428 if (comment == list->comments + list->comments_used)
3430 comment_before = &list->tokens[comment->aux];
3433 CPP_RESERVE (pfile, TOKEN_LEN (token));
3434 pfile->limit = spell_token (pfile, token, list, pfile->limit, 1);
3436 while (token++->type != CPP_VSPACE);