1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31 #define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
35 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37 #define GETC() GETBUF (CPP_BUFFER (pfile))
38 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
40 static void skip_block_comment PARAMS ((cpp_reader *));
41 static void skip_line_comment PARAMS ((cpp_reader *));
42 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43 static int skip_comment PARAMS ((cpp_reader *, int));
44 static int copy_comment PARAMS ((cpp_reader *, int));
45 static void skip_string PARAMS ((cpp_reader *, int));
46 static void parse_string PARAMS ((cpp_reader *, int));
47 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
48 static void null_warning PARAMS ((cpp_reader *, unsigned int));
50 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
52 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
54 static void bump_column PARAMS ((cpp_printer *, unsigned int,
56 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
57 static void expand_token_space PARAMS ((cpp_toklist *));
58 static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
59 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
62 #define auto_expand_name_space(list) \
63 expand_name_space ((list), 1 + (list)->name_cap / 2)
67 static void expand_comment_space PARAMS ((cpp_toklist *));
68 void init_trigraph_map PARAMS ((void));
69 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
71 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
72 const unsigned char *));
73 static int skip_block_comment2 PARAMS ((cpp_reader *));
74 static int skip_line_comment2 PARAMS ((cpp_reader *));
75 static void skip_whitespace PARAMS ((cpp_reader *, int));
76 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
77 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
78 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
80 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
81 static void save_comment PARAMS ((cpp_toklist *, const unsigned char *,
82 unsigned int, unsigned int, unsigned int));
83 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
85 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
87 static unsigned char * spell_token PARAMS ((cpp_reader *, cpp_token *,
88 unsigned char *, int));
90 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
93 /* Macros on a cpp_name. */
94 #define INIT_NAME(list, name) \
96 (name).text = (list)->namebuf + (list)->name_used;} while (0)
98 #define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
99 #define COLUMN(cur) ((cur) - buffer->line_base)
101 /* Maybe put these in the ISTABLE eventually. */
102 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
103 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
105 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
106 character, if any, is in buffer. */
107 #define handle_newline(cur, limit, c) \
109 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
111 CPP_BUMP_LINE_CUR (pfile, (cur)); \
114 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
115 #define PREV_TOKEN_TYPE (cur_token[-1].type)
117 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
118 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
119 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
120 #define BACKUP_DIGRAPH(ttype) do { \
121 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
123 /* An upper bound on the number of bytes needed to spell a token,
124 including preceding whitespace. */
125 #define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
126 SPELL_NONE ? token->val.name.len: 0))
130 /* Order here matters. Those beyond SPELL_NONE store their spelling
131 in the token list, and it's length in the token->val.name.len. */
132 #define SPELL_OPERATOR 0
133 #define SPELL_CHAR 2 /* FIXME: revert order after transition. */
135 #define SPELL_IDENT 3
136 #define SPELL_STRING 4
138 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
139 #define I(e, s) {SPELL_IDENT, s},
140 #define S(e, s) {SPELL_STRING, s},
141 #define C(e, s) {SPELL_CHAR, s},
142 #define N(e, s) {SPELL_NONE, s},
144 static const struct token_spelling
147 const U_CHAR *spelling;
148 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
156 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
159 _cpp_grow_token_buffer (pfile, n)
163 long old_written = CPP_WRITTEN (pfile);
164 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
165 pfile->token_buffer = (U_CHAR *)
166 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
167 CPP_SET_WRITTEN (pfile, old_written);
170 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
171 If BUFFER != NULL, then use the LENGTH characters in BUFFER
172 as the new input buffer.
173 Return the new buffer, or NULL on failure. */
176 cpp_push_buffer (pfile, buffer, length)
178 const U_CHAR *buffer;
181 cpp_buffer *buf = CPP_BUFFER (pfile);
183 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
185 cpp_fatal (pfile, "macro or `#include' recursion too deep");
189 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
191 new->if_stack = pfile->if_stack;
192 new->buf = new->cur = buffer;
193 new->rlimit = buffer + length;
196 new->line_base = NULL;
198 CPP_BUFFER (pfile) = new;
203 cpp_pop_buffer (pfile)
206 cpp_buffer *buf = CPP_BUFFER (pfile);
207 if (ACTIVE_MARK_P (pfile))
208 cpp_ice (pfile, "mark active in cpp_pop_buffer");
212 _cpp_unwind_if_stack (pfile, buf);
214 free ((PTR) buf->buf);
215 if (pfile->system_include_depth)
216 pfile->system_include_depth--;
217 if (pfile->potential_control_macro)
219 buf->ihash->control_macro = pfile->potential_control_macro;
220 pfile->potential_control_macro = 0;
222 pfile->input_stack_listing_current = 0;
226 HASHNODE *m = buf->macro;
229 if ((m->type == T_FMACRO && buf->mapped)
230 || m->type == T_SPECLINE || m->type == T_FILE
231 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
232 || m->type == T_STDC)
233 free ((PTR) buf->buf);
235 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
237 pfile->buffer_stack_depth--;
238 return CPP_BUFFER (pfile);
241 /* Deal with the annoying semantics of fwrite. */
243 safe_fwrite (pfile, buf, len, fp)
253 count = fwrite (buf, 1, len, fp);
262 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
265 /* Notify the compiler proper that the current line number has jumped,
266 or the current file name has changed. */
269 output_line_command (pfile, print, line)
274 cpp_buffer *ip = cpp_file_buffer (pfile);
275 enum { same = 0, enter, leave, rname } change;
276 static const char * const codes[] = { "", " 1", " 2", "" };
278 if (CPP_OPTION (pfile, no_line_commands))
281 /* Determine whether the current filename has changed, and if so,
282 how. 'nominal_fname' values are unique, so they can be compared
283 by comparing pointers. */
284 if (ip->nominal_fname == print->last_fname)
288 if (pfile->buffer_stack_depth == print->last_bsd)
292 if (pfile->buffer_stack_depth > print->last_bsd)
296 print->last_bsd = pfile->buffer_stack_depth;
298 print->last_fname = ip->nominal_fname;
300 /* If the current file has not changed, we can output a few newlines
301 instead if we want to increase the line number by a small amount.
302 We cannot do this if print->lineno is zero, because that means we
303 haven't output any line commands yet. (The very first line
304 command output is a `same_file' command.) */
305 if (change == same && print->lineno != 0
306 && line >= print->lineno && line < print->lineno + 8)
308 while (line > print->lineno)
310 putc ('\n', print->outf);
316 #ifndef NO_IMPLICIT_EXTERN_C
317 if (CPP_OPTION (pfile, cplusplus))
318 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
320 ip->system_header_p ? " 3" : "",
321 (ip->system_header_p == 2) ? " 4" : "");
324 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
326 ip->system_header_p ? " 3" : "");
327 print->lineno = line;
330 /* Write the contents of the token_buffer to the output stream, and
331 clear the token_buffer. Also handles generating line commands and
332 keeping track of file transitions. */
335 cpp_output_tokens (pfile, print)
341 if (CPP_WRITTEN (pfile) - print->written)
343 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
345 safe_fwrite (pfile, pfile->token_buffer,
346 CPP_WRITTEN (pfile) - print->written, print->outf);
349 ip = cpp_file_buffer (pfile);
351 output_line_command (pfile, print, CPP_BUF_LINE (ip));
353 CPP_SET_WRITTEN (pfile, print->written);
356 /* Helper for cpp_output_list - increases the column number to match
357 what we expect it to be. */
360 bump_column (print, from, to)
362 unsigned int from, to;
364 unsigned int tabs, spcs;
365 unsigned int delta = to - from;
367 /* Only if FROM is 0, advance by tabs. */
369 tabs = delta / 8, spcs = delta % 8;
371 tabs = 0, spcs = delta;
373 while (tabs--) putc ('\t', print->outf);
374 while (spcs--) putc (' ', print->outf);
377 /* Write out the list L onto pfile->token_buffer. This function is
380 1) pfile->token_buffer is not going to continue to exist.
381 2) At the moment, tokens don't carry the information described
382 in cpplib.h; they are all strings.
383 3) The list has to be a complete line, and has to be written starting
384 at the beginning of a line. */
387 cpp_output_list (pfile, print, list)
390 const cpp_toklist *list;
393 unsigned int curcol = 1;
395 /* XXX Probably does not do what is intended. */
396 if (print->lineno != list->line)
397 output_line_command (pfile, print, list->line);
399 for (i = 0; i < list->tokens_used; i++)
401 if (TOK_TYPE (list, i) == CPP_VSPACE)
403 output_line_command (pfile, print, list->tokens[i].aux);
407 if (curcol < TOK_COL (list, i))
409 /* Insert space to bring the column to what it should be. */
410 bump_column (print, curcol - 1, TOK_COL (list, i));
411 curcol = TOK_COL (list, i);
413 /* XXX We may have to insert space to prevent an accidental
415 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
416 curcol += TOK_LEN (list, i);
420 /* Scan a string (which may have escape marks), perform macro expansion,
421 and write the result to the token_buffer. */
424 _cpp_expand_to_buffer (pfile, buf, length)
430 enum cpp_ttype token;
435 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
439 /* Copy the buffer, because it might be in an unsafe place - for
440 example, a sequence on the token_buffer, where the pointers will
441 be invalidated if we enlarge the token_buffer. */
442 buf1 = alloca (length);
443 memcpy (buf1, buf, length);
445 /* Set up the input on the input stack. */
446 stop = CPP_BUFFER (pfile);
447 if (cpp_push_buffer (pfile, buf1, length) == NULL)
449 CPP_BUFFER (pfile)->has_escapes = 1;
451 /* Scan the input, create the output. */
454 token = cpp_get_token (pfile);
455 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
460 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
463 cpp_scan_buffer_nooutput (pfile)
466 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
467 enum cpp_ttype token;
468 unsigned int old_written = CPP_WRITTEN (pfile);
469 /* In no-output mode, we can ignore everything but directives. */
472 if (! pfile->only_seen_white)
473 _cpp_skip_rest_of_line (pfile);
474 token = cpp_get_token (pfile);
475 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
478 CPP_SET_WRITTEN (pfile, old_written);
481 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
484 cpp_scan_buffer (pfile, print)
488 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
489 enum cpp_ttype token;
493 token = cpp_get_token (pfile);
494 if (token == CPP_EOF || token == CPP_VSPACE
495 /* XXX Temporary kluge - force flush after #include only */
496 || (token == CPP_DIRECTIVE
497 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
499 cpp_output_tokens (pfile, print);
500 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
506 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
509 cpp_file_buffer (pfile)
514 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
515 if (ip->ihash != NULL)
520 /* Token-buffer helper functions. */
522 /* Expand a token list's string space. */
524 expand_name_space (list, len)
528 const U_CHAR *old_namebuf;
530 old_namebuf = list->namebuf;
531 list->name_cap += len;
532 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
534 /* Fix up token text pointers. */
535 if (list->namebuf != old_namebuf)
539 for (i = 0; i < list->tokens_used; i++)
540 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
541 list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
545 /* Expand the number of tokens in a list. */
547 expand_token_space (list)
550 list->tokens_cap *= 2;
551 list->tokens = (cpp_token *)
552 xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
553 list->tokens++; /* Skip the dummy. */
556 /* Initialize a token list. We allocate an extra token in front of
557 the token list, as this allows us to always peek at the previous
558 token without worrying about underflowing the list. */
560 init_token_list (pfile, list, recycle)
565 /* Recycling a used list saves 3 free-malloc pairs. */
568 /* Initialize token space. Put a dummy token before the start
569 that will fail matches. */
570 list->tokens_cap = 256; /* 4K's worth. */
571 list->tokens = (cpp_token *)
572 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
573 list->tokens[0].type = CPP_EOF;
576 /* Initialize name space. */
577 list->name_cap = 1024;
578 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
580 /* Only create a comment space on demand. */
581 list->comments_cap = 0;
585 list->tokens_used = 0;
587 list->comments_used = 0;
589 list->line = pfile->buffer->lineno;
590 list->dir_handler = 0;
594 /* Scan an entire line and create a token list for it. Does not
595 macro-expand or execute directives. */
598 _cpp_scan_line (pfile, list)
607 init_token_list (pfile, list, 1);
609 written = CPP_WRITTEN (pfile);
614 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
615 type = _cpp_lex_token (pfile);
616 len = CPP_WRITTEN (pfile) - written;
617 CPP_SET_WRITTEN (pfile, written);
618 if (type == CPP_HSPACE)
620 if (CPP_PEDANTIC (pfile))
621 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
625 else if (type == CPP_COMMENT)
626 /* Only happens when processing -traditional macro definitions.
627 Do not give this a token entry, but do not change space_before
631 if (list->tokens_used >= list->tokens_cap)
632 expand_token_space (list);
633 if (list->name_used + len >= list->name_cap)
634 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
636 if (type == CPP_MACRO)
640 TOK_TYPE (list, i) = type;
641 TOK_COL (list, i) = col;
642 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
644 if (type == CPP_VSPACE)
647 TOK_LEN (list, i) = len;
648 if (token_spellings[type].type > SPELL_NONE)
650 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
651 TOK_NAME (list, i) = list->namebuf + list->name_used;
652 list->name_used += len;
655 TOK_NAME (list, i) = token_spellings[type].spelling;
659 TOK_AUX (list, i) = CPP_BUFFER (pfile)->lineno + 1;
661 /* XXX Temporary kluge: put back the newline. */
666 /* Skip a C-style block comment. We know it's a comment, and point is
667 at the second character of the starter. */
669 skip_block_comment (pfile)
672 unsigned int line, col;
673 const U_CHAR *limit, *cur;
676 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
677 col = CPP_BUF_COL (CPP_BUFFER (pfile));
678 limit = CPP_BUFFER (pfile)->rlimit;
679 cur = CPP_BUFFER (pfile)->cur;
684 if (c == '\n' || c == '\r')
686 /* \r cannot be a macro escape marker here. */
687 if (!ACTIVE_MARK_P (pfile))
688 CPP_BUMP_LINE_CUR (pfile, cur);
692 /* Check for teminator. */
693 if (cur < limit && *cur == '/')
696 /* Warn about comment starter embedded in comment. */
697 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
698 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
699 cur - CPP_BUFFER (pfile)->line_base,
700 "'/*' within comment");
704 cpp_error_with_line (pfile, line, col, "unterminated comment");
707 CPP_BUFFER (pfile)->cur = cur + 1;
710 /* Skip a C++/Chill line comment. We know it's a comment, and point
711 is at the second character of the initiator. */
713 skip_line_comment (pfile)
721 /* We don't have to worry about EOF in here. */
724 /* Don't consider final '\n' to be part of comment. */
730 /* \r cannot be a macro escape marker here. */
731 if (!ACTIVE_MARK_P (pfile))
732 CPP_BUMP_LINE (pfile);
733 if (CPP_OPTION (pfile, warn_comments))
734 cpp_warning (pfile, "backslash-newline within line comment");
739 /* Skip a comment - C, C++, or Chill style. M is the first character
740 of the comment marker. If this really is a comment, skip to its
741 end and return ' '. If this is not a comment, return M (which will
745 skip_comment (pfile, m)
749 if (m == '/' && PEEKC() == '*')
751 skip_block_comment (pfile);
754 else if (m == '/' && PEEKC() == '/')
756 if (CPP_BUFFER (pfile)->system_header_p)
758 /* We silently allow C++ comments in system headers, irrespective
759 of conformance mode, because lots of busted systems do that
760 and trying to clean it up in fixincludes is a nightmare. */
761 skip_line_comment (pfile);
764 else if (CPP_OPTION (pfile, cplusplus_comments))
766 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
768 if (CPP_WTRADITIONAL (pfile))
770 "C++ style comments are not allowed in traditional C");
771 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
773 "C++ style comments are not allowed in ISO C89");
774 if (CPP_WTRADITIONAL (pfile)
775 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
777 "(this will be reported only once per input file)");
778 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
780 skip_line_comment (pfile);
786 else if (m == '-' && PEEKC() == '-'
787 && CPP_OPTION (pfile, chill))
789 skip_line_comment (pfile);
796 /* Identical to skip_comment except that it copies the comment into the
797 token_buffer. This is used if !discard_comments. */
799 copy_comment (pfile, m)
803 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
806 if (skip_comment (pfile, m) == m)
809 limit = CPP_BUFFER (pfile)->cur;
810 CPP_RESERVE (pfile, limit - start + 2);
811 CPP_PUTC_Q (pfile, m);
812 for (; start <= limit; start++)
814 CPP_PUTC_Q (pfile, *start);
820 null_warning (pfile, count)
825 cpp_warning (pfile, "embedded null character ignored");
827 cpp_warning (pfile, "embedded null characters ignored");
830 /* Skip whitespace \-newline and comments. Does not macro-expand. */
833 _cpp_skip_hspace (pfile)
836 unsigned int null_count = 0;
844 else if (is_hspace(c))
846 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
847 cpp_pedwarn (pfile, "%s in preprocessing directive",
848 c == '\f' ? "formfeed" : "vertical tab");
854 /* \r is a backslash-newline marker if !has_escapes, and
855 a deletable-whitespace or no-reexpansion marker otherwise. */
856 if (CPP_BUFFER (pfile)->has_escapes)
864 CPP_BUMP_LINE (pfile);
866 else if (c == '/' || c == '-')
868 c = skip_comment (pfile, c);
878 null_warning (pfile, null_count);
881 /* Read and discard the rest of the current line. */
884 _cpp_skip_rest_of_line (pfile)
898 if (! CPP_BUFFER (pfile)->has_escapes)
899 CPP_BUMP_LINE (pfile);
904 skip_string (pfile, c);
909 skip_comment (pfile, c);
914 if (CPP_PEDANTIC (pfile))
915 cpp_pedwarn (pfile, "%s in preprocessing directive",
916 c == '\f' ? "formfeed" : "vertical tab");
923 /* Parse an identifier starting with C. */
926 _cpp_parse_name (pfile, c)
938 if (c == '$' && CPP_PEDANTIC (pfile))
939 cpp_pedwarn (pfile, "`$' in identifier");
941 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
942 CPP_PUTC_Q (pfile, c);
950 /* Parse and skip over a string starting with C. A single quoted
951 string is treated like a double -- some programs (e.g., troff) are
952 perverse this way. (However, a single quoted string is not allowed
953 to extend over multiple lines.) */
955 skip_string (pfile, c)
959 unsigned int start_line, start_column;
960 unsigned int null_count = 0;
962 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
963 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
970 cpp_error_with_line (pfile, start_line, start_column,
971 "unterminated string or character constant");
972 if (pfile->multiline_string_line != start_line
973 && pfile->multiline_string_line != 0)
974 cpp_error_with_line (pfile,
975 pfile->multiline_string_line, -1,
976 "possible real start of unterminated constant");
977 pfile->multiline_string_line = 0;
985 CPP_BUMP_LINE (pfile);
986 /* In Fortran and assembly language, silently terminate
987 strings of either variety at end of line. This is a
988 kludge around not knowing where comments are in these
990 if (CPP_OPTION (pfile, lang_fortran)
991 || CPP_OPTION (pfile, lang_asm))
996 /* Character constants may not extend over multiple lines.
997 In Standard C, neither may strings. We accept multiline
998 strings as an extension. */
1001 cpp_error_with_line (pfile, start_line, start_column,
1002 "unterminated character constant");
1006 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1007 cpp_pedwarn_with_line (pfile, start_line, start_column,
1008 "string constant runs past end of line");
1009 if (pfile->multiline_string_line == 0)
1010 pfile->multiline_string_line = start_line;
1014 if (CPP_BUFFER (pfile)->has_escapes)
1016 cpp_ice (pfile, "\\r escape inside string constant");
1020 /* Backslash newline is replaced by nothing at all. */
1021 CPP_BUMP_LINE (pfile);
1037 if (null_count == 1)
1038 cpp_warning (pfile, "null character in string or character constant");
1039 else if (null_count > 1)
1040 cpp_warning (pfile, "null characters in string or character constant");
1043 /* Parse a string and copy it to the output. */
1046 parse_string (pfile, c)
1050 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1051 const U_CHAR *limit;
1053 skip_string (pfile, c);
1055 limit = CPP_BUFFER (pfile)->cur;
1056 CPP_RESERVE (pfile, limit - start + 2);
1057 CPP_PUTC_Q (pfile, c);
1058 for (; start < limit; start++)
1060 CPP_PUTC_Q (pfile, *start);
1063 /* Read an assertion into the token buffer, converting to
1064 canonical form: `#predicate(a n swe r)' The next non-whitespace
1065 character to read should be the first letter of the predicate.
1066 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
1067 with answer (see callers for why). In case of 0, an error has been
1070 _cpp_parse_assertion (pfile)
1074 _cpp_skip_hspace (pfile);
1078 cpp_error (pfile, "assertion without predicate");
1081 else if (! is_idstart(c))
1083 cpp_error (pfile, "assertion predicate is not an identifier");
1086 CPP_PUTC(pfile, '#');
1088 _cpp_parse_name (pfile, c);
1093 if (is_hspace(c) || c == '\r')
1094 _cpp_skip_hspace (pfile);
1100 CPP_PUTC(pfile, '(');
1103 while ((c = GETC()) != ')')
1109 CPP_PUTC(pfile, ' ');
1113 else if (c == '\n' || c == EOF)
1115 if (c == '\n') FORWARD(-1);
1116 cpp_error (pfile, "un-terminated assertion answer");
1120 /* \r cannot be a macro escape here. */
1121 CPP_BUMP_LINE (pfile);
1124 CPP_PUTC (pfile, c);
1129 if (pfile->limit[-1] == ' ')
1130 pfile->limit[-1] = ')';
1131 else if (pfile->limit[-1] == '(')
1133 cpp_error (pfile, "empty token sequence in assertion");
1137 CPP_PUTC (pfile, ')');
1142 /* Get the next token, and add it to the text in pfile->token_buffer.
1143 Return the kind of token we got. */
1146 _cpp_lex_token (pfile)
1150 enum cpp_ttype token;
1152 if (CPP_BUFFER (pfile) == NULL)
1163 if (PEEKC () == '=')
1167 if (CPP_OPTION (pfile, discard_comments))
1168 c = skip_comment (pfile, c);
1170 c = copy_comment (pfile, c);
1174 /* Comments are equivalent to spaces.
1175 For -traditional, a comment is equivalent to nothing. */
1176 if (!CPP_OPTION (pfile, discard_comments))
1178 else if (CPP_TRADITIONAL (pfile))
1180 if (pfile->parsing_define_directive)
1186 CPP_PUTC (pfile, c);
1191 CPP_PUTC (pfile, c);
1194 if (pfile->parsing_if_directive)
1196 CPP_ADJUST_WRITTEN (pfile, -1);
1197 if (_cpp_parse_assertion (pfile))
1198 return CPP_ASSERTION;
1202 if (pfile->parsing_define_directive)
1208 CPP_PUTC (pfile, c2);
1210 else if (c2 == '%' && PEEKN (1) == ':')
1212 /* Digraph: "%:" == "#". */
1214 CPP_RESERVE (pfile, 2);
1215 CPP_PUTC_Q (pfile, c2);
1216 CPP_PUTC_Q (pfile, GETC ());
1224 if (!pfile->only_seen_white)
1227 /* Remove the "#" or "%:" from the token buffer. */
1228 CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
1229 return CPP_DIRECTIVE;
1233 parse_string (pfile, c);
1234 return c == '\'' ? CPP_CHAR : CPP_STRING;
1237 if (!CPP_OPTION (pfile, dollars_in_ident))
1243 /* Digraph: ":>" == "]". */
1245 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1253 if (c2 == c || c2 == '=')
1258 /* Digraphs: "%:" == "#", "%>" == "}". */
1263 CPP_RESERVE (pfile, 2);
1264 CPP_PUTC_Q (pfile, c);
1265 CPP_PUTC_Q (pfile, c2);
1271 CPP_RESERVE (pfile, 2);
1272 CPP_PUTC_Q (pfile, c);
1273 CPP_PUTC_Q (pfile, c2);
1274 return CPP_OPEN_BRACE;
1276 /* else fall through */
1282 if (PEEKC () == '=')
1290 if (CPP_OPTION (pfile, chill))
1291 goto comment; /* Chill style comment */
1299 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1301 /* In C++, there's a ->* operator. */
1303 CPP_RESERVE (pfile, 4);
1304 CPP_PUTC_Q (pfile, c);
1305 CPP_PUTC_Q (pfile, GETC ());
1306 CPP_PUTC_Q (pfile, GETC ());
1314 if (pfile->parsing_include_directive)
1318 CPP_PUTC (pfile, c);
1322 if (c == '\n' || c == EOF)
1325 "missing '>' in `#include <FILENAME>'");
1330 if (!CPP_BUFFER (pfile)->has_escapes)
1332 /* Backslash newline is replaced by nothing. */
1333 CPP_ADJUST_WRITTEN (pfile, -1);
1334 CPP_BUMP_LINE (pfile);
1338 /* We might conceivably get \r- or \r<space> in
1339 here. Just delete 'em. */
1341 if (d != '-' && d != ' ')
1342 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1343 CPP_ADJUST_WRITTEN (pfile, -1);
1349 /* Digraphs: "<%" == "{", "<:" == "[". */
1354 CPP_RESERVE (pfile, 2);
1355 CPP_PUTC_Q (pfile, c);
1356 CPP_PUTC_Q (pfile, c2);
1357 return CPP_CLOSE_BRACE;
1361 /* else fall through */
1366 /* GNU C++ supports MIN and MAX operators <? and >?. */
1367 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1370 CPP_RESERVE (pfile, 3);
1371 CPP_PUTC_Q (pfile, c);
1372 CPP_PUTC_Q (pfile, c2);
1373 if (PEEKC () == '=')
1374 CPP_PUTC_Q (pfile, GETC ());
1381 CPP_PUTC (pfile, c);
1386 /* In C++ there's a .* operator. */
1387 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1390 if (c2 == '.' && PEEKN(1) == '.')
1392 CPP_RESERVE (pfile, 3);
1393 CPP_PUTC_Q (pfile, '.');
1394 CPP_PUTC_Q (pfile, '.');
1395 CPP_PUTC_Q (pfile, '.');
1397 return CPP_ELLIPSIS;
1402 CPP_RESERVE (pfile, 2);
1403 CPP_PUTC_Q (pfile, c);
1404 CPP_PUTC_Q (pfile, GETC ());
1409 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1411 CPP_PUTC (pfile, c);
1413 parse_string (pfile, c);
1414 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1418 case '0': case '1': case '2': case '3': case '4':
1419 case '5': case '6': case '7': case '8': case '9':
1424 CPP_RESERVE (pfile, 2);
1425 CPP_PUTC_Q (pfile, c);
1429 if (!is_numchar(c) && c != '.'
1430 && ((c2 != 'e' && c2 != 'E'
1431 && ((c2 != 'p' && c2 != 'P')
1432 || CPP_OPTION (pfile, c89)))
1433 || (c != '+' && c != '-')))
1439 case 'b': case 'c': case 'd': case 'h': case 'o':
1440 case 'B': case 'C': case 'D': case 'H': case 'O':
1441 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1443 CPP_RESERVE (pfile, 2);
1444 CPP_PUTC_Q (pfile, c);
1445 CPP_PUTC_Q (pfile, '\'');
1451 goto chill_number_eof;
1454 CPP_PUTC (pfile, c);
1458 CPP_RESERVE (pfile, 2);
1459 CPP_PUTC_Q (pfile, c);
1472 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1473 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1474 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1475 case 'x': case 'y': case 'z':
1476 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1477 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1478 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1481 _cpp_parse_name (pfile, c);
1484 case ' ': case '\t': case '\v': case '\f': case '\0':
1493 CPP_PUTC (pfile, c);
1495 if (c == EOF || !is_hspace(c))
1500 null_warning (pfile, null_count);
1505 if (CPP_BUFFER (pfile)->has_escapes)
1510 if (pfile->output_escapes)
1511 CPP_PUTS (pfile, "\r-", 2);
1512 _cpp_parse_name (pfile, GETC ());
1517 /* "\r " means a space, but only if necessary to prevent
1518 accidental token concatenation. */
1519 CPP_RESERVE (pfile, 2);
1520 if (pfile->output_escapes)
1521 CPP_PUTC_Q (pfile, '\r');
1522 CPP_PUTC_Q (pfile, c);
1527 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1533 /* Backslash newline is ignored. */
1534 if (!ACTIVE_MARK_P (pfile))
1535 CPP_BUMP_LINE (pfile);
1540 CPP_PUTC (pfile, c);
1543 case '(': token = CPP_OPEN_PAREN; goto char1;
1544 case ')': token = CPP_CLOSE_PAREN; goto char1;
1545 case '{': token = CPP_OPEN_BRACE; goto char1;
1546 case '}': token = CPP_CLOSE_BRACE; goto char1;
1547 case ',': token = CPP_COMMA; goto char1;
1548 case ';': token = CPP_SEMICOLON; goto char1;
1554 CPP_PUTC (pfile, c);
1559 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1560 Caller is expected to have checked no_macro_expand. */
1562 maybe_macroexpand (pfile, written)
1566 U_CHAR *macro = pfile->token_buffer + written;
1567 size_t len = CPP_WRITTEN (pfile) - written;
1568 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1570 /* _cpp_lookup never returns null. */
1571 if (hp->type == T_VOID)
1573 if (hp->disabled || hp->type == T_IDENTITY)
1575 if (pfile->output_escapes)
1577 /* Insert a no-reexpand marker before IDENT. */
1578 CPP_RESERVE (pfile, 2);
1579 CPP_ADJUST_WRITTEN (pfile, 2);
1580 macro = pfile->token_buffer + written;
1582 memmove (macro + 2, macro, len);
1588 if (hp->type == T_EMPTY)
1590 /* Special case optimization: macro expands to nothing. */
1591 CPP_SET_WRITTEN (pfile, written);
1592 CPP_PUTC_Q (pfile, ' ');
1596 /* If macro wants an arglist, verify that a '(' follows. */
1597 if (hp->type == T_FMACRO)
1599 int macbuf_whitespace = 0;
1602 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1604 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1607 _cpp_skip_hspace (pfile);
1614 if (point != CPP_BUFFER (pfile)->cur)
1615 macbuf_whitespace = 1;
1619 goto not_macro_call;
1620 cpp_pop_buffer (pfile);
1623 CPP_SET_MARK (pfile);
1626 _cpp_skip_hspace (pfile);
1633 CPP_GOTO_MARK (pfile);
1638 if (macbuf_whitespace)
1639 CPP_PUTC (pfile, ' ');
1645 /* This is now known to be a macro call.
1646 Expand the macro, reading arguments as needed,
1647 and push the expansion on the input stack. */
1648 _cpp_macroexpand (pfile, hp);
1649 CPP_SET_WRITTEN (pfile, written);
1653 /* Complain about \v or \f in a preprocessing directive (constraint
1654 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1656 pedantic_whitespace (pfile, p, len)
1664 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1665 else if (*p == '\f')
1666 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1674 cpp_get_token (pfile)
1677 enum cpp_ttype token;
1678 long written = CPP_WRITTEN (pfile);
1681 token = _cpp_lex_token (pfile);
1686 pfile->potential_control_macro = 0;
1687 pfile->only_seen_white = 0;
1691 if (pfile->only_seen_white == 0)
1692 pfile->only_seen_white = 1;
1693 CPP_BUMP_LINE (pfile);
1701 pfile->potential_control_macro = 0;
1702 if (_cpp_handle_directive (pfile))
1703 return CPP_DIRECTIVE;
1704 pfile->only_seen_white = 0;
1705 CPP_PUTC (pfile, '#');
1709 pfile->potential_control_macro = 0;
1710 pfile->only_seen_white = 0;
1711 if (! pfile->no_macro_expand
1712 && maybe_macroexpand (pfile, written))
1717 if (CPP_BUFFER (pfile) == NULL)
1719 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1721 cpp_pop_buffer (pfile);
1724 cpp_pop_buffer (pfile);
1729 /* Like cpp_get_token, but skip spaces and comments. */
1732 cpp_get_non_space_token (pfile)
1735 int old_written = CPP_WRITTEN (pfile);
1738 enum cpp_ttype token = cpp_get_token (pfile);
1739 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1741 CPP_SET_WRITTEN (pfile, old_written);
1745 /* Like cpp_get_token, except that it does not execute directives,
1746 does not consume vertical space, and discards horizontal space. */
1748 _cpp_get_directive_token (pfile)
1752 enum cpp_ttype token;
1756 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1757 old_written = CPP_WRITTEN (pfile);
1758 token = _cpp_lex_token (pfile);
1765 /* Put it back and return VSPACE. */
1767 CPP_ADJUST_WRITTEN (pfile, -1);
1771 /* The purpose of this rather strange check is to prevent pedantic
1772 warnings for ^L in an #ifdefed out block. */
1773 if (CPP_PEDANTIC (pfile) && ! at_bol)
1774 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1775 CPP_WRITTEN (pfile) - old_written);
1776 CPP_SET_WRITTEN (pfile, old_written);
1781 /* Don't execute the directive, but don't smash it to OTHER either. */
1782 CPP_PUTC (pfile, '#');
1783 return CPP_DIRECTIVE;
1786 if (! pfile->no_macro_expand
1787 && maybe_macroexpand (pfile, old_written))
1792 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1794 cpp_pop_buffer (pfile);
1798 /* This can happen for files that don't end with a newline,
1799 and for cpp_define and friends. Pretend they do, so
1800 callers don't have to deal. A warning will be issued by
1801 someone else, if necessary. */
1806 /* Determine the current line and column. Used only by read_and_prescan. */
1808 find_position (start, limit, linep)
1811 unsigned long *linep;
1813 unsigned long line = *linep;
1814 U_CHAR *lbase = start;
1815 while (start < limit)
1817 U_CHAR ch = *start++;
1818 if (ch == '\n' || ch == '\r')
1828 /* The following table is used by _cpp_read_and_prescan. If we have
1829 designated initializers, it can be constant data; otherwise, it is
1830 set up at runtime by _cpp_init_input_buffer. */
1833 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1836 #if (GCC_VERSION >= 2007)
1837 #define init_chartab() /* nothing */
1838 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1840 #define s(p, v) [p] = v,
1842 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1843 static void init_chartab PARAMS ((void)) { \
1844 unsigned char *x = chartab;
1846 #define s(p, v) x[p] = v;
1849 /* Table of characters that can't be handled in the inner loop.
1850 Also contains the mapping between trigraph third characters and their
1852 #define SPECCASE_CR 1
1853 #define SPECCASE_BACKSLASH 2
1854 #define SPECCASE_QUESTION 3
1857 s('\r', SPECCASE_CR)
1858 s('\\', SPECCASE_BACKSLASH)
1859 s('?', SPECCASE_QUESTION)
1861 s('=', '#') s(')', ']') s('!', '|')
1862 s('(', '[') s('\'', '^') s('>', '}')
1863 s('/', '\\') s('<', '{') s('-', '~')
1870 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1871 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1873 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1874 much memory to allocate initially; more will be allocated if
1875 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1876 canonical form (\n). If enabled, convert and/or warn about
1877 trigraphs. Convert backslash-newline to a one-character escape
1878 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1879 token). If there is no newline at the end of the file, add one and
1880 warn. Returns -1 on failure, or the actual length of the data to
1883 This function does a lot of work, and can be a serious performance
1884 bottleneck. It has been tuned heavily; make sure you understand it
1885 before hacking. The common case - no trigraphs, Unix style line
1886 breaks, backslash-newline set off by whitespace, newline at EOF -
1887 has been optimized at the expense of the others. The performance
1888 penalty for DOS style line breaks (\r\n) is about 15%.
1890 Warnings lose particularly heavily since we have to determine the
1891 line number, which involves scanning from the beginning of the file
1892 or from the last warning. The penalty for the absence of a newline
1893 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1895 If your file has more than one kind of end-of-line marker, you
1896 will get messed-up line numbering.
1898 So that the cases of the switch statement do not have to concern
1899 themselves with the complications of reading beyond the end of the
1900 buffer, the buffer is guaranteed to have at least 3 characters in
1901 it (or however many are left in the file, if less) on entry to the
1902 switch. This is enough to handle trigraphs and the "\\\n\r" and
1905 The end of the buffer is marked by a '\\', which, being a special
1906 character, guarantees we will exit the fast-scan loops and perform
1910 _cpp_read_and_prescan (pfile, fp, desc, len)
1916 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1917 U_CHAR *ip, *op, *line_base;
1920 unsigned int deferred_newlines;
1925 deferred_newlines = 0;
1929 ibase = pfile->input_buffer + 3;
1931 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1935 U_CHAR *near_buff_end;
1937 count = read (desc, ibase, pfile->input_buffer_len);
1941 ibase[count] = '\\'; /* Marks end of buffer */
1944 near_buff_end = pfile->input_buffer + count;
1949 size_t delta_line_base;
1953 This could happen if the file is larger than half the
1954 maximum address space of the machine. */
1957 delta_op = op - buf;
1958 delta_line_base = line_base - buf;
1959 buf = (U_CHAR *) xrealloc (buf, len);
1960 op = buf + delta_op;
1961 line_base = buf + delta_line_base;
1968 /* Allow normal processing of the (at most 2) remaining
1969 characters. The end-of-buffer marker is still present
1970 and prevents false matches within the switch. */
1971 near_buff_end = ibase - 1;
1978 /* Deal with \-newline, potentially in the middle of a token. */
1979 if (deferred_newlines)
1981 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1983 /* Previous was not white space. Skip to white
1984 space, if we can, before outputting the \r's */
1986 while (ip[span] != ' '
1989 && NORMAL(ip[span]))
1991 memcpy (op, ip, span);
1994 if (! NORMAL(ip[0]))
1997 while (deferred_newlines)
1998 deferred_newlines--, *op++ = '\r';
2001 /* Copy as much as we can without special treatment. */
2003 while (NORMAL (ip[span])) span++;
2004 memcpy (op, ip, span);
2009 if (ip > near_buff_end) /* Do we have enough chars? */
2011 switch (chartab[*ip++])
2013 case SPECCASE_CR: /* \r */
2022 case SPECCASE_BACKSLASH: /* \ */
2025 deferred_newlines++;
2027 if (*ip == '\r') ip++;
2029 else if (*ip == '\r')
2031 deferred_newlines++;
2033 if (*ip == '\n') ip++;
2039 case SPECCASE_QUESTION: /* ? */
2043 *op++ = '?'; /* Normal non-trigraph case */
2052 if (CPP_OPTION (pfile, warn_trigraphs))
2055 line_base = find_position (line_base, op, &line);
2056 col = op - line_base + 1;
2057 if (CPP_OPTION (pfile, trigraphs))
2058 cpp_warning_with_line (pfile, line, col,
2059 "trigraph ??%c converted to %c", d, t);
2061 cpp_warning_with_line (pfile, line, col,
2062 "trigraph ??%c ignored", d);
2066 if (CPP_OPTION (pfile, trigraphs))
2068 op[-1] = t; /* Overwrite '?' */
2073 goto do_speccase; /* May need buffer refill */
2085 /* Copy previous char plus unprocessed (at most 2) chars
2086 to beginning of buffer, refill it with another
2087 read(), and continue processing */
2088 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
2098 line_base = find_position (line_base, op, &line);
2099 col = op - line_base + 1;
2100 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2101 if (offset + 1 > len)
2104 if (offset + 1 > len)
2106 buf = (U_CHAR *) xrealloc (buf, len);
2112 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2116 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2117 (unsigned long)offset);
2122 cpp_error_from_errno (pfile, fp->ihash->name);
2127 /* Allocate pfile->input_buffer, and initialize chartab[]
2128 if it hasn't happened already. */
2131 _cpp_init_input_buffer (pfile)
2137 init_token_list (pfile, &pfile->directbuf, 0);
2139 /* Determine the appropriate size for the input buffer. Normal C
2140 source files are smaller than eight K. */
2141 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2142 address arithmetic all the time, and 3 for pushback during buffer
2143 refill, in case there's a potential trigraph or end-of-line
2144 digraph at the end of a block. */
2146 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2147 pfile->input_buffer = tmp;
2148 pfile->input_buffer_len = 8192;
2152 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2153 and extending for LEN characters to the NUL-terminated string
2154 STRING. Typical usage:
2156 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2162 cpp_idcmp (token, len, string)
2163 const U_CHAR *token;
2167 size_t len2 = strlen (string);
2170 if ((r = memcmp (token, string, MIN (len, len2))))
2173 /* The longer of the two strings sorts after the shorter. */
2176 else if (len < len2)
2184 /* Lexing algorithm.
2186 The original lexer in cpplib was made up of two passes: a first pass
2187 that replaced trigraphs and deleted esacped newlines, and a second
2188 pass that tokenized the result of the first pass. Tokenisation was
2189 performed by peeking at the next character in the input stream. For
2190 example, if the input stream contained "!=", the handler for the !
2191 character would peek at the next character, and if it were a '='
2192 would skip over it, and return a "!=" token, otherwise it would
2193 return just the "!" token.
2195 To implement a single-pass lexer, this peeking ahead is unworkable.
2196 An arbitrary number of escaped newlines, and trigraphs (in particular
2197 ??/ which translates to the escape \), could separate the '!' and '='
2198 in the input stream, yet the next token is still a "!=".
2200 Suppose instead that we lex by one logical line at a time, producing
2201 a token list or stack for each logical line, and when seeing the '!'
2202 push a CPP_NOT token on the list. Then if the '!' is part of a
2203 longer token ("!=") we know we must see the remainder of the token by
2204 the time we reach the end of the logical line. Thus we can have the
2205 '=' handler look at the previous token (at the end of the list / top
2206 of the stack) and see if it is a "!" token, and if so, instead of
2207 pushing a "=" token revise the existing token to be a "!=" token.
2209 This works in the presence of escaped newlines, because the '\' would
2210 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2211 newline ('\n' or '\r') handler looks at the token at the top of the
2212 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2213 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2214 the '=' handler would never see any intervening escaped newlines.
2216 To make trigraphs work in this context, as in precedence trigraphs
2217 are highest and converted before anything else, the '?' handler does
2218 lookahead to see if it is a trigraph, and if so skips the trigraph
2219 and pushes the token it represents onto the top of the stack. This
2220 also works in the particular case of a CPP_BACKSLASH trigraph.
2222 To the preprocessor, whitespace is only significant to the point of
2223 knowing whether whitespace precedes a particular token. For example,
2224 the '=' handler needs to know whether there was whitespace between it
2225 and a "!" token on the top of the stack, to make the token conversion
2226 decision correctly. So each token has a PREV_WHITESPACE flag to
2227 indicate this - the standard permits consecutive whitespace to be
2228 regarded as a single space. The compiler front ends are not
2229 interested in whitespace at all; they just require a token stream.
2230 Another place where whitespace is significant to the preprocessor is
2231 a #define statment - if there is whitespace between the macro name
2232 and an initial "(" token the macro is "object-like", otherwise it is
2233 a function-like macro that takes arguments.
2235 However, all is not rosy. Parsing of identifiers, numbers, comments
2236 and strings becomes trickier because of the possibility of raw
2237 trigraphs and escaped newlines in the input stream.
2239 The trigraphs are three consecutive characters beginning with two
2240 question marks. A question mark is not valid as part of a number or
2241 identifier, so parsing of a number or identifier terminates normally
2242 upon reaching it, returning to the mainloop which handles the
2243 trigraph just like it would in any other position. Similarly for the
2244 backslash of a backslash-newline combination. So we just need the
2245 escaped-newline dropper in the mainloop to check if the token on the
2246 top of the stack after dropping the escaped newline is a number or
2247 identifier, and if so to continue the processing it as if nothing had
2250 For strings, we replace trigraphs whenever we reach a quote or
2251 newline, because there might be a backslash trigraph escaping them.
2252 We need to be careful that we start trigraph replacing from where we
2253 left off previously, because it is possible for a first scan to leave
2254 "fake" trigraphs that a second scan would pick up as real (e.g. the
2255 sequence "????/\n=" would find a fake ??= trigraph after removing the
2258 For line comments, on reaching a newline we scan the previous
2259 character(s) to see if it escaped, and continue if it is. Block
2260 comments ignore everything and just focus on finding the comment
2261 termination mark. The only difficult thing, and it is surprisingly
2262 tricky, is checking if an asterisk precedes the final slash since
2263 they could be separated by escaped newlines. If the preprocessor is
2264 invoked with the output comments option, we don't bother removing
2265 escaped newlines and replacing trigraphs for output.
2267 Finally, numbers can begin with a period, which is pushed initially
2268 as a CPP_DOT token in its own right. The digit handler checks if the
2269 previous token was a CPP_DOT not separated by whitespace, and if so
2270 pops it off the stack and pushes a period into the number's buffer
2271 before calling the number parser.
2275 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2276 U":>", U"<%", U"%>"};
2277 static unsigned char trigraph_map[256];
2280 expand_comment_space (list)
2283 if (list->comments_cap == 0)
2285 list->comments_cap = 10;
2286 list->comments = (cpp_token *)
2287 xmalloc (list->comments_cap * sizeof (cpp_token));
2291 list->comments_cap *= 2;
2292 list->comments = (cpp_token *)
2293 xrealloc (list->comments, list->comments_cap);
2298 cpp_free_token_list (list)
2302 free (list->comments);
2303 free (list->tokens - 1); /* Backup over dummy token. */
2304 free (list->namebuf);
2309 init_trigraph_map ()
2311 trigraph_map['='] = '#';
2312 trigraph_map['('] = '[';
2313 trigraph_map[')'] = ']';
2314 trigraph_map['/'] = '\\';
2315 trigraph_map['\''] = '^';
2316 trigraph_map['<'] = '{';
2317 trigraph_map['>'] = '}';
2318 trigraph_map['!'] = '|';
2319 trigraph_map['-'] = '~';
2322 /* Call when a trigraph is encountered. It warns if necessary, and
2323 returns true if the trigraph should be honoured. END is the third
2324 character of a trigraph in the input stream. */
2326 trigraph_ok (pfile, end)
2328 const unsigned char *end;
2330 int accept = CPP_OPTION (pfile, trigraphs);
2332 if (CPP_OPTION (pfile, warn_trigraphs))
2334 unsigned int col = end - 1 - pfile->buffer->line_base;
2336 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2337 "trigraph ??%c converted to %c",
2338 (int) *end, (int) trigraph_map[*end]);
2340 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2341 "trigraph ??%c ignored", (int) *end);
2346 /* Scan a string for trigraphs, warning or replacing them inline as
2347 appropriate. When parsing a string, we must call this routine
2348 before processing a newline character (if trigraphs are enabled),
2349 since the newline might be escaped by a preceding backslash
2350 trigraph sequence. Returns a pointer to the end of the name after
2353 static unsigned char*
2354 trigraph_replace (pfile, src, limit)
2357 unsigned char* limit;
2359 unsigned char *dest;
2361 /* Starting with src[1], find two consecutive '?'. The case of no
2362 trigraphs is streamlined. */
2364 for (; src + 1 < limit; src += 2)
2369 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2372 else if (src + 2 == limit || src[1] != '?')
2375 /* Check if it really is a trigraph. */
2376 if (trigraph_map[src[2]] == 0)
2380 goto trigraph_found;
2384 /* Now we have a trigraph, we need to scan the remaining buffer, and
2385 copy-shifting its contents left if replacement is enabled. */
2386 for (; src + 2 < limit; dest++, src++)
2387 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2391 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2392 *dest = trigraph_map[*src];
2395 /* Copy remaining (at most 2) characters. */
2401 /* If CUR is a backslash or the end of a trigraphed backslash, return
2402 a pointer to its beginning, otherwise NULL. We don't read beyond
2403 the buffer start, because there is the start of the comment in the
2405 static const unsigned char *
2406 backslash_start (pfile, cur)
2408 const unsigned char *cur;
2412 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2413 && trigraph_ok (pfile, cur))
2418 /* Skip a C-style block comment. This is probably the trickiest
2419 handler. We find the end of the comment by seeing if an asterisk
2420 is before every '/' we encounter. The nasty complication is that a
2421 previous asterisk may be separated by one or more escaped newlines.
2422 Returns non-zero if comment terminated by EOF, zero otherwise. */
2424 skip_block_comment2 (pfile)
2427 cpp_buffer *buffer = pfile->buffer;
2428 const unsigned char *char_after_star = 0;
2429 register const unsigned char *cur = buffer->cur;
2432 /* Inner loop would think the comment has ended if the first comment
2433 character is a '/'. Avoid this and keep the inner loop clean by
2434 skipping such a character. */
2435 if (cur < buffer->rlimit && cur[0] == '/')
2438 for (; cur < buffer->rlimit; )
2440 unsigned char c = *cur++;
2442 /* People like decorating comments with '*', so check for
2443 '/' instead for efficiency. */
2446 if (cur[-2] == '*' || cur - 1 == char_after_star)
2449 /* Warn about potential nested comments, but not when
2450 the final character inside the comment is a '/'.
2451 Don't bother to get it right across escaped newlines. */
2452 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2453 && cur[0] == '*' && cur[1] != '/')
2456 cpp_warning (pfile, "'/*' within comment");
2459 else if (IS_NEWLINE(c))
2461 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2463 handle_newline (cur, buffer->rlimit, c);
2464 /* Work correctly if there is an asterisk before an
2465 arbirtrarily long sequence of escaped newlines. */
2466 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2467 char_after_star = cur;
2469 char_after_star = 0;
2479 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2480 Returns non-zero if a multiline comment. */
2482 skip_line_comment2 (pfile)
2485 cpp_buffer *buffer = pfile->buffer;
2486 register const unsigned char *cur = buffer->cur;
2489 for (; cur < buffer->rlimit; )
2491 unsigned char c = *cur++;
2495 /* Check for a (trigaph?) backslash escaping the newline. */
2496 if (!backslash_start (pfile, cur - 2))
2499 handle_newline (cur, buffer->rlimit, c);
2505 buffer->cur = cur - 1; /* Leave newline for caller. */
2509 /* Skips whitespace, stopping at next non-whitespace character. */
2511 skip_whitespace (pfile, in_directive)
2515 cpp_buffer *buffer = pfile->buffer;
2516 register const unsigned char *cur = buffer->cur;
2517 unsigned short null_count = 0;
2519 for (; cur < buffer->rlimit; )
2521 unsigned char c = *cur++;
2523 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2525 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2529 /* Mut be '\f' or '\v' */
2530 else if (in_directive && CPP_PEDANTIC (pfile))
2531 cpp_pedwarn (pfile, "%s in preprocessing directive",
2532 c == '\f' ? "formfeed" : "vertical tab");
2537 buffer->cur = cur - 1;
2539 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2540 : "embedded null character ignored");
2543 /* Parse (append) an identifier. */
2545 parse_name (pfile, list, name)
2550 const unsigned char *name_limit;
2551 unsigned char *namebuf;
2552 cpp_buffer *buffer = pfile->buffer;
2553 register const unsigned char *cur = buffer->cur;
2556 name_limit = list->namebuf + list->name_cap;
2557 namebuf = list->namebuf + list->name_used;
2559 for (; cur < buffer->rlimit && namebuf < name_limit; )
2561 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2567 if (c == '$' && CPP_PEDANTIC (pfile))
2570 cpp_pedwarn (pfile, "'$' character in identifier");
2574 /* Run out of name space? */
2575 if (cur < buffer->rlimit)
2577 list->name_used = namebuf - list->namebuf;
2578 auto_expand_name_space (list);
2584 name->len = namebuf - name->text;
2585 list->name_used = namebuf - list->namebuf;
2588 /* Parse (append) a number. */
2590 #define VALID_SIGN(c, prevc) \
2591 (((c) == '+' || (c) == '-') && \
2592 ((prevc) == 'e' || (prevc) == 'E' \
2593 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2596 parse_number (pfile, list, name)
2601 const unsigned char *name_limit;
2602 unsigned char *namebuf;
2603 cpp_buffer *buffer = pfile->buffer;
2604 register const unsigned char *cur = buffer->cur;
2607 name_limit = list->namebuf + list->name_cap;
2608 namebuf = list->namebuf + list->name_used;
2610 for (; cur < buffer->rlimit && namebuf < name_limit; )
2612 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2614 /* Perhaps we should accept '$' here if we accept it for
2615 identifiers. We know namebuf[-1] is safe, because for c to
2616 be a sign we must have pushed at least one character. */
2617 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2624 /* Run out of name space? */
2625 if (cur < buffer->rlimit)
2627 list->name_used = namebuf - list->namebuf;
2628 auto_expand_name_space (list);
2634 name->len = namebuf - name->text;
2635 list->name_used = namebuf - list->namebuf;
2638 /* Places a string terminated by an unescaped TERMINATOR into a
2639 cpp_name, which should be expandable and thus at the top of the
2640 list's stack. Handles embedded trigraphs, if necessary, and
2643 Can be used for character constants (terminator = '\''), string
2644 constants ('"') and angled headers ('>'). Multi-line strings are
2645 allowed, except for within directives. */
2648 parse_string2 (pfile, list, name, terminator)
2652 unsigned int terminator;
2654 cpp_buffer *buffer = pfile->buffer;
2655 register const unsigned char *cur = buffer->cur;
2656 const unsigned char *name_limit;
2657 unsigned char *namebuf;
2658 unsigned int null_count = 0;
2659 int trigraphed_len = 0;
2662 name_limit = list->namebuf + list->name_cap;
2663 namebuf = list->namebuf + list->name_used;
2665 for (; cur < buffer->rlimit && namebuf < name_limit; )
2667 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2671 else if (c == terminator || IS_NEWLINE (c))
2673 /* Needed for trigraph_replace and multiline string warning. */
2676 /* Scan for trigraphs before checking if backslash-escaped. */
2677 if (CPP_OPTION (pfile, trigraphs)
2678 || CPP_OPTION (pfile, warn_trigraphs))
2680 namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
2682 trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
2683 if (trigraphed_len < 0)
2687 namebuf--; /* Drop the newline / terminator from the name. */
2690 /* Drop a backslash newline, and continue. */
2691 if (namebuf[-1] == '\\')
2693 handle_newline (cur, buffer->rlimit, c);
2700 /* In Fortran and assembly language, silently terminate
2701 strings of either variety at end of line. This is a
2702 kludge around not knowing where comments are in these
2704 if (CPP_OPTION (pfile, lang_fortran)
2705 || CPP_OPTION (pfile, lang_asm))
2708 /* Character constants, headers and asserts may not
2709 extend over multiple lines. In Standard C, neither
2710 may strings. We accept multiline strings as an
2711 extension, but not in directives. */
2712 if (terminator != '"' || IS_DIRECTIVE (list))
2715 cur++; /* Move forwards again. */
2717 if (pfile->multiline_string_line == 0)
2719 pfile->multiline_string_line = list->line;
2720 if (CPP_PEDANTIC (pfile))
2721 cpp_pedwarn (pfile, "multi-line string constant");
2725 handle_newline (cur, buffer->rlimit, c);
2729 unsigned char *temp;
2731 /* An odd number of consecutive backslashes represents
2732 an escaped terminator. */
2734 while (temp >= name->text && *temp == '\\')
2737 if ((namebuf - temp) & 1)
2744 /* Run out of name space? */
2745 if (cur < buffer->rlimit)
2747 list->name_used = namebuf - list->namebuf;
2748 auto_expand_name_space (list);
2752 /* We may not have trigraph-replaced the input for this code path,
2753 but as the input is in error by being unterminated we don't
2754 bother. Prevent warnings about no newlines at EOF. */
2755 if (IS_NEWLINE(cur[-1]))
2759 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2761 if (terminator == '\"' && pfile->multiline_string_line != list->line
2762 && pfile->multiline_string_line != 0)
2764 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2765 "possible start of unterminated string literal");
2766 pfile->multiline_string_line = 0;
2771 name->len = namebuf - name->text;
2772 list->name_used = namebuf - list->namebuf;
2775 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2776 : "null character preserved"));
2779 /* The character TYPE helps us distinguish comment types: '*' = C
2780 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
2781 the stored comment includes the comment start and any terminator. */
2783 #define COMMENT_START_LEN 2
2785 save_comment (list, from, len, tok_no, type)
2787 const unsigned char *from;
2789 unsigned int tok_no;
2793 unsigned char *buffer;
2795 len += COMMENT_START_LEN;
2797 if (list->comments_used == list->comments_cap)
2798 expand_comment_space (list);
2800 if (list->name_used + len > list->name_cap)
2801 expand_name_space (list, len);
2803 buffer = list->namebuf + list->name_used;
2805 comment = &list->comments[list->comments_used++];
2806 comment->type = CPP_COMMENT;
2807 comment->aux = tok_no;
2808 comment->val.name.len = len;
2809 comment->val.name.text = buffer;
2822 memcpy (buffer, from, len - COMMENT_START_LEN);
2823 list->name_used += len;
2827 * The tokenizer's main loop. Returns a token list, representing a
2828 * logical line in the input file, terminated with a CPP_VSPACE
2829 * token. On EOF, a token list containing the single CPP_EOF token
2832 * Implementation relies almost entirely on lookback, rather than
2833 * looking forwards. This means that tokenization requires just
2834 * a single pass of the file, even in the presence of trigraphs and
2835 * escaped newlines, providing significant performance benefits.
2836 * Trigraph overhead is negligible if they are disabled, and low
2837 * even when enabled.
2841 _cpp_lex_line (pfile, list)
2845 cpp_token *cur_token, *token_limit;
2846 cpp_buffer *buffer = pfile->buffer;
2847 register const unsigned char *cur = buffer->cur;
2848 unsigned char flags = 0;
2851 token_limit = list->tokens + list->tokens_cap;
2852 cur_token = list->tokens + list->tokens_used;
2854 for (; cur < buffer->rlimit && cur_token < token_limit;)
2856 unsigned char c = *cur++;
2858 /* Optimize whitespace skipping, in particular the case of a
2859 single whitespace character, as every other token is probably
2860 whitespace. (' ' '\t' '\v' '\f' '\0'). */
2861 if (is_hspace ((unsigned int) c))
2863 if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
2865 buffer->cur = cur - (c == '\0'); /* Get the null warning. */
2866 skip_whitespace (pfile, IS_DIRECTIVE (list));
2869 flags = PREV_WHITESPACE;
2870 if (cur == buffer->rlimit)
2875 /* Initialize current token. Its type is set in the switch. */
2876 cur_token->col = COLUMN (cur);
2877 cur_token->flags = flags;
2882 case '0': case '1': case '2': case '3': case '4':
2883 case '5': case '6': case '7': case '8': case '9':
2884 cur--; /* Backup character. */
2885 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2887 /* Prepend an immediately previous CPP_DOT token. */
2889 if (list->name_cap == list->name_used)
2890 auto_expand_name_space (list);
2892 cur_token->val.name.len = 1;
2893 cur_token->val.name.text = list->namebuf + list->name_used;
2894 list->namebuf[list->name_used++] = '.';
2897 INIT_NAME (list, cur_token->val.name);
2901 parse_number (pfile, list, &cur_token->val.name);
2904 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2909 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2910 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2911 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2912 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2914 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2915 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2916 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2917 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2919 cur--; /* Backup character. */
2920 INIT_NAME (list, cur_token->val.name);
2921 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2925 parse_name (pfile, list, &cur_token->val.name);
2928 /* Find handler for newly created / extended directive. */
2929 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2930 _cpp_check_directive (list, cur_token);
2937 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2938 /* Do we have a wide string? */
2939 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2940 && cur_token[-1].val.name.len == 1
2941 && cur_token[-1].val.name.text[0] == 'L'
2942 && !CPP_TRADITIONAL (pfile))
2944 /* No need for 'L' any more. */
2946 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2950 /* Here c is one of ' " > or ). */
2951 INIT_NAME (list, cur_token->val.name);
2953 parse_string2 (pfile, list, &cur_token->val.name, c);
2959 cur_token->type = CPP_DIV;
2962 if (PREV_TOKEN_TYPE == CPP_DIV)
2964 /* We silently allow C++ comments in system headers,
2965 irrespective of conformance mode, because lots of
2966 broken systems do that and trying to clean it up
2967 in fixincludes is a nightmare. */
2968 if (buffer->system_header_p)
2969 goto do_line_comment;
2970 else if (CPP_OPTION (pfile, cplusplus_comments))
2972 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2973 && ! buffer->warned_cplusplus_comments)
2977 "C++ style comments are not allowed in ISO C89");
2979 "(this will be reported only once per input file)");
2980 buffer->warned_cplusplus_comments = 1;
2986 "comment start split across lines");
2987 if (skip_line_comment2 (pfile))
2988 cpp_error_with_line (pfile, list->line,
2990 "multi-line comment");
2991 if (!CPP_OPTION (pfile, discard_comments))
2992 save_comment (list, cur, buffer->cur - cur,
2993 cur_token - 1 - list->tokens, c);
2996 /* Back-up to first '-' or '/'. */
2998 if (!CPP_OPTION (pfile, traditional))
2999 flags = PREV_WHITESPACE;
3007 cur_token->type = CPP_MULT;
3010 if (PREV_TOKEN_TYPE == CPP_DIV)
3015 "comment start '/*' split across lines");
3016 if (skip_block_comment2 (pfile))
3017 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3018 "unterminated comment");
3019 else if (buffer->cur[-2] != '*')
3021 "comment end '*/' split across lines");
3022 if (!CPP_OPTION (pfile, discard_comments))
3023 save_comment (list, cur, buffer->cur - cur,
3024 cur_token - 1 - list->tokens, c);
3028 if (!CPP_OPTION (pfile, traditional))
3029 flags = PREV_WHITESPACE;
3032 else if (CPP_OPTION (pfile, cplusplus))
3034 /* In C++, there are .* and ->* operators. */
3035 if (PREV_TOKEN_TYPE == CPP_DEREF)
3036 BACKUP_TOKEN (CPP_DEREF_STAR);
3037 else if (PREV_TOKEN_TYPE == CPP_DOT)
3038 BACKUP_TOKEN (CPP_DOT_STAR);
3046 handle_newline (cur, buffer->rlimit, c);
3047 if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3049 /* Remove the escaped newline. Then continue to process
3050 any interrupted name or number. */
3055 if (cur_token->type == CPP_NAME)
3057 else if (cur_token->type == CPP_NUMBER)
3058 goto continue_number;
3061 /* Remember whitespace setting. */
3062 flags = cur_token->flags;
3065 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3068 cpp_warning (pfile, "backslash and newline separated by space");
3070 PUSH_TOKEN (CPP_VSPACE);
3074 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3076 if (CPP_OPTION (pfile, chill))
3077 goto do_line_comment;
3078 REVISE_TOKEN (CPP_MINUS_MINUS);
3081 PUSH_TOKEN (CPP_MINUS);
3084 /* The digraph flag checking ensures that ## and %:%:
3085 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3088 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3089 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3090 REVISE_TOKEN (CPP_PASTE);
3092 PUSH_TOKEN (CPP_HASH);
3096 cur_token->type = CPP_COLON;
3099 if (PREV_TOKEN_TYPE == CPP_COLON
3100 && CPP_OPTION (pfile, cplusplus))
3101 BACKUP_TOKEN (CPP_SCOPE);
3102 /* Digraph: "<:" is a '[' */
3103 else if (PREV_TOKEN_TYPE == CPP_LESS)
3104 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3105 /* Digraph: "%:" is a '#' */
3106 else if (PREV_TOKEN_TYPE == CPP_MOD)
3108 (--cur_token)->flags |= DIGRAPH;
3116 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3117 REVISE_TOKEN (CPP_AND_AND);
3119 PUSH_TOKEN (CPP_AND);
3124 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3125 REVISE_TOKEN (CPP_OR_OR);
3127 PUSH_TOKEN (CPP_OR);
3131 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3132 REVISE_TOKEN (CPP_PLUS_PLUS);
3134 PUSH_TOKEN (CPP_PLUS);
3138 /* This relies on equidistance of "?=" and "?" tokens. */
3139 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3140 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3142 PUSH_TOKEN (CPP_EQ);
3146 cur_token->type = CPP_GREATER;
3149 if (PREV_TOKEN_TYPE == CPP_GREATER)
3150 BACKUP_TOKEN (CPP_RSHIFT);
3151 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3152 BACKUP_TOKEN (CPP_DEREF);
3153 /* Digraph: ":>" is a ']' */
3154 else if (PREV_TOKEN_TYPE == CPP_COLON)
3155 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3156 /* Digraph: "%>" is a '}' */
3157 else if (PREV_TOKEN_TYPE == CPP_MOD)
3158 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3164 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3166 REVISE_TOKEN (CPP_LSHIFT);
3169 /* Is this the beginning of a header name? */
3170 if (list->dir_flags & SYNTAX_INCLUDE)
3172 c = '>'; /* Terminator. */
3173 cur_token->type = CPP_HEADER_NAME;
3174 goto do_parse_string;
3176 PUSH_TOKEN (CPP_LESS);
3180 /* Digraph: "<%" is a '{' */
3181 cur_token->type = CPP_MOD;
3182 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3183 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3188 if (cur + 1 < buffer->rlimit && *cur == '?'
3189 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3191 /* Handle trigraph. */
3195 case '(': goto make_open_square;
3196 case ')': goto make_close_square;
3197 case '<': goto make_open_brace;
3198 case '>': goto make_close_brace;
3199 case '=': goto make_hash;
3200 case '!': goto make_or;
3201 case '-': goto make_complement;
3202 case '/': goto make_backslash;
3203 case '\'': goto make_xor;
3206 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3208 /* GNU C++ defines <? and >? operators. */
3209 if (PREV_TOKEN_TYPE == CPP_LESS)
3211 REVISE_TOKEN (CPP_MIN);
3214 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3216 REVISE_TOKEN (CPP_MAX);
3220 PUSH_TOKEN (CPP_QUERY);
3224 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3226 && !(cur_token[-1].flags & PREV_WHITESPACE))
3229 PUSH_TOKEN (CPP_ELLIPSIS);
3232 PUSH_TOKEN (CPP_DOT);
3236 case '~': PUSH_TOKEN (CPP_COMPL); break;
3238 case '^': PUSH_TOKEN (CPP_XOR); break;
3240 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3242 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3244 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3246 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3248 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3249 case '!': PUSH_TOKEN (CPP_NOT); break;
3250 case ',': PUSH_TOKEN (CPP_COMMA); break;
3251 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3252 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
3253 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3256 if (CPP_OPTION (pfile, dollars_in_ident))
3261 cur_token->val.name.len = 0; /* FIXME: needed for transition only */
3262 PUSH_TOKEN (CPP_OTHER);
3267 /* Run out of token space? */
3268 if (cur_token == token_limit)
3270 list->tokens_used = cur_token - list->tokens;
3271 expand_token_space (list);
3275 cur_token->type = CPP_EOF;
3276 cur_token->flags = flags;
3278 if (cur_token != &list->tokens[0])
3280 /* Next call back will get just a CPP_EOF. */
3282 cpp_warning (pfile, "no newline at end of file");
3283 PUSH_TOKEN (CPP_VSPACE);
3289 list->tokens_used = cur_token - list->tokens;
3291 /* FIXME: take this check out and put it in the caller.
3292 list->directive == 0 indicates an unknown directive (but null
3293 directive is OK). This is the first time we can be sure the
3294 directive is invalid, and thus warn about it, because it might
3295 have been split by escaped newlines. Also, don't complain about
3296 invalid directives in assembly source, we don't know where the
3297 comments are, and # may introduce assembler pseudo-ops. */
3299 if (IS_DIRECTIVE (list) && list->dir_handler == 0
3300 && list->tokens[1].type != CPP_VSPACE
3301 && !CPP_OPTION (pfile, lang_asm))
3302 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3303 "invalid preprocessing directive");
3306 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3307 already contain the enough space to hold the token's spelling. If
3308 WHITESPACE is true, and the token was preceded by whitespace,
3309 output a single space before the token proper. Returns a pointer
3310 to the character after the last character written. */
3312 static unsigned char *
3313 spell_token (pfile, token, buffer, whitespace)
3314 cpp_reader *pfile; /* Would be nice to be rid of this... */
3316 unsigned char *buffer;
3319 /* Whitespace will not be wanted by handlers of the # and ##
3320 operators calling this function, but will be wanted by the
3321 function that writes out the preprocessed file. */
3322 if (whitespace && token->flags & PREV_WHITESPACE)
3325 switch (token_spellings[token->type].type)
3327 case SPELL_OPERATOR:
3329 const unsigned char *spelling;
3332 if (token->flags & DIGRAPH)
3333 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3335 spelling = token_spellings[token->type].spelling;
3337 while ((c = *spelling++) != '\0')
3343 memcpy (buffer, token->val.name.text, token->val.name.len);
3344 buffer += token->val.name.len;
3351 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3354 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3357 memcpy (buffer, token->val.name.text, token->val.name.len);
3358 buffer += token->val.name.len;
3364 *buffer++ = token->aux;
3368 cpp_ice (pfile, "Unspellable token");
3375 /* Temporary function for illustrative purposes. */
3377 _cpp_lex_file (pfile)
3383 init_trigraph_map ();
3384 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3386 for (recycle = 0; ;)
3388 init_token_list (pfile, list, recycle);
3391 _cpp_lex_line (pfile, list);
3392 if (list->tokens[0].type == CPP_EOF)
3395 if (list->dir_handler)
3397 if (list->dir_handler (pfile))
3399 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3404 _cpp_output_list (pfile, list);
3408 /* Temporary function for illustrative purposes. */
3410 _cpp_output_list (pfile, list)
3414 cpp_token *token, *comment, *comment_before = 0;
3416 if (list->comments_used > 0)
3418 comment = &list->comments[0];
3419 comment_before = &list->tokens[comment->aux];
3422 token = &list->tokens[0];
3425 /* Output comments if -C. */
3426 while (token == comment_before)
3428 /* Make space for the comment, and copy it out. */
3429 CPP_RESERVE (pfile, TOKEN_LEN (comment));
3430 pfile->limit = spell_token (pfile, comment, pfile->limit, 0);
3432 /* Stop if no comments left, or no more comments appear
3433 before the current token. */
3435 if (comment == list->comments + list->comments_used)
3437 comment_before = &list->tokens[comment->aux];
3440 CPP_RESERVE (pfile, TOKEN_LEN (token));
3441 pfile->limit = spell_token (pfile, token, pfile->limit, 1);
3443 while (token++->type != CPP_VSPACE);