1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31 #define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
35 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37 #define GETC() GETBUF (CPP_BUFFER (pfile))
38 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
40 static void skip_block_comment PARAMS ((cpp_reader *));
41 static void skip_line_comment PARAMS ((cpp_reader *));
42 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43 static int skip_comment PARAMS ((cpp_reader *, int));
44 static int copy_comment PARAMS ((cpp_reader *, int));
45 static void skip_string PARAMS ((cpp_reader *, int));
46 static void parse_string PARAMS ((cpp_reader *, int));
47 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
48 static void null_warning PARAMS ((cpp_reader *, unsigned int));
50 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
52 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
54 static void bump_column PARAMS ((cpp_printer *, unsigned int,
56 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
57 static void expand_token_space PARAMS ((cpp_toklist *));
58 static void init_token_list PARAMS ((cpp_reader *, cpp_toklist *, int));
59 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
62 #define auto_expand_name_space(list) \
63 expand_name_space ((list), 1 + (list)->name_cap / 2)
67 static void expand_comment_space PARAMS ((cpp_toklist *));
68 void init_trigraph_map PARAMS ((void));
69 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
71 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
72 const unsigned char *));
73 static int skip_block_comment2 PARAMS ((cpp_reader *));
74 static int skip_line_comment2 PARAMS ((cpp_reader *));
75 static void skip_whitespace PARAMS ((cpp_reader *, int));
76 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
77 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
78 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
80 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
81 static void save_comment PARAMS ((cpp_toklist *, const unsigned char *,
82 unsigned int, unsigned int, unsigned int));
83 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
85 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
87 static unsigned char * spell_token PARAMS ((cpp_reader *, cpp_token *,
88 unsigned char *, int));
90 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
93 /* Macros on a cpp_name. */
94 #define INIT_NAME(list, name) \
96 (name).text = (list)->namebuf + (list)->name_used;} while (0)
98 #define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
99 #define COLUMN(cur) ((cur) - buffer->line_base)
101 /* Maybe put these in the ISTABLE eventually. */
102 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
103 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
105 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
106 character, if any, is in buffer. */
107 #define handle_newline(cur, limit, c) \
109 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
111 CPP_BUMP_LINE_CUR (pfile, (cur)); \
114 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
115 #define PREV_TOKEN_TYPE (cur_token[-1].type)
117 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
118 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
119 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
120 #define BACKUP_DIGRAPH(ttype) do { \
121 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
123 /* An upper bound on the number of bytes needed to spell a token,
124 including preceding whitespace. */
125 #define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
126 SPELL_NONE ? token->val.name.len: 0))
130 /* Order here matters. Those beyond SPELL_NONE store their spelling
131 in the token list, and it's length in the token->val.name.len. */
132 #define SPELL_OPERATOR 0
133 #define SPELL_CHAR 2 /* FIXME: revert order after transition. */
135 #define SPELL_IDENT 3
136 #define SPELL_STRING 4
138 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
139 #define I(e, s) {SPELL_IDENT, s},
140 #define S(e, s) {SPELL_STRING, s},
141 #define C(e, s) {SPELL_CHAR, s},
142 #define N(e, s) {SPELL_NONE, s},
144 static const struct token_spelling
147 const U_CHAR *spelling;
148 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
156 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
159 _cpp_grow_token_buffer (pfile, n)
163 long old_written = CPP_WRITTEN (pfile);
164 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
165 pfile->token_buffer = (U_CHAR *)
166 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
167 CPP_SET_WRITTEN (pfile, old_written);
170 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
171 If BUFFER != NULL, then use the LENGTH characters in BUFFER
172 as the new input buffer.
173 Return the new buffer, or NULL on failure. */
176 cpp_push_buffer (pfile, buffer, length)
178 const U_CHAR *buffer;
181 cpp_buffer *buf = CPP_BUFFER (pfile);
183 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
185 cpp_fatal (pfile, "macro or `#include' recursion too deep");
189 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
191 new->if_stack = pfile->if_stack;
192 new->buf = new->cur = buffer;
193 new->rlimit = buffer + length;
196 new->line_base = NULL;
198 CPP_BUFFER (pfile) = new;
203 cpp_pop_buffer (pfile)
206 cpp_buffer *buf = CPP_BUFFER (pfile);
207 if (ACTIVE_MARK_P (pfile))
208 cpp_ice (pfile, "mark active in cpp_pop_buffer");
212 _cpp_unwind_if_stack (pfile, buf);
214 free ((PTR) buf->buf);
215 if (pfile->system_include_depth)
216 pfile->system_include_depth--;
217 if (pfile->potential_control_macro)
219 buf->ihash->control_macro = pfile->potential_control_macro;
220 pfile->potential_control_macro = 0;
222 pfile->input_stack_listing_current = 0;
226 HASHNODE *m = buf->macro;
229 if ((m->type == T_FMACRO && buf->mapped)
230 || m->type == T_SPECLINE || m->type == T_FILE
231 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
232 || m->type == T_STDC)
233 free ((PTR) buf->buf);
235 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
237 pfile->buffer_stack_depth--;
238 return CPP_BUFFER (pfile);
241 /* Deal with the annoying semantics of fwrite. */
243 safe_fwrite (pfile, buf, len, fp)
253 count = fwrite (buf, 1, len, fp);
262 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
265 /* Notify the compiler proper that the current line number has jumped,
266 or the current file name has changed. */
269 output_line_command (pfile, print, line)
274 cpp_buffer *ip = cpp_file_buffer (pfile);
275 enum { same = 0, enter, leave, rname } change;
276 static const char * const codes[] = { "", " 1", " 2", "" };
278 if (CPP_OPTION (pfile, no_line_commands))
281 /* Determine whether the current filename has changed, and if so,
282 how. 'nominal_fname' values are unique, so they can be compared
283 by comparing pointers. */
284 if (ip->nominal_fname == print->last_fname)
288 if (pfile->buffer_stack_depth == print->last_bsd)
292 if (pfile->buffer_stack_depth > print->last_bsd)
296 print->last_bsd = pfile->buffer_stack_depth;
298 print->last_fname = ip->nominal_fname;
300 /* If the current file has not changed, we can output a few newlines
301 instead if we want to increase the line number by a small amount.
302 We cannot do this if print->lineno is zero, because that means we
303 haven't output any line commands yet. (The very first line
304 command output is a `same_file' command.) */
305 if (change == same && print->lineno != 0
306 && line >= print->lineno && line < print->lineno + 8)
308 while (line > print->lineno)
310 putc ('\n', print->outf);
316 #ifndef NO_IMPLICIT_EXTERN_C
317 if (CPP_OPTION (pfile, cplusplus))
318 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
320 ip->system_header_p ? " 3" : "",
321 (ip->system_header_p == 2) ? " 4" : "");
324 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
326 ip->system_header_p ? " 3" : "");
327 print->lineno = line;
330 /* Write the contents of the token_buffer to the output stream, and
331 clear the token_buffer. Also handles generating line commands and
332 keeping track of file transitions. */
335 cpp_output_tokens (pfile, print)
341 if (CPP_WRITTEN (pfile) - print->written)
343 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
345 safe_fwrite (pfile, pfile->token_buffer,
346 CPP_WRITTEN (pfile) - print->written, print->outf);
349 ip = cpp_file_buffer (pfile);
351 output_line_command (pfile, print, CPP_BUF_LINE (ip));
353 CPP_SET_WRITTEN (pfile, print->written);
356 /* Helper for cpp_output_list - increases the column number to match
357 what we expect it to be. */
360 bump_column (print, from, to)
362 unsigned int from, to;
364 unsigned int tabs, spcs;
365 unsigned int delta = to - from;
367 /* Only if FROM is 0, advance by tabs. */
369 tabs = delta / 8, spcs = delta % 8;
371 tabs = 0, spcs = delta;
373 while (tabs--) putc ('\t', print->outf);
374 while (spcs--) putc (' ', print->outf);
377 /* Write out the list L onto pfile->token_buffer. This function is
380 1) pfile->token_buffer is not going to continue to exist.
381 2) At the moment, tokens don't carry the information described
382 in cpplib.h; they are all strings.
383 3) The list has to be a complete line, and has to be written starting
384 at the beginning of a line. */
387 cpp_output_list (pfile, print, list)
390 const cpp_toklist *list;
393 unsigned int curcol = 1;
395 /* XXX Probably does not do what is intended. */
396 if (print->lineno != list->line)
397 output_line_command (pfile, print, list->line);
399 for (i = 0; i < list->tokens_used; i++)
401 if (TOK_TYPE (list, i) == CPP_VSPACE)
403 output_line_command (pfile, print, list->tokens[i].aux);
407 if (curcol < TOK_COL (list, i))
409 /* Insert space to bring the column to what it should be. */
410 bump_column (print, curcol - 1, TOK_COL (list, i));
411 curcol = TOK_COL (list, i);
413 /* XXX We may have to insert space to prevent an accidental
415 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
416 curcol += TOK_LEN (list, i);
420 /* Scan a string (which may have escape marks), perform macro expansion,
421 and write the result to the token_buffer. */
424 _cpp_expand_to_buffer (pfile, buf, length)
430 enum cpp_ttype token;
435 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
439 /* Copy the buffer, because it might be in an unsafe place - for
440 example, a sequence on the token_buffer, where the pointers will
441 be invalidated if we enlarge the token_buffer. */
442 buf1 = alloca (length);
443 memcpy (buf1, buf, length);
445 /* Set up the input on the input stack. */
446 stop = CPP_BUFFER (pfile);
447 if (cpp_push_buffer (pfile, buf1, length) == NULL)
449 CPP_BUFFER (pfile)->has_escapes = 1;
451 /* Scan the input, create the output. */
454 token = cpp_get_token (pfile);
455 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
460 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
463 cpp_scan_buffer_nooutput (pfile)
466 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
467 enum cpp_ttype token;
468 unsigned int old_written = CPP_WRITTEN (pfile);
469 /* In no-output mode, we can ignore everything but directives. */
472 if (! pfile->only_seen_white)
473 _cpp_skip_rest_of_line (pfile);
474 token = cpp_get_token (pfile);
475 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
478 CPP_SET_WRITTEN (pfile, old_written);
481 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
484 cpp_scan_buffer (pfile, print)
488 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
489 enum cpp_ttype token;
493 token = cpp_get_token (pfile);
494 if (token == CPP_EOF || token == CPP_VSPACE
495 /* XXX Temporary kluge - force flush after #include only */
496 || (token == CPP_DIRECTIVE
497 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
499 cpp_output_tokens (pfile, print);
500 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
506 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
509 cpp_file_buffer (pfile)
514 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
515 if (ip->ihash != NULL)
520 /* Token-buffer helper functions. */
522 /* Expand a token list's string space. */
524 expand_name_space (list, len)
528 const U_CHAR *old_namebuf;
531 old_namebuf = list->namebuf;
532 list->name_cap += len;
533 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
535 /* Fix up token text pointers. */
536 delta = list->namebuf - old_namebuf;
541 for (i = 0; i < list->tokens_used; i++)
542 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
543 list->tokens[i].val.name.text += delta;
547 /* Expand the number of tokens in a list. */
549 expand_token_space (list)
552 list->tokens_cap *= 2;
553 list->tokens = (cpp_token *)
554 xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
555 list->tokens++; /* Skip the dummy. */
558 /* Initialize a token list. We allocate an extra token in front of
559 the token list, as this allows us to always peek at the previous
560 token without worrying about underflowing the list. */
562 init_token_list (pfile, list, recycle)
567 /* Recycling a used list saves 3 free-malloc pairs. */
570 /* Initialize token space. Put a dummy token before the start
571 that will fail matches. */
572 list->tokens_cap = 256; /* 4K's worth. */
573 list->tokens = (cpp_token *)
574 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
575 list->tokens[0].type = CPP_EOF;
578 /* Initialize name space. */
579 list->name_cap = 1024;
580 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
582 /* Only create a comment space on demand. */
583 list->comments_cap = 0;
587 list->tokens_used = 0;
589 list->comments_used = 0;
591 list->line = pfile->buffer->lineno;
592 list->dir_handler = 0;
596 /* Scan an entire line and create a token list for it. Does not
597 macro-expand or execute directives. */
600 _cpp_scan_line (pfile, list)
609 init_token_list (pfile, list, 1);
611 written = CPP_WRITTEN (pfile);
616 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
617 type = _cpp_lex_token (pfile);
618 len = CPP_WRITTEN (pfile) - written;
619 CPP_SET_WRITTEN (pfile, written);
620 if (type == CPP_HSPACE)
622 if (CPP_PEDANTIC (pfile))
623 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
627 else if (type == CPP_COMMENT)
628 /* Only happens when processing -traditional macro definitions.
629 Do not give this a token entry, but do not change space_before
633 if (list->tokens_used >= list->tokens_cap)
634 expand_token_space (list);
635 if (list->name_used + len >= list->name_cap)
636 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
638 if (type == CPP_MACRO)
642 TOK_TYPE (list, i) = type;
643 TOK_COL (list, i) = col;
644 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
646 if (type == CPP_VSPACE)
649 TOK_LEN (list, i) = len;
650 if (token_spellings[type].type > SPELL_NONE)
652 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
653 TOK_NAME (list, i) = list->namebuf + list->name_used;
654 list->name_used += len;
657 TOK_NAME (list, i) = token_spellings[type].spelling;
661 TOK_AUX (list, i) = CPP_BUFFER (pfile)->lineno + 1;
663 /* XXX Temporary kluge: put back the newline. */
668 /* Skip a C-style block comment. We know it's a comment, and point is
669 at the second character of the starter. */
671 skip_block_comment (pfile)
674 unsigned int line, col;
675 const U_CHAR *limit, *cur;
678 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
679 col = CPP_BUF_COL (CPP_BUFFER (pfile));
680 limit = CPP_BUFFER (pfile)->rlimit;
681 cur = CPP_BUFFER (pfile)->cur;
686 if (c == '\n' || c == '\r')
688 /* \r cannot be a macro escape marker here. */
689 if (!ACTIVE_MARK_P (pfile))
690 CPP_BUMP_LINE_CUR (pfile, cur);
694 /* Check for teminator. */
695 if (cur < limit && *cur == '/')
698 /* Warn about comment starter embedded in comment. */
699 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
700 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
701 cur - CPP_BUFFER (pfile)->line_base,
702 "'/*' within comment");
706 cpp_error_with_line (pfile, line, col, "unterminated comment");
709 CPP_BUFFER (pfile)->cur = cur + 1;
712 /* Skip a C++/Chill line comment. We know it's a comment, and point
713 is at the second character of the initiator. */
715 skip_line_comment (pfile)
723 /* We don't have to worry about EOF in here. */
726 /* Don't consider final '\n' to be part of comment. */
732 /* \r cannot be a macro escape marker here. */
733 if (!ACTIVE_MARK_P (pfile))
734 CPP_BUMP_LINE (pfile);
735 if (CPP_OPTION (pfile, warn_comments))
736 cpp_warning (pfile, "backslash-newline within line comment");
741 /* Skip a comment - C, C++, or Chill style. M is the first character
742 of the comment marker. If this really is a comment, skip to its
743 end and return ' '. If this is not a comment, return M (which will
747 skip_comment (pfile, m)
751 if (m == '/' && PEEKC() == '*')
753 skip_block_comment (pfile);
756 else if (m == '/' && PEEKC() == '/')
758 if (CPP_BUFFER (pfile)->system_header_p)
760 /* We silently allow C++ comments in system headers, irrespective
761 of conformance mode, because lots of busted systems do that
762 and trying to clean it up in fixincludes is a nightmare. */
763 skip_line_comment (pfile);
766 else if (CPP_OPTION (pfile, cplusplus_comments))
768 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
770 if (CPP_WTRADITIONAL (pfile))
772 "C++ style comments are not allowed in traditional C");
773 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
775 "C++ style comments are not allowed in ISO C89");
776 if (CPP_WTRADITIONAL (pfile)
777 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
779 "(this will be reported only once per input file)");
780 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
782 skip_line_comment (pfile);
788 else if (m == '-' && PEEKC() == '-'
789 && CPP_OPTION (pfile, chill))
791 skip_line_comment (pfile);
798 /* Identical to skip_comment except that it copies the comment into the
799 token_buffer. This is used if !discard_comments. */
801 copy_comment (pfile, m)
805 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
808 if (skip_comment (pfile, m) == m)
811 limit = CPP_BUFFER (pfile)->cur;
812 CPP_RESERVE (pfile, limit - start + 2);
813 CPP_PUTC_Q (pfile, m);
814 for (; start <= limit; start++)
816 CPP_PUTC_Q (pfile, *start);
822 null_warning (pfile, count)
827 cpp_warning (pfile, "embedded null character ignored");
829 cpp_warning (pfile, "embedded null characters ignored");
832 /* Skip whitespace \-newline and comments. Does not macro-expand. */
835 _cpp_skip_hspace (pfile)
838 unsigned int null_count = 0;
846 else if (is_hspace(c))
848 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
849 cpp_pedwarn (pfile, "%s in preprocessing directive",
850 c == '\f' ? "formfeed" : "vertical tab");
856 /* \r is a backslash-newline marker if !has_escapes, and
857 a deletable-whitespace or no-reexpansion marker otherwise. */
858 if (CPP_BUFFER (pfile)->has_escapes)
866 CPP_BUMP_LINE (pfile);
868 else if (c == '/' || c == '-')
870 c = skip_comment (pfile, c);
880 null_warning (pfile, null_count);
883 /* Read and discard the rest of the current line. */
886 _cpp_skip_rest_of_line (pfile)
900 if (! CPP_BUFFER (pfile)->has_escapes)
901 CPP_BUMP_LINE (pfile);
906 skip_string (pfile, c);
911 skip_comment (pfile, c);
916 if (CPP_PEDANTIC (pfile))
917 cpp_pedwarn (pfile, "%s in preprocessing directive",
918 c == '\f' ? "formfeed" : "vertical tab");
925 /* Parse an identifier starting with C. */
928 _cpp_parse_name (pfile, c)
940 if (c == '$' && CPP_PEDANTIC (pfile))
941 cpp_pedwarn (pfile, "`$' in identifier");
943 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
944 CPP_PUTC_Q (pfile, c);
952 /* Parse and skip over a string starting with C. A single quoted
953 string is treated like a double -- some programs (e.g., troff) are
954 perverse this way. (However, a single quoted string is not allowed
955 to extend over multiple lines.) */
957 skip_string (pfile, c)
961 unsigned int start_line, start_column;
962 unsigned int null_count = 0;
964 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
965 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
972 cpp_error_with_line (pfile, start_line, start_column,
973 "unterminated string or character constant");
974 if (pfile->multiline_string_line != start_line
975 && pfile->multiline_string_line != 0)
976 cpp_error_with_line (pfile,
977 pfile->multiline_string_line, -1,
978 "possible real start of unterminated constant");
979 pfile->multiline_string_line = 0;
987 CPP_BUMP_LINE (pfile);
988 /* In Fortran and assembly language, silently terminate
989 strings of either variety at end of line. This is a
990 kludge around not knowing where comments are in these
992 if (CPP_OPTION (pfile, lang_fortran)
993 || CPP_OPTION (pfile, lang_asm))
998 /* Character constants may not extend over multiple lines.
999 In Standard C, neither may strings. We accept multiline
1000 strings as an extension. */
1003 cpp_error_with_line (pfile, start_line, start_column,
1004 "unterminated character constant");
1008 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1009 cpp_pedwarn_with_line (pfile, start_line, start_column,
1010 "string constant runs past end of line");
1011 if (pfile->multiline_string_line == 0)
1012 pfile->multiline_string_line = start_line;
1016 if (CPP_BUFFER (pfile)->has_escapes)
1018 cpp_ice (pfile, "\\r escape inside string constant");
1022 /* Backslash newline is replaced by nothing at all. */
1023 CPP_BUMP_LINE (pfile);
1039 if (null_count == 1)
1040 cpp_warning (pfile, "null character in string or character constant");
1041 else if (null_count > 1)
1042 cpp_warning (pfile, "null characters in string or character constant");
1045 /* Parse a string and copy it to the output. */
1048 parse_string (pfile, c)
1052 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1053 const U_CHAR *limit;
1055 skip_string (pfile, c);
1057 limit = CPP_BUFFER (pfile)->cur;
1058 CPP_RESERVE (pfile, limit - start + 2);
1059 CPP_PUTC_Q (pfile, c);
1060 for (; start < limit; start++)
1062 CPP_PUTC_Q (pfile, *start);
1065 /* Read an assertion into the token buffer, converting to
1066 canonical form: `#predicate(a n swe r)' The next non-whitespace
1067 character to read should be the first letter of the predicate.
1068 Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
1069 with answer (see callers for why). In case of 0, an error has been
1072 _cpp_parse_assertion (pfile)
1076 _cpp_skip_hspace (pfile);
1080 cpp_error (pfile, "assertion without predicate");
1083 else if (! is_idstart(c))
1085 cpp_error (pfile, "assertion predicate is not an identifier");
1088 CPP_PUTC(pfile, '#');
1090 _cpp_parse_name (pfile, c);
1095 if (is_hspace(c) || c == '\r')
1096 _cpp_skip_hspace (pfile);
1102 CPP_PUTC(pfile, '(');
1105 while ((c = GETC()) != ')')
1111 CPP_PUTC(pfile, ' ');
1115 else if (c == '\n' || c == EOF)
1117 if (c == '\n') FORWARD(-1);
1118 cpp_error (pfile, "un-terminated assertion answer");
1122 /* \r cannot be a macro escape here. */
1123 CPP_BUMP_LINE (pfile);
1126 CPP_PUTC (pfile, c);
1131 if (pfile->limit[-1] == ' ')
1132 pfile->limit[-1] = ')';
1133 else if (pfile->limit[-1] == '(')
1135 cpp_error (pfile, "empty token sequence in assertion");
1139 CPP_PUTC (pfile, ')');
1144 /* Get the next token, and add it to the text in pfile->token_buffer.
1145 Return the kind of token we got. */
1148 _cpp_lex_token (pfile)
1152 enum cpp_ttype token;
1154 if (CPP_BUFFER (pfile) == NULL)
1165 if (PEEKC () == '=')
1169 if (CPP_OPTION (pfile, discard_comments))
1170 c = skip_comment (pfile, c);
1172 c = copy_comment (pfile, c);
1176 /* Comments are equivalent to spaces.
1177 For -traditional, a comment is equivalent to nothing. */
1178 if (!CPP_OPTION (pfile, discard_comments))
1180 else if (CPP_TRADITIONAL (pfile))
1182 if (pfile->parsing_define_directive)
1188 CPP_PUTC (pfile, c);
1193 CPP_PUTC (pfile, c);
1196 if (pfile->parsing_if_directive)
1198 CPP_ADJUST_WRITTEN (pfile, -1);
1199 if (_cpp_parse_assertion (pfile))
1200 return CPP_ASSERTION;
1204 if (pfile->parsing_define_directive)
1210 CPP_PUTC (pfile, c2);
1212 else if (c2 == '%' && PEEKN (1) == ':')
1214 /* Digraph: "%:" == "#". */
1216 CPP_RESERVE (pfile, 2);
1217 CPP_PUTC_Q (pfile, c2);
1218 CPP_PUTC_Q (pfile, GETC ());
1226 if (!pfile->only_seen_white)
1229 /* Remove the "#" or "%:" from the token buffer. */
1230 CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
1231 return CPP_DIRECTIVE;
1235 parse_string (pfile, c);
1236 return c == '\'' ? CPP_CHAR : CPP_STRING;
1239 if (!CPP_OPTION (pfile, dollars_in_ident))
1245 /* Digraph: ":>" == "]". */
1247 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1255 if (c2 == c || c2 == '=')
1260 /* Digraphs: "%:" == "#", "%>" == "}". */
1265 CPP_RESERVE (pfile, 2);
1266 CPP_PUTC_Q (pfile, c);
1267 CPP_PUTC_Q (pfile, c2);
1273 CPP_RESERVE (pfile, 2);
1274 CPP_PUTC_Q (pfile, c);
1275 CPP_PUTC_Q (pfile, c2);
1276 return CPP_OPEN_BRACE;
1278 /* else fall through */
1284 if (PEEKC () == '=')
1292 if (CPP_OPTION (pfile, chill))
1293 goto comment; /* Chill style comment */
1301 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1303 /* In C++, there's a ->* operator. */
1305 CPP_RESERVE (pfile, 4);
1306 CPP_PUTC_Q (pfile, c);
1307 CPP_PUTC_Q (pfile, GETC ());
1308 CPP_PUTC_Q (pfile, GETC ());
1316 if (pfile->parsing_include_directive)
1320 CPP_PUTC (pfile, c);
1324 if (c == '\n' || c == EOF)
1327 "missing '>' in `#include <FILENAME>'");
1332 if (!CPP_BUFFER (pfile)->has_escapes)
1334 /* Backslash newline is replaced by nothing. */
1335 CPP_ADJUST_WRITTEN (pfile, -1);
1336 CPP_BUMP_LINE (pfile);
1340 /* We might conceivably get \r- or \r<space> in
1341 here. Just delete 'em. */
1343 if (d != '-' && d != ' ')
1344 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1345 CPP_ADJUST_WRITTEN (pfile, -1);
1351 /* Digraphs: "<%" == "{", "<:" == "[". */
1356 CPP_RESERVE (pfile, 2);
1357 CPP_PUTC_Q (pfile, c);
1358 CPP_PUTC_Q (pfile, c2);
1359 return CPP_CLOSE_BRACE;
1363 /* else fall through */
1368 /* GNU C++ supports MIN and MAX operators <? and >?. */
1369 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1372 CPP_RESERVE (pfile, 3);
1373 CPP_PUTC_Q (pfile, c);
1374 CPP_PUTC_Q (pfile, c2);
1375 if (PEEKC () == '=')
1376 CPP_PUTC_Q (pfile, GETC ());
1383 CPP_PUTC (pfile, c);
1388 /* In C++ there's a .* operator. */
1389 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1392 if (c2 == '.' && PEEKN(1) == '.')
1394 CPP_RESERVE (pfile, 3);
1395 CPP_PUTC_Q (pfile, '.');
1396 CPP_PUTC_Q (pfile, '.');
1397 CPP_PUTC_Q (pfile, '.');
1399 return CPP_ELLIPSIS;
1404 CPP_RESERVE (pfile, 2);
1405 CPP_PUTC_Q (pfile, c);
1406 CPP_PUTC_Q (pfile, GETC ());
1411 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1413 CPP_PUTC (pfile, c);
1415 parse_string (pfile, c);
1416 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1420 case '0': case '1': case '2': case '3': case '4':
1421 case '5': case '6': case '7': case '8': case '9':
1426 CPP_RESERVE (pfile, 2);
1427 CPP_PUTC_Q (pfile, c);
1431 if (!is_numchar(c) && c != '.'
1432 && ((c2 != 'e' && c2 != 'E'
1433 && ((c2 != 'p' && c2 != 'P')
1434 || CPP_OPTION (pfile, c89)))
1435 || (c != '+' && c != '-')))
1441 case 'b': case 'c': case 'd': case 'h': case 'o':
1442 case 'B': case 'C': case 'D': case 'H': case 'O':
1443 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1445 CPP_RESERVE (pfile, 2);
1446 CPP_PUTC_Q (pfile, c);
1447 CPP_PUTC_Q (pfile, '\'');
1453 goto chill_number_eof;
1456 CPP_PUTC (pfile, c);
1460 CPP_RESERVE (pfile, 2);
1461 CPP_PUTC_Q (pfile, c);
1474 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1475 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1476 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1477 case 'x': case 'y': case 'z':
1478 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1479 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1480 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1483 _cpp_parse_name (pfile, c);
1486 case ' ': case '\t': case '\v': case '\f': case '\0':
1495 CPP_PUTC (pfile, c);
1497 if (c == EOF || !is_hspace(c))
1502 null_warning (pfile, null_count);
1507 if (CPP_BUFFER (pfile)->has_escapes)
1512 if (pfile->output_escapes)
1513 CPP_PUTS (pfile, "\r-", 2);
1514 _cpp_parse_name (pfile, GETC ());
1519 /* "\r " means a space, but only if necessary to prevent
1520 accidental token concatenation. */
1521 CPP_RESERVE (pfile, 2);
1522 if (pfile->output_escapes)
1523 CPP_PUTC_Q (pfile, '\r');
1524 CPP_PUTC_Q (pfile, c);
1529 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1535 /* Backslash newline is ignored. */
1536 if (!ACTIVE_MARK_P (pfile))
1537 CPP_BUMP_LINE (pfile);
1542 CPP_PUTC (pfile, c);
1545 case '(': token = CPP_OPEN_PAREN; goto char1;
1546 case ')': token = CPP_CLOSE_PAREN; goto char1;
1547 case '{': token = CPP_OPEN_BRACE; goto char1;
1548 case '}': token = CPP_CLOSE_BRACE; goto char1;
1549 case ',': token = CPP_COMMA; goto char1;
1550 case ';': token = CPP_SEMICOLON; goto char1;
1556 CPP_PUTC (pfile, c);
1561 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1562 Caller is expected to have checked no_macro_expand. */
1564 maybe_macroexpand (pfile, written)
1568 U_CHAR *macro = pfile->token_buffer + written;
1569 size_t len = CPP_WRITTEN (pfile) - written;
1570 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1572 /* _cpp_lookup never returns null. */
1573 if (hp->type == T_VOID)
1575 if (hp->disabled || hp->type == T_IDENTITY)
1577 if (pfile->output_escapes)
1579 /* Insert a no-reexpand marker before IDENT. */
1580 CPP_RESERVE (pfile, 2);
1581 CPP_ADJUST_WRITTEN (pfile, 2);
1582 macro = pfile->token_buffer + written;
1584 memmove (macro + 2, macro, len);
1590 if (hp->type == T_EMPTY)
1592 /* Special case optimization: macro expands to nothing. */
1593 CPP_SET_WRITTEN (pfile, written);
1594 CPP_PUTC_Q (pfile, ' ');
1598 /* If macro wants an arglist, verify that a '(' follows. */
1599 if (hp->type == T_FMACRO)
1601 int macbuf_whitespace = 0;
1604 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1606 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1609 _cpp_skip_hspace (pfile);
1616 if (point != CPP_BUFFER (pfile)->cur)
1617 macbuf_whitespace = 1;
1621 goto not_macro_call;
1622 cpp_pop_buffer (pfile);
1625 CPP_SET_MARK (pfile);
1628 _cpp_skip_hspace (pfile);
1635 CPP_GOTO_MARK (pfile);
1640 if (macbuf_whitespace)
1641 CPP_PUTC (pfile, ' ');
1647 /* This is now known to be a macro call.
1648 Expand the macro, reading arguments as needed,
1649 and push the expansion on the input stack. */
1650 _cpp_macroexpand (pfile, hp);
1651 CPP_SET_WRITTEN (pfile, written);
1655 /* Complain about \v or \f in a preprocessing directive (constraint
1656 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1658 pedantic_whitespace (pfile, p, len)
1666 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1667 else if (*p == '\f')
1668 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1676 cpp_get_token (pfile)
1679 enum cpp_ttype token;
1680 long written = CPP_WRITTEN (pfile);
1683 token = _cpp_lex_token (pfile);
1688 pfile->potential_control_macro = 0;
1689 pfile->only_seen_white = 0;
1693 if (pfile->only_seen_white == 0)
1694 pfile->only_seen_white = 1;
1695 CPP_BUMP_LINE (pfile);
1703 pfile->potential_control_macro = 0;
1704 if (_cpp_handle_directive (pfile))
1705 return CPP_DIRECTIVE;
1706 pfile->only_seen_white = 0;
1707 CPP_PUTC (pfile, '#');
1711 pfile->potential_control_macro = 0;
1712 pfile->only_seen_white = 0;
1713 if (! pfile->no_macro_expand
1714 && maybe_macroexpand (pfile, written))
1719 if (CPP_BUFFER (pfile) == NULL)
1721 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1723 cpp_pop_buffer (pfile);
1726 cpp_pop_buffer (pfile);
1731 /* Like cpp_get_token, but skip spaces and comments. */
1734 cpp_get_non_space_token (pfile)
1737 int old_written = CPP_WRITTEN (pfile);
1740 enum cpp_ttype token = cpp_get_token (pfile);
1741 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1743 CPP_SET_WRITTEN (pfile, old_written);
1747 /* Like cpp_get_token, except that it does not execute directives,
1748 does not consume vertical space, and discards horizontal space. */
1750 _cpp_get_directive_token (pfile)
1754 enum cpp_ttype token;
1758 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1759 old_written = CPP_WRITTEN (pfile);
1760 token = _cpp_lex_token (pfile);
1767 /* Put it back and return VSPACE. */
1769 CPP_ADJUST_WRITTEN (pfile, -1);
1773 /* The purpose of this rather strange check is to prevent pedantic
1774 warnings for ^L in an #ifdefed out block. */
1775 if (CPP_PEDANTIC (pfile) && ! at_bol)
1776 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1777 CPP_WRITTEN (pfile) - old_written);
1778 CPP_SET_WRITTEN (pfile, old_written);
1783 /* Don't execute the directive, but don't smash it to OTHER either. */
1784 CPP_PUTC (pfile, '#');
1785 return CPP_DIRECTIVE;
1788 if (! pfile->no_macro_expand
1789 && maybe_macroexpand (pfile, old_written))
1794 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1796 cpp_pop_buffer (pfile);
1800 /* This can happen for files that don't end with a newline,
1801 and for cpp_define and friends. Pretend they do, so
1802 callers don't have to deal. A warning will be issued by
1803 someone else, if necessary. */
1808 /* Determine the current line and column. Used only by read_and_prescan. */
1810 find_position (start, limit, linep)
1813 unsigned long *linep;
1815 unsigned long line = *linep;
1816 U_CHAR *lbase = start;
1817 while (start < limit)
1819 U_CHAR ch = *start++;
1820 if (ch == '\n' || ch == '\r')
1830 /* The following table is used by _cpp_read_and_prescan. If we have
1831 designated initializers, it can be constant data; otherwise, it is
1832 set up at runtime by _cpp_init_input_buffer. */
1835 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1838 #if (GCC_VERSION >= 2007)
1839 #define init_chartab() /* nothing */
1840 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1842 #define s(p, v) [p] = v,
1844 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1845 static void init_chartab PARAMS ((void)) { \
1846 unsigned char *x = chartab;
1848 #define s(p, v) x[p] = v;
1851 /* Table of characters that can't be handled in the inner loop.
1852 Also contains the mapping between trigraph third characters and their
1854 #define SPECCASE_CR 1
1855 #define SPECCASE_BACKSLASH 2
1856 #define SPECCASE_QUESTION 3
1859 s('\r', SPECCASE_CR)
1860 s('\\', SPECCASE_BACKSLASH)
1861 s('?', SPECCASE_QUESTION)
1863 s('=', '#') s(')', ']') s('!', '|')
1864 s('(', '[') s('\'', '^') s('>', '}')
1865 s('/', '\\') s('<', '{') s('-', '~')
1872 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1873 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1875 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1876 much memory to allocate initially; more will be allocated if
1877 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1878 canonical form (\n). If enabled, convert and/or warn about
1879 trigraphs. Convert backslash-newline to a one-character escape
1880 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1881 token). If there is no newline at the end of the file, add one and
1882 warn. Returns -1 on failure, or the actual length of the data to
1885 This function does a lot of work, and can be a serious performance
1886 bottleneck. It has been tuned heavily; make sure you understand it
1887 before hacking. The common case - no trigraphs, Unix style line
1888 breaks, backslash-newline set off by whitespace, newline at EOF -
1889 has been optimized at the expense of the others. The performance
1890 penalty for DOS style line breaks (\r\n) is about 15%.
1892 Warnings lose particularly heavily since we have to determine the
1893 line number, which involves scanning from the beginning of the file
1894 or from the last warning. The penalty for the absence of a newline
1895 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1897 If your file has more than one kind of end-of-line marker, you
1898 will get messed-up line numbering.
1900 So that the cases of the switch statement do not have to concern
1901 themselves with the complications of reading beyond the end of the
1902 buffer, the buffer is guaranteed to have at least 3 characters in
1903 it (or however many are left in the file, if less) on entry to the
1904 switch. This is enough to handle trigraphs and the "\\\n\r" and
1907 The end of the buffer is marked by a '\\', which, being a special
1908 character, guarantees we will exit the fast-scan loops and perform
1912 _cpp_read_and_prescan (pfile, fp, desc, len)
1918 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1919 U_CHAR *ip, *op, *line_base;
1922 unsigned int deferred_newlines;
1927 deferred_newlines = 0;
1931 ibase = pfile->input_buffer + 3;
1933 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1937 U_CHAR *near_buff_end;
1939 count = read (desc, ibase, pfile->input_buffer_len);
1943 ibase[count] = '\\'; /* Marks end of buffer */
1946 near_buff_end = pfile->input_buffer + count;
1951 size_t delta_line_base;
1955 This could happen if the file is larger than half the
1956 maximum address space of the machine. */
1959 delta_op = op - buf;
1960 delta_line_base = line_base - buf;
1961 buf = (U_CHAR *) xrealloc (buf, len);
1962 op = buf + delta_op;
1963 line_base = buf + delta_line_base;
1970 /* Allow normal processing of the (at most 2) remaining
1971 characters. The end-of-buffer marker is still present
1972 and prevents false matches within the switch. */
1973 near_buff_end = ibase - 1;
1980 /* Deal with \-newline, potentially in the middle of a token. */
1981 if (deferred_newlines)
1983 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1985 /* Previous was not white space. Skip to white
1986 space, if we can, before outputting the \r's */
1988 while (ip[span] != ' '
1991 && NORMAL(ip[span]))
1993 memcpy (op, ip, span);
1996 if (! NORMAL(ip[0]))
1999 while (deferred_newlines)
2000 deferred_newlines--, *op++ = '\r';
2003 /* Copy as much as we can without special treatment. */
2005 while (NORMAL (ip[span])) span++;
2006 memcpy (op, ip, span);
2011 if (ip > near_buff_end) /* Do we have enough chars? */
2013 switch (chartab[*ip++])
2015 case SPECCASE_CR: /* \r */
2024 case SPECCASE_BACKSLASH: /* \ */
2027 deferred_newlines++;
2029 if (*ip == '\r') ip++;
2031 else if (*ip == '\r')
2033 deferred_newlines++;
2035 if (*ip == '\n') ip++;
2041 case SPECCASE_QUESTION: /* ? */
2045 *op++ = '?'; /* Normal non-trigraph case */
2054 if (CPP_OPTION (pfile, warn_trigraphs))
2057 line_base = find_position (line_base, op, &line);
2058 col = op - line_base + 1;
2059 if (CPP_OPTION (pfile, trigraphs))
2060 cpp_warning_with_line (pfile, line, col,
2061 "trigraph ??%c converted to %c", d, t);
2063 cpp_warning_with_line (pfile, line, col,
2064 "trigraph ??%c ignored", d);
2068 if (CPP_OPTION (pfile, trigraphs))
2070 op[-1] = t; /* Overwrite '?' */
2075 goto do_speccase; /* May need buffer refill */
2087 /* Copy previous char plus unprocessed (at most 2) chars
2088 to beginning of buffer, refill it with another
2089 read(), and continue processing */
2090 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
2100 line_base = find_position (line_base, op, &line);
2101 col = op - line_base + 1;
2102 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2103 if (offset + 1 > len)
2106 if (offset + 1 > len)
2108 buf = (U_CHAR *) xrealloc (buf, len);
2114 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2118 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2119 (unsigned long)offset);
2124 cpp_error_from_errno (pfile, fp->ihash->name);
2129 /* Allocate pfile->input_buffer, and initialize chartab[]
2130 if it hasn't happened already. */
2133 _cpp_init_input_buffer (pfile)
2139 init_token_list (pfile, &pfile->directbuf, 0);
2141 /* Determine the appropriate size for the input buffer. Normal C
2142 source files are smaller than eight K. */
2143 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2144 address arithmetic all the time, and 3 for pushback during buffer
2145 refill, in case there's a potential trigraph or end-of-line
2146 digraph at the end of a block. */
2148 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2149 pfile->input_buffer = tmp;
2150 pfile->input_buffer_len = 8192;
2154 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2155 and extending for LEN characters to the NUL-terminated string
2156 STRING. Typical usage:
2158 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2164 cpp_idcmp (token, len, string)
2165 const U_CHAR *token;
2169 size_t len2 = strlen (string);
2172 if ((r = memcmp (token, string, MIN (len, len2))))
2175 /* The longer of the two strings sorts after the shorter. */
2178 else if (len < len2)
2186 /* Lexing algorithm.
2188 The original lexer in cpplib was made up of two passes: a first pass
2189 that replaced trigraphs and deleted esacped newlines, and a second
2190 pass that tokenized the result of the first pass. Tokenisation was
2191 performed by peeking at the next character in the input stream. For
2192 example, if the input stream contained "!=", the handler for the !
2193 character would peek at the next character, and if it were a '='
2194 would skip over it, and return a "!=" token, otherwise it would
2195 return just the "!" token.
2197 To implement a single-pass lexer, this peeking ahead is unworkable.
2198 An arbitrary number of escaped newlines, and trigraphs (in particular
2199 ??/ which translates to the escape \), could separate the '!' and '='
2200 in the input stream, yet the next token is still a "!=".
2202 Suppose instead that we lex by one logical line at a time, producing
2203 a token list or stack for each logical line, and when seeing the '!'
2204 push a CPP_NOT token on the list. Then if the '!' is part of a
2205 longer token ("!=") we know we must see the remainder of the token by
2206 the time we reach the end of the logical line. Thus we can have the
2207 '=' handler look at the previous token (at the end of the list / top
2208 of the stack) and see if it is a "!" token, and if so, instead of
2209 pushing a "=" token revise the existing token to be a "!=" token.
2211 This works in the presence of escaped newlines, because the '\' would
2212 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2213 newline ('\n' or '\r') handler looks at the token at the top of the
2214 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2215 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2216 the '=' handler would never see any intervening escaped newlines.
2218 To make trigraphs work in this context, as in precedence trigraphs
2219 are highest and converted before anything else, the '?' handler does
2220 lookahead to see if it is a trigraph, and if so skips the trigraph
2221 and pushes the token it represents onto the top of the stack. This
2222 also works in the particular case of a CPP_BACKSLASH trigraph.
2224 To the preprocessor, whitespace is only significant to the point of
2225 knowing whether whitespace precedes a particular token. For example,
2226 the '=' handler needs to know whether there was whitespace between it
2227 and a "!" token on the top of the stack, to make the token conversion
2228 decision correctly. So each token has a PREV_WHITESPACE flag to
2229 indicate this - the standard permits consecutive whitespace to be
2230 regarded as a single space. The compiler front ends are not
2231 interested in whitespace at all; they just require a token stream.
2232 Another place where whitespace is significant to the preprocessor is
2233 a #define statment - if there is whitespace between the macro name
2234 and an initial "(" token the macro is "object-like", otherwise it is
2235 a function-like macro that takes arguments.
2237 However, all is not rosy. Parsing of identifiers, numbers, comments
2238 and strings becomes trickier because of the possibility of raw
2239 trigraphs and escaped newlines in the input stream.
2241 The trigraphs are three consecutive characters beginning with two
2242 question marks. A question mark is not valid as part of a number or
2243 identifier, so parsing of a number or identifier terminates normally
2244 upon reaching it, returning to the mainloop which handles the
2245 trigraph just like it would in any other position. Similarly for the
2246 backslash of a backslash-newline combination. So we just need the
2247 escaped-newline dropper in the mainloop to check if the token on the
2248 top of the stack after dropping the escaped newline is a number or
2249 identifier, and if so to continue the processing it as if nothing had
2252 For strings, we replace trigraphs whenever we reach a quote or
2253 newline, because there might be a backslash trigraph escaping them.
2254 We need to be careful that we start trigraph replacing from where we
2255 left off previously, because it is possible for a first scan to leave
2256 "fake" trigraphs that a second scan would pick up as real (e.g. the
2257 sequence "????/\n=" would find a fake ??= trigraph after removing the
2260 For line comments, on reaching a newline we scan the previous
2261 character(s) to see if it escaped, and continue if it is. Block
2262 comments ignore everything and just focus on finding the comment
2263 termination mark. The only difficult thing, and it is surprisingly
2264 tricky, is checking if an asterisk precedes the final slash since
2265 they could be separated by escaped newlines. If the preprocessor is
2266 invoked with the output comments option, we don't bother removing
2267 escaped newlines and replacing trigraphs for output.
2269 Finally, numbers can begin with a period, which is pushed initially
2270 as a CPP_DOT token in its own right. The digit handler checks if the
2271 previous token was a CPP_DOT not separated by whitespace, and if so
2272 pops it off the stack and pushes a period into the number's buffer
2273 before calling the number parser.
2277 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2278 U":>", U"<%", U"%>"};
2279 static unsigned char trigraph_map[256];
2282 expand_comment_space (list)
2285 if (list->comments_cap == 0)
2287 list->comments_cap = 10;
2288 list->comments = (cpp_token *)
2289 xmalloc (list->comments_cap * sizeof (cpp_token));
2293 list->comments_cap *= 2;
2294 list->comments = (cpp_token *)
2295 xrealloc (list->comments, list->comments_cap);
2300 cpp_free_token_list (list)
2304 free (list->comments);
2305 free (list->tokens - 1); /* Backup over dummy token. */
2306 free (list->namebuf);
2311 init_trigraph_map ()
2313 trigraph_map['='] = '#';
2314 trigraph_map['('] = '[';
2315 trigraph_map[')'] = ']';
2316 trigraph_map['/'] = '\\';
2317 trigraph_map['\''] = '^';
2318 trigraph_map['<'] = '{';
2319 trigraph_map['>'] = '}';
2320 trigraph_map['!'] = '|';
2321 trigraph_map['-'] = '~';
2324 /* Call when a trigraph is encountered. It warns if necessary, and
2325 returns true if the trigraph should be honoured. END is the third
2326 character of a trigraph in the input stream. */
2328 trigraph_ok (pfile, end)
2330 const unsigned char *end;
2332 int accept = CPP_OPTION (pfile, trigraphs);
2334 if (CPP_OPTION (pfile, warn_trigraphs))
2336 unsigned int col = end - 1 - pfile->buffer->line_base;
2338 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2339 "trigraph ??%c converted to %c",
2340 (int) *end, (int) trigraph_map[*end]);
2342 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2343 "trigraph ??%c ignored", (int) *end);
2348 /* Scan a string for trigraphs, warning or replacing them inline as
2349 appropriate. When parsing a string, we must call this routine
2350 before processing a newline character (if trigraphs are enabled),
2351 since the newline might be escaped by a preceding backslash
2352 trigraph sequence. Returns a pointer to the end of the name after
2355 static unsigned char*
2356 trigraph_replace (pfile, src, limit)
2359 unsigned char* limit;
2361 unsigned char *dest;
2363 /* Starting with src[1], find two consecutive '?'. The case of no
2364 trigraphs is streamlined. */
2366 for (; src + 1 < limit; src += 2)
2371 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2374 else if (src + 2 == limit || src[1] != '?')
2377 /* Check if it really is a trigraph. */
2378 if (trigraph_map[src[2]] == 0)
2382 goto trigraph_found;
2386 /* Now we have a trigraph, we need to scan the remaining buffer, and
2387 copy-shifting its contents left if replacement is enabled. */
2388 for (; src + 2 < limit; dest++, src++)
2389 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2393 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2394 *dest = trigraph_map[*src];
2397 /* Copy remaining (at most 2) characters. */
2403 /* If CUR is a backslash or the end of a trigraphed backslash, return
2404 a pointer to its beginning, otherwise NULL. We don't read beyond
2405 the buffer start, because there is the start of the comment in the
2407 static const unsigned char *
2408 backslash_start (pfile, cur)
2410 const unsigned char *cur;
2414 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2415 && trigraph_ok (pfile, cur))
2420 /* Skip a C-style block comment. This is probably the trickiest
2421 handler. We find the end of the comment by seeing if an asterisk
2422 is before every '/' we encounter. The nasty complication is that a
2423 previous asterisk may be separated by one or more escaped newlines.
2424 Returns non-zero if comment terminated by EOF, zero otherwise. */
2426 skip_block_comment2 (pfile)
2429 cpp_buffer *buffer = pfile->buffer;
2430 const unsigned char *char_after_star = 0;
2431 register const unsigned char *cur = buffer->cur;
2434 /* Inner loop would think the comment has ended if the first comment
2435 character is a '/'. Avoid this and keep the inner loop clean by
2436 skipping such a character. */
2437 if (cur < buffer->rlimit && cur[0] == '/')
2440 for (; cur < buffer->rlimit; )
2442 unsigned char c = *cur++;
2444 /* People like decorating comments with '*', so check for
2445 '/' instead for efficiency. */
2448 if (cur[-2] == '*' || cur - 1 == char_after_star)
2451 /* Warn about potential nested comments, but not when
2452 the final character inside the comment is a '/'.
2453 Don't bother to get it right across escaped newlines. */
2454 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2455 && cur[0] == '*' && cur[1] != '/')
2458 cpp_warning (pfile, "'/*' within comment");
2461 else if (IS_NEWLINE(c))
2463 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2465 handle_newline (cur, buffer->rlimit, c);
2466 /* Work correctly if there is an asterisk before an
2467 arbirtrarily long sequence of escaped newlines. */
2468 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2469 char_after_star = cur;
2471 char_after_star = 0;
2481 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2482 Returns non-zero if a multiline comment. */
2484 skip_line_comment2 (pfile)
2487 cpp_buffer *buffer = pfile->buffer;
2488 register const unsigned char *cur = buffer->cur;
2491 for (; cur < buffer->rlimit; )
2493 unsigned char c = *cur++;
2497 /* Check for a (trigaph?) backslash escaping the newline. */
2498 if (!backslash_start (pfile, cur - 2))
2501 handle_newline (cur, buffer->rlimit, c);
2507 buffer->cur = cur - 1; /* Leave newline for caller. */
2511 /* Skips whitespace, stopping at next non-whitespace character. */
2513 skip_whitespace (pfile, in_directive)
2517 cpp_buffer *buffer = pfile->buffer;
2518 register const unsigned char *cur = buffer->cur;
2519 unsigned short null_count = 0;
2521 for (; cur < buffer->rlimit; )
2523 unsigned char c = *cur++;
2525 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2527 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2531 /* Mut be '\f' or '\v' */
2532 else if (in_directive && CPP_PEDANTIC (pfile))
2533 cpp_pedwarn (pfile, "%s in preprocessing directive",
2534 c == '\f' ? "formfeed" : "vertical tab");
2539 buffer->cur = cur - 1;
2541 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2542 : "embedded null character ignored");
2545 /* Parse (append) an identifier. */
2547 parse_name (pfile, list, name)
2552 const unsigned char *name_limit;
2553 unsigned char *namebuf;
2554 cpp_buffer *buffer = pfile->buffer;
2555 register const unsigned char *cur = buffer->cur;
2558 name_limit = list->namebuf + list->name_cap;
2559 namebuf = list->namebuf + list->name_used;
2561 for (; cur < buffer->rlimit && namebuf < name_limit; )
2563 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2569 if (c == '$' && CPP_PEDANTIC (pfile))
2572 cpp_pedwarn (pfile, "'$' character in identifier");
2576 /* Run out of name space? */
2577 if (cur < buffer->rlimit)
2579 list->name_used = namebuf - list->namebuf;
2580 auto_expand_name_space (list);
2586 name->len = namebuf - name->text;
2587 list->name_used = namebuf - list->namebuf;
2590 /* Parse (append) a number. */
2592 #define VALID_SIGN(c, prevc) \
2593 (((c) == '+' || (c) == '-') && \
2594 ((prevc) == 'e' || (prevc) == 'E' \
2595 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2598 parse_number (pfile, list, name)
2603 const unsigned char *name_limit;
2604 unsigned char *namebuf;
2605 cpp_buffer *buffer = pfile->buffer;
2606 register const unsigned char *cur = buffer->cur;
2609 name_limit = list->namebuf + list->name_cap;
2610 namebuf = list->namebuf + list->name_used;
2612 for (; cur < buffer->rlimit && namebuf < name_limit; )
2614 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2616 /* Perhaps we should accept '$' here if we accept it for
2617 identifiers. We know namebuf[-1] is safe, because for c to
2618 be a sign we must have pushed at least one character. */
2619 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2626 /* Run out of name space? */
2627 if (cur < buffer->rlimit)
2629 list->name_used = namebuf - list->namebuf;
2630 auto_expand_name_space (list);
2636 name->len = namebuf - name->text;
2637 list->name_used = namebuf - list->namebuf;
2640 /* Places a string terminated by an unescaped TERMINATOR into a
2641 cpp_name, which should be expandable and thus at the top of the
2642 list's stack. Handles embedded trigraphs, if necessary, and
2645 Can be used for character constants (terminator = '\''), string
2646 constants ('"'), angled headers ('>') and assertions (')'). */
2649 parse_string2 (pfile, list, name, terminator)
2653 unsigned int terminator;
2655 cpp_buffer *buffer = pfile->buffer;
2656 register const unsigned char *cur = buffer->cur;
2657 const unsigned char *name_limit;
2658 unsigned char *namebuf;
2659 unsigned int null_count = 0;
2660 int trigraphed_len = 0;
2663 name_limit = list->namebuf + list->name_cap;
2664 namebuf = list->namebuf + list->name_used;
2666 for (; cur < buffer->rlimit && namebuf < name_limit; )
2668 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2672 else if (c == terminator || IS_NEWLINE (c))
2674 /* Needed for trigraph_replace and multiline string warning. */
2677 /* Scan for trigraphs before checking if backslash-escaped. */
2678 if (CPP_OPTION (pfile, trigraphs)
2679 || CPP_OPTION (pfile, warn_trigraphs))
2681 namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
2683 trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
2684 if (trigraphed_len < 0)
2688 namebuf--; /* Drop the newline / terminator from the name. */
2691 /* Drop a backslash newline, and continue. */
2692 if (namebuf[-1] == '\\')
2694 handle_newline (cur, buffer->rlimit, c);
2701 /* In Fortran and assembly language, silently terminate
2702 strings of either variety at end of line. This is a
2703 kludge around not knowing where comments are in these
2705 if (CPP_OPTION (pfile, lang_fortran)
2706 || CPP_OPTION (pfile, lang_asm))
2709 /* Character constants, headers and asserts may not
2710 extend over multiple lines. In Standard C, neither
2711 may strings. We accept multiline strings as an
2712 extension, but not in directives. */
2713 if (terminator != '"' || IS_DIRECTIVE (list))
2716 cur++; /* Move forwards again. */
2718 if (pfile->multiline_string_line == 0)
2720 pfile->multiline_string_line = list->line;
2721 if (CPP_PEDANTIC (pfile))
2722 cpp_pedwarn (pfile, "multi-line string constant");
2726 handle_newline (cur, buffer->rlimit, c);
2730 unsigned char *temp;
2732 /* An odd number of consecutive backslashes represents
2733 an escaped terminator. */
2735 while (temp >= name->text && *temp == '\\')
2738 if ((namebuf - temp) & 1)
2745 /* Run out of name space? */
2746 if (cur < buffer->rlimit)
2748 list->name_used = namebuf - list->namebuf;
2749 auto_expand_name_space (list);
2753 /* We may not have trigraph-replaced the input for this code path,
2754 but as the input is in error by being unterminated we don't
2755 bother. Prevent warnings about no newlines at EOF. */
2756 if (IS_NEWLINE(cur[-1]))
2760 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2762 if (terminator == '\"' && pfile->multiline_string_line != list->line
2763 && pfile->multiline_string_line != 0)
2765 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2766 "possible start of unterminated string literal");
2767 pfile->multiline_string_line = 0;
2772 name->len = namebuf - name->text;
2773 list->name_used = namebuf - list->namebuf;
2776 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2777 : "null character preserved"));
2780 /* The character TYPE helps us distinguish comment types: '*' = C
2781 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
2782 the stored comment includes the comment start and any terminator. */
2784 #define COMMENT_START_LEN 2
2786 save_comment (list, from, len, tok_no, type)
2788 const unsigned char *from;
2790 unsigned int tok_no;
2794 unsigned char *buffer;
2796 len += COMMENT_START_LEN;
2798 if (list->comments_used == list->comments_cap)
2799 expand_comment_space (list);
2801 if (list->name_used + len > list->name_cap)
2802 expand_name_space (list, len);
2804 buffer = list->namebuf + list->name_used;
2806 comment = &list->comments[list->comments_used++];
2807 comment->type = CPP_COMMENT;
2808 comment->aux = tok_no;
2809 comment->val.name.len = len;
2810 comment->val.name.text = buffer;
2823 memcpy (buffer, from, len - COMMENT_START_LEN);
2824 list->name_used += len;
2828 * The tokenizer's main loop. Returns a token list, representing a
2829 * logical line in the input file, terminated with a CPP_VSPACE
2830 * token. On EOF, a token list containing the single CPP_EOF token
2833 * Implementation relies almost entirely on lookback, rather than
2834 * looking forwards. This means that tokenization requires just
2835 * a single pass of the file, even in the presence of trigraphs and
2836 * escaped newlines, providing significant performance benefits.
2837 * Trigraph overhead is negligible if they are disabled, and low
2838 * even when enabled.
2842 _cpp_lex_line (pfile, list)
2846 cpp_token *cur_token, *token_limit;
2847 cpp_buffer *buffer = pfile->buffer;
2848 register const unsigned char *cur = buffer->cur;
2849 unsigned char flags = 0;
2852 token_limit = list->tokens + list->tokens_cap;
2853 cur_token = list->tokens + list->tokens_used;
2855 for (; cur < buffer->rlimit && cur_token < token_limit;)
2857 unsigned char c = *cur++;
2859 /* Optimize whitespace skipping, in particular the case of a
2860 single whitespace character, as every other token is probably
2861 whitespace. (' ' '\t' '\v' '\f' '\0'). */
2862 if (is_hspace ((unsigned int) c))
2864 if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
2866 buffer->cur = cur - (c == '\0'); /* Get the null warning. */
2867 skip_whitespace (pfile, IS_DIRECTIVE (list));
2870 flags = PREV_WHITESPACE;
2871 if (cur == buffer->rlimit)
2876 /* Initialize current token. Its type is set in the switch. */
2877 cur_token->col = COLUMN (cur);
2878 cur_token->flags = flags;
2883 case '0': case '1': case '2': case '3': case '4':
2884 case '5': case '6': case '7': case '8': case '9':
2885 cur--; /* Backup character. */
2886 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2888 /* Prepend an immediately previous CPP_DOT token. */
2890 if (list->name_cap == list->name_used)
2891 auto_expand_name_space (list);
2893 cur_token->val.name.len = 1;
2894 cur_token->val.name.text = list->namebuf + list->name_used;
2895 list->namebuf[list->name_used++] = '.';
2898 INIT_NAME (list, cur_token->val.name);
2902 parse_number (pfile, list, &cur_token->val.name);
2905 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2910 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2911 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2912 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2913 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2915 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2916 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2917 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2918 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2920 cur--; /* Backup character. */
2921 INIT_NAME (list, cur_token->val.name);
2922 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2926 parse_name (pfile, list, &cur_token->val.name);
2929 /* Find handler for newly created / extended directive. */
2930 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2931 _cpp_check_directive (list, cur_token);
2938 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2939 /* Do we have a wide string? */
2940 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2941 && cur_token[-1].val.name.len == 1
2942 && cur_token[-1].val.name.text[0] == 'L'
2943 && !CPP_TRADITIONAL (pfile))
2945 /* No need for 'L' any more. */
2947 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2951 /* Here c is one of ' " > or ). */
2952 INIT_NAME (list, cur_token->val.name);
2954 parse_string2 (pfile, list, &cur_token->val.name, c);
2960 cur_token->type = CPP_DIV;
2963 if (PREV_TOKEN_TYPE == CPP_DIV)
2965 /* We silently allow C++ comments in system headers,
2966 irrespective of conformance mode, because lots of
2967 broken systems do that and trying to clean it up
2968 in fixincludes is a nightmare. */
2969 if (buffer->system_header_p)
2970 goto do_line_comment;
2971 else if (CPP_OPTION (pfile, cplusplus_comments))
2973 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2974 && ! buffer->warned_cplusplus_comments)
2978 "C++ style comments are not allowed in ISO C89");
2980 "(this will be reported only once per input file)");
2981 buffer->warned_cplusplus_comments = 1;
2987 "comment start split across lines");
2988 if (skip_line_comment2 (pfile))
2989 cpp_error_with_line (pfile, list->line,
2991 "multi-line comment");
2992 if (!CPP_OPTION (pfile, discard_comments))
2993 save_comment (list, cur, buffer->cur - cur,
2994 cur_token - 1 - list->tokens, c);
2997 /* Back-up to first '-' or '/'. */
2999 if (!CPP_OPTION (pfile, traditional))
3000 flags = PREV_WHITESPACE;
3008 cur_token->type = CPP_MULT;
3011 if (PREV_TOKEN_TYPE == CPP_DIV)
3016 "comment start '/*' split across lines");
3017 if (skip_block_comment2 (pfile))
3018 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3019 "unterminated comment");
3020 else if (buffer->cur[-2] != '*')
3022 "comment end '*/' split across lines");
3023 if (!CPP_OPTION (pfile, discard_comments))
3024 save_comment (list, cur, buffer->cur - cur,
3025 cur_token - 1 - list->tokens, c);
3029 if (!CPP_OPTION (pfile, traditional))
3030 flags = PREV_WHITESPACE;
3033 else if (CPP_OPTION (pfile, cplusplus))
3035 /* In C++, there are .* and ->* operators. */
3036 if (PREV_TOKEN_TYPE == CPP_DEREF)
3037 BACKUP_TOKEN (CPP_DEREF_STAR);
3038 else if (PREV_TOKEN_TYPE == CPP_DOT)
3039 BACKUP_TOKEN (CPP_DOT_STAR);
3047 handle_newline (cur, buffer->rlimit, c);
3048 if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3050 /* Remove the escaped newline. Then continue to process
3051 any interrupted name or number. */
3056 if (cur_token->type == CPP_NAME)
3058 else if (cur_token->type == CPP_NUMBER)
3059 goto continue_number;
3062 /* Remember whitespace setting. */
3063 flags = cur_token->flags;
3066 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3069 cpp_warning (pfile, "backslash and newline separated by space");
3071 PUSH_TOKEN (CPP_VSPACE);
3075 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3077 if (CPP_OPTION (pfile, chill))
3078 goto do_line_comment;
3079 REVISE_TOKEN (CPP_MINUS_MINUS);
3082 PUSH_TOKEN (CPP_MINUS);
3085 /* The digraph flag checking ensures that ## and %:%:
3086 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3089 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3090 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3091 REVISE_TOKEN (CPP_PASTE);
3093 PUSH_TOKEN (CPP_HASH);
3097 cur_token->type = CPP_COLON;
3100 if (PREV_TOKEN_TYPE == CPP_COLON
3101 && CPP_OPTION (pfile, cplusplus))
3102 BACKUP_TOKEN (CPP_SCOPE);
3103 /* Digraph: "<:" is a '[' */
3104 else if (PREV_TOKEN_TYPE == CPP_LESS)
3105 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3106 /* Digraph: "%:" is a '#' */
3107 else if (PREV_TOKEN_TYPE == CPP_MOD)
3109 (--cur_token)->flags |= DIGRAPH;
3117 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3118 REVISE_TOKEN (CPP_AND_AND);
3120 PUSH_TOKEN (CPP_AND);
3125 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3126 REVISE_TOKEN (CPP_OR_OR);
3128 PUSH_TOKEN (CPP_OR);
3132 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3133 REVISE_TOKEN (CPP_PLUS_PLUS);
3135 PUSH_TOKEN (CPP_PLUS);
3139 /* This relies on equidistance of "?=" and "?" tokens. */
3140 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3141 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3143 PUSH_TOKEN (CPP_EQ);
3147 cur_token->type = CPP_GREATER;
3150 if (PREV_TOKEN_TYPE == CPP_GREATER)
3151 BACKUP_TOKEN (CPP_RSHIFT);
3152 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3153 BACKUP_TOKEN (CPP_DEREF);
3154 /* Digraph: ":>" is a ']' */
3155 else if (PREV_TOKEN_TYPE == CPP_COLON)
3156 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3157 /* Digraph: "%>" is a '}' */
3158 else if (PREV_TOKEN_TYPE == CPP_MOD)
3159 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3165 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3167 REVISE_TOKEN (CPP_LSHIFT);
3170 /* Is this the beginning of a header name? */
3171 if (list->dir_flags & SYNTAX_INCLUDE)
3173 c = '>'; /* Terminator. */
3174 cur_token->type = CPP_HEADER_NAME;
3175 goto do_parse_string;
3177 PUSH_TOKEN (CPP_LESS);
3181 /* Digraph: "<%" is a '{' */
3182 cur_token->type = CPP_MOD;
3183 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3184 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3189 /* Is this the beginning of an assertion string? */
3190 if (list->dir_flags & SYNTAX_ASSERT)
3192 c = ')'; /* Terminator. */
3193 cur_token->type = CPP_ASSERTION;
3194 goto do_parse_string;
3196 PUSH_TOKEN (CPP_OPEN_PAREN);
3200 if (cur + 1 < buffer->rlimit && *cur == '?'
3201 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3203 /* Handle trigraph. */
3207 case '(': goto make_open_square;
3208 case ')': goto make_close_square;
3209 case '<': goto make_open_brace;
3210 case '>': goto make_close_brace;
3211 case '=': goto make_hash;
3212 case '!': goto make_or;
3213 case '-': goto make_complement;
3214 case '/': goto make_backslash;
3215 case '\'': goto make_xor;
3218 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3220 /* GNU C++ defines <? and >? operators. */
3221 if (PREV_TOKEN_TYPE == CPP_LESS)
3223 REVISE_TOKEN (CPP_MIN);
3226 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3228 REVISE_TOKEN (CPP_MAX);
3232 PUSH_TOKEN (CPP_QUERY);
3236 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3238 && !(cur_token[-1].flags & PREV_WHITESPACE))
3241 PUSH_TOKEN (CPP_ELLIPSIS);
3244 PUSH_TOKEN (CPP_DOT);
3248 case '~': PUSH_TOKEN (CPP_COMPL); break;
3250 case '^': PUSH_TOKEN (CPP_XOR); break;
3252 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3254 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3256 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3258 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3260 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3261 case '!': PUSH_TOKEN (CPP_NOT); break;
3262 case ',': PUSH_TOKEN (CPP_COMMA); break;
3263 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3264 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3267 if (CPP_OPTION (pfile, dollars_in_ident))
3272 cur_token->val.name.len = 0; /* FIXME: needed for transition only */
3273 PUSH_TOKEN (CPP_OTHER);
3278 /* Run out of token space? */
3279 if (cur_token == token_limit)
3281 list->tokens_used = cur_token - list->tokens;
3282 expand_token_space (list);
3286 cur_token->type = CPP_EOF;
3287 cur_token->flags = flags;
3289 if (cur_token != &list->tokens[0])
3291 /* Next call back will get just a CPP_EOF. */
3293 cpp_warning (pfile, "no newline at end of file");
3294 PUSH_TOKEN (CPP_VSPACE);
3300 list->tokens_used = cur_token - list->tokens;
3302 /* FIXME: take this check out and put it in the caller.
3303 list->directive == 0 indicates an unknown directive (but null
3304 directive is OK). This is the first time we can be sure the
3305 directive is invalid, and thus warn about it, because it might
3306 have been split by escaped newlines. Also, don't complain about
3307 invalid directives in assembly source, we don't know where the
3308 comments are, and # may introduce assembler pseudo-ops. */
3310 if (IS_DIRECTIVE (list) && list->dir_handler == 0
3311 && list->tokens[1].type != CPP_VSPACE
3312 && !CPP_OPTION (pfile, lang_asm))
3313 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3314 "invalid preprocessing directive");
3317 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3318 already contain the enough space to hold the token's spelling. If
3319 WHITESPACE is true, and the token was preceded by whitespace,
3320 output a single space before the token proper. Returns a pointer
3321 to the character after the last character written. */
3323 static unsigned char *
3324 spell_token (pfile, token, buffer, whitespace)
3325 cpp_reader *pfile; /* Would be nice to be rid of this... */
3327 unsigned char *buffer;
3330 /* Whitespace will not be wanted by handlers of the # and ##
3331 operators calling this function, but will be wanted by the
3332 function that writes out the preprocessed file. */
3333 if (whitespace && token->flags & PREV_WHITESPACE)
3336 switch (token_spellings[token->type].type)
3338 case SPELL_OPERATOR:
3340 const unsigned char *spelling;
3343 if (token->flags & DIGRAPH)
3344 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3346 spelling = token_spellings[token->type].spelling;
3348 while ((c = *spelling++) != '\0')
3354 memcpy (buffer, token->val.name.text, token->val.name.len);
3355 buffer += token->val.name.len;
3362 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3365 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3368 memcpy (buffer, token->val.name.text, token->val.name.len);
3369 buffer += token->val.name.len;
3375 *buffer++ = token->aux;
3379 cpp_ice (pfile, "Unspellable token");
3386 /* Temporary function for illustrative purposes. */
3388 _cpp_lex_file (pfile)
3394 init_trigraph_map ();
3395 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3397 for (recycle = 0; ;)
3399 init_token_list (pfile, list, recycle);
3402 _cpp_lex_line (pfile, list);
3403 if (list->tokens[0].type == CPP_EOF)
3406 if (list->dir_handler)
3408 if (list->dir_handler (pfile))
3410 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3415 _cpp_output_list (pfile, list);
3419 /* Temporary function for illustrative purposes. */
3421 _cpp_output_list (pfile, list)
3425 cpp_token *token, *comment, *comment_before = 0;
3427 if (list->comments_used > 0)
3429 comment = &list->comments[0];
3430 comment_before = &list->tokens[comment->aux];
3433 token = &list->tokens[0];
3436 /* Output comments if -C. */
3437 while (token == comment_before)
3439 /* Make space for the comment, and copy it out. */
3440 CPP_RESERVE (pfile, TOKEN_LEN (comment));
3441 pfile->limit = spell_token (pfile, comment, pfile->limit, 0);
3443 /* Stop if no comments left, or no more comments appear
3444 before the current token. */
3446 if (comment == list->comments + list->comments_used)
3448 comment_before = &list->tokens[comment->aux];
3451 CPP_RESERVE (pfile, TOKEN_LEN (token));
3452 pfile->limit = spell_token (pfile, token, pfile->limit, 1);
3454 while (token++->type != CPP_VSPACE);