1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
30 # include <sys/mman.h>
33 #define PEEKBUF(BUFFER, N) \
34 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
35 #define GETBUF(BUFFER) \
36 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
37 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
39 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
40 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
41 #define GETC() GETBUF (CPP_BUFFER (pfile))
42 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
44 static void skip_block_comment PARAMS ((cpp_reader *));
45 static void skip_line_comment PARAMS ((cpp_reader *));
46 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
47 static int skip_comment PARAMS ((cpp_reader *, int));
48 static int copy_comment PARAMS ((cpp_reader *, int));
49 static void skip_string PARAMS ((cpp_reader *, int));
50 static void parse_string PARAMS ((cpp_reader *, int));
51 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
52 static void null_warning PARAMS ((cpp_reader *, unsigned int));
54 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
56 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
58 static void bump_column PARAMS ((cpp_printer *, unsigned int,
60 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
61 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
64 #define auto_expand_name_space(list) \
65 expand_name_space ((list), 1 + (list)->name_cap / 2)
69 void init_trigraph_map PARAMS ((void));
70 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
72 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
73 const unsigned char *));
74 static int skip_block_comment2 PARAMS ((cpp_reader *));
75 static int skip_line_comment2 PARAMS ((cpp_reader *));
76 static void skip_whitespace PARAMS ((cpp_reader *, int));
77 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
78 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
79 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
81 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
82 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
83 const unsigned char *,
84 unsigned int, unsigned int));
85 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
87 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
89 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
90 unsigned char *, int));
92 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
95 /* Macros on a cpp_name. */
96 #define INIT_TOKEN_NAME(list, token) \
97 do {(token)->val.name.len = 0; \
98 (token)->val.name.text = (list)->namebuf + (list)->name_used; \
99 (list)->tokens_used = token - (list)->tokens + 1; \
102 /* Maybe put these in the ISTABLE eventually. */
103 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
104 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
106 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
107 character, if any, is in buffer. */
108 #define handle_newline(cur, limit, c) \
110 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
112 CPP_BUMP_LINE_CUR (pfile, (cur)); \
113 pfile->col_adjust = 0; \
116 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
117 #define PREV_TOKEN_TYPE (cur_token[-1].type)
119 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
120 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
121 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
122 #define BACKUP_DIGRAPH(ttype) do { \
123 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
125 /* An upper bound on the number of bytes needed to spell a token,
126 including preceding whitespace. */
127 #define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
128 SPELL_NONE ? (token)->val.name.len: 0))
132 /* Order here matters. Those beyond SPELL_NONE store their spelling
133 in the token list, and it's length in the token->val.name.len. */
138 SPELL_CHAR, /* FIXME: revert order of NONE and CHAR after transition. */
143 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
144 #define I(e, s) {SPELL_IDENT, s},
145 #define S(e, s) {SPELL_STRING, s},
146 #define C(e, s) {SPELL_CHAR, s},
147 #define N(e, s) {SPELL_NONE, s},
149 static const struct token_spelling
151 ENUM_BITFIELD(spell_type) type : CHAR_BIT;
152 const U_CHAR *spelling;
153 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
161 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
164 _cpp_grow_token_buffer (pfile, n)
168 long old_written = CPP_WRITTEN (pfile);
169 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
170 pfile->token_buffer = (U_CHAR *)
171 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
172 CPP_SET_WRITTEN (pfile, old_written);
175 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
176 If BUFFER != NULL, then use the LENGTH characters in BUFFER
177 as the new input buffer.
178 Return the new buffer, or NULL on failure. */
181 cpp_push_buffer (pfile, buffer, length)
183 const U_CHAR *buffer;
186 cpp_buffer *buf = CPP_BUFFER (pfile);
188 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
190 cpp_fatal (pfile, "macro or `#include' recursion too deep");
194 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
196 new->buf = new->cur = buffer;
197 new->rlimit = buffer + length;
200 new->line_base = NULL;
202 CPP_BUFFER (pfile) = new;
207 cpp_pop_buffer (pfile)
210 cpp_buffer *buf = CPP_BUFFER (pfile);
211 if (ACTIVE_MARK_P (pfile))
212 cpp_ice (pfile, "mark active in cpp_pop_buffer");
216 _cpp_unwind_if_stack (pfile, buf);
218 free ((PTR) buf->buf);
219 if (pfile->system_include_depth)
220 pfile->system_include_depth--;
221 if (pfile->potential_control_macro)
223 if (buf->inc->cmacro != NEVER_REREAD)
224 buf->inc->cmacro = pfile->potential_control_macro;
225 pfile->potential_control_macro = 0;
227 pfile->input_stack_listing_current = 0;
228 /* If the file will not be included again, then close it. */
229 if (DO_NOT_REREAD (buf->inc))
231 close (buf->inc->fd);
237 cpp_hashnode *m = buf->macro;
240 if ((m->type == T_FMACRO && buf->mapped)
241 || m->type == T_SPECLINE || m->type == T_FILE
242 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
243 || m->type == T_STDC)
244 free ((PTR) buf->buf);
246 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
248 pfile->buffer_stack_depth--;
249 return CPP_BUFFER (pfile);
252 /* Deal with the annoying semantics of fwrite. */
254 safe_fwrite (pfile, buf, len, fp)
264 count = fwrite (buf, 1, len, fp);
273 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
276 /* Notify the compiler proper that the current line number has jumped,
277 or the current file name has changed. */
280 output_line_command (pfile, print, line)
285 cpp_buffer *ip = cpp_file_buffer (pfile);
286 enum { same = 0, enter, leave, rname } change;
287 static const char * const codes[] = { "", " 1", " 2", "" };
289 if (CPP_OPTION (pfile, no_line_commands))
292 /* Determine whether the current filename has changed, and if so,
293 how. 'nominal_fname' values are unique, so they can be compared
294 by comparing pointers. */
295 if (ip->nominal_fname == print->last_fname)
299 if (pfile->buffer_stack_depth == print->last_bsd)
303 if (pfile->buffer_stack_depth > print->last_bsd)
307 print->last_bsd = pfile->buffer_stack_depth;
309 print->last_fname = ip->nominal_fname;
311 /* If the current file has not changed, we can output a few newlines
312 instead if we want to increase the line number by a small amount.
313 We cannot do this if print->lineno is zero, because that means we
314 haven't output any line commands yet. (The very first line
315 command output is a `same_file' command.) */
316 if (change == same && print->lineno != 0
317 && line >= print->lineno && line < print->lineno + 8)
319 while (line > print->lineno)
321 putc ('\n', print->outf);
327 #ifndef NO_IMPLICIT_EXTERN_C
328 if (CPP_OPTION (pfile, cplusplus))
329 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
331 ip->inc->sysp ? " 3" : "",
332 (ip->inc->sysp == 2) ? " 4" : "");
335 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
337 ip->inc->sysp ? " 3" : "");
338 print->lineno = line;
341 /* Write the contents of the token_buffer to the output stream, and
342 clear the token_buffer. Also handles generating line commands and
343 keeping track of file transitions. */
346 cpp_output_tokens (pfile, print)
352 if (CPP_WRITTEN (pfile) - print->written)
354 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
356 safe_fwrite (pfile, pfile->token_buffer,
357 CPP_WRITTEN (pfile) - print->written, print->outf);
360 ip = cpp_file_buffer (pfile);
362 output_line_command (pfile, print, CPP_BUF_LINE (ip));
364 CPP_SET_WRITTEN (pfile, print->written);
367 /* Helper for cpp_output_list - increases the column number to match
368 what we expect it to be. */
371 bump_column (print, from, to)
373 unsigned int from, to;
375 unsigned int tabs, spcs;
376 unsigned int delta = to - from;
378 /* Only if FROM is 0, advance by tabs. */
380 tabs = delta / 8, spcs = delta % 8;
382 tabs = 0, spcs = delta;
384 while (tabs--) putc ('\t', print->outf);
385 while (spcs--) putc (' ', print->outf);
388 /* Write out the list L onto pfile->token_buffer. This function is
391 1) pfile->token_buffer is not going to continue to exist.
392 2) At the moment, tokens don't carry the information described
393 in cpplib.h; they are all strings.
394 3) The list has to be a complete line, and has to be written starting
395 at the beginning of a line. */
398 cpp_output_list (pfile, print, list)
401 const cpp_toklist *list;
404 unsigned int curcol = 1;
406 /* XXX Probably does not do what is intended. */
407 if (print->lineno != list->line)
408 output_line_command (pfile, print, list->line);
410 for (i = 0; i < list->tokens_used; i++)
412 if (TOK_TYPE (list, i) == CPP_VSPACE)
414 output_line_command (pfile, print, list->tokens[i].aux);
418 if (curcol < TOK_COL (list, i))
420 /* Insert space to bring the column to what it should be. */
421 bump_column (print, curcol - 1, TOK_COL (list, i));
422 curcol = TOK_COL (list, i);
424 /* XXX We may have to insert space to prevent an accidental
426 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
427 curcol += TOK_LEN (list, i);
431 /* Scan a string (which may have escape marks), perform macro expansion,
432 and write the result to the token_buffer. */
435 _cpp_expand_to_buffer (pfile, buf, length)
441 enum cpp_ttype token;
446 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
450 /* Copy the buffer, because it might be in an unsafe place - for
451 example, a sequence on the token_buffer, where the pointers will
452 be invalidated if we enlarge the token_buffer. */
453 buf1 = alloca (length);
454 memcpy (buf1, buf, length);
456 /* Set up the input on the input stack. */
457 stop = CPP_BUFFER (pfile);
458 if (cpp_push_buffer (pfile, buf1, length) == NULL)
460 CPP_BUFFER (pfile)->has_escapes = 1;
462 /* Scan the input, create the output. */
465 token = cpp_get_token (pfile);
466 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
471 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
474 cpp_scan_buffer_nooutput (pfile)
477 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
478 enum cpp_ttype token;
479 unsigned int old_written = CPP_WRITTEN (pfile);
480 /* In no-output mode, we can ignore everything but directives. */
483 if (! pfile->only_seen_white)
484 _cpp_skip_rest_of_line (pfile);
485 token = cpp_get_token (pfile);
486 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
489 CPP_SET_WRITTEN (pfile, old_written);
492 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
495 cpp_scan_buffer (pfile, print)
499 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
500 enum cpp_ttype token;
504 token = cpp_get_token (pfile);
505 if (token == CPP_VSPACE || token == CPP_EOF
506 /* XXX Temporary kluge - force flush after #include only */
507 || (token == CPP_DIRECTIVE
508 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
510 cpp_output_tokens (pfile, print);
511 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
517 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
520 cpp_file_buffer (pfile)
525 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
531 /* Token-buffer helper functions. */
533 /* Expand a token list's string space. It is *vital* that
534 list->tokens_used is correct, to get pointer fix-up right. */
536 expand_name_space (list, len)
540 const U_CHAR *old_namebuf;
542 old_namebuf = list->namebuf;
543 list->name_cap += len;
544 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
546 /* Fix up token text pointers. */
547 if (list->namebuf != old_namebuf)
551 for (i = 0; i < list->tokens_used; i++)
552 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
553 list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
557 /* Expand the number of tokens in a list. */
559 _cpp_expand_token_space (list, count)
565 list->tokens_cap += count;
566 n = list->tokens_cap;
567 if (list->flags & LIST_OFFSET)
569 list->tokens = (cpp_token *)
570 xrealloc (list->tokens, n * sizeof (cpp_token));
571 if (list->flags & LIST_OFFSET)
572 list->tokens++; /* Skip the dummy. */
575 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
576 an extra token in front of the token list, as this allows the lexer
577 to always peek at the previous token without worrying about
578 underflowing the list, and some initial space. Otherwise, no
579 token- or name-space is allocated, and there is no dummy token. */
581 _cpp_init_toklist (list, flags)
585 /* We malloc zero bytes because we may want to realloc later, and
586 some old implementations don't like realloc-ing a null pointer. */
587 if (flags == NO_DUMMY_TOKEN)
589 list->tokens_cap = 0;
590 list->tokens = (cpp_token *) malloc (0);
596 /* Initialize token space. Put a dummy token before the start
597 that will fail matches. */
598 list->tokens_cap = 256; /* 4K's worth. */
599 list->tokens = (cpp_token *)
600 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
601 list->tokens[0].type = CPP_EOF;
604 /* Initialize name space. */
605 list->name_cap = 1024;
606 list->flags = LIST_OFFSET;
609 /* Allocate name space. */
610 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
612 _cpp_clear_toklist (list);
615 /* Clear a token list. */
617 _cpp_clear_toklist (list)
620 list->tokens_used = 0;
623 list->flags &= LIST_OFFSET; /* clear all but that one */
626 /* Free a token list. Does not free the list itself, which may be
627 embedded in a larger structure. */
629 _cpp_free_toklist (list)
632 if (list->flags & LIST_OFFSET)
633 free (list->tokens - 1); /* Backup over dummy token. */
636 free (list->namebuf);
639 /* Slice a token list: copy the sublist [START, FINISH) into COPY.
640 COPY is assumed not to be initialized. The comment space is not
643 _cpp_slice_toklist (copy, start, finish)
645 const cpp_token *start, *finish;
651 copy->tokens_cap = n;
652 copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
653 memcpy (copy->tokens, start, n * sizeof (cpp_token));
656 for (i = 0; i < n; i++)
657 if (token_spellings[start[i].type].type > SPELL_NONE)
658 bytes += start[i].val.name.len;
660 copy->namebuf = xmalloc (bytes);
662 for (i = 0; i < n; i++)
663 if (token_spellings[start[i].type].type > SPELL_NONE)
665 memcpy (copy->namebuf + bytes,
666 start[i].val.name.text, start[i].val.name.len);
667 copy->tokens[i].val.name.text = copy->namebuf + bytes;
668 bytes += start[i].val.name.len;
671 copy->tokens_cap = n;
672 copy->tokens_used = n;
673 copy->name_used = bytes;
674 copy->name_cap = bytes;
680 /* Shrink a token list down to the minimum size. */
682 _cpp_squeeze_toklist (list)
686 const U_CHAR *old_namebuf;
688 if (list->flags & LIST_OFFSET)
691 memmove (list->tokens, list->tokens + 1,
692 list->tokens_used * sizeof (cpp_token));
693 list->tokens = xrealloc (list->tokens,
694 list->tokens_used * sizeof (cpp_token));
695 list->flags &= ~LIST_OFFSET;
698 list->tokens = xrealloc (list->tokens,
699 list->tokens_used * sizeof (cpp_token));
700 list->tokens_cap = list->tokens_used;
702 old_namebuf = list->namebuf;
703 list->namebuf = xrealloc (list->namebuf, list->name_used);
704 list->name_cap = list->name_used;
706 /* Fix up token text pointers. */
707 delta = list->namebuf - old_namebuf;
712 for (i = 0; i < list->tokens_used; i++)
713 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
714 list->tokens[i].val.name.text += delta;
718 /* Compare two tokens. */
720 _cpp_equiv_tokens (a, b)
721 const cpp_token *a, *b;
723 if (a->type != b->type
724 || a->flags != b->flags
728 if (token_spellings[a->type].type > SPELL_NONE)
730 if (a->val.name.len != b->val.name.len
731 || ustrncmp(a->val.name.text,
739 /* Compare two token lists. */
741 _cpp_equiv_toklists (a, b)
742 const cpp_toklist *a, *b;
746 if (a->tokens_used != b->tokens_used)
749 for (i = 0; i < a->tokens_used; i++)
750 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
755 /* Scan until we encounter a token of type STOP or a newline, and
756 create a token list for it. Does not macro-expand or execute
757 directives. The final token is not included in the list or
758 consumed from the input. Returns the type of the token stopped at. */
761 _cpp_scan_until (pfile, list, stop)
771 _cpp_clear_toklist (list);
772 list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
774 written = CPP_WRITTEN (pfile);
779 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
780 type = _cpp_lex_token (pfile);
781 len = CPP_WRITTEN (pfile) - written;
782 CPP_SET_WRITTEN (pfile, written);
783 if (type == CPP_HSPACE)
785 if (CPP_PEDANTIC (pfile))
786 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
790 else if (type == CPP_COMMENT)
791 /* Only happens when processing -traditional macro definitions.
792 Do not give this a token entry, but do not change space_before
796 if (list->tokens_used >= list->tokens_cap)
797 _cpp_expand_token_space (list, 256);
798 if (list->name_used + len >= list->name_cap)
799 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
801 if (type == CPP_MACRO)
804 if (type == CPP_VSPACE || type == stop)
808 TOK_TYPE (list, i) = type;
809 TOK_COL (list, i) = col;
810 TOK_AUX (list, i) = 0;
811 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
813 TOK_LEN (list, i) = len;
814 if (token_spellings[type].type > SPELL_NONE)
816 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
817 TOK_NAME (list, i) = list->namebuf + list->name_used;
818 list->name_used += len;
821 TOK_NAME (list, i) = token_spellings[type].spelling;
826 /* XXX Temporary kluge: put back the newline (or whatever). */
829 /* Don't consider the first token to have white before. */
830 TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
834 /* Skip a C-style block comment. We know it's a comment, and point is
835 at the second character of the starter. */
837 skip_block_comment (pfile)
840 unsigned int line, col;
841 const U_CHAR *limit, *cur;
844 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
845 col = CPP_BUF_COL (CPP_BUFFER (pfile));
846 limit = CPP_BUFFER (pfile)->rlimit;
847 cur = CPP_BUFFER (pfile)->cur;
852 if (c == '\n' || c == '\r')
854 /* \r cannot be a macro escape marker here. */
855 if (!ACTIVE_MARK_P (pfile))
856 CPP_BUMP_LINE_CUR (pfile, cur);
860 /* Check for teminator. */
861 if (cur < limit && *cur == '/')
864 /* Warn about comment starter embedded in comment. */
865 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
866 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
867 cur - CPP_BUFFER (pfile)->line_base,
868 "'/*' within comment");
872 cpp_error_with_line (pfile, line, col, "unterminated comment");
875 CPP_BUFFER (pfile)->cur = cur + 1;
878 /* Skip a C++/Chill line comment. We know it's a comment, and point
879 is at the second character of the initiator. */
881 skip_line_comment (pfile)
889 /* We don't have to worry about EOF in here. */
892 /* Don't consider final '\n' to be part of comment. */
898 /* \r cannot be a macro escape marker here. */
899 if (!ACTIVE_MARK_P (pfile))
900 CPP_BUMP_LINE (pfile);
901 if (CPP_OPTION (pfile, warn_comments))
902 cpp_warning (pfile, "backslash-newline within line comment");
907 /* Skip a comment - C, C++, or Chill style. M is the first character
908 of the comment marker. If this really is a comment, skip to its
909 end and return ' '. If this is not a comment, return M (which will
913 skip_comment (pfile, m)
917 if (m == '/' && PEEKC() == '*')
919 skip_block_comment (pfile);
922 else if (m == '/' && PEEKC() == '/')
924 if (CPP_IN_SYSTEM_HEADER (pfile))
926 /* We silently allow C++ comments in system headers, irrespective
927 of conformance mode, because lots of busted systems do that
928 and trying to clean it up in fixincludes is a nightmare. */
929 skip_line_comment (pfile);
932 else if (CPP_OPTION (pfile, cplusplus_comments))
934 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
936 if (CPP_WTRADITIONAL (pfile))
938 "C++ style comments are not allowed in traditional C");
939 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
941 "C++ style comments are not allowed in ISO C89");
942 if (CPP_WTRADITIONAL (pfile)
943 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
945 "(this will be reported only once per input file)");
946 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
948 skip_line_comment (pfile);
954 else if (m == '-' && PEEKC() == '-'
955 && CPP_OPTION (pfile, chill))
957 skip_line_comment (pfile);
964 /* Identical to skip_comment except that it copies the comment into the
965 token_buffer. This is used if !discard_comments. */
967 copy_comment (pfile, m)
971 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
974 if (skip_comment (pfile, m) == m)
977 limit = CPP_BUFFER (pfile)->cur;
978 CPP_RESERVE (pfile, limit - start + 2);
979 CPP_PUTC_Q (pfile, m);
980 for (; start <= limit; start++)
982 CPP_PUTC_Q (pfile, *start);
988 null_warning (pfile, count)
993 cpp_warning (pfile, "embedded null character ignored");
995 cpp_warning (pfile, "embedded null characters ignored");
998 /* Skip whitespace \-newline and comments. Does not macro-expand. */
1001 _cpp_skip_hspace (pfile)
1004 unsigned int null_count = 0;
1012 else if (is_hspace(c))
1014 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
1015 cpp_pedwarn (pfile, "%s in preprocessing directive",
1016 c == '\f' ? "formfeed" : "vertical tab");
1022 /* \r is a backslash-newline marker if !has_escapes, and
1023 a deletable-whitespace or no-reexpansion marker otherwise. */
1024 if (CPP_BUFFER (pfile)->has_escapes)
1032 CPP_BUMP_LINE (pfile);
1034 else if (c == '/' || c == '-')
1036 c = skip_comment (pfile, c);
1046 null_warning (pfile, null_count);
1049 /* Read and discard the rest of the current line. */
1052 _cpp_skip_rest_of_line (pfile)
1066 if (! CPP_BUFFER (pfile)->has_escapes)
1067 CPP_BUMP_LINE (pfile);
1072 skip_string (pfile, c);
1077 skip_comment (pfile, c);
1082 if (CPP_PEDANTIC (pfile))
1083 cpp_pedwarn (pfile, "%s in preprocessing directive",
1084 c == '\f' ? "formfeed" : "vertical tab");
1091 /* Parse an identifier starting with C. */
1094 _cpp_parse_name (pfile, c)
1106 /* $ is not a legal identifier character in the standard, but is
1107 commonly accepted as an extension. Don't warn about it in
1108 skipped conditional blocks. */
1109 if (c == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1110 cpp_pedwarn (pfile, "`$' in identifier");
1112 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
1113 CPP_PUTC_Q (pfile, c);
1121 /* Parse and skip over a string starting with C. A single quoted
1122 string is treated like a double -- some programs (e.g., troff) are
1123 perverse this way. (However, a single quoted string is not allowed
1124 to extend over multiple lines.) */
1126 skip_string (pfile, c)
1130 unsigned int start_line, start_column;
1131 unsigned int null_count = 0;
1133 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1134 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
1141 cpp_error_with_line (pfile, start_line, start_column,
1142 "unterminated string or character constant");
1143 if (pfile->multiline_string_line != start_line
1144 && pfile->multiline_string_line != 0)
1145 cpp_error_with_line (pfile,
1146 pfile->multiline_string_line, -1,
1147 "possible real start of unterminated constant");
1148 pfile->multiline_string_line = 0;
1156 CPP_BUMP_LINE (pfile);
1157 /* In Fortran and assembly language, silently terminate
1158 strings of either variety at end of line. This is a
1159 kludge around not knowing where comments are in these
1161 if (CPP_OPTION (pfile, lang_fortran)
1162 || CPP_OPTION (pfile, lang_asm))
1167 /* Character constants may not extend over multiple lines.
1168 In Standard C, neither may strings. We accept multiline
1169 strings as an extension. */
1172 cpp_error_with_line (pfile, start_line, start_column,
1173 "unterminated character constant");
1177 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1178 cpp_pedwarn_with_line (pfile, start_line, start_column,
1179 "string constant runs past end of line");
1180 if (pfile->multiline_string_line == 0)
1181 pfile->multiline_string_line = start_line;
1185 if (CPP_BUFFER (pfile)->has_escapes)
1187 cpp_ice (pfile, "\\r escape inside string constant");
1191 /* Backslash newline is replaced by nothing at all. */
1192 CPP_BUMP_LINE (pfile);
1208 if (null_count == 1)
1209 cpp_warning (pfile, "null character in string or character constant");
1210 else if (null_count > 1)
1211 cpp_warning (pfile, "null characters in string or character constant");
1214 /* Parse a string and copy it to the output. */
1217 parse_string (pfile, c)
1221 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1222 const U_CHAR *limit;
1224 skip_string (pfile, c);
1226 limit = CPP_BUFFER (pfile)->cur;
1227 CPP_RESERVE (pfile, limit - start + 2);
1228 CPP_PUTC_Q (pfile, c);
1229 for (; start < limit; start++)
1231 CPP_PUTC_Q (pfile, *start);
1234 /* Get the next token, and add it to the text in pfile->token_buffer.
1235 Return the kind of token we got. */
1238 _cpp_lex_token (pfile)
1242 enum cpp_ttype token;
1244 if (CPP_BUFFER (pfile) == NULL)
1255 if (PEEKC () == '=')
1259 if (CPP_OPTION (pfile, discard_comments))
1260 c = skip_comment (pfile, c);
1262 c = copy_comment (pfile, c);
1266 /* Comments are equivalent to spaces.
1267 For -traditional, a comment is equivalent to nothing. */
1268 if (!CPP_OPTION (pfile, discard_comments))
1270 else if (CPP_TRADITIONAL (pfile))
1274 CPP_PUTC (pfile, c);
1279 CPP_PUTC (pfile, c);
1286 CPP_PUTC (pfile, c2);
1289 else if (c2 == '%' && PEEKN (1) == ':')
1291 /* Digraph: "%:" == "#". */
1293 CPP_RESERVE (pfile, 2);
1294 CPP_PUTC_Q (pfile, c2);
1295 CPP_PUTC_Q (pfile, GETC ());
1303 parse_string (pfile, c);
1304 return c == '\'' ? CPP_CHAR : CPP_STRING;
1307 if (!CPP_OPTION (pfile, dollars_in_ident))
1313 /* Digraph: ":>" == "]". */
1315 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1323 if (c2 == c || c2 == '=')
1328 /* Digraphs: "%:" == "#", "%>" == "}". */
1333 CPP_RESERVE (pfile, 2);
1334 CPP_PUTC_Q (pfile, c);
1335 CPP_PUTC_Q (pfile, c2);
1341 CPP_RESERVE (pfile, 2);
1342 CPP_PUTC_Q (pfile, c);
1343 CPP_PUTC_Q (pfile, c2);
1344 return CPP_OPEN_BRACE;
1346 /* else fall through */
1352 if (PEEKC () == '=')
1360 if (CPP_OPTION (pfile, chill))
1361 goto comment; /* Chill style comment */
1369 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1371 /* In C++, there's a ->* operator. */
1373 CPP_RESERVE (pfile, 4);
1374 CPP_PUTC_Q (pfile, c);
1375 CPP_PUTC_Q (pfile, GETC ());
1376 CPP_PUTC_Q (pfile, GETC ());
1384 if (pfile->parsing_include_directive)
1388 CPP_PUTC (pfile, c);
1392 if (c == '\n' || c == EOF)
1395 "missing '>' in `#include <FILENAME>'");
1400 if (!CPP_BUFFER (pfile)->has_escapes)
1402 /* Backslash newline is replaced by nothing. */
1403 CPP_ADJUST_WRITTEN (pfile, -1);
1404 CPP_BUMP_LINE (pfile);
1408 /* We might conceivably get \r- or \r<space> in
1409 here. Just delete 'em. */
1411 if (d != '-' && d != ' ')
1412 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1413 CPP_ADJUST_WRITTEN (pfile, -1);
1419 /* Digraphs: "<%" == "{", "<:" == "[". */
1424 CPP_RESERVE (pfile, 2);
1425 CPP_PUTC_Q (pfile, c);
1426 CPP_PUTC_Q (pfile, c2);
1427 return CPP_CLOSE_BRACE;
1431 /* else fall through */
1436 /* GNU C++ supports MIN and MAX operators <? and >?. */
1437 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1440 CPP_RESERVE (pfile, 3);
1441 CPP_PUTC_Q (pfile, c);
1442 CPP_PUTC_Q (pfile, c2);
1443 if (PEEKC () == '=')
1444 CPP_PUTC_Q (pfile, GETC ());
1451 CPP_PUTC (pfile, c);
1456 /* In C++ there's a .* operator. */
1457 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1460 if (c2 == '.' && PEEKN(1) == '.')
1462 CPP_RESERVE (pfile, 3);
1463 CPP_PUTC_Q (pfile, '.');
1464 CPP_PUTC_Q (pfile, '.');
1465 CPP_PUTC_Q (pfile, '.');
1467 return CPP_ELLIPSIS;
1472 CPP_RESERVE (pfile, 2);
1473 CPP_PUTC_Q (pfile, c);
1474 CPP_PUTC_Q (pfile, GETC ());
1479 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1481 CPP_PUTC (pfile, c);
1483 parse_string (pfile, c);
1484 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1488 case '0': case '1': case '2': case '3': case '4':
1489 case '5': case '6': case '7': case '8': case '9':
1494 CPP_RESERVE (pfile, 2);
1495 CPP_PUTC_Q (pfile, c);
1499 if (!is_numchar(c) && c != '.'
1500 && ((c2 != 'e' && c2 != 'E'
1501 && ((c2 != 'p' && c2 != 'P')
1502 || CPP_OPTION (pfile, c89)))
1503 || (c != '+' && c != '-')))
1509 case 'b': case 'c': case 'd': case 'h': case 'o':
1510 case 'B': case 'C': case 'D': case 'H': case 'O':
1511 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1513 CPP_RESERVE (pfile, 2);
1514 CPP_PUTC_Q (pfile, c);
1515 CPP_PUTC_Q (pfile, '\'');
1521 goto chill_number_eof;
1524 CPP_PUTC (pfile, c);
1528 CPP_RESERVE (pfile, 2);
1529 CPP_PUTC_Q (pfile, c);
1542 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1543 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1544 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1545 case 'x': case 'y': case 'z':
1546 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1547 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1548 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1551 _cpp_parse_name (pfile, c);
1554 case ' ': case '\t': case '\v': case '\f': case '\0':
1563 CPP_PUTC (pfile, c);
1565 if (c == EOF || !is_hspace(c))
1570 null_warning (pfile, null_count);
1575 if (CPP_BUFFER (pfile)->has_escapes)
1580 if (pfile->output_escapes)
1581 CPP_PUTS (pfile, "\r-", 2);
1582 _cpp_parse_name (pfile, GETC ());
1587 /* "\r " means a space, but only if necessary to prevent
1588 accidental token concatenation. */
1589 CPP_RESERVE (pfile, 2);
1590 if (pfile->output_escapes)
1591 CPP_PUTC_Q (pfile, '\r');
1592 CPP_PUTC_Q (pfile, c);
1597 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1603 /* Backslash newline is ignored. */
1604 if (!ACTIVE_MARK_P (pfile))
1605 CPP_BUMP_LINE (pfile);
1610 CPP_PUTC (pfile, c);
1613 case '(': token = CPP_OPEN_PAREN; goto char1;
1614 case ')': token = CPP_CLOSE_PAREN; goto char1;
1615 case '{': token = CPP_OPEN_BRACE; goto char1;
1616 case '}': token = CPP_CLOSE_BRACE; goto char1;
1617 case ',': token = CPP_COMMA; goto char1;
1618 case ';': token = CPP_SEMICOLON; goto char1;
1624 CPP_PUTC (pfile, c);
1629 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1630 Caller is expected to have checked no_macro_expand. */
1632 maybe_macroexpand (pfile, written)
1636 U_CHAR *macro = pfile->token_buffer + written;
1637 size_t len = CPP_WRITTEN (pfile) - written;
1638 cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
1640 /* cpp_lookup never returns null. */
1641 if (hp->type == T_VOID)
1643 if (hp->disabled || hp->type == T_IDENTITY)
1645 if (pfile->output_escapes)
1647 /* Insert a no-reexpand marker before IDENT. */
1648 CPP_RESERVE (pfile, 2);
1649 CPP_ADJUST_WRITTEN (pfile, 2);
1650 macro = pfile->token_buffer + written;
1652 memmove (macro + 2, macro, len);
1658 if (hp->type == T_EMPTY)
1660 /* Special case optimization: macro expands to nothing. */
1661 CPP_SET_WRITTEN (pfile, written);
1662 CPP_PUTC_Q (pfile, ' ');
1666 /* If macro wants an arglist, verify that a '(' follows. */
1667 if (hp->type == T_FMACRO)
1669 int macbuf_whitespace = 0;
1672 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1674 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1677 _cpp_skip_hspace (pfile);
1684 if (point != CPP_BUFFER (pfile)->cur)
1685 macbuf_whitespace = 1;
1689 goto not_macro_call;
1690 cpp_pop_buffer (pfile);
1693 CPP_SET_MARK (pfile);
1696 _cpp_skip_hspace (pfile);
1703 CPP_GOTO_MARK (pfile);
1708 if (macbuf_whitespace)
1709 CPP_PUTC (pfile, ' ');
1711 /* K+R treated this as a hard error. */
1712 if (CPP_WTRADITIONAL (pfile))
1714 "function macro %s must be used with arguments in traditional C",
1721 /* This is now known to be a macro call.
1722 Expand the macro, reading arguments as needed,
1723 and push the expansion on the input stack. */
1724 _cpp_macroexpand (pfile, hp);
1725 CPP_SET_WRITTEN (pfile, written);
1729 /* Complain about \v or \f in a preprocessing directive (constraint
1730 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1732 pedantic_whitespace (pfile, p, len)
1740 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1741 else if (*p == '\f')
1742 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1750 cpp_get_token (pfile)
1753 enum cpp_ttype token;
1754 long written = CPP_WRITTEN (pfile);
1758 token = _cpp_lex_token (pfile);
1763 if (pfile->skipping)
1765 pfile->potential_control_macro = 0;
1766 pfile->only_seen_white = 0;
1774 if (pfile->only_seen_white == 0)
1775 pfile->only_seen_white = 1;
1776 CPP_BUMP_LINE (pfile);
1780 pfile->potential_control_macro = 0;
1781 if (!pfile->only_seen_white)
1783 /* XXX shouldn't have to do this - remove the hash or %: from
1784 the token buffer. */
1785 if (CPP_PWRITTEN (pfile)[-1] == '#')
1786 CPP_ADJUST_WRITTEN (pfile, -1);
1788 CPP_ADJUST_WRITTEN (pfile, -2);
1790 if (_cpp_handle_directive (pfile))
1792 token = CPP_DIRECTIVE;
1795 pfile->only_seen_white = 0;
1796 CPP_PUTC (pfile, '#');
1800 if (pfile->skipping)
1802 pfile->potential_control_macro = 0;
1803 pfile->only_seen_white = 0;
1804 if (! pfile->no_macro_expand
1805 && maybe_macroexpand (pfile, written))
1810 /* Do not run this case through the 'skipping' logic. */
1812 if (CPP_BUFFER (pfile) == NULL)
1814 macro_buffer = CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile));
1816 cpp_pop_buffer (pfile);
1822 if (pfile->skipping)
1824 CPP_SET_WRITTEN (pfile, written);
1830 /* Like cpp_get_token, but skip spaces and comments. */
1833 cpp_get_non_space_token (pfile)
1836 int old_written = CPP_WRITTEN (pfile);
1839 enum cpp_ttype token = cpp_get_token (pfile);
1840 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1842 CPP_SET_WRITTEN (pfile, old_written);
1846 /* Like cpp_get_token, except that it does not execute directives,
1847 does not consume vertical space, and discards horizontal space. */
1849 _cpp_get_directive_token (pfile)
1853 enum cpp_ttype token;
1857 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1858 old_written = CPP_WRITTEN (pfile);
1859 token = _cpp_lex_token (pfile);
1866 /* Put it back and return VSPACE. */
1868 CPP_ADJUST_WRITTEN (pfile, -1);
1872 /* The purpose of this rather strange check is to prevent pedantic
1873 warnings for ^L in an #ifdefed out block. */
1874 if (CPP_PEDANTIC (pfile) && ! at_bol)
1875 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1876 CPP_WRITTEN (pfile) - old_written);
1877 CPP_SET_WRITTEN (pfile, old_written);
1882 if (! pfile->no_macro_expand
1883 && maybe_macroexpand (pfile, old_written))
1888 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1890 cpp_pop_buffer (pfile);
1894 /* This can happen for files that don't end with a newline,
1895 and for cpp_define and friends. Pretend they do, so
1896 callers don't have to deal. A warning will be issued by
1897 someone else, if necessary. */
1902 /* Determine the current line and column. Used only by read_and_prescan. */
1904 find_position (start, limit, linep)
1907 unsigned long *linep;
1909 unsigned long line = *linep;
1910 U_CHAR *lbase = start;
1911 while (start < limit)
1913 U_CHAR ch = *start++;
1914 if (ch == '\n' || ch == '\r')
1924 /* The following table is used by _cpp_prescan. If we have
1925 designated initializers, it can be constant data; otherwise, it is
1926 set up at runtime by _cpp_init_input_buffer. */
1928 #if (GCC_VERSION >= 2007)
1929 #define init_chartab() /* nothing */
1930 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1932 #define s(p, v) [p] = v,
1934 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1935 static void init_chartab PARAMS ((void)) { \
1936 unsigned char *x = chartab;
1938 #define s(p, v) x[p] = v;
1941 /* Table of characters that can't be handled in the inner loop.
1942 Also contains the mapping between trigraph third characters and their
1944 #define SPECCASE_CR 1
1945 #define SPECCASE_BACKSLASH 2
1946 #define SPECCASE_QUESTION 3
1949 s('\r', SPECCASE_CR)
1950 s('\\', SPECCASE_BACKSLASH)
1951 s('?', SPECCASE_QUESTION)
1953 s('=', '#') s(')', ']') s('!', '|')
1954 s('(', '[') s('\'', '^') s('>', '}')
1955 s('/', '\\') s('<', '{') s('-', '~')
1962 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1963 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1965 /* Prescan pass over a file already loaded into BUF. This is
1966 translation phases 1 and 2 (C99 5.1.1.2).
1968 Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1969 canonical form (\n). If enabled, convert and/or warn about
1970 trigraphs. Convert backslash-newline to a one-character escape
1971 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1972 token). If there is no newline at the end of the file, add one and
1973 warn. Returns -1 on failure, or the actual length of the data to
1976 This function does a lot of work, and can be a serious performance
1977 bottleneck. It has been tuned heavily; make sure you understand it
1978 before hacking. The common case - no trigraphs, Unix style line
1979 breaks, backslash-newline set off by whitespace, newline at EOF -
1980 has been optimized at the expense of the others. The performance
1981 penalty for DOS style line breaks (\r\n) is about 15%.
1983 Warnings lose particularly heavily since we have to determine the
1984 line number, which involves scanning from the beginning of the file
1985 or from the last warning. The penalty for the absence of a newline
1986 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1988 If your file has more than one kind of end-of-line marker, you
1989 will get messed-up line numbering. */
1992 _cpp_prescan (pfile, fp, len)
1998 const U_CHAR *ibase, *ip, *ilimit;
2001 unsigned int deferred_newlines;
2003 /* Allocate an extra byte in case we must add a trailing \n. */
2004 buf = (U_CHAR *) xmalloc (len + 1);
2005 line_base = op = buf;
2006 ip = ibase = fp->buf;
2007 ilimit = ibase + len;
2009 deferred_newlines = 0;
2015 /* Deal with \-newline, potentially in the middle of a token. */
2016 if (deferred_newlines)
2018 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
2020 /* Previous was not white space. Skip to white
2021 space, if we can, before outputting the \r's */
2029 memcpy (op, ip, iq - ip);
2035 while (deferred_newlines)
2036 deferred_newlines--, *op++ = '\r';
2039 /* Copy as much as we can without special treatment. */
2041 while (iq < ilimit && NORMAL (*iq)) iq++;
2042 memcpy (op, ip, iq - ip);
2050 switch (chartab[*ip++])
2052 case SPECCASE_CR: /* \r */
2055 if (ip < ilimit && *ip == '\n')
2061 case SPECCASE_BACKSLASH: /* \ */
2067 deferred_newlines++;
2069 if (*ip == '\r') ip++;
2072 else if (*ip == '\r')
2074 deferred_newlines++;
2076 if (*ip == '\n') ip++;
2084 case SPECCASE_QUESTION: /* ? */
2088 *op++ = '?'; /* Normal non-trigraph case */
2089 if (ip > ilimit - 2 || ip[0] != '?')
2097 if (CPP_OPTION (pfile, warn_trigraphs))
2100 line_base = find_position (line_base, op, &line);
2101 col = op - line_base + 1;
2102 if (CPP_OPTION (pfile, trigraphs))
2103 cpp_warning_with_line (pfile, line, col,
2104 "trigraph ??%c converted to %c", d, t);
2106 cpp_warning_with_line (pfile, line, col,
2107 "trigraph ??%c ignored", d);
2111 if (CPP_OPTION (pfile, trigraphs))
2113 op[-1] = t; /* Overwrite '?' */
2130 #ifdef HAVE_MMAP_FILE
2132 munmap ((caddr_t) fp->buf, len);
2135 free ((PTR) fp->buf);
2140 line_base = find_position (line_base, op, &line);
2141 col = op - line_base + 1;
2142 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2150 /* Allocate pfile->input_buffer, and initialize chartab[]
2151 if it hasn't happened already. */
2154 _cpp_init_input_buffer (pfile)
2160 _cpp_init_toklist (&pfile->directbuf, NO_DUMMY_TOKEN);
2162 /* Determine the appropriate size for the input buffer. Normal C
2163 source files are smaller than eight K. */
2164 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2165 address arithmetic all the time, and 3 for pushback during buffer
2166 refill, in case there's a potential trigraph or end-of-line
2167 digraph at the end of a block. */
2169 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2170 pfile->input_buffer = tmp;
2171 pfile->input_buffer_len = 8192;
2175 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2176 and extending for LEN characters to the NUL-terminated string
2177 STRING. Typical usage:
2179 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2185 cpp_idcmp (token, len, string)
2186 const U_CHAR *token;
2190 size_t len2 = strlen (string);
2193 if ((r = memcmp (token, string, MIN (len, len2))))
2196 /* The longer of the two strings sorts after the shorter. */
2199 else if (len < len2)
2207 /* Lexing algorithm.
2209 The original lexer in cpplib was made up of two passes: a first pass
2210 that replaced trigraphs and deleted esacped newlines, and a second
2211 pass that tokenized the result of the first pass. Tokenisation was
2212 performed by peeking at the next character in the input stream. For
2213 example, if the input stream contained "!=", the handler for the !
2214 character would peek at the next character, and if it were a '='
2215 would skip over it, and return a "!=" token, otherwise it would
2216 return just the "!" token.
2218 To implement a single-pass lexer, this peeking ahead is unworkable.
2219 An arbitrary number of escaped newlines, and trigraphs (in particular
2220 ??/ which translates to the escape \), could separate the '!' and '='
2221 in the input stream, yet the next token is still a "!=".
2223 Suppose instead that we lex by one logical line at a time, producing
2224 a token list or stack for each logical line, and when seeing the '!'
2225 push a CPP_NOT token on the list. Then if the '!' is part of a
2226 longer token ("!=") we know we must see the remainder of the token by
2227 the time we reach the end of the logical line. Thus we can have the
2228 '=' handler look at the previous token (at the end of the list / top
2229 of the stack) and see if it is a "!" token, and if so, instead of
2230 pushing a "=" token revise the existing token to be a "!=" token.
2232 This works in the presence of escaped newlines, because the '\' would
2233 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2234 newline ('\n' or '\r') handler looks at the token at the top of the
2235 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2236 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2237 the '=' handler would never see any intervening escaped newlines.
2239 To make trigraphs work in this context, as in precedence trigraphs
2240 are highest and converted before anything else, the '?' handler does
2241 lookahead to see if it is a trigraph, and if so skips the trigraph
2242 and pushes the token it represents onto the top of the stack. This
2243 also works in the particular case of a CPP_BACKSLASH trigraph.
2245 To the preprocessor, whitespace is only significant to the point of
2246 knowing whether whitespace precedes a particular token. For example,
2247 the '=' handler needs to know whether there was whitespace between it
2248 and a "!" token on the top of the stack, to make the token conversion
2249 decision correctly. So each token has a PREV_WHITESPACE flag to
2250 indicate this - the standard permits consecutive whitespace to be
2251 regarded as a single space. The compiler front ends are not
2252 interested in whitespace at all; they just require a token stream.
2253 Another place where whitespace is significant to the preprocessor is
2254 a #define statment - if there is whitespace between the macro name
2255 and an initial "(" token the macro is "object-like", otherwise it is
2256 a function-like macro that takes arguments.
2258 However, all is not rosy. Parsing of identifiers, numbers, comments
2259 and strings becomes trickier because of the possibility of raw
2260 trigraphs and escaped newlines in the input stream.
2262 The trigraphs are three consecutive characters beginning with two
2263 question marks. A question mark is not valid as part of a number or
2264 identifier, so parsing of a number or identifier terminates normally
2265 upon reaching it, returning to the mainloop which handles the
2266 trigraph just like it would in any other position. Similarly for the
2267 backslash of a backslash-newline combination. So we just need the
2268 escaped-newline dropper in the mainloop to check if the token on the
2269 top of the stack after dropping the escaped newline is a number or
2270 identifier, and if so to continue the processing it as if nothing had
2273 For strings, we replace trigraphs whenever we reach a quote or
2274 newline, because there might be a backslash trigraph escaping them.
2275 We need to be careful that we start trigraph replacing from where we
2276 left off previously, because it is possible for a first scan to leave
2277 "fake" trigraphs that a second scan would pick up as real (e.g. the
2278 sequence "????/\n=" would find a fake ??= trigraph after removing the
2281 For line comments, on reaching a newline we scan the previous
2282 character(s) to see if it escaped, and continue if it is. Block
2283 comments ignore everything and just focus on finding the comment
2284 termination mark. The only difficult thing, and it is surprisingly
2285 tricky, is checking if an asterisk precedes the final slash since
2286 they could be separated by escaped newlines. If the preprocessor is
2287 invoked with the output comments option, we don't bother removing
2288 escaped newlines and replacing trigraphs for output.
2290 Finally, numbers can begin with a period, which is pushed initially
2291 as a CPP_DOT token in its own right. The digit handler checks if the
2292 previous token was a CPP_DOT not separated by whitespace, and if so
2293 pops it off the stack and pushes a period into the number's buffer
2294 before calling the number parser.
2298 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2299 U":>", U"<%", U"%>"};
2300 static unsigned char trigraph_map[256];
2303 init_trigraph_map ()
2305 trigraph_map['='] = '#';
2306 trigraph_map['('] = '[';
2307 trigraph_map[')'] = ']';
2308 trigraph_map['/'] = '\\';
2309 trigraph_map['\''] = '^';
2310 trigraph_map['<'] = '{';
2311 trigraph_map['>'] = '}';
2312 trigraph_map['!'] = '|';
2313 trigraph_map['-'] = '~';
2316 /* Call when a trigraph is encountered. It warns if necessary, and
2317 returns true if the trigraph should be honoured. END is the third
2318 character of a trigraph in the input stream. */
2320 trigraph_ok (pfile, end)
2322 const unsigned char *end;
2324 int accept = CPP_OPTION (pfile, trigraphs);
2326 if (CPP_OPTION (pfile, warn_trigraphs))
2328 unsigned int col = end - 1 - pfile->buffer->line_base;
2330 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2331 "trigraph ??%c converted to %c",
2332 (int) *end, (int) trigraph_map[*end]);
2334 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2335 "trigraph ??%c ignored", (int) *end);
2340 /* Scan a string for trigraphs, warning or replacing them inline as
2341 appropriate. When parsing a string, we must call this routine
2342 before processing a newline character (if trigraphs are enabled),
2343 since the newline might be escaped by a preceding backslash
2344 trigraph sequence. Returns a pointer to the end of the name after
2347 static unsigned char*
2348 trigraph_replace (pfile, src, limit)
2351 unsigned char* limit;
2353 unsigned char *dest;
2355 /* Starting with src[1], find two consecutive '?'. The case of no
2356 trigraphs is streamlined. */
2358 for (; src + 1 < limit; src += 2)
2363 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2366 else if (src + 2 == limit || src[1] != '?')
2369 /* Check if it really is a trigraph. */
2370 if (trigraph_map[src[2]] == 0)
2374 goto trigraph_found;
2378 /* Now we have a trigraph, we need to scan the remaining buffer, and
2379 copy-shifting its contents left if replacement is enabled. */
2380 for (; src + 2 < limit; dest++, src++)
2381 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2385 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2386 *dest = trigraph_map[*src];
2389 /* Copy remaining (at most 2) characters. */
2395 /* If CUR is a backslash or the end of a trigraphed backslash, return
2396 a pointer to its beginning, otherwise NULL. We don't read beyond
2397 the buffer start, because there is the start of the comment in the
2399 static const unsigned char *
2400 backslash_start (pfile, cur)
2402 const unsigned char *cur;
2406 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2407 && trigraph_ok (pfile, cur))
2412 /* Skip a C-style block comment. This is probably the trickiest
2413 handler. We find the end of the comment by seeing if an asterisk
2414 is before every '/' we encounter. The nasty complication is that a
2415 previous asterisk may be separated by one or more escaped newlines.
2416 Returns non-zero if comment terminated by EOF, zero otherwise. */
2418 skip_block_comment2 (pfile)
2421 cpp_buffer *buffer = pfile->buffer;
2422 const unsigned char *char_after_star = 0;
2423 register const unsigned char *cur = buffer->cur;
2426 /* Inner loop would think the comment has ended if the first comment
2427 character is a '/'. Avoid this and keep the inner loop clean by
2428 skipping such a character. */
2429 if (cur < buffer->rlimit && cur[0] == '/')
2432 for (; cur < buffer->rlimit; )
2434 unsigned char c = *cur++;
2436 /* People like decorating comments with '*', so check for
2437 '/' instead for efficiency. */
2440 if (cur[-2] == '*' || cur - 1 == char_after_star)
2443 /* Warn about potential nested comments, but not when
2444 the final character inside the comment is a '/'.
2445 Don't bother to get it right across escaped newlines. */
2446 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2447 && cur[0] == '*' && cur[1] != '/')
2450 cpp_warning (pfile, "'/*' within comment");
2453 else if (IS_NEWLINE(c))
2455 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2457 handle_newline (cur, buffer->rlimit, c);
2458 /* Work correctly if there is an asterisk before an
2459 arbirtrarily long sequence of escaped newlines. */
2460 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2461 char_after_star = cur;
2463 char_after_star = 0;
2473 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2474 Returns non-zero if a multiline comment. */
2476 skip_line_comment2 (pfile)
2479 cpp_buffer *buffer = pfile->buffer;
2480 register const unsigned char *cur = buffer->cur;
2483 for (; cur < buffer->rlimit; )
2485 unsigned char c = *cur++;
2489 /* Check for a (trigaph?) backslash escaping the newline. */
2490 if (!backslash_start (pfile, cur - 2))
2493 handle_newline (cur, buffer->rlimit, c);
2499 buffer->cur = cur - 1; /* Leave newline for caller. */
2503 /* Skips whitespace, stopping at next non-whitespace character.
2504 Adjusts pfile->col_adjust to account for tabs. This enables tokens
2505 to be assigned the correct column. */
2507 skip_whitespace (pfile, in_directive)
2511 cpp_buffer *buffer = pfile->buffer;
2512 register const unsigned char *cur = buffer->cur;
2513 unsigned short null_count = 0;
2515 for (; cur < buffer->rlimit; )
2517 unsigned char c = *cur++;
2521 unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
2522 pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
2523 - col % CPP_OPTION(pfile, tabstop));
2525 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2527 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2531 /* Mut be '\f' or '\v' */
2532 else if (in_directive && CPP_PEDANTIC (pfile))
2533 cpp_pedwarn (pfile, "%s in preprocessing directive",
2534 c == '\f' ? "formfeed" : "vertical tab");
2539 buffer->cur = cur - 1;
2541 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2542 : "embedded null character ignored");
2545 /* Parse (append) an identifier. */
2547 parse_name (pfile, list, name)
2552 const unsigned char *name_limit;
2553 unsigned char *namebuf;
2554 cpp_buffer *buffer = pfile->buffer;
2555 register const unsigned char *cur = buffer->cur;
2558 name_limit = list->namebuf + list->name_cap;
2559 namebuf = list->namebuf + list->name_used;
2561 for (; cur < buffer->rlimit && namebuf < name_limit; )
2563 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2569 if (c == '$' && CPP_PEDANTIC (pfile))
2572 cpp_pedwarn (pfile, "'$' character in identifier");
2576 /* Run out of name space? */
2577 if (cur < buffer->rlimit)
2579 list->name_used = namebuf - list->namebuf;
2580 auto_expand_name_space (list);
2586 name->len = namebuf - name->text;
2587 list->name_used = namebuf - list->namebuf;
2590 /* Parse (append) a number. */
2592 #define VALID_SIGN(c, prevc) \
2593 (((c) == '+' || (c) == '-') && \
2594 ((prevc) == 'e' || (prevc) == 'E' \
2595 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2598 parse_number (pfile, list, name)
2603 const unsigned char *name_limit;
2604 unsigned char *namebuf;
2605 cpp_buffer *buffer = pfile->buffer;
2606 register const unsigned char *cur = buffer->cur;
2609 name_limit = list->namebuf + list->name_cap;
2610 namebuf = list->namebuf + list->name_used;
2612 for (; cur < buffer->rlimit && namebuf < name_limit; )
2614 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2616 /* Perhaps we should accept '$' here if we accept it for
2617 identifiers. We know namebuf[-1] is safe, because for c to
2618 be a sign we must have pushed at least one character. */
2619 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2626 /* Run out of name space? */
2627 if (cur < buffer->rlimit)
2629 list->name_used = namebuf - list->namebuf;
2630 auto_expand_name_space (list);
2636 name->len = namebuf - name->text;
2637 list->name_used = namebuf - list->namebuf;
2640 /* Places a string terminated by an unescaped TERMINATOR into a
2641 cpp_name, which should be expandable and thus at the top of the
2642 list's stack. Handles embedded trigraphs, if necessary, and
2645 Can be used for character constants (terminator = '\''), string
2646 constants ('"') and angled headers ('>'). Multi-line strings are
2647 allowed, except for within directives. */
2650 parse_string2 (pfile, list, name, terminator, multiline_ok)
2654 unsigned int terminator;
2657 cpp_buffer *buffer = pfile->buffer;
2658 register const unsigned char *cur = buffer->cur;
2659 const unsigned char *name_limit;
2660 unsigned char *namebuf;
2661 unsigned int null_count = 0;
2662 int trigraphed_len = 0;
2665 name_limit = list->namebuf + list->name_cap;
2666 namebuf = list->namebuf + list->name_used;
2668 for (; cur < buffer->rlimit && namebuf < name_limit; )
2670 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2674 else if (c == terminator || IS_NEWLINE (c))
2676 /* Needed for trigraph_replace and multiline string warning. */
2679 /* Scan for trigraphs before checking if backslash-escaped. */
2680 if (CPP_OPTION (pfile, trigraphs)
2681 || CPP_OPTION (pfile, warn_trigraphs))
2683 namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
2685 trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
2686 if (trigraphed_len < 0)
2690 namebuf--; /* Drop the newline / terminator from the name. */
2693 /* Drop a backslash newline, and continue. */
2694 if (namebuf[-1] == '\\')
2696 handle_newline (cur, buffer->rlimit, c);
2703 /* In Fortran and assembly language, silently terminate
2704 strings of either variety at end of line. This is a
2705 kludge around not knowing where comments are in these
2707 if (CPP_OPTION (pfile, lang_fortran)
2708 || CPP_OPTION (pfile, lang_asm))
2711 /* Character constants, headers and asserts may not
2712 extend over multiple lines. In Standard C, neither
2713 may strings. We accept multiline strings as an
2714 extension, but not in directives. */
2718 cur++; /* Move forwards again. */
2720 if (pfile->multiline_string_line == 0)
2722 pfile->multiline_string_line = list->line;
2723 if (CPP_PEDANTIC (pfile))
2724 cpp_pedwarn (pfile, "multi-line string constant");
2728 handle_newline (cur, buffer->rlimit, c);
2732 unsigned char *temp;
2734 /* An odd number of consecutive backslashes represents
2735 an escaped terminator. */
2737 while (temp >= name->text && *temp == '\\')
2740 if ((namebuf - temp) & 1)
2747 /* Run out of name space? */
2748 if (cur < buffer->rlimit)
2750 list->name_used = namebuf - list->namebuf;
2751 auto_expand_name_space (list);
2755 /* We may not have trigraph-replaced the input for this code path,
2756 but as the input is in error by being unterminated we don't
2757 bother. Prevent warnings about no newlines at EOF. */
2758 if (IS_NEWLINE(cur[-1]))
2762 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2764 if (terminator == '\"' && pfile->multiline_string_line != list->line
2765 && pfile->multiline_string_line != 0)
2767 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2768 "possible start of unterminated string literal");
2769 pfile->multiline_string_line = 0;
2774 name->len = namebuf - name->text;
2775 list->name_used = namebuf - list->namebuf;
2778 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2779 : "null character preserved"));
2782 /* The character TYPE helps us distinguish comment types: '*' = C
2783 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
2784 the stored comment includes the comment start and any terminator. */
2786 #define COMMENT_START_LEN 2
2788 save_comment (list, token, from, len, type)
2791 const unsigned char *from;
2795 unsigned char *buffer;
2797 len += COMMENT_START_LEN;
2799 if (list->name_used + len > list->name_cap)
2800 expand_name_space (list, len);
2802 INIT_TOKEN_NAME (list, token);
2803 token->type = CPP_COMMENT;
2804 token->val.name.len = len;
2806 buffer = list->namebuf + list->name_used;
2807 list->name_used += len;
2809 /* Copy the comment. */
2820 memcpy (buffer, from, len - COMMENT_START_LEN);
2824 * The tokenizer's main loop. Returns a token list, representing a
2825 * logical line in the input file. On EOF after some tokens have
2826 * been processed, we return immediately. Then in next call, or if
2827 * EOF occurred at the beginning of a logical line, a single CPP_EOF
2828 * token is placed in the list.
2830 * Implementation relies almost entirely on lookback, rather than
2831 * looking forwards. This means that tokenization requires just
2832 * a single pass of the file, even in the presence of trigraphs and
2833 * escaped newlines, providing significant performance benefits.
2834 * Trigraph overhead is negligible if they are disabled, and low
2835 * even when enabled.
2838 #define IS_DIRECTIVE() (list->tokens[first_token].type == CPP_HASH)
2841 _cpp_lex_line (pfile, list)
2845 cpp_token *cur_token, *token_limit;
2846 cpp_buffer *buffer = pfile->buffer;
2847 register const unsigned char *cur = buffer->cur;
2848 unsigned char flags = 0;
2849 unsigned int first_token = list->tokens_used;
2851 list->line = CPP_BUF_LINE (buffer);
2852 pfile->col_adjust = 0;
2854 token_limit = list->tokens + list->tokens_cap;
2855 cur_token = list->tokens + list->tokens_used;
2857 for (; cur < buffer->rlimit && cur_token < token_limit;)
2859 unsigned char c = *cur++;
2861 /* Optimize whitespace skipping, as most tokens are probably
2862 separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
2864 if (is_hspace ((unsigned int) c))
2866 /* Step back to get the null warning and tab correction. */
2867 buffer->cur = cur - 1;
2868 skip_whitespace (pfile, IS_DIRECTIVE ());
2871 flags = PREV_WHITESPACE;
2872 if (cur == buffer->rlimit)
2877 /* Initialize current token. Its type is set in the switch. */
2878 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
2879 cur_token->flags = flags;
2884 case '0': case '1': case '2': case '3': case '4':
2885 case '5': case '6': case '7': case '8': case '9':
2889 cur--; /* Backup character. */
2890 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
2893 INIT_TOKEN_NAME (list, cur_token);
2894 /* Prepend an immediately previous CPP_DOT token. */
2897 if (list->name_cap == list->name_used)
2898 auto_expand_name_space (list);
2900 cur_token->val.name.len = 1;
2901 list->namebuf[list->name_used++] = '.';
2905 cur_token->type = CPP_NUMBER; /* Before parse_number. */
2907 parse_number (pfile, list, &cur_token->val.name);
2915 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2916 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2917 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2918 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2920 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2921 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2922 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2923 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2925 cur--; /* Backup character. */
2926 INIT_TOKEN_NAME (list, cur_token);
2927 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2931 parse_name (pfile, list, &cur_token->val.name);
2934 /* Find handler for newly created / extended directive. */
2935 if (IS_DIRECTIVE () && cur_token == &list->tokens[first_token + 1])
2936 _cpp_check_directive (list, cur_token);
2943 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2944 /* Do we have a wide string? */
2945 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2946 && cur_token[-1].val.name.len == 1
2947 && cur_token[-1].val.name.text[0] == 'L'
2948 && !CPP_TRADITIONAL (pfile))
2950 /* No need for 'L' any more. */
2952 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2956 /* Here c is one of ' " or >. */
2957 INIT_TOKEN_NAME (list, cur_token);
2959 parse_string2 (pfile, list, &cur_token->val.name, c,
2960 c == '"' && !IS_DIRECTIVE());
2966 cur_token->type = CPP_DIV;
2969 if (PREV_TOKEN_TYPE == CPP_DIV)
2971 /* We silently allow C++ comments in system headers,
2972 irrespective of conformance mode, because lots of
2973 broken systems do that and trying to clean it up
2974 in fixincludes is a nightmare. */
2975 if (CPP_IN_SYSTEM_HEADER (pfile))
2976 goto do_line_comment;
2977 else if (CPP_OPTION (pfile, cplusplus_comments))
2979 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2980 && ! buffer->warned_cplusplus_comments)
2984 "C++ style comments are not allowed in ISO C89");
2986 "(this will be reported only once per input file)");
2987 buffer->warned_cplusplus_comments = 1;
2993 "comment start split across lines");
2994 if (skip_line_comment2 (pfile))
2995 cpp_error_with_line (pfile, list->line,
2997 "multi-line comment");
2999 /* Back-up to first '-' or '/'. */
3001 if (!CPP_OPTION (pfile, discard_comments)
3002 && (!IS_DIRECTIVE() || list->dirno == 0))
3003 save_comment (list, cur_token++, cur,
3004 buffer->cur - cur, c);
3007 if (!CPP_OPTION (pfile, traditional))
3008 flags = PREV_WHITESPACE;
3017 cur_token->type = CPP_MULT;
3020 if (PREV_TOKEN_TYPE == CPP_DIV)
3025 "comment start '/*' split across lines");
3026 if (skip_block_comment2 (pfile))
3027 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3028 "unterminated comment");
3029 else if (buffer->cur[-2] != '*')
3031 "comment end '*/' split across lines");
3033 /* Back up to opening '/'. */
3035 if (!CPP_OPTION (pfile, discard_comments)
3036 && (!IS_DIRECTIVE() || list->dirno == 0))
3037 save_comment (list, cur_token++, cur,
3038 buffer->cur - cur, c);
3041 if (!CPP_OPTION (pfile, traditional))
3042 flags = PREV_WHITESPACE;
3045 else if (CPP_OPTION (pfile, cplusplus))
3047 /* In C++, there are .* and ->* operators. */
3048 if (PREV_TOKEN_TYPE == CPP_DEREF)
3049 BACKUP_TOKEN (CPP_DEREF_STAR);
3050 else if (PREV_TOKEN_TYPE == CPP_DOT)
3051 BACKUP_TOKEN (CPP_DOT_STAR);
3059 handle_newline (cur, buffer->rlimit, c);
3060 if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3062 /* Remove the escaped newline. Then continue to process
3063 any interrupted name or number. */
3068 if (cur_token->type == CPP_NAME)
3070 else if (cur_token->type == CPP_NUMBER)
3071 goto continue_number;
3074 /* Remember whitespace setting. */
3075 flags = cur_token->flags;
3078 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3081 cpp_warning (pfile, "backslash and newline separated by space");
3083 /* Skip vertical space until we have at least one token to
3085 if (cur_token != &list->tokens[first_token])
3087 list->line = CPP_BUF_LINE (buffer);
3091 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3093 if (CPP_OPTION (pfile, chill))
3094 goto do_line_comment;
3095 REVISE_TOKEN (CPP_MINUS_MINUS);
3098 PUSH_TOKEN (CPP_MINUS);
3101 /* The digraph flag checking ensures that ## and %:%:
3102 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3105 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3106 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3107 REVISE_TOKEN (CPP_PASTE);
3109 PUSH_TOKEN (CPP_HASH);
3113 cur_token->type = CPP_COLON;
3116 if (PREV_TOKEN_TYPE == CPP_COLON
3117 && CPP_OPTION (pfile, cplusplus))
3118 BACKUP_TOKEN (CPP_SCOPE);
3119 /* Digraph: "<:" is a '[' */
3120 else if (PREV_TOKEN_TYPE == CPP_LESS)
3121 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3122 /* Digraph: "%:" is a '#' */
3123 else if (PREV_TOKEN_TYPE == CPP_MOD)
3125 (--cur_token)->flags |= DIGRAPH;
3133 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3134 REVISE_TOKEN (CPP_AND_AND);
3136 PUSH_TOKEN (CPP_AND);
3141 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3142 REVISE_TOKEN (CPP_OR_OR);
3144 PUSH_TOKEN (CPP_OR);
3148 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3149 REVISE_TOKEN (CPP_PLUS_PLUS);
3151 PUSH_TOKEN (CPP_PLUS);
3155 /* This relies on equidistance of "?=" and "?" tokens. */
3156 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3157 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3159 PUSH_TOKEN (CPP_EQ);
3163 cur_token->type = CPP_GREATER;
3166 if (PREV_TOKEN_TYPE == CPP_GREATER)
3167 BACKUP_TOKEN (CPP_RSHIFT);
3168 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3169 BACKUP_TOKEN (CPP_DEREF);
3170 /* Digraph: ":>" is a ']' */
3171 else if (PREV_TOKEN_TYPE == CPP_COLON)
3172 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3173 /* Digraph: "%>" is a '}' */
3174 else if (PREV_TOKEN_TYPE == CPP_MOD)
3175 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3181 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3183 REVISE_TOKEN (CPP_LSHIFT);
3186 /* Is this the beginning of a header name? */
3187 if (list->flags & SYNTAX_INCLUDE)
3189 c = '>'; /* Terminator. */
3190 cur_token->type = CPP_HEADER_NAME;
3191 goto do_parse_string;
3193 PUSH_TOKEN (CPP_LESS);
3197 /* Digraph: "<%" is a '{' */
3198 cur_token->type = CPP_MOD;
3199 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3200 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3205 if (cur + 1 < buffer->rlimit && *cur == '?'
3206 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3208 /* Handle trigraph. */
3212 case '(': goto make_open_square;
3213 case ')': goto make_close_square;
3214 case '<': goto make_open_brace;
3215 case '>': goto make_close_brace;
3216 case '=': goto make_hash;
3217 case '!': goto make_or;
3218 case '-': goto make_complement;
3219 case '/': goto make_backslash;
3220 case '\'': goto make_xor;
3223 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3225 /* GNU C++ defines <? and >? operators. */
3226 if (PREV_TOKEN_TYPE == CPP_LESS)
3228 REVISE_TOKEN (CPP_MIN);
3231 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3233 REVISE_TOKEN (CPP_MAX);
3237 PUSH_TOKEN (CPP_QUERY);
3241 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3243 && !(cur_token[-1].flags & PREV_WHITESPACE))
3246 PUSH_TOKEN (CPP_ELLIPSIS);
3249 PUSH_TOKEN (CPP_DOT);
3253 case '~': PUSH_TOKEN (CPP_COMPL); break;
3255 case '^': PUSH_TOKEN (CPP_XOR); break;
3257 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3259 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3261 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3263 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3265 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3266 case '!': PUSH_TOKEN (CPP_NOT); break;
3267 case ',': PUSH_TOKEN (CPP_COMMA); break;
3268 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3269 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
3270 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3273 if (CPP_OPTION (pfile, dollars_in_ident))
3278 cur_token->val.name.len = 0; /* FIXME: needed for transition only */
3279 PUSH_TOKEN (CPP_OTHER);
3284 /* Run out of token space? */
3285 if (cur_token == token_limit)
3287 list->tokens_used = cur_token - list->tokens;
3288 _cpp_expand_token_space (list, 256);
3292 cur_token->flags = flags;
3293 if (cur_token == &list->tokens[first_token])
3295 /* FIXME: move this warning to callers who care. */
3296 if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
3297 cpp_warning (pfile, "no newline at end of file");
3298 cur_token++->type = CPP_EOF;
3302 list->tokens[first_token].flags |= BOL;
3304 list->tokens_used = cur_token - list->tokens;
3307 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3308 already contain the enough space to hold the token's spelling. If
3309 WHITESPACE is true, and the token was preceded by whitespace,
3310 output a single space before the token proper. Returns a pointer
3311 to the character after the last character written. */
3313 static unsigned char *
3314 spell_token (pfile, token, buffer, whitespace)
3315 cpp_reader *pfile; /* Would be nice to be rid of this... */
3316 const cpp_token *token;
3317 unsigned char *buffer;
3320 /* Whitespace will not be wanted by handlers of the # and ##
3321 operators calling this function, but will be wanted by the
3322 function that writes out the preprocessed file. */
3323 if (whitespace && token->flags & PREV_WHITESPACE)
3326 switch (token_spellings[token->type].type)
3328 case SPELL_OPERATOR:
3330 const unsigned char *spelling;
3333 if (token->flags & DIGRAPH)
3334 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3336 spelling = token_spellings[token->type].spelling;
3338 while ((c = *spelling++) != '\0')
3344 memcpy (buffer, token->val.name.text, token->val.name.len);
3345 buffer += token->val.name.len;
3352 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3355 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3358 memcpy (buffer, token->val.name.text, token->val.name.len);
3359 buffer += token->val.name.len;
3365 *buffer++ = token->aux;
3369 cpp_ice (pfile, "Unspellable token");
3376 /* Temporary function for illustrative purposes. */
3378 _cpp_lex_file (pfile)
3383 init_trigraph_map ();
3384 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3385 _cpp_init_toklist (list, DUMMY_TOKEN);
3389 _cpp_lex_line (pfile, list);
3390 if (list->tokens[0].type == CPP_EOF)
3395 _cpp_handle_directive (pfile, list);
3398 _cpp_output_list (pfile, list);
3399 _cpp_clear_toklist (list);
3403 /* Temporary function for illustrative purposes. */
3405 _cpp_output_list (pfile, list)
3411 for (i = 0; i < list->tokens_used; i++)
3413 CPP_RESERVE (pfile, TOKEN_LEN (&list->tokens[i]));
3414 pfile->limit = spell_token (pfile, &list->tokens[i], pfile->limit, 1);