1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
30 # include <sys/mman.h>
33 #define PEEKBUF(BUFFER, N) \
34 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
35 #define GETBUF(BUFFER) \
36 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
37 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
39 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
40 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
41 #define GETC() GETBUF (CPP_BUFFER (pfile))
42 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
44 static void skip_block_comment PARAMS ((cpp_reader *));
45 static void skip_line_comment PARAMS ((cpp_reader *));
46 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
47 static int skip_comment PARAMS ((cpp_reader *, int));
48 static int copy_comment PARAMS ((cpp_reader *, int));
49 static void skip_string PARAMS ((cpp_reader *, int));
50 static void parse_string PARAMS ((cpp_reader *, int));
51 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
52 static void null_warning PARAMS ((cpp_reader *, unsigned int));
54 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
56 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
58 static void bump_column PARAMS ((cpp_printer *, unsigned int,
60 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
61 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
64 #define auto_expand_name_space(list) \
65 expand_name_space ((list), 1 + (list)->name_cap / 2)
69 void init_trigraph_map PARAMS ((void));
70 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
72 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
73 const unsigned char *));
74 static int skip_block_comment2 PARAMS ((cpp_reader *));
75 static int skip_line_comment2 PARAMS ((cpp_reader *));
76 static void skip_whitespace PARAMS ((cpp_reader *, int));
77 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
78 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
79 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
81 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
82 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
83 const unsigned char *,
84 unsigned int, unsigned int));
85 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
87 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
89 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
90 unsigned char *, int));
92 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
95 /* Macros on a cpp_name. */
96 #define INIT_TOKEN_NAME(list, token) \
97 do {(token)->val.name.len = 0; \
98 (token)->val.name.text = (list)->namebuf + (list)->name_used; \
99 (list)->tokens_used = token - (list)->tokens + 1; \
102 /* Maybe put these in the ISTABLE eventually. */
103 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
104 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
106 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
107 character, if any, is in buffer. */
108 #define handle_newline(cur, limit, c) \
110 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
112 CPP_BUMP_LINE_CUR (pfile, (cur)); \
113 pfile->col_adjust = 0; \
116 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
117 #define PREV_TOKEN_TYPE (cur_token[-1].type)
119 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
120 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
121 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
122 #define BACKUP_DIGRAPH(ttype) do { \
123 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
125 /* An upper bound on the number of bytes needed to spell a token,
126 including preceding whitespace. */
127 #define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
128 SPELL_NONE ? (token)->val.name.len: 0))
132 /* Order here matters. Those beyond SPELL_NONE store their spelling
133 in the token list, and it's length in the token->val.name.len. */
138 SPELL_CHAR, /* FIXME: revert order of NONE and CHAR after transition. */
143 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
144 #define I(e, s) {SPELL_IDENT, s},
145 #define S(e, s) {SPELL_STRING, s},
146 #define C(e, s) {SPELL_CHAR, s},
147 #define N(e, s) {SPELL_NONE, s},
149 static const struct token_spelling
151 ENUM_BITFIELD(spell_type) type : CHAR_BIT;
152 const U_CHAR *spelling;
153 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
161 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
164 _cpp_grow_token_buffer (pfile, n)
168 long old_written = CPP_WRITTEN (pfile);
169 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
170 pfile->token_buffer = (U_CHAR *)
171 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
172 CPP_SET_WRITTEN (pfile, old_written);
175 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
176 If BUFFER != NULL, then use the LENGTH characters in BUFFER
177 as the new input buffer.
178 Return the new buffer, or NULL on failure. */
181 cpp_push_buffer (pfile, buffer, length)
183 const U_CHAR *buffer;
186 cpp_buffer *buf = CPP_BUFFER (pfile);
188 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
190 cpp_fatal (pfile, "macro or `#include' recursion too deep");
194 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
196 new->if_stack = pfile->if_stack;
197 new->buf = new->cur = buffer;
198 new->rlimit = buffer + length;
201 new->line_base = NULL;
203 CPP_BUFFER (pfile) = new;
208 cpp_pop_buffer (pfile)
211 cpp_buffer *buf = CPP_BUFFER (pfile);
212 if (ACTIVE_MARK_P (pfile))
213 cpp_ice (pfile, "mark active in cpp_pop_buffer");
217 _cpp_unwind_if_stack (pfile, buf);
219 free ((PTR) buf->buf);
220 if (pfile->system_include_depth)
221 pfile->system_include_depth--;
222 if (pfile->potential_control_macro)
224 buf->ihash->control_macro = pfile->potential_control_macro;
225 pfile->potential_control_macro = 0;
227 pfile->input_stack_listing_current = 0;
231 cpp_hashnode *m = buf->macro;
234 if ((m->type == T_FMACRO && buf->mapped)
235 || m->type == T_SPECLINE || m->type == T_FILE
236 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
237 || m->type == T_STDC)
238 free ((PTR) buf->buf);
240 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
242 pfile->buffer_stack_depth--;
243 return CPP_BUFFER (pfile);
246 /* Deal with the annoying semantics of fwrite. */
248 safe_fwrite (pfile, buf, len, fp)
258 count = fwrite (buf, 1, len, fp);
267 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
270 /* Notify the compiler proper that the current line number has jumped,
271 or the current file name has changed. */
274 output_line_command (pfile, print, line)
279 cpp_buffer *ip = cpp_file_buffer (pfile);
280 enum { same = 0, enter, leave, rname } change;
281 static const char * const codes[] = { "", " 1", " 2", "" };
283 if (CPP_OPTION (pfile, no_line_commands))
286 /* Determine whether the current filename has changed, and if so,
287 how. 'nominal_fname' values are unique, so they can be compared
288 by comparing pointers. */
289 if (ip->nominal_fname == print->last_fname)
293 if (pfile->buffer_stack_depth == print->last_bsd)
297 if (pfile->buffer_stack_depth > print->last_bsd)
301 print->last_bsd = pfile->buffer_stack_depth;
303 print->last_fname = ip->nominal_fname;
305 /* If the current file has not changed, we can output a few newlines
306 instead if we want to increase the line number by a small amount.
307 We cannot do this if print->lineno is zero, because that means we
308 haven't output any line commands yet. (The very first line
309 command output is a `same_file' command.) */
310 if (change == same && print->lineno != 0
311 && line >= print->lineno && line < print->lineno + 8)
313 while (line > print->lineno)
315 putc ('\n', print->outf);
321 #ifndef NO_IMPLICIT_EXTERN_C
322 if (CPP_OPTION (pfile, cplusplus))
323 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
325 ip->system_header_p ? " 3" : "",
326 (ip->system_header_p == 2) ? " 4" : "");
329 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
331 ip->system_header_p ? " 3" : "");
332 print->lineno = line;
335 /* Write the contents of the token_buffer to the output stream, and
336 clear the token_buffer. Also handles generating line commands and
337 keeping track of file transitions. */
340 cpp_output_tokens (pfile, print)
346 if (CPP_WRITTEN (pfile) - print->written)
348 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
350 safe_fwrite (pfile, pfile->token_buffer,
351 CPP_WRITTEN (pfile) - print->written, print->outf);
354 ip = cpp_file_buffer (pfile);
356 output_line_command (pfile, print, CPP_BUF_LINE (ip));
358 CPP_SET_WRITTEN (pfile, print->written);
361 /* Helper for cpp_output_list - increases the column number to match
362 what we expect it to be. */
365 bump_column (print, from, to)
367 unsigned int from, to;
369 unsigned int tabs, spcs;
370 unsigned int delta = to - from;
372 /* Only if FROM is 0, advance by tabs. */
374 tabs = delta / 8, spcs = delta % 8;
376 tabs = 0, spcs = delta;
378 while (tabs--) putc ('\t', print->outf);
379 while (spcs--) putc (' ', print->outf);
382 /* Write out the list L onto pfile->token_buffer. This function is
385 1) pfile->token_buffer is not going to continue to exist.
386 2) At the moment, tokens don't carry the information described
387 in cpplib.h; they are all strings.
388 3) The list has to be a complete line, and has to be written starting
389 at the beginning of a line. */
392 cpp_output_list (pfile, print, list)
395 const cpp_toklist *list;
398 unsigned int curcol = 1;
400 /* XXX Probably does not do what is intended. */
401 if (print->lineno != list->line)
402 output_line_command (pfile, print, list->line);
404 for (i = 0; i < list->tokens_used; i++)
406 if (TOK_TYPE (list, i) == CPP_VSPACE)
408 output_line_command (pfile, print, list->tokens[i].aux);
412 if (curcol < TOK_COL (list, i))
414 /* Insert space to bring the column to what it should be. */
415 bump_column (print, curcol - 1, TOK_COL (list, i));
416 curcol = TOK_COL (list, i);
418 /* XXX We may have to insert space to prevent an accidental
420 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
421 curcol += TOK_LEN (list, i);
425 /* Scan a string (which may have escape marks), perform macro expansion,
426 and write the result to the token_buffer. */
429 _cpp_expand_to_buffer (pfile, buf, length)
435 enum cpp_ttype token;
440 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
444 /* Copy the buffer, because it might be in an unsafe place - for
445 example, a sequence on the token_buffer, where the pointers will
446 be invalidated if we enlarge the token_buffer. */
447 buf1 = alloca (length);
448 memcpy (buf1, buf, length);
450 /* Set up the input on the input stack. */
451 stop = CPP_BUFFER (pfile);
452 if (cpp_push_buffer (pfile, buf1, length) == NULL)
454 CPP_BUFFER (pfile)->has_escapes = 1;
456 /* Scan the input, create the output. */
459 token = cpp_get_token (pfile);
460 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
465 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
468 cpp_scan_buffer_nooutput (pfile)
471 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
472 enum cpp_ttype token;
473 unsigned int old_written = CPP_WRITTEN (pfile);
474 /* In no-output mode, we can ignore everything but directives. */
477 if (! pfile->only_seen_white)
478 _cpp_skip_rest_of_line (pfile);
479 token = cpp_get_token (pfile);
480 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
483 CPP_SET_WRITTEN (pfile, old_written);
486 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
489 cpp_scan_buffer (pfile, print)
493 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
494 enum cpp_ttype token;
498 token = cpp_get_token (pfile);
499 if (token == CPP_VSPACE || token == CPP_EOF
500 /* XXX Temporary kluge - force flush after #include only */
501 || (token == CPP_DIRECTIVE
502 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
504 cpp_output_tokens (pfile, print);
505 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
511 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
514 cpp_file_buffer (pfile)
519 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
520 if (ip->ihash != NULL)
525 /* Token-buffer helper functions. */
527 /* Expand a token list's string space. It is *vital* that
528 list->tokens_used is correct, to get pointer fix-up right. */
530 expand_name_space (list, len)
534 const U_CHAR *old_namebuf;
536 old_namebuf = list->namebuf;
537 list->name_cap += len;
538 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
540 /* Fix up token text pointers. */
541 if (list->namebuf != old_namebuf)
545 for (i = 0; i < list->tokens_used; i++)
546 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
547 list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
551 /* Expand the number of tokens in a list. */
553 _cpp_expand_token_space (list, count)
559 list->tokens_cap += count;
560 n = list->tokens_cap;
561 if (list->flags & LIST_OFFSET)
563 list->tokens = (cpp_token *)
564 xrealloc (list->tokens, n * sizeof (cpp_token));
565 if (list->flags & LIST_OFFSET)
566 list->tokens++; /* Skip the dummy. */
569 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
570 an extra token in front of the token list, as this allows the lexer
571 to always peek at the previous token without worrying about
572 underflowing the list, and some initial space. Otherwise, no
573 token- or name-space is allocated, and there is no dummy token. */
575 _cpp_init_toklist (list, flags)
579 /* We malloc zero bytes because we may want to realloc later, and
580 some old implementations don't like realloc-ing a null pointer. */
581 if (flags == NO_DUMMY_TOKEN)
583 list->tokens_cap = 0;
584 list->tokens = (cpp_token *) malloc (0);
590 /* Initialize token space. Put a dummy token before the start
591 that will fail matches. */
592 list->tokens_cap = 256; /* 4K's worth. */
593 list->tokens = (cpp_token *)
594 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
595 list->tokens[0].type = CPP_EOF;
598 /* Initialize name space. */
599 list->name_cap = 1024;
600 list->flags = LIST_OFFSET;
603 /* Allocate name space. */
604 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
606 _cpp_clear_toklist (list);
609 /* Clear a token list. */
611 _cpp_clear_toklist (list)
614 list->tokens_used = 0;
617 list->flags &= LIST_OFFSET; /* clear all but that one */
620 /* Free a token list. Does not free the list itself, which may be
621 embedded in a larger structure. */
623 _cpp_free_toklist (list)
626 if (list->flags & LIST_OFFSET)
627 free (list->tokens - 1); /* Backup over dummy token. */
630 free (list->namebuf);
633 /* Slice a token list: copy the sublist [START, FINISH) into COPY.
634 COPY is assumed not to be initialized. The comment space is not
637 _cpp_slice_toklist (copy, start, finish)
639 const cpp_token *start, *finish;
645 copy->tokens_cap = n;
646 copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
647 memcpy (copy->tokens, start, n * sizeof (cpp_token));
650 for (i = 0; i < n; i++)
651 if (token_spellings[start[i].type].type > SPELL_NONE)
652 bytes += start[i].val.name.len;
654 copy->namebuf = xmalloc (bytes);
656 for (i = 0; i < n; i++)
657 if (token_spellings[start[i].type].type > SPELL_NONE)
659 memcpy (copy->namebuf + bytes,
660 start[i].val.name.text, start[i].val.name.len);
661 copy->tokens[i].val.name.text = copy->namebuf + bytes;
662 bytes += start[i].val.name.len;
665 copy->tokens_cap = n;
666 copy->tokens_used = n;
667 copy->name_used = bytes;
668 copy->name_cap = bytes;
674 /* Shrink a token list down to the minimum size. */
676 _cpp_squeeze_toklist (list)
680 const U_CHAR *old_namebuf;
682 if (list->flags & LIST_OFFSET)
685 memmove (list->tokens, list->tokens + 1,
686 list->tokens_used * sizeof (cpp_token));
687 list->tokens = xrealloc (list->tokens,
688 list->tokens_used * sizeof (cpp_token));
689 list->flags &= ~LIST_OFFSET;
692 list->tokens = xrealloc (list->tokens,
693 list->tokens_used * sizeof (cpp_token));
694 list->tokens_cap = list->tokens_used;
696 old_namebuf = list->namebuf;
697 list->namebuf = xrealloc (list->namebuf, list->name_used);
698 list->name_cap = list->name_used;
700 /* Fix up token text pointers. */
701 delta = list->namebuf - old_namebuf;
706 for (i = 0; i < list->tokens_used; i++)
707 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
708 list->tokens[i].val.name.text += delta;
712 /* Compare two tokens. */
714 _cpp_equiv_tokens (a, b)
715 const cpp_token *a, *b;
717 if (a->type != b->type
718 || a->flags != b->flags
722 if (token_spellings[a->type].type > SPELL_NONE)
724 if (a->val.name.len != b->val.name.len
725 || ustrncmp(a->val.name.text,
733 /* Compare two token lists. */
735 _cpp_equiv_toklists (a, b)
736 const cpp_toklist *a, *b;
740 if (a->tokens_used != b->tokens_used)
743 for (i = 0; i < a->tokens_used; i++)
744 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
749 /* Scan until we encounter a token of type STOP or a newline, and
750 create a token list for it. Does not macro-expand or execute
751 directives. The final token is not included in the list or
752 consumed from the input. Returns the type of the token stopped at. */
755 _cpp_scan_until (pfile, list, stop)
765 _cpp_clear_toklist (list);
766 list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
768 written = CPP_WRITTEN (pfile);
773 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
774 type = _cpp_lex_token (pfile);
775 len = CPP_WRITTEN (pfile) - written;
776 CPP_SET_WRITTEN (pfile, written);
777 if (type == CPP_HSPACE)
779 if (CPP_PEDANTIC (pfile))
780 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
784 else if (type == CPP_COMMENT)
785 /* Only happens when processing -traditional macro definitions.
786 Do not give this a token entry, but do not change space_before
790 if (list->tokens_used >= list->tokens_cap)
791 _cpp_expand_token_space (list, 256);
792 if (list->name_used + len >= list->name_cap)
793 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
795 if (type == CPP_MACRO)
798 if (type == CPP_VSPACE || type == stop)
802 TOK_TYPE (list, i) = type;
803 TOK_COL (list, i) = col;
804 TOK_AUX (list, i) = 0;
805 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
807 TOK_LEN (list, i) = len;
808 if (token_spellings[type].type > SPELL_NONE)
810 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
811 TOK_NAME (list, i) = list->namebuf + list->name_used;
812 list->name_used += len;
815 TOK_NAME (list, i) = token_spellings[type].spelling;
820 /* XXX Temporary kluge: put back the newline (or whatever). */
823 /* Don't consider the first token to have white before. */
824 TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
828 /* Skip a C-style block comment. We know it's a comment, and point is
829 at the second character of the starter. */
831 skip_block_comment (pfile)
834 unsigned int line, col;
835 const U_CHAR *limit, *cur;
838 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
839 col = CPP_BUF_COL (CPP_BUFFER (pfile));
840 limit = CPP_BUFFER (pfile)->rlimit;
841 cur = CPP_BUFFER (pfile)->cur;
846 if (c == '\n' || c == '\r')
848 /* \r cannot be a macro escape marker here. */
849 if (!ACTIVE_MARK_P (pfile))
850 CPP_BUMP_LINE_CUR (pfile, cur);
854 /* Check for teminator. */
855 if (cur < limit && *cur == '/')
858 /* Warn about comment starter embedded in comment. */
859 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
860 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
861 cur - CPP_BUFFER (pfile)->line_base,
862 "'/*' within comment");
866 cpp_error_with_line (pfile, line, col, "unterminated comment");
869 CPP_BUFFER (pfile)->cur = cur + 1;
872 /* Skip a C++/Chill line comment. We know it's a comment, and point
873 is at the second character of the initiator. */
875 skip_line_comment (pfile)
883 /* We don't have to worry about EOF in here. */
886 /* Don't consider final '\n' to be part of comment. */
892 /* \r cannot be a macro escape marker here. */
893 if (!ACTIVE_MARK_P (pfile))
894 CPP_BUMP_LINE (pfile);
895 if (CPP_OPTION (pfile, warn_comments))
896 cpp_warning (pfile, "backslash-newline within line comment");
901 /* Skip a comment - C, C++, or Chill style. M is the first character
902 of the comment marker. If this really is a comment, skip to its
903 end and return ' '. If this is not a comment, return M (which will
907 skip_comment (pfile, m)
911 if (m == '/' && PEEKC() == '*')
913 skip_block_comment (pfile);
916 else if (m == '/' && PEEKC() == '/')
918 if (CPP_BUFFER (pfile)->system_header_p)
920 /* We silently allow C++ comments in system headers, irrespective
921 of conformance mode, because lots of busted systems do that
922 and trying to clean it up in fixincludes is a nightmare. */
923 skip_line_comment (pfile);
926 else if (CPP_OPTION (pfile, cplusplus_comments))
928 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
930 if (CPP_WTRADITIONAL (pfile))
932 "C++ style comments are not allowed in traditional C");
933 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
935 "C++ style comments are not allowed in ISO C89");
936 if (CPP_WTRADITIONAL (pfile)
937 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
939 "(this will be reported only once per input file)");
940 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
942 skip_line_comment (pfile);
948 else if (m == '-' && PEEKC() == '-'
949 && CPP_OPTION (pfile, chill))
951 skip_line_comment (pfile);
958 /* Identical to skip_comment except that it copies the comment into the
959 token_buffer. This is used if !discard_comments. */
961 copy_comment (pfile, m)
965 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
968 if (skip_comment (pfile, m) == m)
971 limit = CPP_BUFFER (pfile)->cur;
972 CPP_RESERVE (pfile, limit - start + 2);
973 CPP_PUTC_Q (pfile, m);
974 for (; start <= limit; start++)
976 CPP_PUTC_Q (pfile, *start);
982 null_warning (pfile, count)
987 cpp_warning (pfile, "embedded null character ignored");
989 cpp_warning (pfile, "embedded null characters ignored");
992 /* Skip whitespace \-newline and comments. Does not macro-expand. */
995 _cpp_skip_hspace (pfile)
998 unsigned int null_count = 0;
1006 else if (is_hspace(c))
1008 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
1009 cpp_pedwarn (pfile, "%s in preprocessing directive",
1010 c == '\f' ? "formfeed" : "vertical tab");
1016 /* \r is a backslash-newline marker if !has_escapes, and
1017 a deletable-whitespace or no-reexpansion marker otherwise. */
1018 if (CPP_BUFFER (pfile)->has_escapes)
1026 CPP_BUMP_LINE (pfile);
1028 else if (c == '/' || c == '-')
1030 c = skip_comment (pfile, c);
1040 null_warning (pfile, null_count);
1043 /* Read and discard the rest of the current line. */
1046 _cpp_skip_rest_of_line (pfile)
1060 if (! CPP_BUFFER (pfile)->has_escapes)
1061 CPP_BUMP_LINE (pfile);
1066 skip_string (pfile, c);
1071 skip_comment (pfile, c);
1076 if (CPP_PEDANTIC (pfile))
1077 cpp_pedwarn (pfile, "%s in preprocessing directive",
1078 c == '\f' ? "formfeed" : "vertical tab");
1085 /* Parse an identifier starting with C. */
1088 _cpp_parse_name (pfile, c)
1100 if (c == '$' && CPP_PEDANTIC (pfile))
1101 cpp_pedwarn (pfile, "`$' in identifier");
1103 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
1104 CPP_PUTC_Q (pfile, c);
1112 /* Parse and skip over a string starting with C. A single quoted
1113 string is treated like a double -- some programs (e.g., troff) are
1114 perverse this way. (However, a single quoted string is not allowed
1115 to extend over multiple lines.) */
1117 skip_string (pfile, c)
1121 unsigned int start_line, start_column;
1122 unsigned int null_count = 0;
1124 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1125 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
1132 cpp_error_with_line (pfile, start_line, start_column,
1133 "unterminated string or character constant");
1134 if (pfile->multiline_string_line != start_line
1135 && pfile->multiline_string_line != 0)
1136 cpp_error_with_line (pfile,
1137 pfile->multiline_string_line, -1,
1138 "possible real start of unterminated constant");
1139 pfile->multiline_string_line = 0;
1147 CPP_BUMP_LINE (pfile);
1148 /* In Fortran and assembly language, silently terminate
1149 strings of either variety at end of line. This is a
1150 kludge around not knowing where comments are in these
1152 if (CPP_OPTION (pfile, lang_fortran)
1153 || CPP_OPTION (pfile, lang_asm))
1158 /* Character constants may not extend over multiple lines.
1159 In Standard C, neither may strings. We accept multiline
1160 strings as an extension. */
1163 cpp_error_with_line (pfile, start_line, start_column,
1164 "unterminated character constant");
1168 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1169 cpp_pedwarn_with_line (pfile, start_line, start_column,
1170 "string constant runs past end of line");
1171 if (pfile->multiline_string_line == 0)
1172 pfile->multiline_string_line = start_line;
1176 if (CPP_BUFFER (pfile)->has_escapes)
1178 cpp_ice (pfile, "\\r escape inside string constant");
1182 /* Backslash newline is replaced by nothing at all. */
1183 CPP_BUMP_LINE (pfile);
1199 if (null_count == 1)
1200 cpp_warning (pfile, "null character in string or character constant");
1201 else if (null_count > 1)
1202 cpp_warning (pfile, "null characters in string or character constant");
1205 /* Parse a string and copy it to the output. */
1208 parse_string (pfile, c)
1212 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1213 const U_CHAR *limit;
1215 skip_string (pfile, c);
1217 limit = CPP_BUFFER (pfile)->cur;
1218 CPP_RESERVE (pfile, limit - start + 2);
1219 CPP_PUTC_Q (pfile, c);
1220 for (; start < limit; start++)
1222 CPP_PUTC_Q (pfile, *start);
1225 /* Get the next token, and add it to the text in pfile->token_buffer.
1226 Return the kind of token we got. */
1229 _cpp_lex_token (pfile)
1233 enum cpp_ttype token;
1235 if (CPP_BUFFER (pfile) == NULL)
1246 if (PEEKC () == '=')
1250 if (CPP_OPTION (pfile, discard_comments))
1251 c = skip_comment (pfile, c);
1253 c = copy_comment (pfile, c);
1257 /* Comments are equivalent to spaces.
1258 For -traditional, a comment is equivalent to nothing. */
1259 if (!CPP_OPTION (pfile, discard_comments))
1261 else if (CPP_TRADITIONAL (pfile))
1265 CPP_PUTC (pfile, c);
1270 CPP_PUTC (pfile, c);
1277 CPP_PUTC (pfile, c2);
1280 else if (c2 == '%' && PEEKN (1) == ':')
1282 /* Digraph: "%:" == "#". */
1284 CPP_RESERVE (pfile, 2);
1285 CPP_PUTC_Q (pfile, c2);
1286 CPP_PUTC_Q (pfile, GETC ());
1294 parse_string (pfile, c);
1295 return c == '\'' ? CPP_CHAR : CPP_STRING;
1298 if (!CPP_OPTION (pfile, dollars_in_ident))
1304 /* Digraph: ":>" == "]". */
1306 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1314 if (c2 == c || c2 == '=')
1319 /* Digraphs: "%:" == "#", "%>" == "}". */
1324 CPP_RESERVE (pfile, 2);
1325 CPP_PUTC_Q (pfile, c);
1326 CPP_PUTC_Q (pfile, c2);
1332 CPP_RESERVE (pfile, 2);
1333 CPP_PUTC_Q (pfile, c);
1334 CPP_PUTC_Q (pfile, c2);
1335 return CPP_OPEN_BRACE;
1337 /* else fall through */
1343 if (PEEKC () == '=')
1351 if (CPP_OPTION (pfile, chill))
1352 goto comment; /* Chill style comment */
1360 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1362 /* In C++, there's a ->* operator. */
1364 CPP_RESERVE (pfile, 4);
1365 CPP_PUTC_Q (pfile, c);
1366 CPP_PUTC_Q (pfile, GETC ());
1367 CPP_PUTC_Q (pfile, GETC ());
1375 if (pfile->parsing_include_directive)
1379 CPP_PUTC (pfile, c);
1383 if (c == '\n' || c == EOF)
1386 "missing '>' in `#include <FILENAME>'");
1391 if (!CPP_BUFFER (pfile)->has_escapes)
1393 /* Backslash newline is replaced by nothing. */
1394 CPP_ADJUST_WRITTEN (pfile, -1);
1395 CPP_BUMP_LINE (pfile);
1399 /* We might conceivably get \r- or \r<space> in
1400 here. Just delete 'em. */
1402 if (d != '-' && d != ' ')
1403 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1404 CPP_ADJUST_WRITTEN (pfile, -1);
1410 /* Digraphs: "<%" == "{", "<:" == "[". */
1415 CPP_RESERVE (pfile, 2);
1416 CPP_PUTC_Q (pfile, c);
1417 CPP_PUTC_Q (pfile, c2);
1418 return CPP_CLOSE_BRACE;
1422 /* else fall through */
1427 /* GNU C++ supports MIN and MAX operators <? and >?. */
1428 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1431 CPP_RESERVE (pfile, 3);
1432 CPP_PUTC_Q (pfile, c);
1433 CPP_PUTC_Q (pfile, c2);
1434 if (PEEKC () == '=')
1435 CPP_PUTC_Q (pfile, GETC ());
1442 CPP_PUTC (pfile, c);
1447 /* In C++ there's a .* operator. */
1448 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1451 if (c2 == '.' && PEEKN(1) == '.')
1453 CPP_RESERVE (pfile, 3);
1454 CPP_PUTC_Q (pfile, '.');
1455 CPP_PUTC_Q (pfile, '.');
1456 CPP_PUTC_Q (pfile, '.');
1458 return CPP_ELLIPSIS;
1463 CPP_RESERVE (pfile, 2);
1464 CPP_PUTC_Q (pfile, c);
1465 CPP_PUTC_Q (pfile, GETC ());
1470 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1472 CPP_PUTC (pfile, c);
1474 parse_string (pfile, c);
1475 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1479 case '0': case '1': case '2': case '3': case '4':
1480 case '5': case '6': case '7': case '8': case '9':
1485 CPP_RESERVE (pfile, 2);
1486 CPP_PUTC_Q (pfile, c);
1490 if (!is_numchar(c) && c != '.'
1491 && ((c2 != 'e' && c2 != 'E'
1492 && ((c2 != 'p' && c2 != 'P')
1493 || CPP_OPTION (pfile, c89)))
1494 || (c != '+' && c != '-')))
1500 case 'b': case 'c': case 'd': case 'h': case 'o':
1501 case 'B': case 'C': case 'D': case 'H': case 'O':
1502 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1504 CPP_RESERVE (pfile, 2);
1505 CPP_PUTC_Q (pfile, c);
1506 CPP_PUTC_Q (pfile, '\'');
1512 goto chill_number_eof;
1515 CPP_PUTC (pfile, c);
1519 CPP_RESERVE (pfile, 2);
1520 CPP_PUTC_Q (pfile, c);
1533 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1534 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1535 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1536 case 'x': case 'y': case 'z':
1537 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1538 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1539 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1542 _cpp_parse_name (pfile, c);
1545 case ' ': case '\t': case '\v': case '\f': case '\0':
1554 CPP_PUTC (pfile, c);
1556 if (c == EOF || !is_hspace(c))
1561 null_warning (pfile, null_count);
1566 if (CPP_BUFFER (pfile)->has_escapes)
1571 if (pfile->output_escapes)
1572 CPP_PUTS (pfile, "\r-", 2);
1573 _cpp_parse_name (pfile, GETC ());
1578 /* "\r " means a space, but only if necessary to prevent
1579 accidental token concatenation. */
1580 CPP_RESERVE (pfile, 2);
1581 if (pfile->output_escapes)
1582 CPP_PUTC_Q (pfile, '\r');
1583 CPP_PUTC_Q (pfile, c);
1588 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1594 /* Backslash newline is ignored. */
1595 if (!ACTIVE_MARK_P (pfile))
1596 CPP_BUMP_LINE (pfile);
1601 CPP_PUTC (pfile, c);
1604 case '(': token = CPP_OPEN_PAREN; goto char1;
1605 case ')': token = CPP_CLOSE_PAREN; goto char1;
1606 case '{': token = CPP_OPEN_BRACE; goto char1;
1607 case '}': token = CPP_CLOSE_BRACE; goto char1;
1608 case ',': token = CPP_COMMA; goto char1;
1609 case ';': token = CPP_SEMICOLON; goto char1;
1615 CPP_PUTC (pfile, c);
1620 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1621 Caller is expected to have checked no_macro_expand. */
1623 maybe_macroexpand (pfile, written)
1627 U_CHAR *macro = pfile->token_buffer + written;
1628 size_t len = CPP_WRITTEN (pfile) - written;
1629 cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
1631 /* cpp_lookup never returns null. */
1632 if (hp->type == T_VOID)
1634 if (hp->disabled || hp->type == T_IDENTITY)
1636 if (pfile->output_escapes)
1638 /* Insert a no-reexpand marker before IDENT. */
1639 CPP_RESERVE (pfile, 2);
1640 CPP_ADJUST_WRITTEN (pfile, 2);
1641 macro = pfile->token_buffer + written;
1643 memmove (macro + 2, macro, len);
1649 if (hp->type == T_EMPTY)
1651 /* Special case optimization: macro expands to nothing. */
1652 CPP_SET_WRITTEN (pfile, written);
1653 CPP_PUTC_Q (pfile, ' ');
1657 /* If macro wants an arglist, verify that a '(' follows. */
1658 if (hp->type == T_FMACRO)
1660 int macbuf_whitespace = 0;
1663 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1665 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1668 _cpp_skip_hspace (pfile);
1675 if (point != CPP_BUFFER (pfile)->cur)
1676 macbuf_whitespace = 1;
1680 goto not_macro_call;
1681 cpp_pop_buffer (pfile);
1684 CPP_SET_MARK (pfile);
1687 _cpp_skip_hspace (pfile);
1694 CPP_GOTO_MARK (pfile);
1699 if (macbuf_whitespace)
1700 CPP_PUTC (pfile, ' ');
1702 /* K+R treated this as a hard error. */
1703 if (CPP_OPTION (pfile, warn_traditional))
1705 "traditional C rejects function macro %s in non-function context",
1712 /* This is now known to be a macro call.
1713 Expand the macro, reading arguments as needed,
1714 and push the expansion on the input stack. */
1715 _cpp_macroexpand (pfile, hp);
1716 CPP_SET_WRITTEN (pfile, written);
1720 /* Complain about \v or \f in a preprocessing directive (constraint
1721 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1723 pedantic_whitespace (pfile, p, len)
1731 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1732 else if (*p == '\f')
1733 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1741 cpp_get_token (pfile)
1744 enum cpp_ttype token;
1745 long written = CPP_WRITTEN (pfile);
1748 token = _cpp_lex_token (pfile);
1753 pfile->potential_control_macro = 0;
1754 pfile->only_seen_white = 0;
1758 if (pfile->only_seen_white == 0)
1759 pfile->only_seen_white = 1;
1760 CPP_BUMP_LINE (pfile);
1768 pfile->potential_control_macro = 0;
1769 if (!pfile->only_seen_white)
1771 /* XXX shouldn't have to do this - remove the hash or %: from
1772 the token buffer. */
1773 if (CPP_PWRITTEN (pfile)[-1] == '#')
1774 CPP_ADJUST_WRITTEN (pfile, -1);
1776 CPP_ADJUST_WRITTEN (pfile, -2);
1778 if (_cpp_handle_directive (pfile))
1779 return CPP_DIRECTIVE;
1780 pfile->only_seen_white = 0;
1781 CPP_PUTC (pfile, '#');
1785 pfile->potential_control_macro = 0;
1786 pfile->only_seen_white = 0;
1787 if (! pfile->no_macro_expand
1788 && maybe_macroexpand (pfile, written))
1793 if (CPP_BUFFER (pfile) == NULL)
1795 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1797 cpp_pop_buffer (pfile);
1800 cpp_pop_buffer (pfile);
1805 /* Like cpp_get_token, but skip spaces and comments. */
1808 cpp_get_non_space_token (pfile)
1811 int old_written = CPP_WRITTEN (pfile);
1814 enum cpp_ttype token = cpp_get_token (pfile);
1815 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1817 CPP_SET_WRITTEN (pfile, old_written);
1821 /* Like cpp_get_token, except that it does not execute directives,
1822 does not consume vertical space, and discards horizontal space. */
1824 _cpp_get_directive_token (pfile)
1828 enum cpp_ttype token;
1832 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1833 old_written = CPP_WRITTEN (pfile);
1834 token = _cpp_lex_token (pfile);
1841 /* Put it back and return VSPACE. */
1843 CPP_ADJUST_WRITTEN (pfile, -1);
1847 /* The purpose of this rather strange check is to prevent pedantic
1848 warnings for ^L in an #ifdefed out block. */
1849 if (CPP_PEDANTIC (pfile) && ! at_bol)
1850 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1851 CPP_WRITTEN (pfile) - old_written);
1852 CPP_SET_WRITTEN (pfile, old_written);
1857 if (! pfile->no_macro_expand
1858 && maybe_macroexpand (pfile, old_written))
1863 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1865 cpp_pop_buffer (pfile);
1869 /* This can happen for files that don't end with a newline,
1870 and for cpp_define and friends. Pretend they do, so
1871 callers don't have to deal. A warning will be issued by
1872 someone else, if necessary. */
1877 /* Determine the current line and column. Used only by read_and_prescan. */
1879 find_position (start, limit, linep)
1882 unsigned long *linep;
1884 unsigned long line = *linep;
1885 U_CHAR *lbase = start;
1886 while (start < limit)
1888 U_CHAR ch = *start++;
1889 if (ch == '\n' || ch == '\r')
1899 /* The following table is used by _cpp_prescan. If we have
1900 designated initializers, it can be constant data; otherwise, it is
1901 set up at runtime by _cpp_init_input_buffer. */
1903 #if (GCC_VERSION >= 2007)
1904 #define init_chartab() /* nothing */
1905 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1907 #define s(p, v) [p] = v,
1909 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1910 static void init_chartab PARAMS ((void)) { \
1911 unsigned char *x = chartab;
1913 #define s(p, v) x[p] = v;
1916 /* Table of characters that can't be handled in the inner loop.
1917 Also contains the mapping between trigraph third characters and their
1919 #define SPECCASE_CR 1
1920 #define SPECCASE_BACKSLASH 2
1921 #define SPECCASE_QUESTION 3
1924 s('\r', SPECCASE_CR)
1925 s('\\', SPECCASE_BACKSLASH)
1926 s('?', SPECCASE_QUESTION)
1928 s('=', '#') s(')', ']') s('!', '|')
1929 s('(', '[') s('\'', '^') s('>', '}')
1930 s('/', '\\') s('<', '{') s('-', '~')
1937 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1938 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1940 /* Prescan pass over a file already loaded into BUF. This is
1941 translation phases 1 and 2 (C99 5.1.1.2).
1943 Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1944 canonical form (\n). If enabled, convert and/or warn about
1945 trigraphs. Convert backslash-newline to a one-character escape
1946 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1947 token). If there is no newline at the end of the file, add one and
1948 warn. Returns -1 on failure, or the actual length of the data to
1951 This function does a lot of work, and can be a serious performance
1952 bottleneck. It has been tuned heavily; make sure you understand it
1953 before hacking. The common case - no trigraphs, Unix style line
1954 breaks, backslash-newline set off by whitespace, newline at EOF -
1955 has been optimized at the expense of the others. The performance
1956 penalty for DOS style line breaks (\r\n) is about 15%.
1958 Warnings lose particularly heavily since we have to determine the
1959 line number, which involves scanning from the beginning of the file
1960 or from the last warning. The penalty for the absence of a newline
1961 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1963 If your file has more than one kind of end-of-line marker, you
1964 will get messed-up line numbering. */
1967 _cpp_prescan (pfile, fp, len)
1973 const U_CHAR *ibase, *ip, *ilimit;
1976 unsigned int deferred_newlines;
1978 /* Allocate an extra byte in case we must add a trailing \n. */
1979 buf = (U_CHAR *) xmalloc (len + 1);
1980 line_base = op = buf;
1981 ip = ibase = fp->buf;
1982 ilimit = ibase + len;
1984 deferred_newlines = 0;
1990 /* Deal with \-newline, potentially in the middle of a token. */
1991 if (deferred_newlines)
1993 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1995 /* Previous was not white space. Skip to white
1996 space, if we can, before outputting the \r's */
2004 memcpy (op, ip, iq - ip);
2010 while (deferred_newlines)
2011 deferred_newlines--, *op++ = '\r';
2014 /* Copy as much as we can without special treatment. */
2016 while (iq < ilimit && NORMAL (*iq)) iq++;
2017 memcpy (op, ip, iq - ip);
2025 switch (chartab[*ip++])
2027 case SPECCASE_CR: /* \r */
2030 if (ip < ilimit && *ip == '\n')
2036 case SPECCASE_BACKSLASH: /* \ */
2042 deferred_newlines++;
2044 if (*ip == '\r') ip++;
2047 else if (*ip == '\r')
2049 deferred_newlines++;
2051 if (*ip == '\n') ip++;
2059 case SPECCASE_QUESTION: /* ? */
2063 *op++ = '?'; /* Normal non-trigraph case */
2064 if (ip > ilimit - 2 || ip[0] != '?')
2072 if (CPP_OPTION (pfile, warn_trigraphs))
2075 line_base = find_position (line_base, op, &line);
2076 col = op - line_base + 1;
2077 if (CPP_OPTION (pfile, trigraphs))
2078 cpp_warning_with_line (pfile, line, col,
2079 "trigraph ??%c converted to %c", d, t);
2081 cpp_warning_with_line (pfile, line, col,
2082 "trigraph ??%c ignored", d);
2086 if (CPP_OPTION (pfile, trigraphs))
2088 op[-1] = t; /* Overwrite '?' */
2105 #ifdef HAVE_MMAP_FILE
2107 munmap ((caddr_t) fp->buf, len);
2110 free ((PTR) fp->buf);
2115 line_base = find_position (line_base, op, &line);
2116 col = op - line_base + 1;
2117 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2125 /* Allocate pfile->input_buffer, and initialize chartab[]
2126 if it hasn't happened already. */
2129 _cpp_init_input_buffer (pfile)
2135 _cpp_init_toklist (&pfile->directbuf, NO_DUMMY_TOKEN);
2137 /* Determine the appropriate size for the input buffer. Normal C
2138 source files are smaller than eight K. */
2139 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2140 address arithmetic all the time, and 3 for pushback during buffer
2141 refill, in case there's a potential trigraph or end-of-line
2142 digraph at the end of a block. */
2144 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2145 pfile->input_buffer = tmp;
2146 pfile->input_buffer_len = 8192;
2150 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2151 and extending for LEN characters to the NUL-terminated string
2152 STRING. Typical usage:
2154 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2160 cpp_idcmp (token, len, string)
2161 const U_CHAR *token;
2165 size_t len2 = strlen (string);
2168 if ((r = memcmp (token, string, MIN (len, len2))))
2171 /* The longer of the two strings sorts after the shorter. */
2174 else if (len < len2)
2182 /* Lexing algorithm.
2184 The original lexer in cpplib was made up of two passes: a first pass
2185 that replaced trigraphs and deleted esacped newlines, and a second
2186 pass that tokenized the result of the first pass. Tokenisation was
2187 performed by peeking at the next character in the input stream. For
2188 example, if the input stream contained "!=", the handler for the !
2189 character would peek at the next character, and if it were a '='
2190 would skip over it, and return a "!=" token, otherwise it would
2191 return just the "!" token.
2193 To implement a single-pass lexer, this peeking ahead is unworkable.
2194 An arbitrary number of escaped newlines, and trigraphs (in particular
2195 ??/ which translates to the escape \), could separate the '!' and '='
2196 in the input stream, yet the next token is still a "!=".
2198 Suppose instead that we lex by one logical line at a time, producing
2199 a token list or stack for each logical line, and when seeing the '!'
2200 push a CPP_NOT token on the list. Then if the '!' is part of a
2201 longer token ("!=") we know we must see the remainder of the token by
2202 the time we reach the end of the logical line. Thus we can have the
2203 '=' handler look at the previous token (at the end of the list / top
2204 of the stack) and see if it is a "!" token, and if so, instead of
2205 pushing a "=" token revise the existing token to be a "!=" token.
2207 This works in the presence of escaped newlines, because the '\' would
2208 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2209 newline ('\n' or '\r') handler looks at the token at the top of the
2210 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2211 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2212 the '=' handler would never see any intervening escaped newlines.
2214 To make trigraphs work in this context, as in precedence trigraphs
2215 are highest and converted before anything else, the '?' handler does
2216 lookahead to see if it is a trigraph, and if so skips the trigraph
2217 and pushes the token it represents onto the top of the stack. This
2218 also works in the particular case of a CPP_BACKSLASH trigraph.
2220 To the preprocessor, whitespace is only significant to the point of
2221 knowing whether whitespace precedes a particular token. For example,
2222 the '=' handler needs to know whether there was whitespace between it
2223 and a "!" token on the top of the stack, to make the token conversion
2224 decision correctly. So each token has a PREV_WHITESPACE flag to
2225 indicate this - the standard permits consecutive whitespace to be
2226 regarded as a single space. The compiler front ends are not
2227 interested in whitespace at all; they just require a token stream.
2228 Another place where whitespace is significant to the preprocessor is
2229 a #define statment - if there is whitespace between the macro name
2230 and an initial "(" token the macro is "object-like", otherwise it is
2231 a function-like macro that takes arguments.
2233 However, all is not rosy. Parsing of identifiers, numbers, comments
2234 and strings becomes trickier because of the possibility of raw
2235 trigraphs and escaped newlines in the input stream.
2237 The trigraphs are three consecutive characters beginning with two
2238 question marks. A question mark is not valid as part of a number or
2239 identifier, so parsing of a number or identifier terminates normally
2240 upon reaching it, returning to the mainloop which handles the
2241 trigraph just like it would in any other position. Similarly for the
2242 backslash of a backslash-newline combination. So we just need the
2243 escaped-newline dropper in the mainloop to check if the token on the
2244 top of the stack after dropping the escaped newline is a number or
2245 identifier, and if so to continue the processing it as if nothing had
2248 For strings, we replace trigraphs whenever we reach a quote or
2249 newline, because there might be a backslash trigraph escaping them.
2250 We need to be careful that we start trigraph replacing from where we
2251 left off previously, because it is possible for a first scan to leave
2252 "fake" trigraphs that a second scan would pick up as real (e.g. the
2253 sequence "????/\n=" would find a fake ??= trigraph after removing the
2256 For line comments, on reaching a newline we scan the previous
2257 character(s) to see if it escaped, and continue if it is. Block
2258 comments ignore everything and just focus on finding the comment
2259 termination mark. The only difficult thing, and it is surprisingly
2260 tricky, is checking if an asterisk precedes the final slash since
2261 they could be separated by escaped newlines. If the preprocessor is
2262 invoked with the output comments option, we don't bother removing
2263 escaped newlines and replacing trigraphs for output.
2265 Finally, numbers can begin with a period, which is pushed initially
2266 as a CPP_DOT token in its own right. The digit handler checks if the
2267 previous token was a CPP_DOT not separated by whitespace, and if so
2268 pops it off the stack and pushes a period into the number's buffer
2269 before calling the number parser.
2273 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2274 U":>", U"<%", U"%>"};
2275 static unsigned char trigraph_map[256];
2278 init_trigraph_map ()
2280 trigraph_map['='] = '#';
2281 trigraph_map['('] = '[';
2282 trigraph_map[')'] = ']';
2283 trigraph_map['/'] = '\\';
2284 trigraph_map['\''] = '^';
2285 trigraph_map['<'] = '{';
2286 trigraph_map['>'] = '}';
2287 trigraph_map['!'] = '|';
2288 trigraph_map['-'] = '~';
2291 /* Call when a trigraph is encountered. It warns if necessary, and
2292 returns true if the trigraph should be honoured. END is the third
2293 character of a trigraph in the input stream. */
2295 trigraph_ok (pfile, end)
2297 const unsigned char *end;
2299 int accept = CPP_OPTION (pfile, trigraphs);
2301 if (CPP_OPTION (pfile, warn_trigraphs))
2303 unsigned int col = end - 1 - pfile->buffer->line_base;
2305 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2306 "trigraph ??%c converted to %c",
2307 (int) *end, (int) trigraph_map[*end]);
2309 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2310 "trigraph ??%c ignored", (int) *end);
2315 /* Scan a string for trigraphs, warning or replacing them inline as
2316 appropriate. When parsing a string, we must call this routine
2317 before processing a newline character (if trigraphs are enabled),
2318 since the newline might be escaped by a preceding backslash
2319 trigraph sequence. Returns a pointer to the end of the name after
2322 static unsigned char*
2323 trigraph_replace (pfile, src, limit)
2326 unsigned char* limit;
2328 unsigned char *dest;
2330 /* Starting with src[1], find two consecutive '?'. The case of no
2331 trigraphs is streamlined. */
2333 for (; src + 1 < limit; src += 2)
2338 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2341 else if (src + 2 == limit || src[1] != '?')
2344 /* Check if it really is a trigraph. */
2345 if (trigraph_map[src[2]] == 0)
2349 goto trigraph_found;
2353 /* Now we have a trigraph, we need to scan the remaining buffer, and
2354 copy-shifting its contents left if replacement is enabled. */
2355 for (; src + 2 < limit; dest++, src++)
2356 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2360 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2361 *dest = trigraph_map[*src];
2364 /* Copy remaining (at most 2) characters. */
2370 /* If CUR is a backslash or the end of a trigraphed backslash, return
2371 a pointer to its beginning, otherwise NULL. We don't read beyond
2372 the buffer start, because there is the start of the comment in the
2374 static const unsigned char *
2375 backslash_start (pfile, cur)
2377 const unsigned char *cur;
2381 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2382 && trigraph_ok (pfile, cur))
2387 /* Skip a C-style block comment. This is probably the trickiest
2388 handler. We find the end of the comment by seeing if an asterisk
2389 is before every '/' we encounter. The nasty complication is that a
2390 previous asterisk may be separated by one or more escaped newlines.
2391 Returns non-zero if comment terminated by EOF, zero otherwise. */
2393 skip_block_comment2 (pfile)
2396 cpp_buffer *buffer = pfile->buffer;
2397 const unsigned char *char_after_star = 0;
2398 register const unsigned char *cur = buffer->cur;
2401 /* Inner loop would think the comment has ended if the first comment
2402 character is a '/'. Avoid this and keep the inner loop clean by
2403 skipping such a character. */
2404 if (cur < buffer->rlimit && cur[0] == '/')
2407 for (; cur < buffer->rlimit; )
2409 unsigned char c = *cur++;
2411 /* People like decorating comments with '*', so check for
2412 '/' instead for efficiency. */
2415 if (cur[-2] == '*' || cur - 1 == char_after_star)
2418 /* Warn about potential nested comments, but not when
2419 the final character inside the comment is a '/'.
2420 Don't bother to get it right across escaped newlines. */
2421 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2422 && cur[0] == '*' && cur[1] != '/')
2425 cpp_warning (pfile, "'/*' within comment");
2428 else if (IS_NEWLINE(c))
2430 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2432 handle_newline (cur, buffer->rlimit, c);
2433 /* Work correctly if there is an asterisk before an
2434 arbirtrarily long sequence of escaped newlines. */
2435 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2436 char_after_star = cur;
2438 char_after_star = 0;
2448 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2449 Returns non-zero if a multiline comment. */
2451 skip_line_comment2 (pfile)
2454 cpp_buffer *buffer = pfile->buffer;
2455 register const unsigned char *cur = buffer->cur;
2458 for (; cur < buffer->rlimit; )
2460 unsigned char c = *cur++;
2464 /* Check for a (trigaph?) backslash escaping the newline. */
2465 if (!backslash_start (pfile, cur - 2))
2468 handle_newline (cur, buffer->rlimit, c);
2474 buffer->cur = cur - 1; /* Leave newline for caller. */
2478 /* Skips whitespace, stopping at next non-whitespace character.
2479 Adjusts pfile->col_adjust to account for tabs. This enables tokens
2480 to be assigned the correct column. */
2482 skip_whitespace (pfile, in_directive)
2486 cpp_buffer *buffer = pfile->buffer;
2487 register const unsigned char *cur = buffer->cur;
2488 unsigned short null_count = 0;
2490 for (; cur < buffer->rlimit; )
2492 unsigned char c = *cur++;
2496 unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
2497 pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
2498 - col % CPP_OPTION(pfile, tabstop));
2500 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2502 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2506 /* Mut be '\f' or '\v' */
2507 else if (in_directive && CPP_PEDANTIC (pfile))
2508 cpp_pedwarn (pfile, "%s in preprocessing directive",
2509 c == '\f' ? "formfeed" : "vertical tab");
2514 buffer->cur = cur - 1;
2516 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2517 : "embedded null character ignored");
2520 /* Parse (append) an identifier. */
2522 parse_name (pfile, list, name)
2527 const unsigned char *name_limit;
2528 unsigned char *namebuf;
2529 cpp_buffer *buffer = pfile->buffer;
2530 register const unsigned char *cur = buffer->cur;
2533 name_limit = list->namebuf + list->name_cap;
2534 namebuf = list->namebuf + list->name_used;
2536 for (; cur < buffer->rlimit && namebuf < name_limit; )
2538 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2544 if (c == '$' && CPP_PEDANTIC (pfile))
2547 cpp_pedwarn (pfile, "'$' character in identifier");
2551 /* Run out of name space? */
2552 if (cur < buffer->rlimit)
2554 list->name_used = namebuf - list->namebuf;
2555 auto_expand_name_space (list);
2561 name->len = namebuf - name->text;
2562 list->name_used = namebuf - list->namebuf;
2565 /* Parse (append) a number. */
2567 #define VALID_SIGN(c, prevc) \
2568 (((c) == '+' || (c) == '-') && \
2569 ((prevc) == 'e' || (prevc) == 'E' \
2570 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2573 parse_number (pfile, list, name)
2578 const unsigned char *name_limit;
2579 unsigned char *namebuf;
2580 cpp_buffer *buffer = pfile->buffer;
2581 register const unsigned char *cur = buffer->cur;
2584 name_limit = list->namebuf + list->name_cap;
2585 namebuf = list->namebuf + list->name_used;
2587 for (; cur < buffer->rlimit && namebuf < name_limit; )
2589 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2591 /* Perhaps we should accept '$' here if we accept it for
2592 identifiers. We know namebuf[-1] is safe, because for c to
2593 be a sign we must have pushed at least one character. */
2594 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2601 /* Run out of name space? */
2602 if (cur < buffer->rlimit)
2604 list->name_used = namebuf - list->namebuf;
2605 auto_expand_name_space (list);
2611 name->len = namebuf - name->text;
2612 list->name_used = namebuf - list->namebuf;
2615 /* Places a string terminated by an unescaped TERMINATOR into a
2616 cpp_name, which should be expandable and thus at the top of the
2617 list's stack. Handles embedded trigraphs, if necessary, and
2620 Can be used for character constants (terminator = '\''), string
2621 constants ('"') and angled headers ('>'). Multi-line strings are
2622 allowed, except for within directives. */
2625 parse_string2 (pfile, list, name, terminator, multiline_ok)
2629 unsigned int terminator;
2632 cpp_buffer *buffer = pfile->buffer;
2633 register const unsigned char *cur = buffer->cur;
2634 const unsigned char *name_limit;
2635 unsigned char *namebuf;
2636 unsigned int null_count = 0;
2637 int trigraphed_len = 0;
2640 name_limit = list->namebuf + list->name_cap;
2641 namebuf = list->namebuf + list->name_used;
2643 for (; cur < buffer->rlimit && namebuf < name_limit; )
2645 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2649 else if (c == terminator || IS_NEWLINE (c))
2651 /* Needed for trigraph_replace and multiline string warning. */
2654 /* Scan for trigraphs before checking if backslash-escaped. */
2655 if (CPP_OPTION (pfile, trigraphs)
2656 || CPP_OPTION (pfile, warn_trigraphs))
2658 namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
2660 trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
2661 if (trigraphed_len < 0)
2665 namebuf--; /* Drop the newline / terminator from the name. */
2668 /* Drop a backslash newline, and continue. */
2669 if (namebuf[-1] == '\\')
2671 handle_newline (cur, buffer->rlimit, c);
2678 /* In Fortran and assembly language, silently terminate
2679 strings of either variety at end of line. This is a
2680 kludge around not knowing where comments are in these
2682 if (CPP_OPTION (pfile, lang_fortran)
2683 || CPP_OPTION (pfile, lang_asm))
2686 /* Character constants, headers and asserts may not
2687 extend over multiple lines. In Standard C, neither
2688 may strings. We accept multiline strings as an
2689 extension, but not in directives. */
2693 cur++; /* Move forwards again. */
2695 if (pfile->multiline_string_line == 0)
2697 pfile->multiline_string_line = list->line;
2698 if (CPP_PEDANTIC (pfile))
2699 cpp_pedwarn (pfile, "multi-line string constant");
2703 handle_newline (cur, buffer->rlimit, c);
2707 unsigned char *temp;
2709 /* An odd number of consecutive backslashes represents
2710 an escaped terminator. */
2712 while (temp >= name->text && *temp == '\\')
2715 if ((namebuf - temp) & 1)
2722 /* Run out of name space? */
2723 if (cur < buffer->rlimit)
2725 list->name_used = namebuf - list->namebuf;
2726 auto_expand_name_space (list);
2730 /* We may not have trigraph-replaced the input for this code path,
2731 but as the input is in error by being unterminated we don't
2732 bother. Prevent warnings about no newlines at EOF. */
2733 if (IS_NEWLINE(cur[-1]))
2737 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2739 if (terminator == '\"' && pfile->multiline_string_line != list->line
2740 && pfile->multiline_string_line != 0)
2742 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2743 "possible start of unterminated string literal");
2744 pfile->multiline_string_line = 0;
2749 name->len = namebuf - name->text;
2750 list->name_used = namebuf - list->namebuf;
2753 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2754 : "null character preserved"));
2757 /* The character TYPE helps us distinguish comment types: '*' = C
2758 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
2759 the stored comment includes the comment start and any terminator. */
2761 #define COMMENT_START_LEN 2
2763 save_comment (list, token, from, len, type)
2766 const unsigned char *from;
2770 unsigned char *buffer;
2772 len += COMMENT_START_LEN;
2774 if (list->name_used + len > list->name_cap)
2775 expand_name_space (list, len);
2777 INIT_TOKEN_NAME (list, token);
2778 token->type = CPP_COMMENT;
2779 token->val.name.len = len;
2781 buffer = list->namebuf + list->name_used;
2782 list->name_used += len;
2784 /* Copy the comment. */
2795 memcpy (buffer, from, len - COMMENT_START_LEN);
2799 * The tokenizer's main loop. Returns a token list, representing a
2800 * logical line in the input file. On EOF after some tokens have
2801 * been processed, we return immediately. Then in next call, or if
2802 * EOF occurred at the beginning of a logical line, a single CPP_EOF
2803 * token is placed in the list.
2805 * Implementation relies almost entirely on lookback, rather than
2806 * looking forwards. This means that tokenization requires just
2807 * a single pass of the file, even in the presence of trigraphs and
2808 * escaped newlines, providing significant performance benefits.
2809 * Trigraph overhead is negligible if they are disabled, and low
2810 * even when enabled.
2813 #define IS_DIRECTIVE() (list->tokens[first_token].type == CPP_HASH)
2816 _cpp_lex_line (pfile, list)
2820 cpp_token *cur_token, *token_limit;
2821 cpp_buffer *buffer = pfile->buffer;
2822 register const unsigned char *cur = buffer->cur;
2823 unsigned char flags = 0;
2824 unsigned int first_token = list->tokens_used;
2826 list->line = CPP_BUF_LINE (buffer);
2827 pfile->col_adjust = 0;
2829 token_limit = list->tokens + list->tokens_cap;
2830 cur_token = list->tokens + list->tokens_used;
2832 for (; cur < buffer->rlimit && cur_token < token_limit;)
2834 unsigned char c = *cur++;
2836 /* Optimize whitespace skipping, as most tokens are probably
2837 separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
2839 if (is_hspace ((unsigned int) c))
2841 /* Step back to get the null warning and tab correction. */
2842 buffer->cur = cur - 1;
2843 skip_whitespace (pfile, IS_DIRECTIVE ());
2846 flags = PREV_WHITESPACE;
2847 if (cur == buffer->rlimit)
2852 /* Initialize current token. Its type is set in the switch. */
2853 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
2854 cur_token->flags = flags;
2859 case '0': case '1': case '2': case '3': case '4':
2860 case '5': case '6': case '7': case '8': case '9':
2864 cur--; /* Backup character. */
2865 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
2868 INIT_TOKEN_NAME (list, cur_token);
2869 /* Prepend an immediately previous CPP_DOT token. */
2872 if (list->name_cap == list->name_used)
2873 auto_expand_name_space (list);
2875 cur_token->val.name.len = 1;
2876 list->namebuf[list->name_used++] = '.';
2880 cur_token->type = CPP_NUMBER; /* Before parse_number. */
2882 parse_number (pfile, list, &cur_token->val.name);
2890 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2891 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2892 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2893 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2895 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2896 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2897 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2898 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2900 cur--; /* Backup character. */
2901 INIT_TOKEN_NAME (list, cur_token);
2902 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2906 parse_name (pfile, list, &cur_token->val.name);
2909 /* Find handler for newly created / extended directive. */
2910 if (IS_DIRECTIVE () && cur_token == &list->tokens[first_token + 1])
2911 _cpp_check_directive (list, cur_token);
2918 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2919 /* Do we have a wide string? */
2920 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2921 && cur_token[-1].val.name.len == 1
2922 && cur_token[-1].val.name.text[0] == 'L'
2923 && !CPP_TRADITIONAL (pfile))
2925 /* No need for 'L' any more. */
2927 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2931 /* Here c is one of ' " or >. */
2932 INIT_TOKEN_NAME (list, cur_token);
2934 parse_string2 (pfile, list, &cur_token->val.name, c,
2935 c == '"' && !IS_DIRECTIVE());
2941 cur_token->type = CPP_DIV;
2944 if (PREV_TOKEN_TYPE == CPP_DIV)
2946 /* We silently allow C++ comments in system headers,
2947 irrespective of conformance mode, because lots of
2948 broken systems do that and trying to clean it up
2949 in fixincludes is a nightmare. */
2950 if (buffer->system_header_p)
2951 goto do_line_comment;
2952 else if (CPP_OPTION (pfile, cplusplus_comments))
2954 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2955 && ! buffer->warned_cplusplus_comments)
2959 "C++ style comments are not allowed in ISO C89");
2961 "(this will be reported only once per input file)");
2962 buffer->warned_cplusplus_comments = 1;
2968 "comment start split across lines");
2969 if (skip_line_comment2 (pfile))
2970 cpp_error_with_line (pfile, list->line,
2972 "multi-line comment");
2974 /* Back-up to first '-' or '/'. */
2976 if (!CPP_OPTION (pfile, discard_comments)
2977 && (!IS_DIRECTIVE() || list->dirno == 0))
2978 save_comment (list, cur_token++, cur,
2979 buffer->cur - cur, c);
2982 if (!CPP_OPTION (pfile, traditional))
2983 flags = PREV_WHITESPACE;
2992 cur_token->type = CPP_MULT;
2995 if (PREV_TOKEN_TYPE == CPP_DIV)
3000 "comment start '/*' split across lines");
3001 if (skip_block_comment2 (pfile))
3002 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3003 "unterminated comment");
3004 else if (buffer->cur[-2] != '*')
3006 "comment end '*/' split across lines");
3008 /* Back up to opening '/'. */
3010 if (!CPP_OPTION (pfile, discard_comments)
3011 && (!IS_DIRECTIVE() || list->dirno == 0))
3012 save_comment (list, cur_token++, cur,
3013 buffer->cur - cur, c);
3016 if (!CPP_OPTION (pfile, traditional))
3017 flags = PREV_WHITESPACE;
3020 else if (CPP_OPTION (pfile, cplusplus))
3022 /* In C++, there are .* and ->* operators. */
3023 if (PREV_TOKEN_TYPE == CPP_DEREF)
3024 BACKUP_TOKEN (CPP_DEREF_STAR);
3025 else if (PREV_TOKEN_TYPE == CPP_DOT)
3026 BACKUP_TOKEN (CPP_DOT_STAR);
3034 handle_newline (cur, buffer->rlimit, c);
3035 if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3037 /* Remove the escaped newline. Then continue to process
3038 any interrupted name or number. */
3043 if (cur_token->type == CPP_NAME)
3045 else if (cur_token->type == CPP_NUMBER)
3046 goto continue_number;
3049 /* Remember whitespace setting. */
3050 flags = cur_token->flags;
3053 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3056 cpp_warning (pfile, "backslash and newline separated by space");
3058 /* Skip vertical space until we have at least one token to
3060 if (cur_token != &list->tokens[first_token])
3062 list->line = CPP_BUF_LINE (buffer);
3066 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3068 if (CPP_OPTION (pfile, chill))
3069 goto do_line_comment;
3070 REVISE_TOKEN (CPP_MINUS_MINUS);
3073 PUSH_TOKEN (CPP_MINUS);
3076 /* The digraph flag checking ensures that ## and %:%:
3077 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3080 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3081 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3082 REVISE_TOKEN (CPP_PASTE);
3084 PUSH_TOKEN (CPP_HASH);
3088 cur_token->type = CPP_COLON;
3091 if (PREV_TOKEN_TYPE == CPP_COLON
3092 && CPP_OPTION (pfile, cplusplus))
3093 BACKUP_TOKEN (CPP_SCOPE);
3094 /* Digraph: "<:" is a '[' */
3095 else if (PREV_TOKEN_TYPE == CPP_LESS)
3096 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3097 /* Digraph: "%:" is a '#' */
3098 else if (PREV_TOKEN_TYPE == CPP_MOD)
3100 (--cur_token)->flags |= DIGRAPH;
3108 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3109 REVISE_TOKEN (CPP_AND_AND);
3111 PUSH_TOKEN (CPP_AND);
3116 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3117 REVISE_TOKEN (CPP_OR_OR);
3119 PUSH_TOKEN (CPP_OR);
3123 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3124 REVISE_TOKEN (CPP_PLUS_PLUS);
3126 PUSH_TOKEN (CPP_PLUS);
3130 /* This relies on equidistance of "?=" and "?" tokens. */
3131 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3132 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3134 PUSH_TOKEN (CPP_EQ);
3138 cur_token->type = CPP_GREATER;
3141 if (PREV_TOKEN_TYPE == CPP_GREATER)
3142 BACKUP_TOKEN (CPP_RSHIFT);
3143 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3144 BACKUP_TOKEN (CPP_DEREF);
3145 /* Digraph: ":>" is a ']' */
3146 else if (PREV_TOKEN_TYPE == CPP_COLON)
3147 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3148 /* Digraph: "%>" is a '}' */
3149 else if (PREV_TOKEN_TYPE == CPP_MOD)
3150 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3156 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3158 REVISE_TOKEN (CPP_LSHIFT);
3161 /* Is this the beginning of a header name? */
3162 if (list->flags & SYNTAX_INCLUDE)
3164 c = '>'; /* Terminator. */
3165 cur_token->type = CPP_HEADER_NAME;
3166 goto do_parse_string;
3168 PUSH_TOKEN (CPP_LESS);
3172 /* Digraph: "<%" is a '{' */
3173 cur_token->type = CPP_MOD;
3174 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3175 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3180 if (cur + 1 < buffer->rlimit && *cur == '?'
3181 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3183 /* Handle trigraph. */
3187 case '(': goto make_open_square;
3188 case ')': goto make_close_square;
3189 case '<': goto make_open_brace;
3190 case '>': goto make_close_brace;
3191 case '=': goto make_hash;
3192 case '!': goto make_or;
3193 case '-': goto make_complement;
3194 case '/': goto make_backslash;
3195 case '\'': goto make_xor;
3198 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3200 /* GNU C++ defines <? and >? operators. */
3201 if (PREV_TOKEN_TYPE == CPP_LESS)
3203 REVISE_TOKEN (CPP_MIN);
3206 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3208 REVISE_TOKEN (CPP_MAX);
3212 PUSH_TOKEN (CPP_QUERY);
3216 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3218 && !(cur_token[-1].flags & PREV_WHITESPACE))
3221 PUSH_TOKEN (CPP_ELLIPSIS);
3224 PUSH_TOKEN (CPP_DOT);
3228 case '~': PUSH_TOKEN (CPP_COMPL); break;
3230 case '^': PUSH_TOKEN (CPP_XOR); break;
3232 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3234 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3236 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3238 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3240 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3241 case '!': PUSH_TOKEN (CPP_NOT); break;
3242 case ',': PUSH_TOKEN (CPP_COMMA); break;
3243 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3244 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
3245 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3248 if (CPP_OPTION (pfile, dollars_in_ident))
3253 cur_token->val.name.len = 0; /* FIXME: needed for transition only */
3254 PUSH_TOKEN (CPP_OTHER);
3259 /* Run out of token space? */
3260 if (cur_token == token_limit)
3262 list->tokens_used = cur_token - list->tokens;
3263 _cpp_expand_token_space (list, 256);
3267 cur_token->flags = flags;
3268 if (cur_token == &list->tokens[first_token])
3270 /* FIXME: move this warning to callers who care. */
3271 if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
3272 cpp_warning (pfile, "no newline at end of file");
3273 cur_token++->type = CPP_EOF;
3277 list->tokens[first_token].flags |= BOL;
3279 list->tokens_used = cur_token - list->tokens;
3282 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3283 already contain the enough space to hold the token's spelling. If
3284 WHITESPACE is true, and the token was preceded by whitespace,
3285 output a single space before the token proper. Returns a pointer
3286 to the character after the last character written. */
3288 static unsigned char *
3289 spell_token (pfile, token, buffer, whitespace)
3290 cpp_reader *pfile; /* Would be nice to be rid of this... */
3291 const cpp_token *token;
3292 unsigned char *buffer;
3295 /* Whitespace will not be wanted by handlers of the # and ##
3296 operators calling this function, but will be wanted by the
3297 function that writes out the preprocessed file. */
3298 if (whitespace && token->flags & PREV_WHITESPACE)
3301 switch (token_spellings[token->type].type)
3303 case SPELL_OPERATOR:
3305 const unsigned char *spelling;
3308 if (token->flags & DIGRAPH)
3309 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3311 spelling = token_spellings[token->type].spelling;
3313 while ((c = *spelling++) != '\0')
3319 memcpy (buffer, token->val.name.text, token->val.name.len);
3320 buffer += token->val.name.len;
3327 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3330 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3333 memcpy (buffer, token->val.name.text, token->val.name.len);
3334 buffer += token->val.name.len;
3340 *buffer++ = token->aux;
3344 cpp_ice (pfile, "Unspellable token");
3351 /* Temporary function for illustrative purposes. */
3353 _cpp_lex_file (pfile)
3358 init_trigraph_map ();
3359 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3360 _cpp_init_toklist (list, DUMMY_TOKEN);
3364 _cpp_lex_line (pfile, list);
3365 if (list->tokens[0].type == CPP_EOF)
3370 _cpp_handle_directive (pfile, list);
3373 _cpp_output_list (pfile, list);
3374 _cpp_clear_toklist (list);
3378 /* Temporary function for illustrative purposes. */
3380 _cpp_output_list (pfile, list)
3386 for (i = 0; i < list->tokens_used; i++)
3388 CPP_RESERVE (pfile, TOKEN_LEN (&list->tokens[i]));
3389 pfile->limit = spell_token (pfile, &list->tokens[i], pfile->limit, 1);