1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31 #define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
35 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37 #define GETC() GETBUF (CPP_BUFFER (pfile))
38 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
40 static void skip_block_comment PARAMS ((cpp_reader *));
41 static void skip_line_comment PARAMS ((cpp_reader *));
42 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43 static int skip_comment PARAMS ((cpp_reader *, int));
44 static int copy_comment PARAMS ((cpp_reader *, int));
45 static void skip_string PARAMS ((cpp_reader *, int));
46 static void parse_string PARAMS ((cpp_reader *, int));
47 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
48 static void null_warning PARAMS ((cpp_reader *, unsigned int));
50 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
52 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
54 static void bump_column PARAMS ((cpp_printer *, unsigned int,
56 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
57 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
60 #define auto_expand_name_space(list) \
61 expand_name_space ((list), 1 + (list)->name_cap / 2)
65 static void expand_comment_space PARAMS ((cpp_toklist *));
66 void init_trigraph_map PARAMS ((void));
67 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
69 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
70 const unsigned char *));
71 static int skip_block_comment2 PARAMS ((cpp_reader *));
72 static int skip_line_comment2 PARAMS ((cpp_reader *));
73 static void skip_whitespace PARAMS ((cpp_reader *, int));
74 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
75 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
76 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
78 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
79 static void save_comment PARAMS ((cpp_toklist *, cpp_token *, unsigned char *,
80 unsigned int, unsigned int));
81 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
83 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
85 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
86 unsigned char *, int));
88 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
91 /* Macros on a cpp_name. */
92 #define INIT_TOKEN_NAME(list, token) \
93 do {(token)->val.name.len = 0; \
94 (token)->val.name.text = (list)->namebuf + (list)->name_used; \
95 (list)->tokens_used = token - (list)->tokens + 1; \
98 /* Maybe put these in the ISTABLE eventually. */
99 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
100 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
102 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
103 character, if any, is in buffer. */
104 #define handle_newline(cur, limit, c) \
106 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
108 CPP_BUMP_LINE_CUR (pfile, (cur)); \
109 pfile->col_adjust = 0; \
112 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
113 #define PREV_TOKEN_TYPE (cur_token[-1].type)
115 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
116 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
117 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
118 #define BACKUP_DIGRAPH(ttype) do { \
119 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
121 /* An upper bound on the number of bytes needed to spell a token,
122 including preceding whitespace. */
123 #define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
124 SPELL_NONE ? token->val.name.len: 0))
128 /* Order here matters. Those beyond SPELL_NONE store their spelling
129 in the token list, and it's length in the token->val.name.len. */
134 SPELL_CHAR, /* FIXME: revert order of NONE and CHAR after transition. */
139 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
140 #define I(e, s) {SPELL_IDENT, s},
141 #define S(e, s) {SPELL_STRING, s},
142 #define C(e, s) {SPELL_CHAR, s},
143 #define N(e, s) {SPELL_NONE, s},
145 static const struct token_spelling
147 ENUM_BITFIELD(spell_type) type : CHAR_BIT;
148 const U_CHAR *spelling;
149 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
157 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
160 _cpp_grow_token_buffer (pfile, n)
164 long old_written = CPP_WRITTEN (pfile);
165 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
166 pfile->token_buffer = (U_CHAR *)
167 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
168 CPP_SET_WRITTEN (pfile, old_written);
171 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
172 If BUFFER != NULL, then use the LENGTH characters in BUFFER
173 as the new input buffer.
174 Return the new buffer, or NULL on failure. */
177 cpp_push_buffer (pfile, buffer, length)
179 const U_CHAR *buffer;
182 cpp_buffer *buf = CPP_BUFFER (pfile);
184 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
186 cpp_fatal (pfile, "macro or `#include' recursion too deep");
190 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
192 new->if_stack = pfile->if_stack;
193 new->buf = new->cur = buffer;
194 new->rlimit = buffer + length;
197 new->line_base = NULL;
199 CPP_BUFFER (pfile) = new;
204 cpp_pop_buffer (pfile)
207 cpp_buffer *buf = CPP_BUFFER (pfile);
208 if (ACTIVE_MARK_P (pfile))
209 cpp_ice (pfile, "mark active in cpp_pop_buffer");
213 _cpp_unwind_if_stack (pfile, buf);
215 free ((PTR) buf->buf);
216 if (pfile->system_include_depth)
217 pfile->system_include_depth--;
218 if (pfile->potential_control_macro)
220 buf->ihash->control_macro = pfile->potential_control_macro;
221 pfile->potential_control_macro = 0;
223 pfile->input_stack_listing_current = 0;
227 HASHNODE *m = buf->macro;
230 if ((m->type == T_FMACRO && buf->mapped)
231 || m->type == T_SPECLINE || m->type == T_FILE
232 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
233 || m->type == T_STDC)
234 free ((PTR) buf->buf);
236 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
238 pfile->buffer_stack_depth--;
239 return CPP_BUFFER (pfile);
242 /* Deal with the annoying semantics of fwrite. */
244 safe_fwrite (pfile, buf, len, fp)
254 count = fwrite (buf, 1, len, fp);
263 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
266 /* Notify the compiler proper that the current line number has jumped,
267 or the current file name has changed. */
270 output_line_command (pfile, print, line)
275 cpp_buffer *ip = cpp_file_buffer (pfile);
276 enum { same = 0, enter, leave, rname } change;
277 static const char * const codes[] = { "", " 1", " 2", "" };
279 if (CPP_OPTION (pfile, no_line_commands))
282 /* Determine whether the current filename has changed, and if so,
283 how. 'nominal_fname' values are unique, so they can be compared
284 by comparing pointers. */
285 if (ip->nominal_fname == print->last_fname)
289 if (pfile->buffer_stack_depth == print->last_bsd)
293 if (pfile->buffer_stack_depth > print->last_bsd)
297 print->last_bsd = pfile->buffer_stack_depth;
299 print->last_fname = ip->nominal_fname;
301 /* If the current file has not changed, we can output a few newlines
302 instead if we want to increase the line number by a small amount.
303 We cannot do this if print->lineno is zero, because that means we
304 haven't output any line commands yet. (The very first line
305 command output is a `same_file' command.) */
306 if (change == same && print->lineno != 0
307 && line >= print->lineno && line < print->lineno + 8)
309 while (line > print->lineno)
311 putc ('\n', print->outf);
317 #ifndef NO_IMPLICIT_EXTERN_C
318 if (CPP_OPTION (pfile, cplusplus))
319 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
321 ip->system_header_p ? " 3" : "",
322 (ip->system_header_p == 2) ? " 4" : "");
325 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
327 ip->system_header_p ? " 3" : "");
328 print->lineno = line;
331 /* Write the contents of the token_buffer to the output stream, and
332 clear the token_buffer. Also handles generating line commands and
333 keeping track of file transitions. */
336 cpp_output_tokens (pfile, print)
342 if (CPP_WRITTEN (pfile) - print->written)
344 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
346 safe_fwrite (pfile, pfile->token_buffer,
347 CPP_WRITTEN (pfile) - print->written, print->outf);
350 ip = cpp_file_buffer (pfile);
352 output_line_command (pfile, print, CPP_BUF_LINE (ip));
354 CPP_SET_WRITTEN (pfile, print->written);
357 /* Helper for cpp_output_list - increases the column number to match
358 what we expect it to be. */
361 bump_column (print, from, to)
363 unsigned int from, to;
365 unsigned int tabs, spcs;
366 unsigned int delta = to - from;
368 /* Only if FROM is 0, advance by tabs. */
370 tabs = delta / 8, spcs = delta % 8;
372 tabs = 0, spcs = delta;
374 while (tabs--) putc ('\t', print->outf);
375 while (spcs--) putc (' ', print->outf);
378 /* Write out the list L onto pfile->token_buffer. This function is
381 1) pfile->token_buffer is not going to continue to exist.
382 2) At the moment, tokens don't carry the information described
383 in cpplib.h; they are all strings.
384 3) The list has to be a complete line, and has to be written starting
385 at the beginning of a line. */
388 cpp_output_list (pfile, print, list)
391 const cpp_toklist *list;
394 unsigned int curcol = 1;
396 /* XXX Probably does not do what is intended. */
397 if (print->lineno != list->line)
398 output_line_command (pfile, print, list->line);
400 for (i = 0; i < list->tokens_used; i++)
402 if (TOK_TYPE (list, i) == CPP_VSPACE)
404 output_line_command (pfile, print, list->tokens[i].aux);
408 if (curcol < TOK_COL (list, i))
410 /* Insert space to bring the column to what it should be. */
411 bump_column (print, curcol - 1, TOK_COL (list, i));
412 curcol = TOK_COL (list, i);
414 /* XXX We may have to insert space to prevent an accidental
416 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
417 curcol += TOK_LEN (list, i);
421 /* Scan a string (which may have escape marks), perform macro expansion,
422 and write the result to the token_buffer. */
425 _cpp_expand_to_buffer (pfile, buf, length)
431 enum cpp_ttype token;
436 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
440 /* Copy the buffer, because it might be in an unsafe place - for
441 example, a sequence on the token_buffer, where the pointers will
442 be invalidated if we enlarge the token_buffer. */
443 buf1 = alloca (length);
444 memcpy (buf1, buf, length);
446 /* Set up the input on the input stack. */
447 stop = CPP_BUFFER (pfile);
448 if (cpp_push_buffer (pfile, buf1, length) == NULL)
450 CPP_BUFFER (pfile)->has_escapes = 1;
452 /* Scan the input, create the output. */
455 token = cpp_get_token (pfile);
456 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
461 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
464 cpp_scan_buffer_nooutput (pfile)
467 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
468 enum cpp_ttype token;
469 unsigned int old_written = CPP_WRITTEN (pfile);
470 /* In no-output mode, we can ignore everything but directives. */
473 if (! pfile->only_seen_white)
474 _cpp_skip_rest_of_line (pfile);
475 token = cpp_get_token (pfile);
476 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
479 CPP_SET_WRITTEN (pfile, old_written);
482 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
485 cpp_scan_buffer (pfile, print)
489 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
490 enum cpp_ttype token;
494 token = cpp_get_token (pfile);
495 if (token == CPP_VSPACE || token == CPP_EOF
496 /* XXX Temporary kluge - force flush after #include only */
497 || (token == CPP_DIRECTIVE
498 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
500 cpp_output_tokens (pfile, print);
501 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
507 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
510 cpp_file_buffer (pfile)
515 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
516 if (ip->ihash != NULL)
521 /* Token-buffer helper functions. */
523 /* Expand a token list's string space. It is *vital* that
524 list->tokens_used is correct, to get pointer fix-up right. */
526 expand_name_space (list, len)
530 const U_CHAR *old_namebuf;
532 old_namebuf = list->namebuf;
533 list->name_cap += len;
534 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
536 /* Fix up token text pointers. */
537 if (list->namebuf != old_namebuf)
541 for (i = 0; i < list->tokens_used; i++)
542 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
543 list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
547 /* Expand the number of tokens in a list. */
549 _cpp_expand_token_space (list, count)
555 list->tokens_cap += count;
556 n = list->tokens_cap;
557 if (list->flags & LIST_OFFSET)
559 list->tokens = (cpp_token *)
560 xrealloc (list->tokens, n * sizeof (cpp_token));
561 if (list->flags & LIST_OFFSET)
562 list->tokens++; /* Skip the dummy. */
565 /* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
566 an extra token in front of the token list, as this allows the lexer
567 to always peek at the previous token without worrying about
568 underflowing the list, and some initial space. Otherwise, no
569 token- or name-space is allocated, and there is no dummy token. */
571 _cpp_init_toklist (list, flags)
575 /* We malloc zero bytes because we may want to realloc later, and
576 some old implementations don't like realloc-ing a null pointer. */
577 if (flags == NO_DUMMY_TOKEN)
579 list->tokens_cap = 0;
580 list->tokens = (cpp_token *) malloc (0);
586 /* Initialize token space. Put a dummy token before the start
587 that will fail matches. */
588 list->tokens_cap = 256; /* 4K's worth. */
589 list->tokens = (cpp_token *)
590 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
591 list->tokens[0].type = CPP_EOF;
594 /* Initialize name space. */
595 list->name_cap = 1024;
596 list->flags = LIST_OFFSET;
599 /* Allocate name space. */
600 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
602 _cpp_clear_toklist (list);
605 /* Clear a token list. */
607 _cpp_clear_toklist (list)
610 list->tokens_used = 0;
613 list->flags &= LIST_OFFSET; /* clear all but that one */
616 /* Free a token list. Does not free the list itself, which may be
617 embedded in a larger structure. */
619 _cpp_free_toklist (list)
622 if (list->flags & LIST_OFFSET)
623 free (list->tokens - 1); /* Backup over dummy token. */
626 free (list->namebuf);
629 /* Slice a token list: copy the sublist [START, FINISH) into COPY.
630 COPY is assumed not to be initialized. The comment space is not
633 _cpp_slice_toklist (copy, start, finish)
635 const cpp_token *start, *finish;
641 copy->tokens_cap = n;
642 copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
643 memcpy (copy->tokens, start, n * sizeof (cpp_token));
646 for (i = 0; i < n; i++)
647 if (token_spellings[start[i].type].type > SPELL_NONE)
648 bytes += start[i].val.name.len;
650 copy->namebuf = xmalloc (bytes);
652 for (i = 0; i < n; i++)
653 if (token_spellings[start[i].type].type > SPELL_NONE)
655 memcpy (copy->namebuf + bytes,
656 start[i].val.name.text, start[i].val.name.len);
657 copy->tokens[i].val.name.text = copy->namebuf + bytes;
658 bytes += start[i].val.name.len;
661 copy->tokens_cap = n;
662 copy->tokens_used = n;
663 copy->name_used = bytes;
664 copy->name_cap = bytes;
670 /* Shrink a token list down to the minimum size. */
672 _cpp_squeeze_toklist (list)
676 const U_CHAR *old_namebuf;
678 if (list->flags & LIST_OFFSET)
681 memmove (list->tokens, list->tokens + 1,
682 list->tokens_used * sizeof (cpp_token));
683 list->tokens = xrealloc (list->tokens,
684 list->tokens_used * sizeof (cpp_token));
685 list->flags &= ~LIST_OFFSET;
688 list->tokens = xrealloc (list->tokens,
689 list->tokens_used * sizeof (cpp_token));
690 list->tokens_cap = list->tokens_used;
692 old_namebuf = list->namebuf;
693 list->namebuf = xrealloc (list->namebuf, list->name_used);
694 list->name_cap = list->name_used;
696 /* Fix up token text pointers. */
697 delta = list->namebuf - old_namebuf;
702 for (i = 0; i < list->tokens_used; i++)
703 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
704 list->tokens[i].val.name.text += delta;
708 /* Compare two tokens. */
710 _cpp_equiv_tokens (a, b)
711 const cpp_token *a, *b;
713 if (a->type != b->type
714 || a->flags != b->flags
718 if (token_spellings[a->type].type > SPELL_NONE)
720 if (a->val.name.len != b->val.name.len
721 || ustrncmp(a->val.name.text,
729 /* Compare two token lists. */
731 _cpp_equiv_toklists (a, b)
732 const cpp_toklist *a, *b;
736 if (a->tokens_used != b->tokens_used)
739 for (i = 0; i < a->tokens_used; i++)
740 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
745 /* Scan until we encounter a token of type STOP or a newline, and
746 create a token list for it. Does not macro-expand or execute
747 directives. The final token is not included in the list or
748 consumed from the input. Returns the type of the token stopped at. */
751 _cpp_scan_until (pfile, list, stop)
761 _cpp_clear_toklist (list);
762 list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
764 written = CPP_WRITTEN (pfile);
769 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
770 type = _cpp_lex_token (pfile);
771 len = CPP_WRITTEN (pfile) - written;
772 CPP_SET_WRITTEN (pfile, written);
773 if (type == CPP_HSPACE)
775 if (CPP_PEDANTIC (pfile))
776 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
780 else if (type == CPP_COMMENT)
781 /* Only happens when processing -traditional macro definitions.
782 Do not give this a token entry, but do not change space_before
786 if (list->tokens_used >= list->tokens_cap)
787 _cpp_expand_token_space (list, 256);
788 if (list->name_used + len >= list->name_cap)
789 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
791 if (type == CPP_MACRO)
794 if (type == CPP_VSPACE || type == stop)
798 TOK_TYPE (list, i) = type;
799 TOK_COL (list, i) = col;
800 TOK_AUX (list, i) = 0;
801 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
803 TOK_LEN (list, i) = len;
804 if (token_spellings[type].type > SPELL_NONE)
806 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
807 TOK_NAME (list, i) = list->namebuf + list->name_used;
808 list->name_used += len;
811 TOK_NAME (list, i) = token_spellings[type].spelling;
816 /* XXX Temporary kluge: put back the newline (or whatever). */
819 /* Don't consider the first token to have white before. */
820 TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
824 /* Skip a C-style block comment. We know it's a comment, and point is
825 at the second character of the starter. */
827 skip_block_comment (pfile)
830 unsigned int line, col;
831 const U_CHAR *limit, *cur;
834 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
835 col = CPP_BUF_COL (CPP_BUFFER (pfile));
836 limit = CPP_BUFFER (pfile)->rlimit;
837 cur = CPP_BUFFER (pfile)->cur;
842 if (c == '\n' || c == '\r')
844 /* \r cannot be a macro escape marker here. */
845 if (!ACTIVE_MARK_P (pfile))
846 CPP_BUMP_LINE_CUR (pfile, cur);
850 /* Check for teminator. */
851 if (cur < limit && *cur == '/')
854 /* Warn about comment starter embedded in comment. */
855 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
856 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
857 cur - CPP_BUFFER (pfile)->line_base,
858 "'/*' within comment");
862 cpp_error_with_line (pfile, line, col, "unterminated comment");
865 CPP_BUFFER (pfile)->cur = cur + 1;
868 /* Skip a C++/Chill line comment. We know it's a comment, and point
869 is at the second character of the initiator. */
871 skip_line_comment (pfile)
879 /* We don't have to worry about EOF in here. */
882 /* Don't consider final '\n' to be part of comment. */
888 /* \r cannot be a macro escape marker here. */
889 if (!ACTIVE_MARK_P (pfile))
890 CPP_BUMP_LINE (pfile);
891 if (CPP_OPTION (pfile, warn_comments))
892 cpp_warning (pfile, "backslash-newline within line comment");
897 /* Skip a comment - C, C++, or Chill style. M is the first character
898 of the comment marker. If this really is a comment, skip to its
899 end and return ' '. If this is not a comment, return M (which will
903 skip_comment (pfile, m)
907 if (m == '/' && PEEKC() == '*')
909 skip_block_comment (pfile);
912 else if (m == '/' && PEEKC() == '/')
914 if (CPP_BUFFER (pfile)->system_header_p)
916 /* We silently allow C++ comments in system headers, irrespective
917 of conformance mode, because lots of busted systems do that
918 and trying to clean it up in fixincludes is a nightmare. */
919 skip_line_comment (pfile);
922 else if (CPP_OPTION (pfile, cplusplus_comments))
924 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
926 if (CPP_WTRADITIONAL (pfile))
928 "C++ style comments are not allowed in traditional C");
929 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
931 "C++ style comments are not allowed in ISO C89");
932 if (CPP_WTRADITIONAL (pfile)
933 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
935 "(this will be reported only once per input file)");
936 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
938 skip_line_comment (pfile);
944 else if (m == '-' && PEEKC() == '-'
945 && CPP_OPTION (pfile, chill))
947 skip_line_comment (pfile);
954 /* Identical to skip_comment except that it copies the comment into the
955 token_buffer. This is used if !discard_comments. */
957 copy_comment (pfile, m)
961 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
964 if (skip_comment (pfile, m) == m)
967 limit = CPP_BUFFER (pfile)->cur;
968 CPP_RESERVE (pfile, limit - start + 2);
969 CPP_PUTC_Q (pfile, m);
970 for (; start <= limit; start++)
972 CPP_PUTC_Q (pfile, *start);
978 null_warning (pfile, count)
983 cpp_warning (pfile, "embedded null character ignored");
985 cpp_warning (pfile, "embedded null characters ignored");
988 /* Skip whitespace \-newline and comments. Does not macro-expand. */
991 _cpp_skip_hspace (pfile)
994 unsigned int null_count = 0;
1002 else if (is_hspace(c))
1004 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
1005 cpp_pedwarn (pfile, "%s in preprocessing directive",
1006 c == '\f' ? "formfeed" : "vertical tab");
1012 /* \r is a backslash-newline marker if !has_escapes, and
1013 a deletable-whitespace or no-reexpansion marker otherwise. */
1014 if (CPP_BUFFER (pfile)->has_escapes)
1022 CPP_BUMP_LINE (pfile);
1024 else if (c == '/' || c == '-')
1026 c = skip_comment (pfile, c);
1036 null_warning (pfile, null_count);
1039 /* Read and discard the rest of the current line. */
1042 _cpp_skip_rest_of_line (pfile)
1056 if (! CPP_BUFFER (pfile)->has_escapes)
1057 CPP_BUMP_LINE (pfile);
1062 skip_string (pfile, c);
1067 skip_comment (pfile, c);
1072 if (CPP_PEDANTIC (pfile))
1073 cpp_pedwarn (pfile, "%s in preprocessing directive",
1074 c == '\f' ? "formfeed" : "vertical tab");
1081 /* Parse an identifier starting with C. */
1084 _cpp_parse_name (pfile, c)
1096 if (c == '$' && CPP_PEDANTIC (pfile))
1097 cpp_pedwarn (pfile, "`$' in identifier");
1099 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
1100 CPP_PUTC_Q (pfile, c);
1108 /* Parse and skip over a string starting with C. A single quoted
1109 string is treated like a double -- some programs (e.g., troff) are
1110 perverse this way. (However, a single quoted string is not allowed
1111 to extend over multiple lines.) */
1113 skip_string (pfile, c)
1117 unsigned int start_line, start_column;
1118 unsigned int null_count = 0;
1120 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1121 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
1128 cpp_error_with_line (pfile, start_line, start_column,
1129 "unterminated string or character constant");
1130 if (pfile->multiline_string_line != start_line
1131 && pfile->multiline_string_line != 0)
1132 cpp_error_with_line (pfile,
1133 pfile->multiline_string_line, -1,
1134 "possible real start of unterminated constant");
1135 pfile->multiline_string_line = 0;
1143 CPP_BUMP_LINE (pfile);
1144 /* In Fortran and assembly language, silently terminate
1145 strings of either variety at end of line. This is a
1146 kludge around not knowing where comments are in these
1148 if (CPP_OPTION (pfile, lang_fortran)
1149 || CPP_OPTION (pfile, lang_asm))
1154 /* Character constants may not extend over multiple lines.
1155 In Standard C, neither may strings. We accept multiline
1156 strings as an extension. */
1159 cpp_error_with_line (pfile, start_line, start_column,
1160 "unterminated character constant");
1164 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1165 cpp_pedwarn_with_line (pfile, start_line, start_column,
1166 "string constant runs past end of line");
1167 if (pfile->multiline_string_line == 0)
1168 pfile->multiline_string_line = start_line;
1172 if (CPP_BUFFER (pfile)->has_escapes)
1174 cpp_ice (pfile, "\\r escape inside string constant");
1178 /* Backslash newline is replaced by nothing at all. */
1179 CPP_BUMP_LINE (pfile);
1195 if (null_count == 1)
1196 cpp_warning (pfile, "null character in string or character constant");
1197 else if (null_count > 1)
1198 cpp_warning (pfile, "null characters in string or character constant");
1201 /* Parse a string and copy it to the output. */
1204 parse_string (pfile, c)
1208 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1209 const U_CHAR *limit;
1211 skip_string (pfile, c);
1213 limit = CPP_BUFFER (pfile)->cur;
1214 CPP_RESERVE (pfile, limit - start + 2);
1215 CPP_PUTC_Q (pfile, c);
1216 for (; start < limit; start++)
1218 CPP_PUTC_Q (pfile, *start);
1221 /* Get the next token, and add it to the text in pfile->token_buffer.
1222 Return the kind of token we got. */
1225 _cpp_lex_token (pfile)
1229 enum cpp_ttype token;
1231 if (CPP_BUFFER (pfile) == NULL)
1242 if (PEEKC () == '=')
1246 if (CPP_OPTION (pfile, discard_comments))
1247 c = skip_comment (pfile, c);
1249 c = copy_comment (pfile, c);
1253 /* Comments are equivalent to spaces.
1254 For -traditional, a comment is equivalent to nothing. */
1255 if (!CPP_OPTION (pfile, discard_comments))
1257 else if (CPP_TRADITIONAL (pfile))
1261 CPP_PUTC (pfile, c);
1266 CPP_PUTC (pfile, c);
1273 CPP_PUTC (pfile, c2);
1276 else if (c2 == '%' && PEEKN (1) == ':')
1278 /* Digraph: "%:" == "#". */
1280 CPP_RESERVE (pfile, 2);
1281 CPP_PUTC_Q (pfile, c2);
1282 CPP_PUTC_Q (pfile, GETC ());
1290 parse_string (pfile, c);
1291 return c == '\'' ? CPP_CHAR : CPP_STRING;
1294 if (!CPP_OPTION (pfile, dollars_in_ident))
1300 /* Digraph: ":>" == "]". */
1302 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1310 if (c2 == c || c2 == '=')
1315 /* Digraphs: "%:" == "#", "%>" == "}". */
1320 CPP_RESERVE (pfile, 2);
1321 CPP_PUTC_Q (pfile, c);
1322 CPP_PUTC_Q (pfile, c2);
1328 CPP_RESERVE (pfile, 2);
1329 CPP_PUTC_Q (pfile, c);
1330 CPP_PUTC_Q (pfile, c2);
1331 return CPP_OPEN_BRACE;
1333 /* else fall through */
1339 if (PEEKC () == '=')
1347 if (CPP_OPTION (pfile, chill))
1348 goto comment; /* Chill style comment */
1356 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1358 /* In C++, there's a ->* operator. */
1360 CPP_RESERVE (pfile, 4);
1361 CPP_PUTC_Q (pfile, c);
1362 CPP_PUTC_Q (pfile, GETC ());
1363 CPP_PUTC_Q (pfile, GETC ());
1371 if (pfile->parsing_include_directive)
1375 CPP_PUTC (pfile, c);
1379 if (c == '\n' || c == EOF)
1382 "missing '>' in `#include <FILENAME>'");
1387 if (!CPP_BUFFER (pfile)->has_escapes)
1389 /* Backslash newline is replaced by nothing. */
1390 CPP_ADJUST_WRITTEN (pfile, -1);
1391 CPP_BUMP_LINE (pfile);
1395 /* We might conceivably get \r- or \r<space> in
1396 here. Just delete 'em. */
1398 if (d != '-' && d != ' ')
1399 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1400 CPP_ADJUST_WRITTEN (pfile, -1);
1406 /* Digraphs: "<%" == "{", "<:" == "[". */
1411 CPP_RESERVE (pfile, 2);
1412 CPP_PUTC_Q (pfile, c);
1413 CPP_PUTC_Q (pfile, c2);
1414 return CPP_CLOSE_BRACE;
1418 /* else fall through */
1423 /* GNU C++ supports MIN and MAX operators <? and >?. */
1424 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1427 CPP_RESERVE (pfile, 3);
1428 CPP_PUTC_Q (pfile, c);
1429 CPP_PUTC_Q (pfile, c2);
1430 if (PEEKC () == '=')
1431 CPP_PUTC_Q (pfile, GETC ());
1438 CPP_PUTC (pfile, c);
1443 /* In C++ there's a .* operator. */
1444 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1447 if (c2 == '.' && PEEKN(1) == '.')
1449 CPP_RESERVE (pfile, 3);
1450 CPP_PUTC_Q (pfile, '.');
1451 CPP_PUTC_Q (pfile, '.');
1452 CPP_PUTC_Q (pfile, '.');
1454 return CPP_ELLIPSIS;
1459 CPP_RESERVE (pfile, 2);
1460 CPP_PUTC_Q (pfile, c);
1461 CPP_PUTC_Q (pfile, GETC ());
1466 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1468 CPP_PUTC (pfile, c);
1470 parse_string (pfile, c);
1471 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1475 case '0': case '1': case '2': case '3': case '4':
1476 case '5': case '6': case '7': case '8': case '9':
1481 CPP_RESERVE (pfile, 2);
1482 CPP_PUTC_Q (pfile, c);
1486 if (!is_numchar(c) && c != '.'
1487 && ((c2 != 'e' && c2 != 'E'
1488 && ((c2 != 'p' && c2 != 'P')
1489 || CPP_OPTION (pfile, c89)))
1490 || (c != '+' && c != '-')))
1496 case 'b': case 'c': case 'd': case 'h': case 'o':
1497 case 'B': case 'C': case 'D': case 'H': case 'O':
1498 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1500 CPP_RESERVE (pfile, 2);
1501 CPP_PUTC_Q (pfile, c);
1502 CPP_PUTC_Q (pfile, '\'');
1508 goto chill_number_eof;
1511 CPP_PUTC (pfile, c);
1515 CPP_RESERVE (pfile, 2);
1516 CPP_PUTC_Q (pfile, c);
1529 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1530 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1531 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1532 case 'x': case 'y': case 'z':
1533 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1534 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1535 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1538 _cpp_parse_name (pfile, c);
1541 case ' ': case '\t': case '\v': case '\f': case '\0':
1550 CPP_PUTC (pfile, c);
1552 if (c == EOF || !is_hspace(c))
1557 null_warning (pfile, null_count);
1562 if (CPP_BUFFER (pfile)->has_escapes)
1567 if (pfile->output_escapes)
1568 CPP_PUTS (pfile, "\r-", 2);
1569 _cpp_parse_name (pfile, GETC ());
1574 /* "\r " means a space, but only if necessary to prevent
1575 accidental token concatenation. */
1576 CPP_RESERVE (pfile, 2);
1577 if (pfile->output_escapes)
1578 CPP_PUTC_Q (pfile, '\r');
1579 CPP_PUTC_Q (pfile, c);
1584 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1590 /* Backslash newline is ignored. */
1591 if (!ACTIVE_MARK_P (pfile))
1592 CPP_BUMP_LINE (pfile);
1597 CPP_PUTC (pfile, c);
1600 case '(': token = CPP_OPEN_PAREN; goto char1;
1601 case ')': token = CPP_CLOSE_PAREN; goto char1;
1602 case '{': token = CPP_OPEN_BRACE; goto char1;
1603 case '}': token = CPP_CLOSE_BRACE; goto char1;
1604 case ',': token = CPP_COMMA; goto char1;
1605 case ';': token = CPP_SEMICOLON; goto char1;
1611 CPP_PUTC (pfile, c);
1616 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1617 Caller is expected to have checked no_macro_expand. */
1619 maybe_macroexpand (pfile, written)
1623 U_CHAR *macro = pfile->token_buffer + written;
1624 size_t len = CPP_WRITTEN (pfile) - written;
1625 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1627 /* _cpp_lookup never returns null. */
1628 if (hp->type == T_VOID)
1630 if (hp->disabled || hp->type == T_IDENTITY)
1632 if (pfile->output_escapes)
1634 /* Insert a no-reexpand marker before IDENT. */
1635 CPP_RESERVE (pfile, 2);
1636 CPP_ADJUST_WRITTEN (pfile, 2);
1637 macro = pfile->token_buffer + written;
1639 memmove (macro + 2, macro, len);
1645 if (hp->type == T_EMPTY)
1647 /* Special case optimization: macro expands to nothing. */
1648 CPP_SET_WRITTEN (pfile, written);
1649 CPP_PUTC_Q (pfile, ' ');
1653 /* If macro wants an arglist, verify that a '(' follows. */
1654 if (hp->type == T_FMACRO)
1656 int macbuf_whitespace = 0;
1659 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1661 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1664 _cpp_skip_hspace (pfile);
1671 if (point != CPP_BUFFER (pfile)->cur)
1672 macbuf_whitespace = 1;
1676 goto not_macro_call;
1677 cpp_pop_buffer (pfile);
1680 CPP_SET_MARK (pfile);
1683 _cpp_skip_hspace (pfile);
1690 CPP_GOTO_MARK (pfile);
1695 if (macbuf_whitespace)
1696 CPP_PUTC (pfile, ' ');
1698 /* K+R treated this as a hard error. */
1699 if (CPP_OPTION (pfile, warn_traditional))
1701 "traditional C rejects function macro %s in non-function context",
1708 /* This is now known to be a macro call.
1709 Expand the macro, reading arguments as needed,
1710 and push the expansion on the input stack. */
1711 _cpp_macroexpand (pfile, hp);
1712 CPP_SET_WRITTEN (pfile, written);
1716 /* Complain about \v or \f in a preprocessing directive (constraint
1717 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1719 pedantic_whitespace (pfile, p, len)
1727 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1728 else if (*p == '\f')
1729 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1737 cpp_get_token (pfile)
1740 enum cpp_ttype token;
1741 long written = CPP_WRITTEN (pfile);
1744 token = _cpp_lex_token (pfile);
1749 pfile->potential_control_macro = 0;
1750 pfile->only_seen_white = 0;
1754 if (pfile->only_seen_white == 0)
1755 pfile->only_seen_white = 1;
1756 CPP_BUMP_LINE (pfile);
1764 pfile->potential_control_macro = 0;
1765 if (!pfile->only_seen_white)
1767 /* XXX shouldn't have to do this - remove the hash or %: from
1768 the token buffer. */
1769 if (CPP_PWRITTEN (pfile)[-1] == '#')
1770 CPP_ADJUST_WRITTEN (pfile, -1);
1772 CPP_ADJUST_WRITTEN (pfile, -2);
1774 if (_cpp_handle_directive (pfile))
1775 return CPP_DIRECTIVE;
1776 pfile->only_seen_white = 0;
1777 CPP_PUTC (pfile, '#');
1781 pfile->potential_control_macro = 0;
1782 pfile->only_seen_white = 0;
1783 if (! pfile->no_macro_expand
1784 && maybe_macroexpand (pfile, written))
1789 if (CPP_BUFFER (pfile) == NULL)
1791 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1793 cpp_pop_buffer (pfile);
1796 cpp_pop_buffer (pfile);
1801 /* Like cpp_get_token, but skip spaces and comments. */
1804 cpp_get_non_space_token (pfile)
1807 int old_written = CPP_WRITTEN (pfile);
1810 enum cpp_ttype token = cpp_get_token (pfile);
1811 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1813 CPP_SET_WRITTEN (pfile, old_written);
1817 /* Like cpp_get_token, except that it does not execute directives,
1818 does not consume vertical space, and discards horizontal space. */
1820 _cpp_get_directive_token (pfile)
1824 enum cpp_ttype token;
1828 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1829 old_written = CPP_WRITTEN (pfile);
1830 token = _cpp_lex_token (pfile);
1837 /* Put it back and return VSPACE. */
1839 CPP_ADJUST_WRITTEN (pfile, -1);
1843 /* The purpose of this rather strange check is to prevent pedantic
1844 warnings for ^L in an #ifdefed out block. */
1845 if (CPP_PEDANTIC (pfile) && ! at_bol)
1846 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1847 CPP_WRITTEN (pfile) - old_written);
1848 CPP_SET_WRITTEN (pfile, old_written);
1853 if (! pfile->no_macro_expand
1854 && maybe_macroexpand (pfile, old_written))
1859 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1861 cpp_pop_buffer (pfile);
1865 /* This can happen for files that don't end with a newline,
1866 and for cpp_define and friends. Pretend they do, so
1867 callers don't have to deal. A warning will be issued by
1868 someone else, if necessary. */
1873 /* Determine the current line and column. Used only by read_and_prescan. */
1875 find_position (start, limit, linep)
1878 unsigned long *linep;
1880 unsigned long line = *linep;
1881 U_CHAR *lbase = start;
1882 while (start < limit)
1884 U_CHAR ch = *start++;
1885 if (ch == '\n' || ch == '\r')
1895 /* The following table is used by _cpp_read_and_prescan. If we have
1896 designated initializers, it can be constant data; otherwise, it is
1897 set up at runtime by _cpp_init_input_buffer. */
1900 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1903 #if (GCC_VERSION >= 2007)
1904 #define init_chartab() /* nothing */
1905 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1907 #define s(p, v) [p] = v,
1909 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1910 static void init_chartab PARAMS ((void)) { \
1911 unsigned char *x = chartab;
1913 #define s(p, v) x[p] = v;
1916 /* Table of characters that can't be handled in the inner loop.
1917 Also contains the mapping between trigraph third characters and their
1919 #define SPECCASE_CR 1
1920 #define SPECCASE_BACKSLASH 2
1921 #define SPECCASE_QUESTION 3
1924 s('\r', SPECCASE_CR)
1925 s('\\', SPECCASE_BACKSLASH)
1926 s('?', SPECCASE_QUESTION)
1928 s('=', '#') s(')', ']') s('!', '|')
1929 s('(', '[') s('\'', '^') s('>', '}')
1930 s('/', '\\') s('<', '{') s('-', '~')
1937 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1938 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1940 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1941 much memory to allocate initially; more will be allocated if
1942 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1943 canonical form (\n). If enabled, convert and/or warn about
1944 trigraphs. Convert backslash-newline to a one-character escape
1945 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1946 token). If there is no newline at the end of the file, add one and
1947 warn. Returns -1 on failure, or the actual length of the data to
1950 This function does a lot of work, and can be a serious performance
1951 bottleneck. It has been tuned heavily; make sure you understand it
1952 before hacking. The common case - no trigraphs, Unix style line
1953 breaks, backslash-newline set off by whitespace, newline at EOF -
1954 has been optimized at the expense of the others. The performance
1955 penalty for DOS style line breaks (\r\n) is about 15%.
1957 Warnings lose particularly heavily since we have to determine the
1958 line number, which involves scanning from the beginning of the file
1959 or from the last warning. The penalty for the absence of a newline
1960 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1962 If your file has more than one kind of end-of-line marker, you
1963 will get messed-up line numbering.
1965 So that the cases of the switch statement do not have to concern
1966 themselves with the complications of reading beyond the end of the
1967 buffer, the buffer is guaranteed to have at least 3 characters in
1968 it (or however many are left in the file, if less) on entry to the
1969 switch. This is enough to handle trigraphs and the "\\\n\r" and
1972 The end of the buffer is marked by a '\\', which, being a special
1973 character, guarantees we will exit the fast-scan loops and perform
1977 _cpp_read_and_prescan (pfile, fp, desc, len)
1983 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1984 U_CHAR *ip, *op, *line_base;
1987 unsigned int deferred_newlines;
1992 deferred_newlines = 0;
1996 ibase = pfile->input_buffer + 3;
1998 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
2002 U_CHAR *near_buff_end;
2004 count = read (desc, ibase, pfile->input_buffer_len);
2008 ibase[count] = '\\'; /* Marks end of buffer */
2011 near_buff_end = pfile->input_buffer + count;
2016 size_t delta_line_base;
2020 This could happen if the file is larger than half the
2021 maximum address space of the machine. */
2024 delta_op = op - buf;
2025 delta_line_base = line_base - buf;
2026 buf = (U_CHAR *) xrealloc (buf, len);
2027 op = buf + delta_op;
2028 line_base = buf + delta_line_base;
2035 /* Allow normal processing of the (at most 2) remaining
2036 characters. The end-of-buffer marker is still present
2037 and prevents false matches within the switch. */
2038 near_buff_end = ibase - 1;
2045 /* Deal with \-newline, potentially in the middle of a token. */
2046 if (deferred_newlines)
2048 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
2050 /* Previous was not white space. Skip to white
2051 space, if we can, before outputting the \r's */
2053 while (ip[span] != ' '
2056 && NORMAL(ip[span]))
2058 memcpy (op, ip, span);
2061 if (! NORMAL(ip[0]))
2064 while (deferred_newlines)
2065 deferred_newlines--, *op++ = '\r';
2068 /* Copy as much as we can without special treatment. */
2070 while (NORMAL (ip[span])) span++;
2071 memcpy (op, ip, span);
2076 if (ip > near_buff_end) /* Do we have enough chars? */
2078 switch (chartab[*ip++])
2080 case SPECCASE_CR: /* \r */
2089 case SPECCASE_BACKSLASH: /* \ */
2092 deferred_newlines++;
2094 if (*ip == '\r') ip++;
2096 else if (*ip == '\r')
2098 deferred_newlines++;
2100 if (*ip == '\n') ip++;
2106 case SPECCASE_QUESTION: /* ? */
2110 *op++ = '?'; /* Normal non-trigraph case */
2119 if (CPP_OPTION (pfile, warn_trigraphs))
2122 line_base = find_position (line_base, op, &line);
2123 col = op - line_base + 1;
2124 if (CPP_OPTION (pfile, trigraphs))
2125 cpp_warning_with_line (pfile, line, col,
2126 "trigraph ??%c converted to %c", d, t);
2128 cpp_warning_with_line (pfile, line, col,
2129 "trigraph ??%c ignored", d);
2133 if (CPP_OPTION (pfile, trigraphs))
2135 op[-1] = t; /* Overwrite '?' */
2140 goto do_speccase; /* May need buffer refill */
2152 /* Copy previous char plus unprocessed (at most 2) chars
2153 to beginning of buffer, refill it with another
2154 read(), and continue processing */
2155 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
2165 line_base = find_position (line_base, op, &line);
2166 col = op - line_base + 1;
2167 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2168 if (offset + 1 > len)
2171 if (offset + 1 > len)
2173 buf = (U_CHAR *) xrealloc (buf, len);
2179 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2183 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2184 (unsigned long)offset);
2189 cpp_error_from_errno (pfile, fp->ihash->name);
2194 /* Allocate pfile->input_buffer, and initialize chartab[]
2195 if it hasn't happened already. */
2198 _cpp_init_input_buffer (pfile)
2204 _cpp_init_toklist (&pfile->directbuf, NO_DUMMY_TOKEN);
2206 /* Determine the appropriate size for the input buffer. Normal C
2207 source files are smaller than eight K. */
2208 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2209 address arithmetic all the time, and 3 for pushback during buffer
2210 refill, in case there's a potential trigraph or end-of-line
2211 digraph at the end of a block. */
2213 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2214 pfile->input_buffer = tmp;
2215 pfile->input_buffer_len = 8192;
2219 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2220 and extending for LEN characters to the NUL-terminated string
2221 STRING. Typical usage:
2223 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2229 cpp_idcmp (token, len, string)
2230 const U_CHAR *token;
2234 size_t len2 = strlen (string);
2237 if ((r = memcmp (token, string, MIN (len, len2))))
2240 /* The longer of the two strings sorts after the shorter. */
2243 else if (len < len2)
2251 /* Lexing algorithm.
2253 The original lexer in cpplib was made up of two passes: a first pass
2254 that replaced trigraphs and deleted esacped newlines, and a second
2255 pass that tokenized the result of the first pass. Tokenisation was
2256 performed by peeking at the next character in the input stream. For
2257 example, if the input stream contained "!=", the handler for the !
2258 character would peek at the next character, and if it were a '='
2259 would skip over it, and return a "!=" token, otherwise it would
2260 return just the "!" token.
2262 To implement a single-pass lexer, this peeking ahead is unworkable.
2263 An arbitrary number of escaped newlines, and trigraphs (in particular
2264 ??/ which translates to the escape \), could separate the '!' and '='
2265 in the input stream, yet the next token is still a "!=".
2267 Suppose instead that we lex by one logical line at a time, producing
2268 a token list or stack for each logical line, and when seeing the '!'
2269 push a CPP_NOT token on the list. Then if the '!' is part of a
2270 longer token ("!=") we know we must see the remainder of the token by
2271 the time we reach the end of the logical line. Thus we can have the
2272 '=' handler look at the previous token (at the end of the list / top
2273 of the stack) and see if it is a "!" token, and if so, instead of
2274 pushing a "=" token revise the existing token to be a "!=" token.
2276 This works in the presence of escaped newlines, because the '\' would
2277 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2278 newline ('\n' or '\r') handler looks at the token at the top of the
2279 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2280 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2281 the '=' handler would never see any intervening escaped newlines.
2283 To make trigraphs work in this context, as in precedence trigraphs
2284 are highest and converted before anything else, the '?' handler does
2285 lookahead to see if it is a trigraph, and if so skips the trigraph
2286 and pushes the token it represents onto the top of the stack. This
2287 also works in the particular case of a CPP_BACKSLASH trigraph.
2289 To the preprocessor, whitespace is only significant to the point of
2290 knowing whether whitespace precedes a particular token. For example,
2291 the '=' handler needs to know whether there was whitespace between it
2292 and a "!" token on the top of the stack, to make the token conversion
2293 decision correctly. So each token has a PREV_WHITESPACE flag to
2294 indicate this - the standard permits consecutive whitespace to be
2295 regarded as a single space. The compiler front ends are not
2296 interested in whitespace at all; they just require a token stream.
2297 Another place where whitespace is significant to the preprocessor is
2298 a #define statment - if there is whitespace between the macro name
2299 and an initial "(" token the macro is "object-like", otherwise it is
2300 a function-like macro that takes arguments.
2302 However, all is not rosy. Parsing of identifiers, numbers, comments
2303 and strings becomes trickier because of the possibility of raw
2304 trigraphs and escaped newlines in the input stream.
2306 The trigraphs are three consecutive characters beginning with two
2307 question marks. A question mark is not valid as part of a number or
2308 identifier, so parsing of a number or identifier terminates normally
2309 upon reaching it, returning to the mainloop which handles the
2310 trigraph just like it would in any other position. Similarly for the
2311 backslash of a backslash-newline combination. So we just need the
2312 escaped-newline dropper in the mainloop to check if the token on the
2313 top of the stack after dropping the escaped newline is a number or
2314 identifier, and if so to continue the processing it as if nothing had
2317 For strings, we replace trigraphs whenever we reach a quote or
2318 newline, because there might be a backslash trigraph escaping them.
2319 We need to be careful that we start trigraph replacing from where we
2320 left off previously, because it is possible for a first scan to leave
2321 "fake" trigraphs that a second scan would pick up as real (e.g. the
2322 sequence "????/\n=" would find a fake ??= trigraph after removing the
2325 For line comments, on reaching a newline we scan the previous
2326 character(s) to see if it escaped, and continue if it is. Block
2327 comments ignore everything and just focus on finding the comment
2328 termination mark. The only difficult thing, and it is surprisingly
2329 tricky, is checking if an asterisk precedes the final slash since
2330 they could be separated by escaped newlines. If the preprocessor is
2331 invoked with the output comments option, we don't bother removing
2332 escaped newlines and replacing trigraphs for output.
2334 Finally, numbers can begin with a period, which is pushed initially
2335 as a CPP_DOT token in its own right. The digit handler checks if the
2336 previous token was a CPP_DOT not separated by whitespace, and if so
2337 pops it off the stack and pushes a period into the number's buffer
2338 before calling the number parser.
2342 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2343 U":>", U"<%", U"%>"};
2344 static unsigned char trigraph_map[256];
2347 init_trigraph_map ()
2349 trigraph_map['='] = '#';
2350 trigraph_map['('] = '[';
2351 trigraph_map[')'] = ']';
2352 trigraph_map['/'] = '\\';
2353 trigraph_map['\''] = '^';
2354 trigraph_map['<'] = '{';
2355 trigraph_map['>'] = '}';
2356 trigraph_map['!'] = '|';
2357 trigraph_map['-'] = '~';
2360 /* Call when a trigraph is encountered. It warns if necessary, and
2361 returns true if the trigraph should be honoured. END is the third
2362 character of a trigraph in the input stream. */
2364 trigraph_ok (pfile, end)
2366 const unsigned char *end;
2368 int accept = CPP_OPTION (pfile, trigraphs);
2370 if (CPP_OPTION (pfile, warn_trigraphs))
2372 unsigned int col = end - 1 - pfile->buffer->line_base;
2374 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2375 "trigraph ??%c converted to %c",
2376 (int) *end, (int) trigraph_map[*end]);
2378 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2379 "trigraph ??%c ignored", (int) *end);
2384 /* Scan a string for trigraphs, warning or replacing them inline as
2385 appropriate. When parsing a string, we must call this routine
2386 before processing a newline character (if trigraphs are enabled),
2387 since the newline might be escaped by a preceding backslash
2388 trigraph sequence. Returns a pointer to the end of the name after
2391 static unsigned char*
2392 trigraph_replace (pfile, src, limit)
2395 unsigned char* limit;
2397 unsigned char *dest;
2399 /* Starting with src[1], find two consecutive '?'. The case of no
2400 trigraphs is streamlined. */
2402 for (; src + 1 < limit; src += 2)
2407 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2410 else if (src + 2 == limit || src[1] != '?')
2413 /* Check if it really is a trigraph. */
2414 if (trigraph_map[src[2]] == 0)
2418 goto trigraph_found;
2422 /* Now we have a trigraph, we need to scan the remaining buffer, and
2423 copy-shifting its contents left if replacement is enabled. */
2424 for (; src + 2 < limit; dest++, src++)
2425 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2429 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2430 *dest = trigraph_map[*src];
2433 /* Copy remaining (at most 2) characters. */
2439 /* If CUR is a backslash or the end of a trigraphed backslash, return
2440 a pointer to its beginning, otherwise NULL. We don't read beyond
2441 the buffer start, because there is the start of the comment in the
2443 static const unsigned char *
2444 backslash_start (pfile, cur)
2446 const unsigned char *cur;
2450 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2451 && trigraph_ok (pfile, cur))
2456 /* Skip a C-style block comment. This is probably the trickiest
2457 handler. We find the end of the comment by seeing if an asterisk
2458 is before every '/' we encounter. The nasty complication is that a
2459 previous asterisk may be separated by one or more escaped newlines.
2460 Returns non-zero if comment terminated by EOF, zero otherwise. */
2462 skip_block_comment2 (pfile)
2465 cpp_buffer *buffer = pfile->buffer;
2466 const unsigned char *char_after_star = 0;
2467 register const unsigned char *cur = buffer->cur;
2470 /* Inner loop would think the comment has ended if the first comment
2471 character is a '/'. Avoid this and keep the inner loop clean by
2472 skipping such a character. */
2473 if (cur < buffer->rlimit && cur[0] == '/')
2476 for (; cur < buffer->rlimit; )
2478 unsigned char c = *cur++;
2480 /* People like decorating comments with '*', so check for
2481 '/' instead for efficiency. */
2484 if (cur[-2] == '*' || cur - 1 == char_after_star)
2487 /* Warn about potential nested comments, but not when
2488 the final character inside the comment is a '/'.
2489 Don't bother to get it right across escaped newlines. */
2490 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2491 && cur[0] == '*' && cur[1] != '/')
2494 cpp_warning (pfile, "'/*' within comment");
2497 else if (IS_NEWLINE(c))
2499 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2501 handle_newline (cur, buffer->rlimit, c);
2502 /* Work correctly if there is an asterisk before an
2503 arbirtrarily long sequence of escaped newlines. */
2504 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2505 char_after_star = cur;
2507 char_after_star = 0;
2517 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2518 Returns non-zero if a multiline comment. */
2520 skip_line_comment2 (pfile)
2523 cpp_buffer *buffer = pfile->buffer;
2524 register const unsigned char *cur = buffer->cur;
2527 for (; cur < buffer->rlimit; )
2529 unsigned char c = *cur++;
2533 /* Check for a (trigaph?) backslash escaping the newline. */
2534 if (!backslash_start (pfile, cur - 2))
2537 handle_newline (cur, buffer->rlimit, c);
2543 buffer->cur = cur - 1; /* Leave newline for caller. */
2547 /* Skips whitespace, stopping at next non-whitespace character.
2548 Adjusts pfile->col_adjust to account for tabs. This enables tokens
2549 to be assigned the correct column. */
2551 skip_whitespace (pfile, in_directive)
2555 cpp_buffer *buffer = pfile->buffer;
2556 register const unsigned char *cur = buffer->cur;
2557 unsigned short null_count = 0;
2559 for (; cur < buffer->rlimit; )
2561 unsigned char c = *cur++;
2565 unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
2566 pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
2567 - col % CPP_OPTION(pfile, tabstop));
2569 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2571 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2575 /* Mut be '\f' or '\v' */
2576 else if (in_directive && CPP_PEDANTIC (pfile))
2577 cpp_pedwarn (pfile, "%s in preprocessing directive",
2578 c == '\f' ? "formfeed" : "vertical tab");
2583 buffer->cur = cur - 1;
2585 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2586 : "embedded null character ignored");
2589 /* Parse (append) an identifier. */
2591 parse_name (pfile, list, name)
2596 const unsigned char *name_limit;
2597 unsigned char *namebuf;
2598 cpp_buffer *buffer = pfile->buffer;
2599 register const unsigned char *cur = buffer->cur;
2602 name_limit = list->namebuf + list->name_cap;
2603 namebuf = list->namebuf + list->name_used;
2605 for (; cur < buffer->rlimit && namebuf < name_limit; )
2607 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2613 if (c == '$' && CPP_PEDANTIC (pfile))
2616 cpp_pedwarn (pfile, "'$' character in identifier");
2620 /* Run out of name space? */
2621 if (cur < buffer->rlimit)
2623 list->name_used = namebuf - list->namebuf;
2624 auto_expand_name_space (list);
2630 name->len = namebuf - name->text;
2631 list->name_used = namebuf - list->namebuf;
2634 /* Parse (append) a number. */
2636 #define VALID_SIGN(c, prevc) \
2637 (((c) == '+' || (c) == '-') && \
2638 ((prevc) == 'e' || (prevc) == 'E' \
2639 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2642 parse_number (pfile, list, name)
2647 const unsigned char *name_limit;
2648 unsigned char *namebuf;
2649 cpp_buffer *buffer = pfile->buffer;
2650 register const unsigned char *cur = buffer->cur;
2653 name_limit = list->namebuf + list->name_cap;
2654 namebuf = list->namebuf + list->name_used;
2656 for (; cur < buffer->rlimit && namebuf < name_limit; )
2658 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2660 /* Perhaps we should accept '$' here if we accept it for
2661 identifiers. We know namebuf[-1] is safe, because for c to
2662 be a sign we must have pushed at least one character. */
2663 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2670 /* Run out of name space? */
2671 if (cur < buffer->rlimit)
2673 list->name_used = namebuf - list->namebuf;
2674 auto_expand_name_space (list);
2680 name->len = namebuf - name->text;
2681 list->name_used = namebuf - list->namebuf;
2684 /* Places a string terminated by an unescaped TERMINATOR into a
2685 cpp_name, which should be expandable and thus at the top of the
2686 list's stack. Handles embedded trigraphs, if necessary, and
2689 Can be used for character constants (terminator = '\''), string
2690 constants ('"') and angled headers ('>'). Multi-line strings are
2691 allowed, except for within directives. */
2694 parse_string2 (pfile, list, name, terminator, multiline_ok)
2698 unsigned int terminator;
2701 cpp_buffer *buffer = pfile->buffer;
2702 register const unsigned char *cur = buffer->cur;
2703 const unsigned char *name_limit;
2704 unsigned char *namebuf;
2705 unsigned int null_count = 0;
2706 int trigraphed_len = 0;
2709 name_limit = list->namebuf + list->name_cap;
2710 namebuf = list->namebuf + list->name_used;
2712 for (; cur < buffer->rlimit && namebuf < name_limit; )
2714 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2718 else if (c == terminator || IS_NEWLINE (c))
2720 /* Needed for trigraph_replace and multiline string warning. */
2723 /* Scan for trigraphs before checking if backslash-escaped. */
2724 if (CPP_OPTION (pfile, trigraphs)
2725 || CPP_OPTION (pfile, warn_trigraphs))
2727 namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
2729 trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
2730 if (trigraphed_len < 0)
2734 namebuf--; /* Drop the newline / terminator from the name. */
2737 /* Drop a backslash newline, and continue. */
2738 if (namebuf[-1] == '\\')
2740 handle_newline (cur, buffer->rlimit, c);
2747 /* In Fortran and assembly language, silently terminate
2748 strings of either variety at end of line. This is a
2749 kludge around not knowing where comments are in these
2751 if (CPP_OPTION (pfile, lang_fortran)
2752 || CPP_OPTION (pfile, lang_asm))
2755 /* Character constants, headers and asserts may not
2756 extend over multiple lines. In Standard C, neither
2757 may strings. We accept multiline strings as an
2758 extension, but not in directives. */
2762 cur++; /* Move forwards again. */
2764 if (pfile->multiline_string_line == 0)
2766 pfile->multiline_string_line = list->line;
2767 if (CPP_PEDANTIC (pfile))
2768 cpp_pedwarn (pfile, "multi-line string constant");
2772 handle_newline (cur, buffer->rlimit, c);
2776 unsigned char *temp;
2778 /* An odd number of consecutive backslashes represents
2779 an escaped terminator. */
2781 while (temp >= name->text && *temp == '\\')
2784 if ((namebuf - temp) & 1)
2791 /* Run out of name space? */
2792 if (cur < buffer->rlimit)
2794 list->name_used = namebuf - list->namebuf;
2795 auto_expand_name_space (list);
2799 /* We may not have trigraph-replaced the input for this code path,
2800 but as the input is in error by being unterminated we don't
2801 bother. Prevent warnings about no newlines at EOF. */
2802 if (IS_NEWLINE(cur[-1]))
2806 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2808 if (terminator == '\"' && pfile->multiline_string_line != list->line
2809 && pfile->multiline_string_line != 0)
2811 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2812 "possible start of unterminated string literal");
2813 pfile->multiline_string_line = 0;
2818 name->len = namebuf - name->text;
2819 list->name_used = namebuf - list->namebuf;
2822 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2823 : "null character preserved"));
2826 /* The character TYPE helps us distinguish comment types: '*' = C
2827 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
2828 the stored comment includes the comment start and any terminator. */
2830 #define COMMENT_START_LEN 2
2832 save_comment (list, token, from, len, type)
2835 const unsigned char *from;
2839 unsigned char *buffer;
2841 len += COMMENT_START_LEN;
2843 if (list->name_used + len > list->name_cap)
2844 expand_name_space (list, len);
2846 INIT_TOKEN_NAME (list, token);
2847 token->type = CPP_COMMENT;
2848 token->val.name.len = len;
2850 buffer = list->namebuf + list->name_used;
2851 list->name_used += len;
2853 /* Copy the comment. */
2864 memcpy (buffer, from, len - COMMENT_START_LEN);
2868 * The tokenizer's main loop. Returns a token list, representing a
2869 * logical line in the input file, terminated with a CPP_VSPACE
2870 * token. On EOF, a token list containing the single CPP_EOF token
2873 * Implementation relies almost entirely on lookback, rather than
2874 * looking forwards. This means that tokenization requires just
2875 * a single pass of the file, even in the presence of trigraphs and
2876 * escaped newlines, providing significant performance benefits.
2877 * Trigraph overhead is negligible if they are disabled, and low
2878 * even when enabled.
2881 #define IS_DIRECTIVE() (list->tokens[first_token].type == CPP_HASH)
2884 _cpp_lex_line (pfile, list)
2888 cpp_token *cur_token, *token_limit;
2889 cpp_buffer *buffer = pfile->buffer;
2890 register const unsigned char *cur = buffer->cur;
2891 unsigned char flags = 0;
2892 unsigned int first_token = list->tokens_used;
2894 pfile->col_adjust = 0;
2896 token_limit = list->tokens + list->tokens_cap;
2897 cur_token = list->tokens + list->tokens_used;
2899 for (; cur < buffer->rlimit && cur_token < token_limit;)
2901 unsigned char c = *cur++;
2903 /* Optimize whitespace skipping, as most tokens are probably
2904 separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
2906 if (is_hspace ((unsigned int) c))
2908 /* Step back to get the null warning and tab correction. */
2909 buffer->cur = cur - 1;
2910 skip_whitespace (pfile, IS_DIRECTIVE ());
2913 flags = PREV_WHITESPACE;
2914 if (cur == buffer->rlimit)
2919 /* Initialize current token. Its type is set in the switch. */
2920 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
2921 cur_token->flags = flags;
2926 case '0': case '1': case '2': case '3': case '4':
2927 case '5': case '6': case '7': case '8': case '9':
2931 cur--; /* Backup character. */
2932 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
2935 INIT_TOKEN_NAME (list, cur_token);
2936 /* Prepend an immediately previous CPP_DOT token. */
2939 if (list->name_cap == list->name_used)
2940 auto_expand_name_space (list);
2942 cur_token->val.name.len = 1;
2943 list->namebuf[list->name_used++] = '.';
2947 cur_token->type = CPP_NUMBER; /* Before parse_number. */
2949 parse_number (pfile, list, &cur_token->val.name);
2957 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2958 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2959 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2960 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2962 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2963 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2964 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2965 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2967 cur--; /* Backup character. */
2968 INIT_TOKEN_NAME (list, cur_token);
2969 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2973 parse_name (pfile, list, &cur_token->val.name);
2976 /* Find handler for newly created / extended directive. */
2977 if (IS_DIRECTIVE () && cur_token == &list->tokens[first_token + 1])
2978 _cpp_check_directive (list, cur_token);
2985 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2986 /* Do we have a wide string? */
2987 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2988 && cur_token[-1].val.name.len == 1
2989 && cur_token[-1].val.name.text[0] == 'L'
2990 && !CPP_TRADITIONAL (pfile))
2992 /* No need for 'L' any more. */
2994 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2998 /* Here c is one of ' " or >. */
2999 INIT_TOKEN_NAME (list, cur_token);
3001 parse_string2 (pfile, list, &cur_token->val.name, c,
3002 c == '"' && !IS_DIRECTIVE());
3008 cur_token->type = CPP_DIV;
3011 if (PREV_TOKEN_TYPE == CPP_DIV)
3013 /* We silently allow C++ comments in system headers,
3014 irrespective of conformance mode, because lots of
3015 broken systems do that and trying to clean it up
3016 in fixincludes is a nightmare. */
3017 if (buffer->system_header_p)
3018 goto do_line_comment;
3019 else if (CPP_OPTION (pfile, cplusplus_comments))
3021 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
3022 && ! buffer->warned_cplusplus_comments)
3026 "C++ style comments are not allowed in ISO C89");
3028 "(this will be reported only once per input file)");
3029 buffer->warned_cplusplus_comments = 1;
3035 "comment start split across lines");
3036 if (skip_line_comment2 (pfile))
3037 cpp_error_with_line (pfile, list->line,
3039 "multi-line comment");
3041 /* Back-up to first '-' or '/'. */
3043 if (!CPP_OPTION (pfile, discard_comments)
3044 && (!IS_DIRECTIVE() || list->dirno == 0))
3045 save_comment (list, cur_token++, cur,
3046 buffer->cur - cur, c);
3049 if (!CPP_OPTION (pfile, traditional))
3050 flags = PREV_WHITESPACE;
3059 cur_token->type = CPP_MULT;
3062 if (PREV_TOKEN_TYPE == CPP_DIV)
3067 "comment start '/*' split across lines");
3068 if (skip_block_comment2 (pfile))
3069 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3070 "unterminated comment");
3071 else if (buffer->cur[-2] != '*')
3073 "comment end '*/' split across lines");
3075 /* Back up to opening '/'. */
3077 if (!CPP_OPTION (pfile, discard_comments)
3078 && (!IS_DIRECTIVE() || list->dirno == 0))
3079 save_comment (list, cur_token++, cur,
3080 buffer->cur - cur, c);
3083 if (!CPP_OPTION (pfile, traditional))
3084 flags = PREV_WHITESPACE;
3087 else if (CPP_OPTION (pfile, cplusplus))
3089 /* In C++, there are .* and ->* operators. */
3090 if (PREV_TOKEN_TYPE == CPP_DEREF)
3091 BACKUP_TOKEN (CPP_DEREF_STAR);
3092 else if (PREV_TOKEN_TYPE == CPP_DOT)
3093 BACKUP_TOKEN (CPP_DOT_STAR);
3101 handle_newline (cur, buffer->rlimit, c);
3102 if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3104 /* Remove the escaped newline. Then continue to process
3105 any interrupted name or number. */
3110 if (cur_token->type == CPP_NAME)
3112 else if (cur_token->type == CPP_NUMBER)
3113 goto continue_number;
3116 /* Remember whitespace setting. */
3117 flags = cur_token->flags;
3120 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3123 cpp_warning (pfile, "backslash and newline separated by space");
3125 PUSH_TOKEN (CPP_VSPACE);
3129 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3131 if (CPP_OPTION (pfile, chill))
3132 goto do_line_comment;
3133 REVISE_TOKEN (CPP_MINUS_MINUS);
3136 PUSH_TOKEN (CPP_MINUS);
3139 /* The digraph flag checking ensures that ## and %:%:
3140 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3143 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3144 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3145 REVISE_TOKEN (CPP_PASTE);
3147 PUSH_TOKEN (CPP_HASH);
3151 cur_token->type = CPP_COLON;
3154 if (PREV_TOKEN_TYPE == CPP_COLON
3155 && CPP_OPTION (pfile, cplusplus))
3156 BACKUP_TOKEN (CPP_SCOPE);
3157 /* Digraph: "<:" is a '[' */
3158 else if (PREV_TOKEN_TYPE == CPP_LESS)
3159 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3160 /* Digraph: "%:" is a '#' */
3161 else if (PREV_TOKEN_TYPE == CPP_MOD)
3163 (--cur_token)->flags |= DIGRAPH;
3171 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3172 REVISE_TOKEN (CPP_AND_AND);
3174 PUSH_TOKEN (CPP_AND);
3179 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3180 REVISE_TOKEN (CPP_OR_OR);
3182 PUSH_TOKEN (CPP_OR);
3186 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3187 REVISE_TOKEN (CPP_PLUS_PLUS);
3189 PUSH_TOKEN (CPP_PLUS);
3193 /* This relies on equidistance of "?=" and "?" tokens. */
3194 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3195 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3197 PUSH_TOKEN (CPP_EQ);
3201 cur_token->type = CPP_GREATER;
3204 if (PREV_TOKEN_TYPE == CPP_GREATER)
3205 BACKUP_TOKEN (CPP_RSHIFT);
3206 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3207 BACKUP_TOKEN (CPP_DEREF);
3208 /* Digraph: ":>" is a ']' */
3209 else if (PREV_TOKEN_TYPE == CPP_COLON)
3210 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3211 /* Digraph: "%>" is a '}' */
3212 else if (PREV_TOKEN_TYPE == CPP_MOD)
3213 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3219 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3221 REVISE_TOKEN (CPP_LSHIFT);
3224 /* Is this the beginning of a header name? */
3225 if (list->flags & SYNTAX_INCLUDE)
3227 c = '>'; /* Terminator. */
3228 cur_token->type = CPP_HEADER_NAME;
3229 goto do_parse_string;
3231 PUSH_TOKEN (CPP_LESS);
3235 /* Digraph: "<%" is a '{' */
3236 cur_token->type = CPP_MOD;
3237 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3238 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3243 if (cur + 1 < buffer->rlimit && *cur == '?'
3244 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3246 /* Handle trigraph. */
3250 case '(': goto make_open_square;
3251 case ')': goto make_close_square;
3252 case '<': goto make_open_brace;
3253 case '>': goto make_close_brace;
3254 case '=': goto make_hash;
3255 case '!': goto make_or;
3256 case '-': goto make_complement;
3257 case '/': goto make_backslash;
3258 case '\'': goto make_xor;
3261 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3263 /* GNU C++ defines <? and >? operators. */
3264 if (PREV_TOKEN_TYPE == CPP_LESS)
3266 REVISE_TOKEN (CPP_MIN);
3269 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3271 REVISE_TOKEN (CPP_MAX);
3275 PUSH_TOKEN (CPP_QUERY);
3279 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3281 && !(cur_token[-1].flags & PREV_WHITESPACE))
3284 PUSH_TOKEN (CPP_ELLIPSIS);
3287 PUSH_TOKEN (CPP_DOT);
3291 case '~': PUSH_TOKEN (CPP_COMPL); break;
3293 case '^': PUSH_TOKEN (CPP_XOR); break;
3295 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3297 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3299 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3301 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3303 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3304 case '!': PUSH_TOKEN (CPP_NOT); break;
3305 case ',': PUSH_TOKEN (CPP_COMMA); break;
3306 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3307 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
3308 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3311 if (CPP_OPTION (pfile, dollars_in_ident))
3316 cur_token->val.name.len = 0; /* FIXME: needed for transition only */
3317 PUSH_TOKEN (CPP_OTHER);
3322 /* Run out of token space? */
3323 if (cur_token == token_limit)
3325 list->tokens_used = cur_token - list->tokens;
3326 _cpp_expand_token_space (list, 256);
3330 cur_token->type = CPP_EOF;
3331 cur_token->flags = flags;
3333 if (cur_token != &list->tokens[first_token])
3335 /* Next call back will get just a CPP_EOF. */
3337 cpp_warning (pfile, "no newline at end of file");
3338 PUSH_TOKEN (CPP_VSPACE);
3344 list->tokens_used = cur_token - list->tokens;
3346 /* FIXME: take this check out and put it in the caller.
3347 list->directive == 0 indicates an unknown directive (but null
3348 directive is OK). This is the first time we can be sure the
3349 directive is invalid, and thus warn about it, because it might
3350 have been split by escaped newlines. Also, don't complain about
3351 invalid directives in assembly source, we don't know where the
3352 comments are, and # may introduce assembler pseudo-ops. */
3354 if (IS_DIRECTIVE (list) && list->dirno == -1
3355 && list->tokens[1].type != CPP_VSPACE
3356 && !CPP_OPTION (pfile, lang_asm))
3357 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3358 "invalid preprocessing directive");
3361 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3362 already contain the enough space to hold the token's spelling. If
3363 WHITESPACE is true, and the token was preceded by whitespace,
3364 output a single space before the token proper. Returns a pointer
3365 to the character after the last character written. */
3367 static unsigned char *
3368 spell_token (pfile, token, buffer, whitespace)
3369 cpp_reader *pfile; /* Would be nice to be rid of this... */
3370 const cpp_token *token;
3371 unsigned char *buffer;
3374 /* Whitespace will not be wanted by handlers of the # and ##
3375 operators calling this function, but will be wanted by the
3376 function that writes out the preprocessed file. */
3377 if (whitespace && token->flags & PREV_WHITESPACE)
3380 switch (token_spellings[token->type].type)
3382 case SPELL_OPERATOR:
3384 const unsigned char *spelling;
3387 if (token->flags & DIGRAPH)
3388 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3390 spelling = token_spellings[token->type].spelling;
3392 while ((c = *spelling++) != '\0')
3398 memcpy (buffer, token->val.name.text, token->val.name.len);
3399 buffer += token->val.name.len;
3406 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3409 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3412 memcpy (buffer, token->val.name.text, token->val.name.len);
3413 buffer += token->val.name.len;
3419 *buffer++ = token->aux;
3423 cpp_ice (pfile, "Unspellable token");
3430 /* Temporary function for illustrative purposes. */
3432 _cpp_lex_file (pfile)
3437 init_trigraph_map ();
3438 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3439 _cpp_init_toklist (list, DUMMY_TOKEN);
3443 _cpp_lex_line (pfile, list);
3444 if (list->tokens[0].type == CPP_EOF)
3449 _cpp_handle_directive (pfile, list);
3452 _cpp_output_list (pfile, list);
3453 _cpp_clear_toklist (list);
3457 /* Temporary function for illustrative purposes. */
3459 _cpp_output_list (pfile, list)
3465 token = &list->tokens[0];
3468 CPP_RESERVE (pfile, TOKEN_LEN (token));
3469 pfile->limit = spell_token (pfile, token, pfile->limit, 1);
3471 while (token++->type != CPP_VSPACE);