1 /* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7 Single-pass line tokenization by Neil Booth, April 2000
9 This program is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
29 #define PEEKBUF(BUFFER, N) \
30 ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
31 #define GETBUF(BUFFER) \
32 ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
33 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
35 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
36 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
37 #define GETC() GETBUF (CPP_BUFFER (pfile))
38 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
40 static void skip_block_comment PARAMS ((cpp_reader *));
41 static void skip_line_comment PARAMS ((cpp_reader *));
42 static int maybe_macroexpand PARAMS ((cpp_reader *, long));
43 static int skip_comment PARAMS ((cpp_reader *, int));
44 static int copy_comment PARAMS ((cpp_reader *, int));
45 static void skip_string PARAMS ((cpp_reader *, int));
46 static void parse_string PARAMS ((cpp_reader *, int));
47 static U_CHAR *find_position PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
48 static void null_warning PARAMS ((cpp_reader *, unsigned int));
50 static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
52 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
54 static void bump_column PARAMS ((cpp_printer *, unsigned int,
56 static void expand_name_space PARAMS ((cpp_toklist *, unsigned int));
57 static void expand_token_space PARAMS ((cpp_toklist *));
58 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
61 #define auto_expand_name_space(list) \
62 expand_name_space ((list), 1 + (list)->name_cap / 2)
66 static void expand_comment_space PARAMS ((cpp_toklist *));
67 void init_trigraph_map PARAMS ((void));
68 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
70 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
71 const unsigned char *));
72 static int skip_block_comment2 PARAMS ((cpp_reader *));
73 static int skip_line_comment2 PARAMS ((cpp_reader *));
74 static void skip_whitespace PARAMS ((cpp_reader *, int));
75 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
76 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
77 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
79 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
80 static void save_comment PARAMS ((cpp_toklist *, const unsigned char *,
81 unsigned int, unsigned int, unsigned int));
82 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
84 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
86 static unsigned char * spell_token PARAMS ((cpp_reader *, cpp_token *,
87 unsigned char *, int));
89 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
92 /* Macros on a cpp_name. */
93 #define INIT_NAME(list, name) \
95 (name).text = (list)->namebuf + (list)->name_used;} while (0)
97 #define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
99 /* Maybe put these in the ISTABLE eventually. */
100 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
101 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
103 /* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
104 character, if any, is in buffer. */
105 #define handle_newline(cur, limit, c) \
107 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
109 CPP_BUMP_LINE_CUR (pfile, (cur)); \
110 pfile->col_adjust = 0; \
113 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
114 #define PREV_TOKEN_TYPE (cur_token[-1].type)
116 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
117 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
118 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
119 #define BACKUP_DIGRAPH(ttype) do { \
120 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
122 /* An upper bound on the number of bytes needed to spell a token,
123 including preceding whitespace. */
124 #define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
125 SPELL_NONE ? token->val.name.len: 0))
129 /* Order here matters. Those beyond SPELL_NONE store their spelling
130 in the token list, and it's length in the token->val.name.len. */
131 #define SPELL_OPERATOR 0
132 #define SPELL_CHAR 2 /* FIXME: revert order after transition. */
134 #define SPELL_IDENT 3
135 #define SPELL_STRING 4
137 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
138 #define I(e, s) {SPELL_IDENT, s},
139 #define S(e, s) {SPELL_STRING, s},
140 #define C(e, s) {SPELL_CHAR, s},
141 #define N(e, s) {SPELL_NONE, s},
143 static const struct token_spelling
146 const U_CHAR *spelling;
147 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
155 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
158 _cpp_grow_token_buffer (pfile, n)
162 long old_written = CPP_WRITTEN (pfile);
163 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
164 pfile->token_buffer = (U_CHAR *)
165 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
166 CPP_SET_WRITTEN (pfile, old_written);
169 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
170 If BUFFER != NULL, then use the LENGTH characters in BUFFER
171 as the new input buffer.
172 Return the new buffer, or NULL on failure. */
175 cpp_push_buffer (pfile, buffer, length)
177 const U_CHAR *buffer;
180 cpp_buffer *buf = CPP_BUFFER (pfile);
182 if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
184 cpp_fatal (pfile, "macro or `#include' recursion too deep");
188 new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
190 new->if_stack = pfile->if_stack;
191 new->buf = new->cur = buffer;
192 new->rlimit = buffer + length;
195 new->line_base = NULL;
197 CPP_BUFFER (pfile) = new;
202 cpp_pop_buffer (pfile)
205 cpp_buffer *buf = CPP_BUFFER (pfile);
206 if (ACTIVE_MARK_P (pfile))
207 cpp_ice (pfile, "mark active in cpp_pop_buffer");
211 _cpp_unwind_if_stack (pfile, buf);
213 free ((PTR) buf->buf);
214 if (pfile->system_include_depth)
215 pfile->system_include_depth--;
216 if (pfile->potential_control_macro)
218 buf->ihash->control_macro = pfile->potential_control_macro;
219 pfile->potential_control_macro = 0;
221 pfile->input_stack_listing_current = 0;
225 HASHNODE *m = buf->macro;
228 if ((m->type == T_FMACRO && buf->mapped)
229 || m->type == T_SPECLINE || m->type == T_FILE
230 || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
231 || m->type == T_STDC)
232 free ((PTR) buf->buf);
234 CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
236 pfile->buffer_stack_depth--;
237 return CPP_BUFFER (pfile);
240 /* Deal with the annoying semantics of fwrite. */
242 safe_fwrite (pfile, buf, len, fp)
252 count = fwrite (buf, 1, len, fp);
261 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
264 /* Notify the compiler proper that the current line number has jumped,
265 or the current file name has changed. */
268 output_line_command (pfile, print, line)
273 cpp_buffer *ip = cpp_file_buffer (pfile);
274 enum { same = 0, enter, leave, rname } change;
275 static const char * const codes[] = { "", " 1", " 2", "" };
277 if (CPP_OPTION (pfile, no_line_commands))
280 /* Determine whether the current filename has changed, and if so,
281 how. 'nominal_fname' values are unique, so they can be compared
282 by comparing pointers. */
283 if (ip->nominal_fname == print->last_fname)
287 if (pfile->buffer_stack_depth == print->last_bsd)
291 if (pfile->buffer_stack_depth > print->last_bsd)
295 print->last_bsd = pfile->buffer_stack_depth;
297 print->last_fname = ip->nominal_fname;
299 /* If the current file has not changed, we can output a few newlines
300 instead if we want to increase the line number by a small amount.
301 We cannot do this if print->lineno is zero, because that means we
302 haven't output any line commands yet. (The very first line
303 command output is a `same_file' command.) */
304 if (change == same && print->lineno != 0
305 && line >= print->lineno && line < print->lineno + 8)
307 while (line > print->lineno)
309 putc ('\n', print->outf);
315 #ifndef NO_IMPLICIT_EXTERN_C
316 if (CPP_OPTION (pfile, cplusplus))
317 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
319 ip->system_header_p ? " 3" : "",
320 (ip->system_header_p == 2) ? " 4" : "");
323 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
325 ip->system_header_p ? " 3" : "");
326 print->lineno = line;
329 /* Write the contents of the token_buffer to the output stream, and
330 clear the token_buffer. Also handles generating line commands and
331 keeping track of file transitions. */
334 cpp_output_tokens (pfile, print)
340 if (CPP_WRITTEN (pfile) - print->written)
342 if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
344 safe_fwrite (pfile, pfile->token_buffer,
345 CPP_WRITTEN (pfile) - print->written, print->outf);
348 ip = cpp_file_buffer (pfile);
350 output_line_command (pfile, print, CPP_BUF_LINE (ip));
352 CPP_SET_WRITTEN (pfile, print->written);
355 /* Helper for cpp_output_list - increases the column number to match
356 what we expect it to be. */
359 bump_column (print, from, to)
361 unsigned int from, to;
363 unsigned int tabs, spcs;
364 unsigned int delta = to - from;
366 /* Only if FROM is 0, advance by tabs. */
368 tabs = delta / 8, spcs = delta % 8;
370 tabs = 0, spcs = delta;
372 while (tabs--) putc ('\t', print->outf);
373 while (spcs--) putc (' ', print->outf);
376 /* Write out the list L onto pfile->token_buffer. This function is
379 1) pfile->token_buffer is not going to continue to exist.
380 2) At the moment, tokens don't carry the information described
381 in cpplib.h; they are all strings.
382 3) The list has to be a complete line, and has to be written starting
383 at the beginning of a line. */
386 cpp_output_list (pfile, print, list)
389 const cpp_toklist *list;
392 unsigned int curcol = 1;
394 /* XXX Probably does not do what is intended. */
395 if (print->lineno != list->line)
396 output_line_command (pfile, print, list->line);
398 for (i = 0; i < list->tokens_used; i++)
400 if (TOK_TYPE (list, i) == CPP_VSPACE)
402 output_line_command (pfile, print, list->tokens[i].aux);
406 if (curcol < TOK_COL (list, i))
408 /* Insert space to bring the column to what it should be. */
409 bump_column (print, curcol - 1, TOK_COL (list, i));
410 curcol = TOK_COL (list, i);
412 /* XXX We may have to insert space to prevent an accidental
414 safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
415 curcol += TOK_LEN (list, i);
419 /* Scan a string (which may have escape marks), perform macro expansion,
420 and write the result to the token_buffer. */
423 _cpp_expand_to_buffer (pfile, buf, length)
429 enum cpp_ttype token;
434 cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
438 /* Copy the buffer, because it might be in an unsafe place - for
439 example, a sequence on the token_buffer, where the pointers will
440 be invalidated if we enlarge the token_buffer. */
441 buf1 = alloca (length);
442 memcpy (buf1, buf, length);
444 /* Set up the input on the input stack. */
445 stop = CPP_BUFFER (pfile);
446 if (cpp_push_buffer (pfile, buf1, length) == NULL)
448 CPP_BUFFER (pfile)->has_escapes = 1;
450 /* Scan the input, create the output. */
453 token = cpp_get_token (pfile);
454 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
459 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
462 cpp_scan_buffer_nooutput (pfile)
465 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
466 enum cpp_ttype token;
467 unsigned int old_written = CPP_WRITTEN (pfile);
468 /* In no-output mode, we can ignore everything but directives. */
471 if (! pfile->only_seen_white)
472 _cpp_skip_rest_of_line (pfile);
473 token = cpp_get_token (pfile);
474 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
477 CPP_SET_WRITTEN (pfile, old_written);
480 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
483 cpp_scan_buffer (pfile, print)
487 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
488 enum cpp_ttype token;
492 token = cpp_get_token (pfile);
493 if (token == CPP_VSPACE || token == CPP_EOF
494 /* XXX Temporary kluge - force flush after #include only */
495 || (token == CPP_DIRECTIVE
496 && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
498 cpp_output_tokens (pfile, print);
499 if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
505 /* Return the topmost cpp_buffer that corresponds to a file (not a macro). */
508 cpp_file_buffer (pfile)
513 for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
514 if (ip->ihash != NULL)
519 /* Token-buffer helper functions. */
521 /* Expand a token list's string space. */
523 expand_name_space (list, len)
527 const U_CHAR *old_namebuf;
529 old_namebuf = list->namebuf;
530 list->name_cap += len;
531 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
533 /* Fix up token text pointers. */
534 if (list->namebuf != old_namebuf)
538 for (i = 0; i < list->tokens_used; i++)
539 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
540 list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
544 /* Expand the number of tokens in a list. */
546 expand_token_space (list)
549 list->tokens_cap *= 2;
550 if (list->flags & LIST_OFFSET)
552 list->tokens = (cpp_token *)
553 xrealloc (list->tokens, (list->tokens_cap + 1) * sizeof (cpp_token));
554 if (list->flags & LIST_OFFSET)
555 list->tokens++; /* Skip the dummy. */
558 /* Initialize a token list. We allocate an extra token in front of
559 the token list, as this allows us to always peek at the previous
560 token without worrying about underflowing the list. */
562 _cpp_init_toklist (list)
565 /* Initialize token space. Put a dummy token before the start
566 that will fail matches. */
567 list->tokens_cap = 256; /* 4K's worth. */
568 list->tokens = (cpp_token *)
569 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
570 list->tokens[0].type = CPP_EOF;
573 /* Initialize name space. */
574 list->name_cap = 1024;
575 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
577 /* Only create a comment space on demand. */
578 list->comments_cap = 0;
581 list->flags = LIST_OFFSET;
582 _cpp_clear_toklist (list);
585 /* Clear a token list. */
587 _cpp_clear_toklist (list)
590 list->tokens_used = 0;
592 list->comments_used = 0;
594 list->flags &= LIST_OFFSET; /* clear all but that one */
597 /* Free a token list. Does not free the list itself, which may be
598 embedded in a larger structure. */
600 _cpp_free_toklist (list)
604 free (list->comments);
605 if (list->flags & LIST_OFFSET)
606 free (list->tokens - 1); /* Backup over dummy token. */
609 free (list->namebuf);
612 /* Slice a token list: copy the sublist [START, FINISH) into COPY.
613 COPY is assumed not to be initialized. The comment space is not
616 _cpp_slice_toklist (copy, start, finish)
618 const cpp_token *start, *finish;
624 copy->tokens_cap = n;
625 copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
626 memcpy (copy->tokens, start, n * sizeof (cpp_token));
629 for (i = 0; i < n; i++)
630 if (token_spellings[start[i].type].type > SPELL_NONE)
631 bytes += start[i].val.name.len;
633 copy->namebuf = xmalloc (bytes);
635 for (i = 0; i < n; i++)
636 if (token_spellings[start[i].type].type > SPELL_NONE)
638 memcpy (copy->namebuf + bytes,
639 start[i].val.name.text, start[i].val.name.len);
640 copy->tokens[i].val.name.text = copy->namebuf + bytes;
641 bytes += start[i].val.name.len;
644 copy->tokens_cap = n;
645 copy->tokens_used = n;
646 copy->name_used = bytes;
647 copy->name_cap = bytes;
649 copy->comments_cap = 0;
650 copy->comments_used = 0;
656 /* Shrink a token list down to the minimum size. */
658 _cpp_squeeze_toklist (list)
662 const U_CHAR *old_namebuf;
664 if (list->flags & LIST_OFFSET)
667 memmove (list->tokens, list->tokens + 1,
668 list->tokens_used * sizeof (cpp_token));
669 list->tokens = xrealloc (list->tokens,
670 list->tokens_used * sizeof (cpp_token));
671 list->flags &= ~LIST_OFFSET;
674 list->tokens = xrealloc (list->tokens,
675 list->tokens_used * sizeof (cpp_token));
676 list->tokens_cap = list->tokens_used;
678 old_namebuf = list->namebuf;
679 list->namebuf = xrealloc (list->namebuf, list->name_used);
680 list->name_cap = list->name_used;
682 /* Fix up token text pointers. */
683 delta = list->namebuf - old_namebuf;
688 for (i = 0; i < list->tokens_used; i++)
689 if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
690 list->tokens[i].val.name.text += delta;
693 if (list->comments_cap)
695 list->comments = xrealloc (list->comments,
696 list->comments_used * sizeof (cpp_token));
697 list->comments_cap = list->comments_used;
701 /* Compare two tokens. */
703 _cpp_equiv_tokens (a, b)
704 const cpp_token *a, *b;
706 if (a->type != b->type
707 || a->flags != b->flags
711 if (token_spellings[a->type].type > SPELL_NONE)
713 if (a->val.name.len != b->val.name.len
714 || ustrncmp(a->val.name.text,
722 /* Compare two token lists. */
724 _cpp_equiv_toklists (a, b)
725 const cpp_toklist *a, *b;
729 if (a->tokens_used != b->tokens_used)
732 for (i = 0; i < a->tokens_used; i++)
733 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
738 /* Scan until we encounter a token of type STOP or a newline, and
739 create a token list for it. Does not macro-expand or execute
740 directives. The final token is not included in the list or
741 consumed from the input. Returns the type of the token stopped at. */
744 _cpp_scan_until (pfile, list, stop)
754 _cpp_clear_toklist (list);
755 list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
757 written = CPP_WRITTEN (pfile);
762 col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
763 type = _cpp_lex_token (pfile);
764 len = CPP_WRITTEN (pfile) - written;
765 CPP_SET_WRITTEN (pfile, written);
766 if (type == CPP_HSPACE)
768 if (CPP_PEDANTIC (pfile))
769 pedantic_whitespace (pfile, pfile->token_buffer + written, len);
773 else if (type == CPP_COMMENT)
774 /* Only happens when processing -traditional macro definitions.
775 Do not give this a token entry, but do not change space_before
779 if (list->tokens_used >= list->tokens_cap)
780 expand_token_space (list);
781 if (list->name_used + len >= list->name_cap)
782 expand_name_space (list, list->name_used + len + 1 - list->name_cap);
784 if (type == CPP_MACRO)
787 if (type == CPP_VSPACE || type == stop)
791 TOK_TYPE (list, i) = type;
792 TOK_COL (list, i) = col;
793 TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
795 TOK_LEN (list, i) = len;
796 if (token_spellings[type].type > SPELL_NONE)
798 memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
799 TOK_NAME (list, i) = list->namebuf + list->name_used;
800 list->name_used += len;
803 TOK_NAME (list, i) = token_spellings[type].spelling;
808 /* XXX Temporary kluge: put back the newline (or whatever). */
811 /* Don't consider the first token to have white before. */
812 TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
816 /* Skip a C-style block comment. We know it's a comment, and point is
817 at the second character of the starter. */
819 skip_block_comment (pfile)
822 unsigned int line, col;
823 const U_CHAR *limit, *cur;
826 line = CPP_BUF_LINE (CPP_BUFFER (pfile));
827 col = CPP_BUF_COL (CPP_BUFFER (pfile));
828 limit = CPP_BUFFER (pfile)->rlimit;
829 cur = CPP_BUFFER (pfile)->cur;
834 if (c == '\n' || c == '\r')
836 /* \r cannot be a macro escape marker here. */
837 if (!ACTIVE_MARK_P (pfile))
838 CPP_BUMP_LINE_CUR (pfile, cur);
842 /* Check for teminator. */
843 if (cur < limit && *cur == '/')
846 /* Warn about comment starter embedded in comment. */
847 if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
848 cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
849 cur - CPP_BUFFER (pfile)->line_base,
850 "'/*' within comment");
854 cpp_error_with_line (pfile, line, col, "unterminated comment");
857 CPP_BUFFER (pfile)->cur = cur + 1;
860 /* Skip a C++/Chill line comment. We know it's a comment, and point
861 is at the second character of the initiator. */
863 skip_line_comment (pfile)
871 /* We don't have to worry about EOF in here. */
874 /* Don't consider final '\n' to be part of comment. */
880 /* \r cannot be a macro escape marker here. */
881 if (!ACTIVE_MARK_P (pfile))
882 CPP_BUMP_LINE (pfile);
883 if (CPP_OPTION (pfile, warn_comments))
884 cpp_warning (pfile, "backslash-newline within line comment");
889 /* Skip a comment - C, C++, or Chill style. M is the first character
890 of the comment marker. If this really is a comment, skip to its
891 end and return ' '. If this is not a comment, return M (which will
895 skip_comment (pfile, m)
899 if (m == '/' && PEEKC() == '*')
901 skip_block_comment (pfile);
904 else if (m == '/' && PEEKC() == '/')
906 if (CPP_BUFFER (pfile)->system_header_p)
908 /* We silently allow C++ comments in system headers, irrespective
909 of conformance mode, because lots of busted systems do that
910 and trying to clean it up in fixincludes is a nightmare. */
911 skip_line_comment (pfile);
914 else if (CPP_OPTION (pfile, cplusplus_comments))
916 if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
918 if (CPP_WTRADITIONAL (pfile))
920 "C++ style comments are not allowed in traditional C");
921 else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
923 "C++ style comments are not allowed in ISO C89");
924 if (CPP_WTRADITIONAL (pfile)
925 || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
927 "(this will be reported only once per input file)");
928 CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
930 skip_line_comment (pfile);
936 else if (m == '-' && PEEKC() == '-'
937 && CPP_OPTION (pfile, chill))
939 skip_line_comment (pfile);
946 /* Identical to skip_comment except that it copies the comment into the
947 token_buffer. This is used if !discard_comments. */
949 copy_comment (pfile, m)
953 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
956 if (skip_comment (pfile, m) == m)
959 limit = CPP_BUFFER (pfile)->cur;
960 CPP_RESERVE (pfile, limit - start + 2);
961 CPP_PUTC_Q (pfile, m);
962 for (; start <= limit; start++)
964 CPP_PUTC_Q (pfile, *start);
970 null_warning (pfile, count)
975 cpp_warning (pfile, "embedded null character ignored");
977 cpp_warning (pfile, "embedded null characters ignored");
980 /* Skip whitespace \-newline and comments. Does not macro-expand. */
983 _cpp_skip_hspace (pfile)
986 unsigned int null_count = 0;
994 else if (is_hspace(c))
996 if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
997 cpp_pedwarn (pfile, "%s in preprocessing directive",
998 c == '\f' ? "formfeed" : "vertical tab");
1004 /* \r is a backslash-newline marker if !has_escapes, and
1005 a deletable-whitespace or no-reexpansion marker otherwise. */
1006 if (CPP_BUFFER (pfile)->has_escapes)
1014 CPP_BUMP_LINE (pfile);
1016 else if (c == '/' || c == '-')
1018 c = skip_comment (pfile, c);
1028 null_warning (pfile, null_count);
1031 /* Read and discard the rest of the current line. */
1034 _cpp_skip_rest_of_line (pfile)
1048 if (! CPP_BUFFER (pfile)->has_escapes)
1049 CPP_BUMP_LINE (pfile);
1054 skip_string (pfile, c);
1059 skip_comment (pfile, c);
1064 if (CPP_PEDANTIC (pfile))
1065 cpp_pedwarn (pfile, "%s in preprocessing directive",
1066 c == '\f' ? "formfeed" : "vertical tab");
1073 /* Parse an identifier starting with C. */
1076 _cpp_parse_name (pfile, c)
1088 if (c == '$' && CPP_PEDANTIC (pfile))
1089 cpp_pedwarn (pfile, "`$' in identifier");
1091 CPP_RESERVE(pfile, 2); /* One more for final NUL. */
1092 CPP_PUTC_Q (pfile, c);
1100 /* Parse and skip over a string starting with C. A single quoted
1101 string is treated like a double -- some programs (e.g., troff) are
1102 perverse this way. (However, a single quoted string is not allowed
1103 to extend over multiple lines.) */
1105 skip_string (pfile, c)
1109 unsigned int start_line, start_column;
1110 unsigned int null_count = 0;
1112 start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1113 start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
1120 cpp_error_with_line (pfile, start_line, start_column,
1121 "unterminated string or character constant");
1122 if (pfile->multiline_string_line != start_line
1123 && pfile->multiline_string_line != 0)
1124 cpp_error_with_line (pfile,
1125 pfile->multiline_string_line, -1,
1126 "possible real start of unterminated constant");
1127 pfile->multiline_string_line = 0;
1135 CPP_BUMP_LINE (pfile);
1136 /* In Fortran and assembly language, silently terminate
1137 strings of either variety at end of line. This is a
1138 kludge around not knowing where comments are in these
1140 if (CPP_OPTION (pfile, lang_fortran)
1141 || CPP_OPTION (pfile, lang_asm))
1146 /* Character constants may not extend over multiple lines.
1147 In Standard C, neither may strings. We accept multiline
1148 strings as an extension. */
1151 cpp_error_with_line (pfile, start_line, start_column,
1152 "unterminated character constant");
1156 if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1157 cpp_pedwarn_with_line (pfile, start_line, start_column,
1158 "string constant runs past end of line");
1159 if (pfile->multiline_string_line == 0)
1160 pfile->multiline_string_line = start_line;
1164 if (CPP_BUFFER (pfile)->has_escapes)
1166 cpp_ice (pfile, "\\r escape inside string constant");
1170 /* Backslash newline is replaced by nothing at all. */
1171 CPP_BUMP_LINE (pfile);
1187 if (null_count == 1)
1188 cpp_warning (pfile, "null character in string or character constant");
1189 else if (null_count > 1)
1190 cpp_warning (pfile, "null characters in string or character constant");
1193 /* Parse a string and copy it to the output. */
1196 parse_string (pfile, c)
1200 const U_CHAR *start = CPP_BUFFER (pfile)->cur; /* XXX Layering violation */
1201 const U_CHAR *limit;
1203 skip_string (pfile, c);
1205 limit = CPP_BUFFER (pfile)->cur;
1206 CPP_RESERVE (pfile, limit - start + 2);
1207 CPP_PUTC_Q (pfile, c);
1208 for (; start < limit; start++)
1210 CPP_PUTC_Q (pfile, *start);
1213 /* Get the next token, and add it to the text in pfile->token_buffer.
1214 Return the kind of token we got. */
1217 _cpp_lex_token (pfile)
1221 enum cpp_ttype token;
1223 if (CPP_BUFFER (pfile) == NULL)
1234 if (PEEKC () == '=')
1238 if (CPP_OPTION (pfile, discard_comments))
1239 c = skip_comment (pfile, c);
1241 c = copy_comment (pfile, c);
1245 /* Comments are equivalent to spaces.
1246 For -traditional, a comment is equivalent to nothing. */
1247 if (!CPP_OPTION (pfile, discard_comments))
1249 else if (CPP_TRADITIONAL (pfile))
1253 CPP_PUTC (pfile, c);
1258 CPP_PUTC (pfile, c);
1265 CPP_PUTC (pfile, c2);
1268 else if (c2 == '%' && PEEKN (1) == ':')
1270 /* Digraph: "%:" == "#". */
1272 CPP_RESERVE (pfile, 2);
1273 CPP_PUTC_Q (pfile, c2);
1274 CPP_PUTC_Q (pfile, GETC ());
1282 parse_string (pfile, c);
1283 return c == '\'' ? CPP_CHAR : CPP_STRING;
1286 if (!CPP_OPTION (pfile, dollars_in_ident))
1292 /* Digraph: ":>" == "]". */
1294 || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1302 if (c2 == c || c2 == '=')
1307 /* Digraphs: "%:" == "#", "%>" == "}". */
1312 CPP_RESERVE (pfile, 2);
1313 CPP_PUTC_Q (pfile, c);
1314 CPP_PUTC_Q (pfile, c2);
1320 CPP_RESERVE (pfile, 2);
1321 CPP_PUTC_Q (pfile, c);
1322 CPP_PUTC_Q (pfile, c2);
1323 return CPP_OPEN_BRACE;
1325 /* else fall through */
1331 if (PEEKC () == '=')
1339 if (CPP_OPTION (pfile, chill))
1340 goto comment; /* Chill style comment */
1348 if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1350 /* In C++, there's a ->* operator. */
1352 CPP_RESERVE (pfile, 4);
1353 CPP_PUTC_Q (pfile, c);
1354 CPP_PUTC_Q (pfile, GETC ());
1355 CPP_PUTC_Q (pfile, GETC ());
1363 if (pfile->parsing_include_directive)
1367 CPP_PUTC (pfile, c);
1371 if (c == '\n' || c == EOF)
1374 "missing '>' in `#include <FILENAME>'");
1379 if (!CPP_BUFFER (pfile)->has_escapes)
1381 /* Backslash newline is replaced by nothing. */
1382 CPP_ADJUST_WRITTEN (pfile, -1);
1383 CPP_BUMP_LINE (pfile);
1387 /* We might conceivably get \r- or \r<space> in
1388 here. Just delete 'em. */
1390 if (d != '-' && d != ' ')
1391 cpp_ice (pfile, "unrecognized escape \\r%c", d);
1392 CPP_ADJUST_WRITTEN (pfile, -1);
1398 /* Digraphs: "<%" == "{", "<:" == "[". */
1403 CPP_RESERVE (pfile, 2);
1404 CPP_PUTC_Q (pfile, c);
1405 CPP_PUTC_Q (pfile, c2);
1406 return CPP_CLOSE_BRACE;
1410 /* else fall through */
1415 /* GNU C++ supports MIN and MAX operators <? and >?. */
1416 if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1419 CPP_RESERVE (pfile, 3);
1420 CPP_PUTC_Q (pfile, c);
1421 CPP_PUTC_Q (pfile, c2);
1422 if (PEEKC () == '=')
1423 CPP_PUTC_Q (pfile, GETC ());
1430 CPP_PUTC (pfile, c);
1435 /* In C++ there's a .* operator. */
1436 if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1439 if (c2 == '.' && PEEKN(1) == '.')
1441 CPP_RESERVE (pfile, 3);
1442 CPP_PUTC_Q (pfile, '.');
1443 CPP_PUTC_Q (pfile, '.');
1444 CPP_PUTC_Q (pfile, '.');
1446 return CPP_ELLIPSIS;
1451 CPP_RESERVE (pfile, 2);
1452 CPP_PUTC_Q (pfile, c);
1453 CPP_PUTC_Q (pfile, GETC ());
1458 if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1460 CPP_PUTC (pfile, c);
1462 parse_string (pfile, c);
1463 return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1467 case '0': case '1': case '2': case '3': case '4':
1468 case '5': case '6': case '7': case '8': case '9':
1473 CPP_RESERVE (pfile, 2);
1474 CPP_PUTC_Q (pfile, c);
1478 if (!is_numchar(c) && c != '.'
1479 && ((c2 != 'e' && c2 != 'E'
1480 && ((c2 != 'p' && c2 != 'P')
1481 || CPP_OPTION (pfile, c89)))
1482 || (c != '+' && c != '-')))
1488 case 'b': case 'c': case 'd': case 'h': case 'o':
1489 case 'B': case 'C': case 'D': case 'H': case 'O':
1490 if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1492 CPP_RESERVE (pfile, 2);
1493 CPP_PUTC_Q (pfile, c);
1494 CPP_PUTC_Q (pfile, '\'');
1500 goto chill_number_eof;
1503 CPP_PUTC (pfile, c);
1507 CPP_RESERVE (pfile, 2);
1508 CPP_PUTC_Q (pfile, c);
1521 case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1522 case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1523 case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1524 case 'x': case 'y': case 'z':
1525 case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1526 case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1527 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1530 _cpp_parse_name (pfile, c);
1533 case ' ': case '\t': case '\v': case '\f': case '\0':
1542 CPP_PUTC (pfile, c);
1544 if (c == EOF || !is_hspace(c))
1549 null_warning (pfile, null_count);
1554 if (CPP_BUFFER (pfile)->has_escapes)
1559 if (pfile->output_escapes)
1560 CPP_PUTS (pfile, "\r-", 2);
1561 _cpp_parse_name (pfile, GETC ());
1566 /* "\r " means a space, but only if necessary to prevent
1567 accidental token concatenation. */
1568 CPP_RESERVE (pfile, 2);
1569 if (pfile->output_escapes)
1570 CPP_PUTC_Q (pfile, '\r');
1571 CPP_PUTC_Q (pfile, c);
1576 cpp_ice (pfile, "unrecognized escape \\r%c", c);
1582 /* Backslash newline is ignored. */
1583 if (!ACTIVE_MARK_P (pfile))
1584 CPP_BUMP_LINE (pfile);
1589 CPP_PUTC (pfile, c);
1592 case '(': token = CPP_OPEN_PAREN; goto char1;
1593 case ')': token = CPP_CLOSE_PAREN; goto char1;
1594 case '{': token = CPP_OPEN_BRACE; goto char1;
1595 case '}': token = CPP_CLOSE_BRACE; goto char1;
1596 case ',': token = CPP_COMMA; goto char1;
1597 case ';': token = CPP_SEMICOLON; goto char1;
1603 CPP_PUTC (pfile, c);
1608 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1609 Caller is expected to have checked no_macro_expand. */
1611 maybe_macroexpand (pfile, written)
1615 U_CHAR *macro = pfile->token_buffer + written;
1616 size_t len = CPP_WRITTEN (pfile) - written;
1617 HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1619 /* _cpp_lookup never returns null. */
1620 if (hp->type == T_VOID)
1622 if (hp->disabled || hp->type == T_IDENTITY)
1624 if (pfile->output_escapes)
1626 /* Insert a no-reexpand marker before IDENT. */
1627 CPP_RESERVE (pfile, 2);
1628 CPP_ADJUST_WRITTEN (pfile, 2);
1629 macro = pfile->token_buffer + written;
1631 memmove (macro + 2, macro, len);
1637 if (hp->type == T_EMPTY)
1639 /* Special case optimization: macro expands to nothing. */
1640 CPP_SET_WRITTEN (pfile, written);
1641 CPP_PUTC_Q (pfile, ' ');
1645 /* If macro wants an arglist, verify that a '(' follows. */
1646 if (hp->type == T_FMACRO)
1648 int macbuf_whitespace = 0;
1651 while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1653 const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1656 _cpp_skip_hspace (pfile);
1663 if (point != CPP_BUFFER (pfile)->cur)
1664 macbuf_whitespace = 1;
1668 goto not_macro_call;
1669 cpp_pop_buffer (pfile);
1672 CPP_SET_MARK (pfile);
1675 _cpp_skip_hspace (pfile);
1682 CPP_GOTO_MARK (pfile);
1687 if (macbuf_whitespace)
1688 CPP_PUTC (pfile, ' ');
1694 /* This is now known to be a macro call.
1695 Expand the macro, reading arguments as needed,
1696 and push the expansion on the input stack. */
1697 _cpp_macroexpand (pfile, hp);
1698 CPP_SET_WRITTEN (pfile, written);
1702 /* Complain about \v or \f in a preprocessing directive (constraint
1703 violation, C99 6.10 para 5). Caller has checked CPP_PEDANTIC. */
1705 pedantic_whitespace (pfile, p, len)
1713 cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1714 else if (*p == '\f')
1715 cpp_pedwarn (pfile, "form feed in preprocessing directive");
1723 cpp_get_token (pfile)
1726 enum cpp_ttype token;
1727 long written = CPP_WRITTEN (pfile);
1730 token = _cpp_lex_token (pfile);
1735 pfile->potential_control_macro = 0;
1736 pfile->only_seen_white = 0;
1740 if (pfile->only_seen_white == 0)
1741 pfile->only_seen_white = 1;
1742 CPP_BUMP_LINE (pfile);
1750 pfile->potential_control_macro = 0;
1751 if (!pfile->only_seen_white)
1753 /* XXX shouldn't have to do this - remove the hash or %: from
1754 the token buffer. */
1755 if (CPP_PWRITTEN (pfile)[-1] == '#')
1756 CPP_ADJUST_WRITTEN (pfile, -1);
1758 CPP_ADJUST_WRITTEN (pfile, -2);
1760 if (_cpp_handle_directive (pfile))
1761 return CPP_DIRECTIVE;
1762 pfile->only_seen_white = 0;
1763 CPP_PUTC (pfile, '#');
1767 pfile->potential_control_macro = 0;
1768 pfile->only_seen_white = 0;
1769 if (! pfile->no_macro_expand
1770 && maybe_macroexpand (pfile, written))
1775 if (CPP_BUFFER (pfile) == NULL)
1777 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1779 cpp_pop_buffer (pfile);
1782 cpp_pop_buffer (pfile);
1787 /* Like cpp_get_token, but skip spaces and comments. */
1790 cpp_get_non_space_token (pfile)
1793 int old_written = CPP_WRITTEN (pfile);
1796 enum cpp_ttype token = cpp_get_token (pfile);
1797 if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1799 CPP_SET_WRITTEN (pfile, old_written);
1803 /* Like cpp_get_token, except that it does not execute directives,
1804 does not consume vertical space, and discards horizontal space. */
1806 _cpp_get_directive_token (pfile)
1810 enum cpp_ttype token;
1814 at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1815 old_written = CPP_WRITTEN (pfile);
1816 token = _cpp_lex_token (pfile);
1823 /* Put it back and return VSPACE. */
1825 CPP_ADJUST_WRITTEN (pfile, -1);
1829 /* The purpose of this rather strange check is to prevent pedantic
1830 warnings for ^L in an #ifdefed out block. */
1831 if (CPP_PEDANTIC (pfile) && ! at_bol)
1832 pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1833 CPP_WRITTEN (pfile) - old_written);
1834 CPP_SET_WRITTEN (pfile, old_written);
1839 if (! pfile->no_macro_expand
1840 && maybe_macroexpand (pfile, old_written))
1845 if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1847 cpp_pop_buffer (pfile);
1851 /* This can happen for files that don't end with a newline,
1852 and for cpp_define and friends. Pretend they do, so
1853 callers don't have to deal. A warning will be issued by
1854 someone else, if necessary. */
1859 /* Determine the current line and column. Used only by read_and_prescan. */
1861 find_position (start, limit, linep)
1864 unsigned long *linep;
1866 unsigned long line = *linep;
1867 U_CHAR *lbase = start;
1868 while (start < limit)
1870 U_CHAR ch = *start++;
1871 if (ch == '\n' || ch == '\r')
1881 /* The following table is used by _cpp_read_and_prescan. If we have
1882 designated initializers, it can be constant data; otherwise, it is
1883 set up at runtime by _cpp_init_input_buffer. */
1886 #define UCHAR_MAX 255 /* assume 8-bit bytes */
1889 #if (GCC_VERSION >= 2007)
1890 #define init_chartab() /* nothing */
1891 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1893 #define s(p, v) [p] = v,
1895 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1896 static void init_chartab PARAMS ((void)) { \
1897 unsigned char *x = chartab;
1899 #define s(p, v) x[p] = v;
1902 /* Table of characters that can't be handled in the inner loop.
1903 Also contains the mapping between trigraph third characters and their
1905 #define SPECCASE_CR 1
1906 #define SPECCASE_BACKSLASH 2
1907 #define SPECCASE_QUESTION 3
1910 s('\r', SPECCASE_CR)
1911 s('\\', SPECCASE_BACKSLASH)
1912 s('?', SPECCASE_QUESTION)
1914 s('=', '#') s(')', ']') s('!', '|')
1915 s('(', '[') s('\'', '^') s('>', '}')
1916 s('/', '\\') s('<', '{') s('-', '~')
1923 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1924 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1926 /* Read the entire contents of file DESC into buffer BUF. LEN is how
1927 much memory to allocate initially; more will be allocated if
1928 necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1929 canonical form (\n). If enabled, convert and/or warn about
1930 trigraphs. Convert backslash-newline to a one-character escape
1931 (\r) and remove it from "embarrassing" places (i.e. the middle of a
1932 token). If there is no newline at the end of the file, add one and
1933 warn. Returns -1 on failure, or the actual length of the data to
1936 This function does a lot of work, and can be a serious performance
1937 bottleneck. It has been tuned heavily; make sure you understand it
1938 before hacking. The common case - no trigraphs, Unix style line
1939 breaks, backslash-newline set off by whitespace, newline at EOF -
1940 has been optimized at the expense of the others. The performance
1941 penalty for DOS style line breaks (\r\n) is about 15%.
1943 Warnings lose particularly heavily since we have to determine the
1944 line number, which involves scanning from the beginning of the file
1945 or from the last warning. The penalty for the absence of a newline
1946 at the end of reload1.c is about 60%. (reload1.c is 329k.)
1948 If your file has more than one kind of end-of-line marker, you
1949 will get messed-up line numbering.
1951 So that the cases of the switch statement do not have to concern
1952 themselves with the complications of reading beyond the end of the
1953 buffer, the buffer is guaranteed to have at least 3 characters in
1954 it (or however many are left in the file, if less) on entry to the
1955 switch. This is enough to handle trigraphs and the "\\\n\r" and
1958 The end of the buffer is marked by a '\\', which, being a special
1959 character, guarantees we will exit the fast-scan loops and perform
1963 _cpp_read_and_prescan (pfile, fp, desc, len)
1969 U_CHAR *buf = (U_CHAR *) xmalloc (len);
1970 U_CHAR *ip, *op, *line_base;
1973 unsigned int deferred_newlines;
1978 deferred_newlines = 0;
1982 ibase = pfile->input_buffer + 3;
1984 ip[-1] = '\0'; /* Guarantee no match with \n for SPECCASE_CR */
1988 U_CHAR *near_buff_end;
1990 count = read (desc, ibase, pfile->input_buffer_len);
1994 ibase[count] = '\\'; /* Marks end of buffer */
1997 near_buff_end = pfile->input_buffer + count;
2002 size_t delta_line_base;
2006 This could happen if the file is larger than half the
2007 maximum address space of the machine. */
2010 delta_op = op - buf;
2011 delta_line_base = line_base - buf;
2012 buf = (U_CHAR *) xrealloc (buf, len);
2013 op = buf + delta_op;
2014 line_base = buf + delta_line_base;
2021 /* Allow normal processing of the (at most 2) remaining
2022 characters. The end-of-buffer marker is still present
2023 and prevents false matches within the switch. */
2024 near_buff_end = ibase - 1;
2031 /* Deal with \-newline, potentially in the middle of a token. */
2032 if (deferred_newlines)
2034 if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
2036 /* Previous was not white space. Skip to white
2037 space, if we can, before outputting the \r's */
2039 while (ip[span] != ' '
2042 && NORMAL(ip[span]))
2044 memcpy (op, ip, span);
2047 if (! NORMAL(ip[0]))
2050 while (deferred_newlines)
2051 deferred_newlines--, *op++ = '\r';
2054 /* Copy as much as we can without special treatment. */
2056 while (NORMAL (ip[span])) span++;
2057 memcpy (op, ip, span);
2062 if (ip > near_buff_end) /* Do we have enough chars? */
2064 switch (chartab[*ip++])
2066 case SPECCASE_CR: /* \r */
2075 case SPECCASE_BACKSLASH: /* \ */
2078 deferred_newlines++;
2080 if (*ip == '\r') ip++;
2082 else if (*ip == '\r')
2084 deferred_newlines++;
2086 if (*ip == '\n') ip++;
2092 case SPECCASE_QUESTION: /* ? */
2096 *op++ = '?'; /* Normal non-trigraph case */
2105 if (CPP_OPTION (pfile, warn_trigraphs))
2108 line_base = find_position (line_base, op, &line);
2109 col = op - line_base + 1;
2110 if (CPP_OPTION (pfile, trigraphs))
2111 cpp_warning_with_line (pfile, line, col,
2112 "trigraph ??%c converted to %c", d, t);
2114 cpp_warning_with_line (pfile, line, col,
2115 "trigraph ??%c ignored", d);
2119 if (CPP_OPTION (pfile, trigraphs))
2121 op[-1] = t; /* Overwrite '?' */
2126 goto do_speccase; /* May need buffer refill */
2138 /* Copy previous char plus unprocessed (at most 2) chars
2139 to beginning of buffer, refill it with another
2140 read(), and continue processing */
2141 memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
2151 line_base = find_position (line_base, op, &line);
2152 col = op - line_base + 1;
2153 cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2154 if (offset + 1 > len)
2157 if (offset + 1 > len)
2159 buf = (U_CHAR *) xrealloc (buf, len);
2165 fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2169 cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2170 (unsigned long)offset);
2175 cpp_error_from_errno (pfile, fp->ihash->name);
2180 /* Allocate pfile->input_buffer, and initialize chartab[]
2181 if it hasn't happened already. */
2184 _cpp_init_input_buffer (pfile)
2190 _cpp_init_toklist (&pfile->directbuf);
2192 /* Determine the appropriate size for the input buffer. Normal C
2193 source files are smaller than eight K. */
2194 /* 8Kbytes of buffer proper, 1 to detect running off the end without
2195 address arithmetic all the time, and 3 for pushback during buffer
2196 refill, in case there's a potential trigraph or end-of-line
2197 digraph at the end of a block. */
2199 tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2200 pfile->input_buffer = tmp;
2201 pfile->input_buffer_len = 8192;
2205 Compares, in the manner of strcmp(3), the token beginning at TOKEN
2206 and extending for LEN characters to the NUL-terminated string
2207 STRING. Typical usage:
2209 if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2215 cpp_idcmp (token, len, string)
2216 const U_CHAR *token;
2220 size_t len2 = strlen (string);
2223 if ((r = memcmp (token, string, MIN (len, len2))))
2226 /* The longer of the two strings sorts after the shorter. */
2229 else if (len < len2)
2237 /* Lexing algorithm.
2239 The original lexer in cpplib was made up of two passes: a first pass
2240 that replaced trigraphs and deleted esacped newlines, and a second
2241 pass that tokenized the result of the first pass. Tokenisation was
2242 performed by peeking at the next character in the input stream. For
2243 example, if the input stream contained "!=", the handler for the !
2244 character would peek at the next character, and if it were a '='
2245 would skip over it, and return a "!=" token, otherwise it would
2246 return just the "!" token.
2248 To implement a single-pass lexer, this peeking ahead is unworkable.
2249 An arbitrary number of escaped newlines, and trigraphs (in particular
2250 ??/ which translates to the escape \), could separate the '!' and '='
2251 in the input stream, yet the next token is still a "!=".
2253 Suppose instead that we lex by one logical line at a time, producing
2254 a token list or stack for each logical line, and when seeing the '!'
2255 push a CPP_NOT token on the list. Then if the '!' is part of a
2256 longer token ("!=") we know we must see the remainder of the token by
2257 the time we reach the end of the logical line. Thus we can have the
2258 '=' handler look at the previous token (at the end of the list / top
2259 of the stack) and see if it is a "!" token, and if so, instead of
2260 pushing a "=" token revise the existing token to be a "!=" token.
2262 This works in the presence of escaped newlines, because the '\' would
2263 have been pushed on the top of the stack as a CPP_BACKSLASH. The
2264 newline ('\n' or '\r') handler looks at the token at the top of the
2265 stack to see if it is a CPP_BACKSLASH, and if so discards both.
2266 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
2267 the '=' handler would never see any intervening escaped newlines.
2269 To make trigraphs work in this context, as in precedence trigraphs
2270 are highest and converted before anything else, the '?' handler does
2271 lookahead to see if it is a trigraph, and if so skips the trigraph
2272 and pushes the token it represents onto the top of the stack. This
2273 also works in the particular case of a CPP_BACKSLASH trigraph.
2275 To the preprocessor, whitespace is only significant to the point of
2276 knowing whether whitespace precedes a particular token. For example,
2277 the '=' handler needs to know whether there was whitespace between it
2278 and a "!" token on the top of the stack, to make the token conversion
2279 decision correctly. So each token has a PREV_WHITESPACE flag to
2280 indicate this - the standard permits consecutive whitespace to be
2281 regarded as a single space. The compiler front ends are not
2282 interested in whitespace at all; they just require a token stream.
2283 Another place where whitespace is significant to the preprocessor is
2284 a #define statment - if there is whitespace between the macro name
2285 and an initial "(" token the macro is "object-like", otherwise it is
2286 a function-like macro that takes arguments.
2288 However, all is not rosy. Parsing of identifiers, numbers, comments
2289 and strings becomes trickier because of the possibility of raw
2290 trigraphs and escaped newlines in the input stream.
2292 The trigraphs are three consecutive characters beginning with two
2293 question marks. A question mark is not valid as part of a number or
2294 identifier, so parsing of a number or identifier terminates normally
2295 upon reaching it, returning to the mainloop which handles the
2296 trigraph just like it would in any other position. Similarly for the
2297 backslash of a backslash-newline combination. So we just need the
2298 escaped-newline dropper in the mainloop to check if the token on the
2299 top of the stack after dropping the escaped newline is a number or
2300 identifier, and if so to continue the processing it as if nothing had
2303 For strings, we replace trigraphs whenever we reach a quote or
2304 newline, because there might be a backslash trigraph escaping them.
2305 We need to be careful that we start trigraph replacing from where we
2306 left off previously, because it is possible for a first scan to leave
2307 "fake" trigraphs that a second scan would pick up as real (e.g. the
2308 sequence "????/\n=" would find a fake ??= trigraph after removing the
2311 For line comments, on reaching a newline we scan the previous
2312 character(s) to see if it escaped, and continue if it is. Block
2313 comments ignore everything and just focus on finding the comment
2314 termination mark. The only difficult thing, and it is surprisingly
2315 tricky, is checking if an asterisk precedes the final slash since
2316 they could be separated by escaped newlines. If the preprocessor is
2317 invoked with the output comments option, we don't bother removing
2318 escaped newlines and replacing trigraphs for output.
2320 Finally, numbers can begin with a period, which is pushed initially
2321 as a CPP_DOT token in its own right. The digit handler checks if the
2322 previous token was a CPP_DOT not separated by whitespace, and if so
2323 pops it off the stack and pushes a period into the number's buffer
2324 before calling the number parser.
2328 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2329 U":>", U"<%", U"%>"};
2330 static unsigned char trigraph_map[256];
2333 expand_comment_space (list)
2336 if (list->comments_cap == 0)
2338 list->comments_cap = 10;
2339 list->comments = (cpp_token *)
2340 xmalloc (list->comments_cap * sizeof (cpp_token));
2344 list->comments_cap *= 2;
2345 list->comments = (cpp_token *)
2346 xrealloc (list->comments, list->comments_cap);
2351 init_trigraph_map ()
2353 trigraph_map['='] = '#';
2354 trigraph_map['('] = '[';
2355 trigraph_map[')'] = ']';
2356 trigraph_map['/'] = '\\';
2357 trigraph_map['\''] = '^';
2358 trigraph_map['<'] = '{';
2359 trigraph_map['>'] = '}';
2360 trigraph_map['!'] = '|';
2361 trigraph_map['-'] = '~';
2364 /* Call when a trigraph is encountered. It warns if necessary, and
2365 returns true if the trigraph should be honoured. END is the third
2366 character of a trigraph in the input stream. */
2368 trigraph_ok (pfile, end)
2370 const unsigned char *end;
2372 int accept = CPP_OPTION (pfile, trigraphs);
2374 if (CPP_OPTION (pfile, warn_trigraphs))
2376 unsigned int col = end - 1 - pfile->buffer->line_base;
2378 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2379 "trigraph ??%c converted to %c",
2380 (int) *end, (int) trigraph_map[*end]);
2382 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2383 "trigraph ??%c ignored", (int) *end);
2388 /* Scan a string for trigraphs, warning or replacing them inline as
2389 appropriate. When parsing a string, we must call this routine
2390 before processing a newline character (if trigraphs are enabled),
2391 since the newline might be escaped by a preceding backslash
2392 trigraph sequence. Returns a pointer to the end of the name after
2395 static unsigned char*
2396 trigraph_replace (pfile, src, limit)
2399 unsigned char* limit;
2401 unsigned char *dest;
2403 /* Starting with src[1], find two consecutive '?'. The case of no
2404 trigraphs is streamlined. */
2406 for (; src + 1 < limit; src += 2)
2411 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
2414 else if (src + 2 == limit || src[1] != '?')
2417 /* Check if it really is a trigraph. */
2418 if (trigraph_map[src[2]] == 0)
2422 goto trigraph_found;
2426 /* Now we have a trigraph, we need to scan the remaining buffer, and
2427 copy-shifting its contents left if replacement is enabled. */
2428 for (; src + 2 < limit; dest++, src++)
2429 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2433 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2434 *dest = trigraph_map[*src];
2437 /* Copy remaining (at most 2) characters. */
2443 /* If CUR is a backslash or the end of a trigraphed backslash, return
2444 a pointer to its beginning, otherwise NULL. We don't read beyond
2445 the buffer start, because there is the start of the comment in the
2447 static const unsigned char *
2448 backslash_start (pfile, cur)
2450 const unsigned char *cur;
2454 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2455 && trigraph_ok (pfile, cur))
2460 /* Skip a C-style block comment. This is probably the trickiest
2461 handler. We find the end of the comment by seeing if an asterisk
2462 is before every '/' we encounter. The nasty complication is that a
2463 previous asterisk may be separated by one or more escaped newlines.
2464 Returns non-zero if comment terminated by EOF, zero otherwise. */
2466 skip_block_comment2 (pfile)
2469 cpp_buffer *buffer = pfile->buffer;
2470 const unsigned char *char_after_star = 0;
2471 register const unsigned char *cur = buffer->cur;
2474 /* Inner loop would think the comment has ended if the first comment
2475 character is a '/'. Avoid this and keep the inner loop clean by
2476 skipping such a character. */
2477 if (cur < buffer->rlimit && cur[0] == '/')
2480 for (; cur < buffer->rlimit; )
2482 unsigned char c = *cur++;
2484 /* People like decorating comments with '*', so check for
2485 '/' instead for efficiency. */
2488 if (cur[-2] == '*' || cur - 1 == char_after_star)
2491 /* Warn about potential nested comments, but not when
2492 the final character inside the comment is a '/'.
2493 Don't bother to get it right across escaped newlines. */
2494 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2495 && cur[0] == '*' && cur[1] != '/')
2498 cpp_warning (pfile, "'/*' within comment");
2501 else if (IS_NEWLINE(c))
2503 const unsigned char* bslash = backslash_start (pfile, cur - 2);
2505 handle_newline (cur, buffer->rlimit, c);
2506 /* Work correctly if there is an asterisk before an
2507 arbirtrarily long sequence of escaped newlines. */
2508 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2509 char_after_star = cur;
2511 char_after_star = 0;
2521 /* Skip a C++ or Chill line comment. Handles escaped newlines.
2522 Returns non-zero if a multiline comment. */
2524 skip_line_comment2 (pfile)
2527 cpp_buffer *buffer = pfile->buffer;
2528 register const unsigned char *cur = buffer->cur;
2531 for (; cur < buffer->rlimit; )
2533 unsigned char c = *cur++;
2537 /* Check for a (trigaph?) backslash escaping the newline. */
2538 if (!backslash_start (pfile, cur - 2))
2541 handle_newline (cur, buffer->rlimit, c);
2547 buffer->cur = cur - 1; /* Leave newline for caller. */
2551 /* Skips whitespace, stopping at next non-whitespace character.
2552 Adjusts pfile->col_adjust to account for tabs. This enables tokens
2553 to be assigned the correct column. */
2555 skip_whitespace (pfile, in_directive)
2559 cpp_buffer *buffer = pfile->buffer;
2560 register const unsigned char *cur = buffer->cur;
2561 unsigned short null_count = 0;
2563 for (; cur < buffer->rlimit; )
2565 unsigned char c = *cur++;
2569 unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
2570 pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
2571 - col % CPP_OPTION(pfile, tabstop));
2573 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
2575 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
2579 /* Mut be '\f' or '\v' */
2580 else if (in_directive && CPP_PEDANTIC (pfile))
2581 cpp_pedwarn (pfile, "%s in preprocessing directive",
2582 c == '\f' ? "formfeed" : "vertical tab");
2587 buffer->cur = cur - 1;
2589 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2590 : "embedded null character ignored");
2593 /* Parse (append) an identifier. */
2595 parse_name (pfile, list, name)
2600 const unsigned char *name_limit;
2601 unsigned char *namebuf;
2602 cpp_buffer *buffer = pfile->buffer;
2603 register const unsigned char *cur = buffer->cur;
2606 name_limit = list->namebuf + list->name_cap;
2607 namebuf = list->namebuf + list->name_used;
2609 for (; cur < buffer->rlimit && namebuf < name_limit; )
2611 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2617 if (c == '$' && CPP_PEDANTIC (pfile))
2620 cpp_pedwarn (pfile, "'$' character in identifier");
2624 /* Run out of name space? */
2625 if (cur < buffer->rlimit)
2627 list->name_used = namebuf - list->namebuf;
2628 auto_expand_name_space (list);
2634 name->len = namebuf - name->text;
2635 list->name_used = namebuf - list->namebuf;
2638 /* Parse (append) a number. */
2640 #define VALID_SIGN(c, prevc) \
2641 (((c) == '+' || (c) == '-') && \
2642 ((prevc) == 'e' || (prevc) == 'E' \
2643 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2646 parse_number (pfile, list, name)
2651 const unsigned char *name_limit;
2652 unsigned char *namebuf;
2653 cpp_buffer *buffer = pfile->buffer;
2654 register const unsigned char *cur = buffer->cur;
2657 name_limit = list->namebuf + list->name_cap;
2658 namebuf = list->namebuf + list->name_used;
2660 for (; cur < buffer->rlimit && namebuf < name_limit; )
2662 unsigned char c = *namebuf = *cur; /* Copy a single char. */
2664 /* Perhaps we should accept '$' here if we accept it for
2665 identifiers. We know namebuf[-1] is safe, because for c to
2666 be a sign we must have pushed at least one character. */
2667 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2674 /* Run out of name space? */
2675 if (cur < buffer->rlimit)
2677 list->name_used = namebuf - list->namebuf;
2678 auto_expand_name_space (list);
2684 name->len = namebuf - name->text;
2685 list->name_used = namebuf - list->namebuf;
2688 /* Places a string terminated by an unescaped TERMINATOR into a
2689 cpp_name, which should be expandable and thus at the top of the
2690 list's stack. Handles embedded trigraphs, if necessary, and
2693 Can be used for character constants (terminator = '\''), string
2694 constants ('"') and angled headers ('>'). Multi-line strings are
2695 allowed, except for within directives. */
2698 parse_string2 (pfile, list, name, terminator)
2702 unsigned int terminator;
2704 cpp_buffer *buffer = pfile->buffer;
2705 register const unsigned char *cur = buffer->cur;
2706 const unsigned char *name_limit;
2707 unsigned char *namebuf;
2708 unsigned int null_count = 0;
2709 int trigraphed_len = 0;
2712 name_limit = list->namebuf + list->name_cap;
2713 namebuf = list->namebuf + list->name_used;
2715 for (; cur < buffer->rlimit && namebuf < name_limit; )
2717 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
2721 else if (c == terminator || IS_NEWLINE (c))
2723 /* Needed for trigraph_replace and multiline string warning. */
2726 /* Scan for trigraphs before checking if backslash-escaped. */
2727 if (CPP_OPTION (pfile, trigraphs)
2728 || CPP_OPTION (pfile, warn_trigraphs))
2730 namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
2732 trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
2733 if (trigraphed_len < 0)
2737 namebuf--; /* Drop the newline / terminator from the name. */
2740 /* Drop a backslash newline, and continue. */
2741 if (namebuf[-1] == '\\')
2743 handle_newline (cur, buffer->rlimit, c);
2750 /* In Fortran and assembly language, silently terminate
2751 strings of either variety at end of line. This is a
2752 kludge around not knowing where comments are in these
2754 if (CPP_OPTION (pfile, lang_fortran)
2755 || CPP_OPTION (pfile, lang_asm))
2758 /* Character constants, headers and asserts may not
2759 extend over multiple lines. In Standard C, neither
2760 may strings. We accept multiline strings as an
2761 extension, but not in directives. */
2762 if (terminator != '"' || IS_DIRECTIVE (list))
2765 cur++; /* Move forwards again. */
2767 if (pfile->multiline_string_line == 0)
2769 pfile->multiline_string_line = list->line;
2770 if (CPP_PEDANTIC (pfile))
2771 cpp_pedwarn (pfile, "multi-line string constant");
2775 handle_newline (cur, buffer->rlimit, c);
2779 unsigned char *temp;
2781 /* An odd number of consecutive backslashes represents
2782 an escaped terminator. */
2784 while (temp >= name->text && *temp == '\\')
2787 if ((namebuf - temp) & 1)
2794 /* Run out of name space? */
2795 if (cur < buffer->rlimit)
2797 list->name_used = namebuf - list->namebuf;
2798 auto_expand_name_space (list);
2802 /* We may not have trigraph-replaced the input for this code path,
2803 but as the input is in error by being unterminated we don't
2804 bother. Prevent warnings about no newlines at EOF. */
2805 if (IS_NEWLINE(cur[-1]))
2809 cpp_error (pfile, "missing terminating %c character", (int) terminator);
2811 if (terminator == '\"' && pfile->multiline_string_line != list->line
2812 && pfile->multiline_string_line != 0)
2814 cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2815 "possible start of unterminated string literal");
2816 pfile->multiline_string_line = 0;
2821 name->len = namebuf - name->text;
2822 list->name_used = namebuf - list->namebuf;
2825 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2826 : "null character preserved"));
2829 /* The character TYPE helps us distinguish comment types: '*' = C
2830 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
2831 the stored comment includes the comment start and any terminator. */
2833 #define COMMENT_START_LEN 2
2835 save_comment (list, from, len, tok_no, type)
2837 const unsigned char *from;
2839 unsigned int tok_no;
2843 unsigned char *buffer;
2845 len += COMMENT_START_LEN;
2847 if (list->comments_used == list->comments_cap)
2848 expand_comment_space (list);
2850 if (list->name_used + len > list->name_cap)
2851 expand_name_space (list, len);
2853 buffer = list->namebuf + list->name_used;
2855 comment = &list->comments[list->comments_used++];
2856 comment->type = CPP_COMMENT;
2857 comment->aux = tok_no;
2858 comment->val.name.len = len;
2859 comment->val.name.text = buffer;
2872 memcpy (buffer, from, len - COMMENT_START_LEN);
2873 list->name_used += len;
2877 * The tokenizer's main loop. Returns a token list, representing a
2878 * logical line in the input file, terminated with a CPP_VSPACE
2879 * token. On EOF, a token list containing the single CPP_EOF token
2882 * Implementation relies almost entirely on lookback, rather than
2883 * looking forwards. This means that tokenization requires just
2884 * a single pass of the file, even in the presence of trigraphs and
2885 * escaped newlines, providing significant performance benefits.
2886 * Trigraph overhead is negligible if they are disabled, and low
2887 * even when enabled.
2891 _cpp_lex_line (pfile, list)
2895 cpp_token *cur_token, *token_limit;
2896 cpp_buffer *buffer = pfile->buffer;
2897 register const unsigned char *cur = buffer->cur;
2898 unsigned char flags = 0;
2900 pfile->col_adjust = 0;
2902 token_limit = list->tokens + list->tokens_cap;
2903 cur_token = list->tokens + list->tokens_used;
2905 for (; cur < buffer->rlimit && cur_token < token_limit;)
2907 unsigned char c = *cur++;
2909 /* Optimize whitespace skipping, as most tokens are probably
2910 separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
2912 if (is_hspace ((unsigned int) c))
2914 /* Step back to get the null warning and tab correction. */
2915 buffer->cur = cur - 1;
2916 skip_whitespace (pfile, IS_DIRECTIVE (list));
2919 flags = PREV_WHITESPACE;
2920 if (cur == buffer->rlimit)
2925 /* Initialize current token. Its type is set in the switch. */
2926 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
2927 cur_token->flags = flags;
2932 case '0': case '1': case '2': case '3': case '4':
2933 case '5': case '6': case '7': case '8': case '9':
2934 cur--; /* Backup character. */
2935 if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2937 /* Prepend an immediately previous CPP_DOT token. */
2939 if (list->name_cap == list->name_used)
2940 auto_expand_name_space (list);
2942 cur_token->val.name.len = 1;
2943 cur_token->val.name.text = list->namebuf + list->name_used;
2944 list->namebuf[list->name_used++] = '.';
2947 INIT_NAME (list, cur_token->val.name);
2951 parse_number (pfile, list, &cur_token->val.name);
2954 PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted. */
2959 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2960 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2961 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2962 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2964 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2965 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2966 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2967 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2969 cur--; /* Backup character. */
2970 INIT_NAME (list, cur_token->val.name);
2971 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
2975 parse_name (pfile, list, &cur_token->val.name);
2978 /* Find handler for newly created / extended directive. */
2979 if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2980 _cpp_check_directive (list, cur_token);
2987 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2988 /* Do we have a wide string? */
2989 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2990 && cur_token[-1].val.name.len == 1
2991 && cur_token[-1].val.name.text[0] == 'L'
2992 && !CPP_TRADITIONAL (pfile))
2994 /* No need for 'L' any more. */
2996 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
3000 /* Here c is one of ' " or >. */
3001 INIT_NAME (list, cur_token->val.name);
3003 parse_string2 (pfile, list, &cur_token->val.name, c);
3009 cur_token->type = CPP_DIV;
3012 if (PREV_TOKEN_TYPE == CPP_DIV)
3014 /* We silently allow C++ comments in system headers,
3015 irrespective of conformance mode, because lots of
3016 broken systems do that and trying to clean it up
3017 in fixincludes is a nightmare. */
3018 if (buffer->system_header_p)
3019 goto do_line_comment;
3020 else if (CPP_OPTION (pfile, cplusplus_comments))
3022 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
3023 && ! buffer->warned_cplusplus_comments)
3027 "C++ style comments are not allowed in ISO C89");
3029 "(this will be reported only once per input file)");
3030 buffer->warned_cplusplus_comments = 1;
3036 "comment start split across lines");
3037 if (skip_line_comment2 (pfile))
3038 cpp_error_with_line (pfile, list->line,
3040 "multi-line comment");
3041 if (!CPP_OPTION (pfile, discard_comments))
3042 save_comment (list, cur, buffer->cur - cur,
3043 cur_token - 1 - list->tokens, c);
3046 /* Back-up to first '-' or '/'. */
3048 if (!CPP_OPTION (pfile, traditional))
3049 flags = PREV_WHITESPACE;
3057 cur_token->type = CPP_MULT;
3060 if (PREV_TOKEN_TYPE == CPP_DIV)
3065 "comment start '/*' split across lines");
3066 if (skip_block_comment2 (pfile))
3067 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3068 "unterminated comment");
3069 else if (buffer->cur[-2] != '*')
3071 "comment end '*/' split across lines");
3072 if (!CPP_OPTION (pfile, discard_comments))
3073 save_comment (list, cur, buffer->cur - cur,
3074 cur_token - 1 - list->tokens, c);
3078 if (!CPP_OPTION (pfile, traditional))
3079 flags = PREV_WHITESPACE;
3082 else if (CPP_OPTION (pfile, cplusplus))
3084 /* In C++, there are .* and ->* operators. */
3085 if (PREV_TOKEN_TYPE == CPP_DEREF)
3086 BACKUP_TOKEN (CPP_DEREF_STAR);
3087 else if (PREV_TOKEN_TYPE == CPP_DOT)
3088 BACKUP_TOKEN (CPP_DOT_STAR);
3096 handle_newline (cur, buffer->rlimit, c);
3097 if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3099 /* Remove the escaped newline. Then continue to process
3100 any interrupted name or number. */
3105 if (cur_token->type == CPP_NAME)
3107 else if (cur_token->type == CPP_NUMBER)
3108 goto continue_number;
3111 /* Remember whitespace setting. */
3112 flags = cur_token->flags;
3115 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3118 cpp_warning (pfile, "backslash and newline separated by space");
3120 PUSH_TOKEN (CPP_VSPACE);
3124 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3126 if (CPP_OPTION (pfile, chill))
3127 goto do_line_comment;
3128 REVISE_TOKEN (CPP_MINUS_MINUS);
3131 PUSH_TOKEN (CPP_MINUS);
3134 /* The digraph flag checking ensures that ## and %:%:
3135 are interpreted as CPP_PASTE, but #%: and %:# are not. */
3138 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3139 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3140 REVISE_TOKEN (CPP_PASTE);
3142 PUSH_TOKEN (CPP_HASH);
3146 cur_token->type = CPP_COLON;
3149 if (PREV_TOKEN_TYPE == CPP_COLON
3150 && CPP_OPTION (pfile, cplusplus))
3151 BACKUP_TOKEN (CPP_SCOPE);
3152 /* Digraph: "<:" is a '[' */
3153 else if (PREV_TOKEN_TYPE == CPP_LESS)
3154 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3155 /* Digraph: "%:" is a '#' */
3156 else if (PREV_TOKEN_TYPE == CPP_MOD)
3158 (--cur_token)->flags |= DIGRAPH;
3166 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3167 REVISE_TOKEN (CPP_AND_AND);
3169 PUSH_TOKEN (CPP_AND);
3174 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3175 REVISE_TOKEN (CPP_OR_OR);
3177 PUSH_TOKEN (CPP_OR);
3181 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3182 REVISE_TOKEN (CPP_PLUS_PLUS);
3184 PUSH_TOKEN (CPP_PLUS);
3188 /* This relies on equidistance of "?=" and "?" tokens. */
3189 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3190 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3192 PUSH_TOKEN (CPP_EQ);
3196 cur_token->type = CPP_GREATER;
3199 if (PREV_TOKEN_TYPE == CPP_GREATER)
3200 BACKUP_TOKEN (CPP_RSHIFT);
3201 else if (PREV_TOKEN_TYPE == CPP_MINUS)
3202 BACKUP_TOKEN (CPP_DEREF);
3203 /* Digraph: ":>" is a ']' */
3204 else if (PREV_TOKEN_TYPE == CPP_COLON)
3205 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3206 /* Digraph: "%>" is a '}' */
3207 else if (PREV_TOKEN_TYPE == CPP_MOD)
3208 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3214 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3216 REVISE_TOKEN (CPP_LSHIFT);
3219 /* Is this the beginning of a header name? */
3220 if (list->flags & SYNTAX_INCLUDE)
3222 c = '>'; /* Terminator. */
3223 cur_token->type = CPP_HEADER_NAME;
3224 goto do_parse_string;
3226 PUSH_TOKEN (CPP_LESS);
3230 /* Digraph: "<%" is a '{' */
3231 cur_token->type = CPP_MOD;
3232 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3233 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3238 if (cur + 1 < buffer->rlimit && *cur == '?'
3239 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3241 /* Handle trigraph. */
3245 case '(': goto make_open_square;
3246 case ')': goto make_close_square;
3247 case '<': goto make_open_brace;
3248 case '>': goto make_close_brace;
3249 case '=': goto make_hash;
3250 case '!': goto make_or;
3251 case '-': goto make_complement;
3252 case '/': goto make_backslash;
3253 case '\'': goto make_xor;
3256 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3258 /* GNU C++ defines <? and >? operators. */
3259 if (PREV_TOKEN_TYPE == CPP_LESS)
3261 REVISE_TOKEN (CPP_MIN);
3264 else if (PREV_TOKEN_TYPE == CPP_GREATER)
3266 REVISE_TOKEN (CPP_MAX);
3270 PUSH_TOKEN (CPP_QUERY);
3274 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3276 && !(cur_token[-1].flags & PREV_WHITESPACE))
3279 PUSH_TOKEN (CPP_ELLIPSIS);
3282 PUSH_TOKEN (CPP_DOT);
3286 case '~': PUSH_TOKEN (CPP_COMPL); break;
3288 case '^': PUSH_TOKEN (CPP_XOR); break;
3290 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3292 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3294 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3296 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3298 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3299 case '!': PUSH_TOKEN (CPP_NOT); break;
3300 case ',': PUSH_TOKEN (CPP_COMMA); break;
3301 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3302 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
3303 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3306 if (CPP_OPTION (pfile, dollars_in_ident))
3311 cur_token->val.name.len = 0; /* FIXME: needed for transition only */
3312 PUSH_TOKEN (CPP_OTHER);
3317 /* Run out of token space? */
3318 if (cur_token == token_limit)
3320 list->tokens_used = cur_token - list->tokens;
3321 expand_token_space (list);
3325 cur_token->type = CPP_EOF;
3326 cur_token->flags = flags;
3328 if (cur_token != &list->tokens[0])
3330 /* Next call back will get just a CPP_EOF. */
3332 cpp_warning (pfile, "no newline at end of file");
3333 PUSH_TOKEN (CPP_VSPACE);
3339 list->tokens_used = cur_token - list->tokens;
3341 /* FIXME: take this check out and put it in the caller.
3342 list->directive == 0 indicates an unknown directive (but null
3343 directive is OK). This is the first time we can be sure the
3344 directive is invalid, and thus warn about it, because it might
3345 have been split by escaped newlines. Also, don't complain about
3346 invalid directives in assembly source, we don't know where the
3347 comments are, and # may introduce assembler pseudo-ops. */
3349 if (IS_DIRECTIVE (list) && list->dirno == -1
3350 && list->tokens[1].type != CPP_VSPACE
3351 && !CPP_OPTION (pfile, lang_asm))
3352 cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3353 "invalid preprocessing directive");
3356 /* Write the spelling of a token TOKEN to BUFFER. The buffer must
3357 already contain the enough space to hold the token's spelling. If
3358 WHITESPACE is true, and the token was preceded by whitespace,
3359 output a single space before the token proper. Returns a pointer
3360 to the character after the last character written. */
3362 static unsigned char *
3363 spell_token (pfile, token, buffer, whitespace)
3364 cpp_reader *pfile; /* Would be nice to be rid of this... */
3366 unsigned char *buffer;
3369 /* Whitespace will not be wanted by handlers of the # and ##
3370 operators calling this function, but will be wanted by the
3371 function that writes out the preprocessed file. */
3372 if (whitespace && token->flags & PREV_WHITESPACE)
3375 switch (token_spellings[token->type].type)
3377 case SPELL_OPERATOR:
3379 const unsigned char *spelling;
3382 if (token->flags & DIGRAPH)
3383 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3385 spelling = token_spellings[token->type].spelling;
3387 while ((c = *spelling++) != '\0')
3393 memcpy (buffer, token->val.name.text, token->val.name.len);
3394 buffer += token->val.name.len;
3401 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3404 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3407 memcpy (buffer, token->val.name.text, token->val.name.len);
3408 buffer += token->val.name.len;
3414 *buffer++ = token->aux;
3418 cpp_ice (pfile, "Unspellable token");
3425 /* Temporary function for illustrative purposes. */
3427 _cpp_lex_file (pfile)
3432 init_trigraph_map ();
3433 list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3434 _cpp_init_toklist (list);
3438 _cpp_lex_line (pfile, list);
3439 if (list->tokens[0].type == CPP_EOF)
3444 _cpp_handle_directive (pfile, list);
3447 _cpp_output_list (pfile, list);
3448 _cpp_clear_toklist (list);
3452 /* Temporary function for illustrative purposes. */
3454 _cpp_output_list (pfile, list)
3458 cpp_token *token, *comment, *comment_before = 0;
3460 if (list->comments_used > 0)
3462 comment = &list->comments[0];
3463 comment_before = &list->tokens[comment->aux];
3466 token = &list->tokens[0];
3469 /* Output comments if -C. */
3470 while (token == comment_before)
3472 /* Make space for the comment, and copy it out. */
3473 CPP_RESERVE (pfile, TOKEN_LEN (comment));
3474 pfile->limit = spell_token (pfile, comment, pfile->limit, 0);
3476 /* Stop if no comments left, or no more comments appear
3477 before the current token. */
3479 if (comment == list->comments + list->comments_used)
3481 comment_before = &list->tokens[comment->aux];
3484 CPP_RESERVE (pfile, TOKEN_LEN (token));
3485 pfile->limit = spell_token (pfile, token, pfile->limit, 1);
3487 while (token++->type != CPP_VSPACE);