gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "intl.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 #ifdef HAVE_MMAP_FILE
  30 # include <sys/mman.h>
  31 #endif
  32
  33 #define PEEKBUF(BUFFER, N) \
  34   ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
  35 #define GETBUF(BUFFER) \
  36   ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
  37 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
  38
  39 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
  40 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
  41 #define GETC() GETBUF (CPP_BUFFER (pfile))
  42 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
  43
  44 static void skip_block_comment  PARAMS ((cpp_reader *));
  45 static void skip_line_comment   PARAMS ((cpp_reader *));
  46 static int maybe_macroexpand    PARAMS ((cpp_reader *, long));
  47 static int skip_comment         PARAMS ((cpp_reader *, int));
  48 static int copy_comment         PARAMS ((cpp_reader *, int));
  49 static void skip_string         PARAMS ((cpp_reader *, int));
  50 static void parse_string        PARAMS ((cpp_reader *, int));
  51 static U_CHAR *find_position    PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
  52 static void null_warning        PARAMS ((cpp_reader *, unsigned int));
  53
  54 static void safe_fwrite         PARAMS ((cpp_reader *, const U_CHAR *,
  55                                          size_t, FILE *));
  56 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
  57                                          unsigned int));
  58 static void bump_column         PARAMS ((cpp_printer *, unsigned int,
  59                                          unsigned int));
  60 static void expand_name_space   PARAMS ((cpp_toklist *, unsigned int));
  61 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
  62                                          unsigned int));
  63
  64 #define auto_expand_name_space(list) \
  65     expand_name_space ((list), 1 + (list)->name_cap / 2)
  66
  67 #ifdef NEW_LEXER
  68
  69 void init_trigraph_map PARAMS ((void));
  70 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
  71                                                 unsigned char *));
  72 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
  73                                                      const unsigned char *));
  74 static int skip_block_comment2 PARAMS ((cpp_reader *));
  75 static int skip_line_comment2 PARAMS ((cpp_reader *));
  76 static void skip_whitespace PARAMS ((cpp_reader *, int));
  77 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  78 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  79 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
  80                                   unsigned int, int));
  81 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
  82 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
  83                                   const unsigned char *,
  84                                   unsigned int, unsigned int));
  85 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
  86
  87 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
  88
  89 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
  90                                             unsigned char *, int));
  91
  92 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
  93                                           cpp_token *));
  94
  95 /* Macros on a cpp_name.  */
  96 #define INIT_TOKEN_NAME(list, token) \
  97   do {(token)->val.name.len = 0; \
  98       (token)->val.name.text = (list)->namebuf + (list)->name_used; \
  99       (list)->tokens_used = token - (list)->tokens + 1; \
 100   } while (0)
 101
 102 /* Maybe put these in the ISTABLE eventually.  */
 103 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
 104 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
 105
 106 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 107    character, if any, is in buffer.  */
 108 #define handle_newline(cur, limit, c) \
 109   do {\
 110   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 111     (cur)++; \
 112   CPP_BUMP_LINE_CUR (pfile, (cur)); \
 113   pfile->col_adjust = 0; \
 114   } while (0)
 115
 116 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
 117 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 118
 119 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
 120 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
 121 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
 122 #define BACKUP_DIGRAPH(ttype) do { \
 123   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 124
 125 /* An upper bound on the number of bytes needed to spell a token,
 126    including preceding whitespace.  */
 127 #define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
 128                                SPELL_NONE ? (token)->val.name.len: 0))
 129
 130 #endif
 131
 132 /* Order here matters.  Those beyond SPELL_NONE store their spelling
 133    in the token list, and it's length in the token->val.name.len.  */
 134 enum spell_type
 135 {
 136   SPELL_OPERATOR = 0,
 137   SPELL_NONE,
 138   SPELL_CHAR,    /* FIXME: revert order of NONE and CHAR after transition. */
 139   SPELL_IDENT,
 140   SPELL_STRING
 141 };
 142
 143 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
 144 #define I(e, s) {SPELL_IDENT, s},
 145 #define S(e, s) {SPELL_STRING, s},
 146 #define C(e, s) {SPELL_CHAR, s},
 147 #define N(e, s) {SPELL_NONE, s},
 148
 149 static const struct token_spelling
 150 {
 151   ENUM_BITFIELD(spell_type) type : CHAR_BIT;
 152   const U_CHAR *spelling;
 153 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 154
 155 #undef T
 156 #undef I
 157 #undef S
 158 #undef C
 159 #undef N
 160
 161 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 162
 163 void
 164 _cpp_grow_token_buffer (pfile, n)
 165      cpp_reader *pfile;
 166      long n;
 167 {
 168   long old_written = CPP_WRITTEN (pfile);
 169   pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
 170   pfile->token_buffer = (U_CHAR *)
 171     xrealloc(pfile->token_buffer, pfile->token_buffer_size);
 172   CPP_SET_WRITTEN (pfile, old_written);
 173 }
 174
 175 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
 176    If BUFFER != NULL, then use the LENGTH characters in BUFFER
 177    as the new input buffer.
 178    Return the new buffer, or NULL on failure.  */
 179
 180 cpp_buffer *
 181 cpp_push_buffer (pfile, buffer, length)
 182      cpp_reader *pfile;
 183      const U_CHAR *buffer;
 184      long length;
 185 {
 186   cpp_buffer *buf = CPP_BUFFER (pfile);
 187   cpp_buffer *new;
 188   if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
 189     {
 190       cpp_fatal (pfile, "macro or `#include' recursion too deep");
 191       return NULL;
 192     }
 193
 194   new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
 195
 196   new->buf = new->cur = buffer;
 197   new->rlimit = buffer + length;
 198   new->prev = buf;
 199   new->mark = NULL;
 200   new->line_base = NULL;
 201
 202   CPP_BUFFER (pfile) = new;
 203   return new;
 204 }
 205
 206 cpp_buffer *
 207 cpp_pop_buffer (pfile)
 208      cpp_reader *pfile;
 209 {
 210   cpp_buffer *buf = CPP_BUFFER (pfile);
 211   if (ACTIVE_MARK_P (pfile))
 212     cpp_ice (pfile, "mark active in cpp_pop_buffer");
 213
 214   if (buf->inc)
 215     {
 216       _cpp_unwind_if_stack (pfile, buf);
 217       if (buf->buf)
 218         free ((PTR) buf->buf);
 219       if (pfile->system_include_depth)
 220         pfile->system_include_depth--;
 221       if (pfile->potential_control_macro)
 222         {
 223           if (buf->inc->cmacro != NEVER_REREAD)
 224             buf->inc->cmacro = pfile->potential_control_macro;
 225           pfile->potential_control_macro = 0;
 226         }
 227       pfile->input_stack_listing_current = 0;
 228       /* If the file will not be included again, then close it.  */
 229       if (DO_NOT_REREAD (buf->inc))
 230         {
 231           close (buf->inc->fd);
 232           buf->inc->fd = -1;
 233         }
 234     }
 235   else if (buf->macro)
 236     {
 237       cpp_hashnode *m = buf->macro;
 238
 239       m->disabled = 0;
 240       if ((m->type == T_FMACRO && buf->mapped)
 241           || m->type == T_SPECLINE || m->type == T_FILE
 242           || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
 243           || m->type == T_STDC)
 244         free ((PTR) buf->buf);
 245     }
 246   CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
 247   free (buf);
 248   pfile->buffer_stack_depth--;
 249   return CPP_BUFFER (pfile);
 250 }
 251
 252 /* Deal with the annoying semantics of fwrite.  */
 253 static void
 254 safe_fwrite (pfile, buf, len, fp)
 255      cpp_reader *pfile;
 256      const U_CHAR *buf;
 257      size_t len;
 258      FILE *fp;
 259 {
 260   size_t count;
 261
 262   while (len)
 263     {
 264       count = fwrite (buf, 1, len, fp);
 265       if (count == 0)
 266         goto error;
 267       len -= count;
 268       buf += count;
 269     }
 270   return;
 271
 272  error:
 273   cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
 274 }
 275
 276 /* Notify the compiler proper that the current line number has jumped,
 277    or the current file name has changed.  */
 278
 279 static void
 280 output_line_command (pfile, print, line)
 281      cpp_reader *pfile;
 282      cpp_printer *print;
 283      unsigned int line;
 284 {
 285   cpp_buffer *ip = cpp_file_buffer (pfile);
 286   enum { same = 0, enter, leave, rname } change;
 287   static const char * const codes[] = { "", " 1", " 2", "" };
 288
 289   if (CPP_OPTION (pfile, no_line_commands))
 290     return;
 291
 292   /* Determine whether the current filename has changed, and if so,
 293      how.  'nominal_fname' values are unique, so they can be compared
 294      by comparing pointers.  */
 295   if (ip->nominal_fname == print->last_fname)
 296     change = same;
 297   else
 298     {
 299       if (pfile->buffer_stack_depth == print->last_bsd)
 300         change = rname;
 301       else
 302         {
 303           if (pfile->buffer_stack_depth > print->last_bsd)
 304             change = enter;
 305           else
 306             change = leave;
 307           print->last_bsd = pfile->buffer_stack_depth;
 308         }
 309       print->last_fname = ip->nominal_fname;
 310     }
 311   /* If the current file has not changed, we can output a few newlines
 312      instead if we want to increase the line number by a small amount.
 313      We cannot do this if print->lineno is zero, because that means we
 314      haven't output any line commands yet.  (The very first line
 315      command output is a `same_file' command.)  */
 316   if (change == same && print->lineno != 0
 317       && line >= print->lineno && line < print->lineno + 8)
 318     {
 319       while (line > print->lineno)
 320         {
 321           putc ('\n', print->outf);
 322           print->lineno++;
 323         }
 324       return;
 325     }
 326
 327 #ifndef NO_IMPLICIT_EXTERN_C
 328   if (CPP_OPTION (pfile, cplusplus))
 329     fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
 330              codes[change],
 331              ip->inc->sysp ? " 3" : "",
 332              (ip->inc->sysp == 2) ? " 4" : "");
 333   else
 334 #endif
 335     fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
 336              codes[change],
 337              ip->inc->sysp ? " 3" : "");
 338   print->lineno = line;
 339 }
 340
 341 /* Write the contents of the token_buffer to the output stream, and
 342    clear the token_buffer.  Also handles generating line commands and
 343    keeping track of file transitions.  */
 344
 345 void
 346 cpp_output_tokens (pfile, print)
 347      cpp_reader *pfile;
 348      cpp_printer *print;
 349 {
 350   cpp_buffer *ip;
 351
 352   if (CPP_WRITTEN (pfile) - print->written)
 353     {
 354       if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
 355         print->lineno++;
 356       safe_fwrite (pfile, pfile->token_buffer,
 357                    CPP_WRITTEN (pfile) - print->written, print->outf);
 358     }
 359
 360   ip = cpp_file_buffer (pfile);
 361   if (ip)
 362     output_line_command (pfile, print, CPP_BUF_LINE (ip));
 363
 364   CPP_SET_WRITTEN (pfile, print->written);
 365 }
 366
 367 /* Helper for cpp_output_list - increases the column number to match
 368    what we expect it to be.  */
 369
 370 static void
 371 bump_column (print, from, to)
 372      cpp_printer *print;
 373      unsigned int from, to;
 374 {
 375   unsigned int tabs, spcs;
 376   unsigned int delta = to - from;
 377
 378   /* Only if FROM is 0, advance by tabs.  */
 379   if (from == 0)
 380     tabs = delta / 8, spcs = delta % 8;
 381   else
 382     tabs = 0, spcs = delta;
 383
 384   while (tabs--) putc ('\t', print->outf);
 385   while (spcs--) putc (' ', print->outf);
 386 }
 387
 388 /* Write out the list L onto pfile->token_buffer.  This function is
 389    incomplete:
 390
 391    1) pfile->token_buffer is not going to continue to exist.
 392    2) At the moment, tokens don't carry the information described
 393    in cpplib.h; they are all strings.
 394    3) The list has to be a complete line, and has to be written starting
 395    at the beginning of a line.  */
 396
 397 void
 398 cpp_output_list (pfile, print, list)
 399      cpp_reader *pfile;
 400      cpp_printer *print;
 401      const cpp_toklist *list;
 402 {
 403   unsigned int i;
 404   unsigned int curcol = 1;
 405
 406   /* XXX Probably does not do what is intended.  */
 407   if (print->lineno != list->line)
 408     output_line_command (pfile, print, list->line);
 409
 410   for (i = 0; i < list->tokens_used; i++)
 411     {
 412       if (TOK_TYPE (list, i) == CPP_VSPACE)
 413         {
 414           output_line_command (pfile, print, list->tokens[i].aux);
 415           continue;
 416         }
 417
 418       if (curcol < TOK_COL (list, i))
 419         {
 420           /* Insert space to bring the column to what it should be.  */
 421           bump_column (print, curcol - 1, TOK_COL (list, i));
 422           curcol = TOK_COL (list, i);
 423         }
 424       /* XXX We may have to insert space to prevent an accidental
 425          token paste.  */
 426       safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
 427       curcol += TOK_LEN (list, i);
 428     }
 429 }
 430
 431 /* Scan a string (which may have escape marks), perform macro expansion,
 432    and write the result to the token_buffer.  */
 433
 434 void
 435 _cpp_expand_to_buffer (pfile, buf, length)
 436      cpp_reader *pfile;
 437      const U_CHAR *buf;
 438      int length;
 439 {
 440   cpp_buffer *stop;
 441   enum cpp_ttype token;
 442   U_CHAR *buf1;
 443
 444   if (length < 0)
 445     {
 446       cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
 447       return;
 448     }
 449
 450   /* Copy the buffer, because it might be in an unsafe place - for
 451      example, a sequence on the token_buffer, where the pointers will
 452      be invalidated if we enlarge the token_buffer.  */
 453   buf1 = alloca (length);
 454   memcpy (buf1, buf, length);
 455
 456   /* Set up the input on the input stack.  */
 457   stop = CPP_BUFFER (pfile);
 458   if (cpp_push_buffer (pfile, buf1, length) == NULL)
 459     return;
 460   CPP_BUFFER (pfile)->has_escapes = 1;
 461
 462   /* Scan the input, create the output.  */
 463   for (;;)
 464     {
 465       token = cpp_get_token (pfile);
 466       if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 467         break;
 468     }
 469 }
 470
 471 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 472
 473 void
 474 cpp_scan_buffer_nooutput (pfile)
 475      cpp_reader *pfile;
 476 {
 477   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 478   enum cpp_ttype token;
 479   unsigned int old_written = CPP_WRITTEN (pfile);
 480   /* In no-output mode, we can ignore everything but directives.  */
 481   for (;;)
 482     {
 483       if (! pfile->only_seen_white)
 484         _cpp_skip_rest_of_line (pfile);
 485       token = cpp_get_token (pfile);
 486       if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 487         break;
 488     }
 489   CPP_SET_WRITTEN (pfile, old_written);
 490 }
 491
 492 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 493
 494 void
 495 cpp_scan_buffer (pfile, print)
 496      cpp_reader *pfile;
 497      cpp_printer *print;
 498 {
 499   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 500   enum cpp_ttype token;
 501
 502   for (;;)
 503     {
 504       token = cpp_get_token (pfile);
 505       if (token == CPP_VSPACE || token == CPP_EOF
 506           /* XXX Temporary kluge - force flush after #include only */
 507           || (token == CPP_DIRECTIVE
 508               && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
 509         {
 510           cpp_output_tokens (pfile, print);
 511           if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 512             return;
 513         }
 514     }
 515 }
 516
 517 /* Return the topmost cpp_buffer that corresponds to a file (not a macro).  */
 518
 519 cpp_buffer *
 520 cpp_file_buffer (pfile)
 521      cpp_reader *pfile;
 522 {
 523   cpp_buffer *ip;
 524
 525   for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
 526     if (ip->inc != NULL)
 527       return ip;
 528   return NULL;
 529 }
 530
 531 /* Token-buffer helper functions.  */
 532
 533 /* Expand a token list's string space. It is *vital* that
 534    list->tokens_used is correct, to get pointer fix-up right.  */
 535 static void
 536 expand_name_space (list, len)
 537      cpp_toklist *list;
 538      unsigned int len;
 539 {
 540   const U_CHAR *old_namebuf;
 541
 542   old_namebuf = list->namebuf;
 543   list->name_cap += len;
 544   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 545
 546   /* Fix up token text pointers.  */
 547   if (list->namebuf != old_namebuf)
 548     {
 549       unsigned int i;
 550
 551       for (i = 0; i < list->tokens_used; i++)
 552         if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
 553           list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
 554     }
 555 }
 556
 557 /* Expand the number of tokens in a list.  */
 558 void
 559 _cpp_expand_token_space (list, count)
 560      cpp_toklist *list;
 561      unsigned int count;
 562 {
 563   unsigned int n;
 564
 565   list->tokens_cap += count;
 566   n = list->tokens_cap;
 567   if (list->flags & LIST_OFFSET)
 568     list->tokens--, n++;
 569   list->tokens = (cpp_token *)
 570     xrealloc (list->tokens, n * sizeof (cpp_token));
 571   if (list->flags & LIST_OFFSET)
 572     list->tokens++;             /* Skip the dummy.  */
 573 }
 574
 575 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 576    an extra token in front of the token list, as this allows the lexer
 577    to always peek at the previous token without worrying about
 578    underflowing the list, and some initial space.  Otherwise, no
 579    token- or name-space is allocated, and there is no dummy token.  */
 580 void
 581 _cpp_init_toklist (list, flags)
 582      cpp_toklist *list;
 583      int flags;
 584 {
 585   /* We malloc zero bytes because we may want to realloc later, and
 586      some old implementations don't like realloc-ing a null pointer.  */
 587   if (flags == NO_DUMMY_TOKEN)
 588     {
 589       list->tokens_cap = 0;
 590       list->tokens = (cpp_token *) malloc (0);
 591       list->name_cap = 0;
 592       list->flags = 0;
 593     }
 594   else
 595     {
 596       /* Initialize token space.  Put a dummy token before the start
 597          that will fail matches.  */
 598       list->tokens_cap = 256;   /* 4K's worth.  */
 599       list->tokens = (cpp_token *)
 600         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 601       list->tokens[0].type = CPP_EOF;
 602       list->tokens++;
 603
 604       /* Initialize name space.  */
 605       list->name_cap = 1024;
 606       list->flags = LIST_OFFSET;
 607     }
 608
 609   /* Allocate name space.  */
 610   list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 611
 612   _cpp_clear_toklist (list);
 613 }
 614
 615 /* Clear a token list.  */
 616 void
 617 _cpp_clear_toklist (list)
 618      cpp_toklist *list;
 619 {
 620   list->tokens_used = 0;
 621   list->name_used = 0;
 622   list->dirno = -1;
 623   list->flags &= LIST_OFFSET;  /* clear all but that one */
 624 }
 625
 626 /* Free a token list.  Does not free the list itself, which may be
 627    embedded in a larger structure.  */
 628 void
 629 _cpp_free_toklist (list)
 630      cpp_toklist *list;
 631 {
 632   if (list->flags & LIST_OFFSET)
 633     free (list->tokens - 1);    /* Backup over dummy token.  */
 634   else
 635     free (list->tokens);
 636   free (list->namebuf);
 637 }
 638
 639 /* Slice a token list: copy the sublist [START, FINISH) into COPY.
 640    COPY is assumed not to be initialized.  The comment space is not
 641    copied.  */
 642 void
 643 _cpp_slice_toklist (copy, start, finish)
 644      cpp_toklist *copy;
 645      const cpp_token *start, *finish;
 646 {
 647   unsigned int i, n;
 648   size_t bytes;
 649
 650   n = finish - start;
 651   copy->tokens_cap = n;
 652   copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
 653   memcpy (copy->tokens, start, n * sizeof (cpp_token));
 654
 655   bytes = 0;
 656   for (i = 0; i < n; i++)
 657     if (token_spellings[start[i].type].type > SPELL_NONE)
 658       bytes += start[i].val.name.len;
 659
 660   copy->namebuf = xmalloc (bytes);
 661   bytes = 0;
 662   for (i = 0; i < n; i++)
 663     if (token_spellings[start[i].type].type > SPELL_NONE)
 664       {
 665         memcpy (copy->namebuf + bytes,
 666                 start[i].val.name.text, start[i].val.name.len);
 667         copy->tokens[i].val.name.text = copy->namebuf + bytes;
 668         bytes += start[i].val.name.len;
 669       }
 670
 671   copy->tokens_cap = n;
 672   copy->tokens_used = n;
 673   copy->name_used = bytes;
 674   copy->name_cap = bytes;
 675
 676   copy->flags = 0;
 677   copy->dirno = -1;
 678 }
 679
 680 /* Shrink a token list down to the minimum size.  */
 681 void
 682 _cpp_squeeze_toklist (list)
 683      cpp_toklist *list;
 684 {
 685   long delta;
 686   const U_CHAR *old_namebuf;
 687
 688   if (list->flags & LIST_OFFSET)
 689     {
 690       list->tokens--;
 691       memmove (list->tokens, list->tokens + 1,
 692                list->tokens_used * sizeof (cpp_token));
 693       list->tokens = xrealloc (list->tokens,
 694                                list->tokens_used * sizeof (cpp_token));
 695       list->flags &= ~LIST_OFFSET;
 696     }
 697   else
 698     list->tokens = xrealloc (list->tokens,
 699                              list->tokens_used * sizeof (cpp_token));
 700   list->tokens_cap = list->tokens_used;
 701
 702   old_namebuf = list->namebuf;
 703   list->namebuf = xrealloc (list->namebuf, list->name_used);
 704   list->name_cap = list->name_used;
 705
 706   /* Fix up token text pointers.  */
 707   delta = list->namebuf - old_namebuf;
 708   if (delta)
 709     {
 710       unsigned int i;
 711
 712       for (i = 0; i < list->tokens_used; i++)
 713         if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
 714           list->tokens[i].val.name.text += delta;
 715     }
 716 }
 717
 718 /* Compare two tokens.  */
 719 int
 720 _cpp_equiv_tokens (a, b)
 721      const cpp_token *a, *b;
 722 {
 723   if (a->type != b->type
 724       || a->flags != b->flags
 725       || a->aux != b->aux)
 726     return 0;
 727
 728   if (token_spellings[a->type].type > SPELL_NONE)
 729     {
 730       if (a->val.name.len != b->val.name.len
 731           || ustrncmp(a->val.name.text,
 732                       b->val.name.text,
 733                       a->val.name.len))
 734         return 0;
 735     }
 736   return 1;
 737 }
 738
 739 /* Compare two token lists.  */
 740 int
 741 _cpp_equiv_toklists (a, b)
 742      const cpp_toklist *a, *b;
 743 {
 744   unsigned int i;
 745
 746   if (a->tokens_used != b->tokens_used)
 747     return 0;
 748
 749   for (i = 0; i < a->tokens_used; i++)
 750     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 751       return 0;
 752   return 1;
 753 }
 754
 755 /* Scan until we encounter a token of type STOP or a newline, and
 756    create a token list for it.  Does not macro-expand or execute
 757    directives.  The final token is not included in the list or
 758    consumed from the input.  Returns the type of the token stopped at. */
 759
 760 enum cpp_ttype
 761 _cpp_scan_until (pfile, list, stop)
 762      cpp_reader *pfile;
 763      cpp_toklist *list;
 764      enum cpp_ttype stop;
 765 {
 766   int i, col;
 767   long written, len;
 768   enum cpp_ttype type;
 769   int space_before;
 770
 771   _cpp_clear_toklist (list);
 772   list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
 773
 774   written = CPP_WRITTEN (pfile);
 775   i = 0;
 776   space_before = 0;
 777   for (;;)
 778     {
 779       col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
 780       type = _cpp_lex_token (pfile);
 781       len = CPP_WRITTEN (pfile) - written;
 782       CPP_SET_WRITTEN (pfile, written);
 783       if (type == CPP_HSPACE)
 784         {
 785           if (CPP_PEDANTIC (pfile))
 786             pedantic_whitespace (pfile, pfile->token_buffer + written, len);
 787           space_before = 1;
 788           continue;
 789         }
 790       else if (type == CPP_COMMENT)
 791         /* Only happens when processing -traditional macro definitions.
 792            Do not give this a token entry, but do not change space_before
 793            either.  */
 794         continue;
 795
 796       if (list->tokens_used >= list->tokens_cap)
 797         _cpp_expand_token_space (list, 256);
 798       if (list->name_used + len >= list->name_cap)
 799         expand_name_space (list, list->name_used + len + 1 - list->name_cap);
 800
 801       if (type == CPP_MACRO)
 802         type = CPP_NAME;
 803
 804       if (type == CPP_VSPACE || type == stop)
 805         break;
 806
 807       list->tokens_used++;
 808       TOK_TYPE  (list, i) = type;
 809       TOK_COL   (list, i) = col;
 810       TOK_AUX   (list, i) = 0;
 811       TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
 812
 813       TOK_LEN (list, i) = len;
 814       if (token_spellings[type].type > SPELL_NONE)
 815         {
 816           memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
 817           TOK_NAME (list, i) = list->namebuf + list->name_used;
 818           list->name_used += len;
 819         }
 820       else
 821         TOK_NAME (list, i) = token_spellings[type].spelling;
 822       i++;
 823       space_before = 0;
 824     }
 825
 826   /* XXX Temporary kluge: put back the newline (or whatever).  */
 827   FORWARD(-1);
 828
 829   /* Don't consider the first token to have white before.  */
 830   TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
 831   return type;
 832 }
 833
 834 /* Skip a C-style block comment.  We know it's a comment, and point is
 835    at the second character of the starter.  */
 836 static void
 837 skip_block_comment (pfile)
 838      cpp_reader *pfile;
 839 {
 840   unsigned int line, col;
 841   const U_CHAR *limit, *cur;
 842
 843   FORWARD(1);
 844   line = CPP_BUF_LINE (CPP_BUFFER (pfile));
 845   col = CPP_BUF_COL (CPP_BUFFER (pfile));
 846   limit = CPP_BUFFER (pfile)->rlimit;
 847   cur = CPP_BUFFER (pfile)->cur;
 848
 849   while (cur < limit)
 850     {
 851       char c = *cur++;
 852       if (c == '\n' || c == '\r')
 853         {
 854           /* \r cannot be a macro escape marker here. */
 855           if (!ACTIVE_MARK_P (pfile))
 856             CPP_BUMP_LINE_CUR (pfile, cur);
 857         }
 858       else if (c == '*')
 859         {
 860           /* Check for teminator.  */
 861           if (cur < limit && *cur == '/')
 862             goto out;
 863
 864           /* Warn about comment starter embedded in comment.  */
 865           if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
 866             cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
 867                                    cur - CPP_BUFFER (pfile)->line_base,
 868                                    "'/*' within comment");
 869         }
 870     }
 871
 872   cpp_error_with_line (pfile, line, col, "unterminated comment");
 873   cur--;
 874  out:
 875   CPP_BUFFER (pfile)->cur = cur + 1;
 876 }
 877
 878 /* Skip a C++/Chill line comment.  We know it's a comment, and point
 879    is at the second character of the initiator.  */
 880 static void
 881 skip_line_comment (pfile)
 882      cpp_reader *pfile;
 883 {
 884   FORWARD(1);
 885   for (;;)
 886     {
 887       int c = GETC ();
 888
 889       /* We don't have to worry about EOF in here.  */
 890       if (c == '\n')
 891         {
 892           /* Don't consider final '\n' to be part of comment.  */
 893           FORWARD(-1);
 894           return;
 895         }
 896       else if (c == '\r')
 897         {
 898           /* \r cannot be a macro escape marker here. */
 899           if (!ACTIVE_MARK_P (pfile))
 900             CPP_BUMP_LINE (pfile);
 901           if (CPP_OPTION (pfile, warn_comments))
 902             cpp_warning (pfile, "backslash-newline within line comment");
 903         }
 904     }
 905 }
 906
 907 /* Skip a comment - C, C++, or Chill style.  M is the first character
 908    of the comment marker.  If this really is a comment, skip to its
 909    end and return ' '.  If this is not a comment, return M (which will
 910    be '/' or '-').  */
 911
 912 static int
 913 skip_comment (pfile, m)
 914      cpp_reader *pfile;
 915      int m;
 916 {
 917   if (m == '/' && PEEKC() == '*')
 918     {
 919       skip_block_comment (pfile);
 920       return ' ';
 921     }
 922   else if (m == '/' && PEEKC() == '/')
 923     {
 924       if (CPP_IN_SYSTEM_HEADER (pfile))
 925         {
 926           /* We silently allow C++ comments in system headers, irrespective
 927              of conformance mode, because lots of busted systems do that
 928              and trying to clean it up in fixincludes is a nightmare.  */
 929           skip_line_comment (pfile);
 930           return ' ';
 931         }
 932       else if (CPP_OPTION (pfile, cplusplus_comments))
 933         {
 934           if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
 935             {
 936               if (CPP_WTRADITIONAL (pfile))
 937                 cpp_pedwarn (pfile,
 938                         "C++ style comments are not allowed in traditional C");
 939               else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
 940                 cpp_pedwarn (pfile,
 941                         "C++ style comments are not allowed in ISO C89");
 942               if (CPP_WTRADITIONAL (pfile)
 943                   || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
 944                 cpp_pedwarn (pfile,
 945                            "(this will be reported only once per input file)");
 946               CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
 947             }
 948           skip_line_comment (pfile);
 949           return ' ';
 950         }
 951       else
 952         return m;
 953     }
 954   else if (m == '-' && PEEKC() == '-'
 955            && CPP_OPTION (pfile, chill))
 956     {
 957       skip_line_comment (pfile);
 958       return ' ';
 959     }
 960   else
 961     return m;
 962 }
 963
 964 /* Identical to skip_comment except that it copies the comment into the
 965    token_buffer.  This is used if !discard_comments.  */
 966 static int
 967 copy_comment (pfile, m)
 968      cpp_reader *pfile;
 969      int m;
 970 {
 971   const U_CHAR *start = CPP_BUFFER (pfile)->cur;  /* XXX Layering violation */
 972   const U_CHAR *limit;
 973
 974   if (skip_comment (pfile, m) == m)
 975     return m;
 976
 977   limit = CPP_BUFFER (pfile)->cur;
 978   CPP_RESERVE (pfile, limit - start + 2);
 979   CPP_PUTC_Q (pfile, m);
 980   for (; start <= limit; start++)
 981     if (*start != '\r')
 982       CPP_PUTC_Q (pfile, *start);
 983
 984   return ' ';
 985 }
 986
 987 static void
 988 null_warning (pfile, count)
 989      cpp_reader *pfile;
 990      unsigned int count;
 991 {
 992   if (count == 1)
 993     cpp_warning (pfile, "embedded null character ignored");
 994   else
 995     cpp_warning (pfile, "embedded null characters ignored");
 996 }
 997
 998 /* Skip whitespace \-newline and comments.  Does not macro-expand.  */
 999
1000 void
1001 _cpp_skip_hspace (pfile)
1002      cpp_reader *pfile;
1003 {
1004   unsigned int null_count = 0;
1005   int c;
1006
1007   while (1)
1008     {
1009       c = GETC();
1010       if (c == EOF)
1011         goto out;
1012       else if (is_hspace(c))
1013         {
1014           if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
1015             cpp_pedwarn (pfile, "%s in preprocessing directive",
1016                          c == '\f' ? "formfeed" : "vertical tab");
1017           else if (c == '\0')
1018             null_count++;
1019         }
1020       else if (c == '\r')
1021         {
1022           /* \r is a backslash-newline marker if !has_escapes, and
1023              a deletable-whitespace or no-reexpansion marker otherwise. */
1024           if (CPP_BUFFER (pfile)->has_escapes)
1025             {
1026               if (PEEKC() == ' ')
1027                 FORWARD(1);
1028               else
1029                 break;
1030             }
1031           else
1032             CPP_BUMP_LINE (pfile);
1033         }
1034       else if (c == '/' || c == '-')
1035         {
1036           c = skip_comment (pfile, c);
1037           if (c  != ' ')
1038             break;
1039         }
1040       else
1041         break;
1042     }
1043   FORWARD(-1);
1044  out:
1045   if (null_count)
1046     null_warning (pfile, null_count);
1047 }
1048
1049 /* Read and discard the rest of the current line.  */
1050
1051 void
1052 _cpp_skip_rest_of_line (pfile)
1053      cpp_reader *pfile;
1054 {
1055   for (;;)
1056     {
1057       int c = GETC();
1058       switch (c)
1059         {
1060         case '\n':
1061           FORWARD(-1);
1062         case EOF:
1063           return;
1064
1065         case '\r':
1066           if (! CPP_BUFFER (pfile)->has_escapes)
1067             CPP_BUMP_LINE (pfile);
1068           break;
1069
1070         case '\'':
1071         case '\"':
1072           skip_string (pfile, c);
1073           break;
1074
1075         case '/':
1076         case '-':
1077           skip_comment (pfile, c);
1078           break;
1079
1080         case '\f':
1081         case '\v':
1082           if (CPP_PEDANTIC (pfile))
1083             cpp_pedwarn (pfile, "%s in preprocessing directive",
1084                          c == '\f' ? "formfeed" : "vertical tab");
1085           break;
1086
1087         }
1088     }
1089 }
1090
1091 /* Parse an identifier starting with C.  */
1092
1093 void
1094 _cpp_parse_name (pfile, c)
1095      cpp_reader *pfile;
1096      int c;
1097 {
1098   for (;;)
1099   {
1100       if (! is_idchar(c))
1101       {
1102           FORWARD (-1);
1103           break;
1104       }
1105
1106       /* $ is not a legal identifier character in the standard, but is
1107          commonly accepted as an extension.  Don't warn about it in
1108          skipped conditional blocks. */
1109       if (c == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
1110         cpp_pedwarn (pfile, "`$' in identifier");
1111
1112       CPP_RESERVE(pfile, 2); /* One more for final NUL.  */
1113       CPP_PUTC_Q (pfile, c);
1114       c = GETC();
1115       if (c == EOF)
1116         break;
1117   }
1118   return;
1119 }
1120
1121 /* Parse and skip over a string starting with C.  A single quoted
1122    string is treated like a double -- some programs (e.g., troff) are
1123    perverse this way.  (However, a single quoted string is not allowed
1124    to extend over multiple lines.)  */
1125 static void
1126 skip_string (pfile, c)
1127      cpp_reader *pfile;
1128      int c;
1129 {
1130   unsigned int start_line, start_column;
1131   unsigned int null_count = 0;
1132
1133   start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1134   start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
1135   while (1)
1136     {
1137       int cc = GETC();
1138       switch (cc)
1139         {
1140         case EOF:
1141           cpp_error_with_line (pfile, start_line, start_column,
1142                                "unterminated string or character constant");
1143           if (pfile->multiline_string_line != start_line
1144               && pfile->multiline_string_line != 0)
1145             cpp_error_with_line (pfile,
1146                                  pfile->multiline_string_line, -1,
1147                          "possible real start of unterminated constant");
1148           pfile->multiline_string_line = 0;
1149           goto out;
1150
1151         case '\0':
1152           null_count++;
1153           break;
1154
1155         case '\n':
1156           CPP_BUMP_LINE (pfile);
1157           /* In Fortran and assembly language, silently terminate
1158              strings of either variety at end of line.  This is a
1159              kludge around not knowing where comments are in these
1160              languages.  */
1161           if (CPP_OPTION (pfile, lang_fortran)
1162               || CPP_OPTION (pfile, lang_asm))
1163             {
1164               FORWARD(-1);
1165               goto out;
1166             }
1167           /* Character constants may not extend over multiple lines.
1168              In Standard C, neither may strings.  We accept multiline
1169              strings as an extension.  */
1170           if (c == '\'')
1171             {
1172               cpp_error_with_line (pfile, start_line, start_column,
1173                                    "unterminated character constant");
1174               FORWARD(-1);
1175               goto out;
1176             }
1177           if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1178             cpp_pedwarn_with_line (pfile, start_line, start_column,
1179                                    "string constant runs past end of line");
1180           if (pfile->multiline_string_line == 0)
1181             pfile->multiline_string_line = start_line;
1182           break;
1183
1184         case '\r':
1185           if (CPP_BUFFER (pfile)->has_escapes)
1186             {
1187               cpp_ice (pfile, "\\r escape inside string constant");
1188               FORWARD(1);
1189             }
1190           else
1191             /* Backslash newline is replaced by nothing at all.  */
1192             CPP_BUMP_LINE (pfile);
1193           break;
1194
1195         case '\\':
1196           FORWARD(1);
1197           break;
1198
1199         case '\"':
1200         case '\'':
1201           if (cc == c)
1202             goto out;
1203           break;
1204         }
1205     }
1206
1207  out:
1208   if (null_count == 1)
1209     cpp_warning (pfile, "null character in string or character constant");
1210   else if (null_count > 1)
1211     cpp_warning (pfile, "null characters in string or character constant");
1212 }
1213
1214 /* Parse a string and copy it to the output.  */
1215
1216 static void
1217 parse_string (pfile, c)
1218      cpp_reader *pfile;
1219      int c;
1220 {
1221   const U_CHAR *start = CPP_BUFFER (pfile)->cur;  /* XXX Layering violation */
1222   const U_CHAR *limit;
1223
1224   skip_string (pfile, c);
1225
1226   limit = CPP_BUFFER (pfile)->cur;
1227   CPP_RESERVE (pfile, limit - start + 2);
1228   CPP_PUTC_Q (pfile, c);
1229   for (; start < limit; start++)
1230     if (*start != '\r')
1231       CPP_PUTC_Q (pfile, *start);
1232 }
1233
1234 /* Get the next token, and add it to the text in pfile->token_buffer.
1235    Return the kind of token we got.  */
1236
1237 enum cpp_ttype
1238 _cpp_lex_token (pfile)
1239      cpp_reader *pfile;
1240 {
1241   register int c, c2;
1242   enum cpp_ttype token;
1243
1244   if (CPP_BUFFER (pfile) == NULL)
1245     return CPP_EOF;
1246
1247  get_next:
1248   c = GETC();
1249   switch (c)
1250     {
1251     case EOF:
1252       return CPP_EOF;
1253
1254     case '/':
1255       if (PEEKC () == '=')
1256         goto op2;
1257
1258     comment:
1259       if (CPP_OPTION (pfile, discard_comments))
1260         c = skip_comment (pfile, c);
1261       else
1262         c = copy_comment (pfile, c);
1263       if (c != ' ')
1264         goto randomchar;
1265
1266       /* Comments are equivalent to spaces.
1267          For -traditional, a comment is equivalent to nothing.  */
1268       if (!CPP_OPTION (pfile, discard_comments))
1269         return CPP_COMMENT;
1270       else if (CPP_TRADITIONAL (pfile))
1271         goto get_next;
1272       else
1273         {
1274           CPP_PUTC (pfile, c);
1275           return CPP_HSPACE;
1276         }
1277
1278     case '#':
1279       CPP_PUTC (pfile, c);
1280
1281     hash:
1282       c2 = PEEKC ();
1283       if (c2 == '#')
1284         {
1285           FORWARD (1);
1286           CPP_PUTC (pfile, c2);
1287           return CPP_PASTE;
1288         }
1289       else if (c2 == '%' && PEEKN (1) == ':')
1290         {
1291           /* Digraph: "%:" == "#".  */
1292           FORWARD (1);
1293           CPP_RESERVE (pfile, 2);
1294           CPP_PUTC_Q (pfile, c2);
1295           CPP_PUTC_Q (pfile, GETC ());
1296           return CPP_PASTE;
1297         }
1298       else
1299         return CPP_HASH;
1300
1301     case '\"':
1302     case '\'':
1303       parse_string (pfile, c);
1304       return c == '\'' ? CPP_CHAR : CPP_STRING;
1305
1306     case '$':
1307       if (!CPP_OPTION (pfile, dollars_in_ident))
1308         goto randomchar;
1309       goto letter;
1310
1311     case ':':
1312       c2 = PEEKC ();
1313       /* Digraph: ":>" == "]".  */
1314       if (c2 == '>'
1315           || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1316         goto op2;
1317       goto randomchar;
1318
1319     case '&':
1320     case '+':
1321     case '|':
1322       c2 = PEEKC ();
1323       if (c2 == c || c2 == '=')
1324         goto op2;
1325       goto randomchar;
1326
1327     case '%':
1328       /* Digraphs: "%:" == "#", "%>" == "}".  */
1329       c2 = PEEKC ();
1330       if (c2 == ':')
1331         {
1332           FORWARD (1);
1333           CPP_RESERVE (pfile, 2);
1334           CPP_PUTC_Q (pfile, c);
1335           CPP_PUTC_Q (pfile, c2);
1336           goto hash;
1337         }
1338       else if (c2 == '>')
1339         {
1340           FORWARD (1);
1341           CPP_RESERVE (pfile, 2);
1342           CPP_PUTC_Q (pfile, c);
1343           CPP_PUTC_Q (pfile, c2);
1344           return CPP_OPEN_BRACE;
1345         }
1346       /* else fall through */
1347
1348     case '*':
1349     case '!':
1350     case '=':
1351     case '^':
1352       if (PEEKC () == '=')
1353         goto op2;
1354       goto randomchar;
1355
1356     case '-':
1357       c2 = PEEKC ();
1358       if (c2 == '-')
1359         {
1360           if (CPP_OPTION (pfile, chill))
1361             goto comment;  /* Chill style comment */
1362           else
1363             goto op2;
1364         }
1365       else if (c2 == '=')
1366         goto op2;
1367       else if (c2 == '>')
1368         {
1369           if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1370             {
1371               /* In C++, there's a ->* operator.  */
1372               token = CPP_OTHER;
1373               CPP_RESERVE (pfile, 4);
1374               CPP_PUTC_Q (pfile, c);
1375               CPP_PUTC_Q (pfile, GETC ());
1376               CPP_PUTC_Q (pfile, GETC ());
1377               return token;
1378             }
1379           goto op2;
1380         }
1381       goto randomchar;
1382
1383     case '<':
1384       if (pfile->parsing_include_directive)
1385         {
1386           for (;;)
1387             {
1388               CPP_PUTC (pfile, c);
1389               if (c == '>')
1390                 break;
1391               c = GETC ();
1392               if (c == '\n' || c == EOF)
1393                 {
1394                   cpp_error (pfile,
1395                              "missing '>' in `#include <FILENAME>'");
1396                   break;
1397                 }
1398               else if (c == '\r')
1399                 {
1400                   if (!CPP_BUFFER (pfile)->has_escapes)
1401                     {
1402                       /* Backslash newline is replaced by nothing. */
1403                       CPP_ADJUST_WRITTEN (pfile, -1);
1404                       CPP_BUMP_LINE (pfile);
1405                     }
1406                   else
1407                     {
1408                       /* We might conceivably get \r- or \r<space> in
1409                          here.  Just delete 'em. */
1410                       int d = GETC();
1411                       if (d != '-' && d != ' ')
1412                         cpp_ice (pfile, "unrecognized escape \\r%c", d);
1413                       CPP_ADJUST_WRITTEN (pfile, -1);
1414                     }
1415                 }
1416             }
1417           return CPP_STRING;
1418         }
1419       /* Digraphs: "<%" == "{", "<:" == "[".  */
1420       c2 = PEEKC ();
1421       if (c2 == '%')
1422         {
1423           FORWARD (1);
1424           CPP_RESERVE (pfile, 2);
1425           CPP_PUTC_Q (pfile, c);
1426           CPP_PUTC_Q (pfile, c2);
1427           return CPP_CLOSE_BRACE;
1428         }
1429       else if (c2 == ':')
1430         goto op2;
1431       /* else fall through */
1432     case '>':
1433       c2 = PEEKC ();
1434       if (c2 == '=')
1435         goto op2;
1436       /* GNU C++ supports MIN and MAX operators <? and >?.  */
1437       if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1438         goto randomchar;
1439       FORWARD(1);
1440       CPP_RESERVE (pfile, 3);
1441       CPP_PUTC_Q (pfile, c);
1442       CPP_PUTC_Q (pfile, c2);
1443       if (PEEKC () == '=')
1444         CPP_PUTC_Q (pfile, GETC ());
1445       return CPP_OTHER;
1446
1447     case '.':
1448       c2 = PEEKC ();
1449       if (ISDIGIT (c2))
1450         {
1451           CPP_PUTC (pfile, c);
1452           c = GETC ();
1453           goto number;
1454         }
1455
1456       /* In C++ there's a .* operator.  */
1457       if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1458         goto op2;
1459
1460       if (c2 == '.' && PEEKN(1) == '.')
1461         {
1462           CPP_RESERVE (pfile, 3);
1463           CPP_PUTC_Q (pfile, '.');
1464           CPP_PUTC_Q (pfile, '.');
1465           CPP_PUTC_Q (pfile, '.');
1466           FORWARD (2);
1467           return CPP_ELLIPSIS;
1468         }
1469       goto randomchar;
1470
1471     op2:
1472       CPP_RESERVE (pfile, 2);
1473       CPP_PUTC_Q (pfile, c);
1474       CPP_PUTC_Q (pfile, GETC ());
1475       return CPP_OTHER;
1476
1477     case 'L':
1478       c2 = PEEKC ();
1479       if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1480         {
1481           CPP_PUTC (pfile, c);
1482           c = GETC ();
1483           parse_string (pfile, c);
1484           return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1485         }
1486       goto letter;
1487
1488     case '0': case '1': case '2': case '3': case '4':
1489     case '5': case '6': case '7': case '8': case '9':
1490     number:
1491     c2  = '.';
1492     for (;;)
1493       {
1494         CPP_RESERVE (pfile, 2);
1495         CPP_PUTC_Q (pfile, c);
1496         c = PEEKC ();
1497         if (c == EOF)
1498           break;
1499         if (!is_numchar(c) && c != '.'
1500             && ((c2 != 'e' && c2 != 'E'
1501                  && ((c2 != 'p' && c2 != 'P')
1502                      || CPP_OPTION (pfile, c89)))
1503                 || (c != '+' && c != '-')))
1504           break;
1505         FORWARD(1);
1506         c2= c;
1507       }
1508     return CPP_NUMBER;
1509     case 'b': case 'c': case 'd': case 'h': case 'o':
1510     case 'B': case 'C': case 'D': case 'H': case 'O':
1511       if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1512         {
1513           CPP_RESERVE (pfile, 2);
1514           CPP_PUTC_Q (pfile, c);
1515           CPP_PUTC_Q (pfile, '\'');
1516           FORWARD(1);
1517           for (;;)
1518             {
1519               c = GETC();
1520               if (c == EOF)
1521                 goto chill_number_eof;
1522               if (!is_numchar(c))
1523                 break;
1524               CPP_PUTC (pfile, c);
1525             }
1526           if (c == '\'')
1527             {
1528               CPP_RESERVE (pfile, 2);
1529               CPP_PUTC_Q (pfile, c);
1530               return CPP_STRING;
1531             }
1532           else
1533             {
1534               FORWARD(-1);
1535             chill_number_eof:
1536               return CPP_NUMBER;
1537             }
1538         }
1539       else
1540         goto letter;
1541     case '_':
1542     case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1543     case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1544     case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1545     case 'x': case 'y': case 'z':
1546     case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1547     case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1548     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1549     case 'Y': case 'Z':
1550     letter:
1551     _cpp_parse_name (pfile, c);
1552     return CPP_MACRO;
1553
1554     case ' ':  case '\t':  case '\v': case '\f': case '\0':
1555       {
1556         int null_count = 0;
1557
1558         for (;;)
1559           {
1560             if (c == '\0')
1561               null_count++;
1562             else
1563               CPP_PUTC (pfile, c);
1564             c = PEEKC ();
1565             if (c == EOF || !is_hspace(c))
1566               break;
1567             FORWARD(1);
1568           }
1569         if (null_count)
1570           null_warning (pfile, null_count);
1571         return CPP_HSPACE;
1572       }
1573
1574     case '\r':
1575       if (CPP_BUFFER (pfile)->has_escapes)
1576         {
1577           c = GETC ();
1578           if (c == '-')
1579             {
1580               if (pfile->output_escapes)
1581                 CPP_PUTS (pfile, "\r-", 2);
1582               _cpp_parse_name (pfile, GETC ());
1583               return CPP_NAME;
1584             }
1585           else if (c == ' ')
1586             {
1587               /* "\r " means a space, but only if necessary to prevent
1588                  accidental token concatenation.  */
1589               CPP_RESERVE (pfile, 2);
1590               if (pfile->output_escapes)
1591                 CPP_PUTC_Q (pfile, '\r');
1592               CPP_PUTC_Q (pfile, c);
1593               return CPP_HSPACE;
1594             }
1595           else
1596             {
1597               cpp_ice (pfile, "unrecognized escape \\r%c", c);
1598               goto get_next;
1599             }
1600         }
1601       else
1602         {
1603           /* Backslash newline is ignored. */
1604           if (!ACTIVE_MARK_P (pfile))
1605             CPP_BUMP_LINE (pfile);
1606           goto get_next;
1607         }
1608
1609     case '\n':
1610       CPP_PUTC (pfile, c);
1611       return CPP_VSPACE;
1612
1613     case '(': token = CPP_OPEN_PAREN;  goto char1;
1614     case ')': token = CPP_CLOSE_PAREN; goto char1;
1615     case '{': token = CPP_OPEN_BRACE;  goto char1;
1616     case '}': token = CPP_CLOSE_BRACE; goto char1;
1617     case ',': token = CPP_COMMA;       goto char1;
1618     case ';': token = CPP_SEMICOLON;   goto char1;
1619
1620     randomchar:
1621     default:
1622       token = CPP_OTHER;
1623     char1:
1624       CPP_PUTC (pfile, c);
1625       return token;
1626     }
1627 }
1628
1629 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1630    Caller is expected to have checked no_macro_expand.  */
1631 static int
1632 maybe_macroexpand (pfile, written)
1633      cpp_reader *pfile;
1634      long written;
1635 {
1636   U_CHAR *macro = pfile->token_buffer + written;
1637   size_t len = CPP_WRITTEN (pfile) - written;
1638   cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
1639
1640   /* cpp_lookup never returns null.  */
1641   if (hp->type == T_VOID)
1642     return 0;
1643   if (hp->disabled || hp->type == T_IDENTITY)
1644     {
1645       if (pfile->output_escapes)
1646         {
1647           /* Insert a no-reexpand marker before IDENT.  */
1648           CPP_RESERVE (pfile, 2);
1649           CPP_ADJUST_WRITTEN (pfile, 2);
1650           macro = pfile->token_buffer + written;
1651
1652           memmove (macro + 2, macro, len);
1653           macro[0] = '\r';
1654           macro[1] = '-';
1655         }
1656       return 0;
1657     }
1658   if (hp->type == T_EMPTY)
1659     {
1660       /* Special case optimization: macro expands to nothing.  */
1661       CPP_SET_WRITTEN (pfile, written);
1662       CPP_PUTC_Q (pfile, ' ');
1663       return 1;
1664     }
1665
1666   /* If macro wants an arglist, verify that a '(' follows.  */
1667   if (hp->type == T_FMACRO)
1668     {
1669       int macbuf_whitespace = 0;
1670       int c;
1671
1672       while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1673         {
1674           const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1675           for (;;)
1676             {
1677               _cpp_skip_hspace (pfile);
1678               c = PEEKC ();
1679               if (c == '\n')
1680                 FORWARD(1);
1681               else
1682                 break;
1683             }
1684           if (point != CPP_BUFFER (pfile)->cur)
1685             macbuf_whitespace = 1;
1686           if (c == '(')
1687             goto is_macro_call;
1688           else if (c != EOF)
1689             goto not_macro_call;
1690           cpp_pop_buffer (pfile);
1691         }
1692
1693       CPP_SET_MARK (pfile);
1694       for (;;)
1695         {
1696           _cpp_skip_hspace (pfile);
1697           c = PEEKC ();
1698           if (c == '\n')
1699             FORWARD(1);
1700           else
1701             break;
1702         }
1703       CPP_GOTO_MARK (pfile);
1704
1705       if (c != '(')
1706         {
1707         not_macro_call:
1708           if (macbuf_whitespace)
1709             CPP_PUTC (pfile, ' ');
1710
1711           /* K+R treated this as a hard error.  */
1712           if (CPP_WTRADITIONAL (pfile))
1713             cpp_warning (pfile,
1714          "function macro %s must be used with arguments in traditional C",
1715                          hp->name);
1716           return 0;
1717         }
1718     }
1719
1720  is_macro_call:
1721   /* This is now known to be a macro call.
1722      Expand the macro, reading arguments as needed,
1723      and push the expansion on the input stack.  */
1724   _cpp_macroexpand (pfile, hp);
1725   CPP_SET_WRITTEN (pfile, written);
1726   return 1;
1727 }
1728
1729 /* Complain about \v or \f in a preprocessing directive (constraint
1730    violation, C99 6.10 para 5).  Caller has checked CPP_PEDANTIC.  */
1731 static void
1732 pedantic_whitespace (pfile, p, len)
1733      cpp_reader *pfile;
1734      U_CHAR *p;
1735      unsigned int len;
1736 {
1737   while (len)
1738     {
1739       if (*p == '\v')
1740         cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1741       else if (*p == '\f')
1742         cpp_pedwarn (pfile, "form feed in preprocessing directive");
1743       p++;
1744       len--;
1745     }
1746 }
1747
1748
1749 enum cpp_ttype
1750 cpp_get_token (pfile)
1751      cpp_reader *pfile;
1752 {
1753   enum cpp_ttype token;
1754   long written = CPP_WRITTEN (pfile);
1755   int macro_buffer;
1756
1757  get_next:
1758   token = _cpp_lex_token (pfile);
1759
1760   switch (token)
1761     {
1762     default:
1763       if (pfile->skipping)
1764         break;
1765       pfile->potential_control_macro = 0;
1766       pfile->only_seen_white = 0;
1767       break;
1768
1769     case CPP_HSPACE:
1770     case CPP_COMMENT:
1771       break;
1772
1773     case CPP_VSPACE:
1774       if (pfile->only_seen_white == 0)
1775         pfile->only_seen_white = 1;
1776       CPP_BUMP_LINE (pfile);
1777       break;
1778
1779     case CPP_HASH:
1780       pfile->potential_control_macro = 0;
1781       if (!pfile->only_seen_white)
1782         break;
1783       /* XXX shouldn't have to do this - remove the hash or %: from
1784          the token buffer.  */
1785       if (CPP_PWRITTEN (pfile)[-1] == '#')
1786         CPP_ADJUST_WRITTEN (pfile, -1);
1787       else
1788         CPP_ADJUST_WRITTEN (pfile, -2);
1789
1790       if (_cpp_handle_directive (pfile))
1791         {
1792           token = CPP_DIRECTIVE;
1793           break;
1794         }
1795       pfile->only_seen_white = 0;
1796       CPP_PUTC (pfile, '#');
1797       break;
1798
1799     case CPP_MACRO:
1800       if (pfile->skipping)
1801         break;
1802       pfile->potential_control_macro = 0;
1803       pfile->only_seen_white = 0;
1804       if (! pfile->no_macro_expand
1805           && maybe_macroexpand (pfile, written))
1806         goto get_next;
1807       token = CPP_NAME;
1808       break;
1809
1810       /* Do not run this case through the 'skipping' logic.  */
1811     case CPP_EOF:
1812       if (CPP_BUFFER (pfile) == NULL)
1813         return CPP_EOF;
1814       macro_buffer = CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile));
1815
1816       cpp_pop_buffer (pfile);
1817       if (macro_buffer)
1818         goto get_next;
1819       return CPP_EOF;
1820     }
1821
1822   if (pfile->skipping)
1823     {
1824       CPP_SET_WRITTEN (pfile, written);
1825       goto get_next;
1826     }
1827   return token;
1828 }
1829
1830 /* Like cpp_get_token, but skip spaces and comments.  */
1831
1832 enum cpp_ttype
1833 cpp_get_non_space_token (pfile)
1834      cpp_reader *pfile;
1835 {
1836   int old_written = CPP_WRITTEN (pfile);
1837   for (;;)
1838     {
1839       enum cpp_ttype token = cpp_get_token (pfile);
1840       if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1841         return token;
1842       CPP_SET_WRITTEN (pfile, old_written);
1843     }
1844 }
1845
1846 /* Like cpp_get_token, except that it does not execute directives,
1847    does not consume vertical space, and discards horizontal space.  */
1848 enum cpp_ttype
1849 _cpp_get_directive_token (pfile)
1850      cpp_reader *pfile;
1851 {
1852   long old_written;
1853   enum cpp_ttype token;
1854   int at_bol;
1855
1856  get_next:
1857   at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1858   old_written = CPP_WRITTEN (pfile);
1859   token = _cpp_lex_token (pfile);
1860   switch (token)
1861     {
1862     default:
1863       return token;
1864
1865     case CPP_VSPACE:
1866       /* Put it back and return VSPACE.  */
1867       FORWARD(-1);
1868       CPP_ADJUST_WRITTEN (pfile, -1);
1869       return CPP_VSPACE;
1870
1871     case CPP_HSPACE:
1872       /* The purpose of this rather strange check is to prevent pedantic
1873          warnings for ^L in an #ifdefed out block.  */
1874       if (CPP_PEDANTIC (pfile) && ! at_bol)
1875         pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1876                              CPP_WRITTEN (pfile) - old_written);
1877       CPP_SET_WRITTEN (pfile, old_written);
1878       goto get_next;
1879       return CPP_HSPACE;
1880
1881     case CPP_MACRO:
1882       if (! pfile->no_macro_expand
1883           && maybe_macroexpand (pfile, old_written))
1884         goto get_next;
1885       return CPP_NAME;
1886
1887     case CPP_EOF:
1888       if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1889         {
1890           cpp_pop_buffer (pfile);
1891           goto get_next;
1892         }
1893       else
1894         /* This can happen for files that don't end with a newline,
1895            and for cpp_define and friends.  Pretend they do, so
1896            callers don't have to deal.  A warning will be issued by
1897            someone else, if necessary.  */
1898         return CPP_VSPACE;
1899     }
1900 }
1901
1902 /* Determine the current line and column.  Used only by read_and_prescan. */
1903 static U_CHAR *
1904 find_position (start, limit, linep)
1905      U_CHAR *start;
1906      U_CHAR *limit;
1907      unsigned long *linep;
1908 {
1909   unsigned long line = *linep;
1910   U_CHAR *lbase = start;
1911   while (start < limit)
1912     {
1913       U_CHAR ch = *start++;
1914       if (ch == '\n' || ch == '\r')
1915         {
1916           line++;
1917           lbase = start;
1918         }
1919     }
1920   *linep = line;
1921   return lbase;
1922 }
1923
1924 /* The following table is used by _cpp_prescan.  If we have
1925    designated initializers, it can be constant data; otherwise, it is
1926    set up at runtime by _cpp_init_input_buffer.  */
1927
1928 #if (GCC_VERSION >= 2007)
1929 #define init_chartab()  /* nothing */
1930 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1931 #define END };
1932 #define s(p, v) [p] = v,
1933 #else
1934 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1935  static void init_chartab PARAMS ((void)) { \
1936  unsigned char *x = chartab;
1937 #define END }
1938 #define s(p, v) x[p] = v;
1939 #endif
1940
1941 /* Table of characters that can't be handled in the inner loop.
1942    Also contains the mapping between trigraph third characters and their
1943    replacements.  */
1944 #define SPECCASE_CR        1
1945 #define SPECCASE_BACKSLASH 2
1946 #define SPECCASE_QUESTION  3
1947
1948 CHARTAB
1949   s('\r', SPECCASE_CR)
1950   s('\\', SPECCASE_BACKSLASH)
1951   s('?',  SPECCASE_QUESTION)
1952
1953   s('=', '#')   s(')', ']')     s('!', '|')
1954   s('(', '[')   s('\'', '^')    s('>', '}')
1955   s('/', '\\')  s('<', '{')     s('-', '~')
1956 END
1957
1958 #undef CHARTAB
1959 #undef END
1960 #undef s
1961
1962 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1963 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1964
1965 /* Prescan pass over a file already loaded into BUF.  This is
1966    translation phases 1 and 2 (C99 5.1.1.2).
1967
1968    Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1969    canonical form (\n).  If enabled, convert and/or warn about
1970    trigraphs.  Convert backslash-newline to a one-character escape
1971    (\r) and remove it from "embarrassing" places (i.e. the middle of a
1972    token).  If there is no newline at the end of the file, add one and
1973    warn.  Returns -1 on failure, or the actual length of the data to
1974    be scanned.
1975
1976    This function does a lot of work, and can be a serious performance
1977    bottleneck.  It has been tuned heavily; make sure you understand it
1978    before hacking.  The common case - no trigraphs, Unix style line
1979    breaks, backslash-newline set off by whitespace, newline at EOF -
1980    has been optimized at the expense of the others.  The performance
1981    penalty for DOS style line breaks (\r\n) is about 15%.
1982
1983    Warnings lose particularly heavily since we have to determine the
1984    line number, which involves scanning from the beginning of the file
1985    or from the last warning.  The penalty for the absence of a newline
1986    at the end of reload1.c is about 60%.  (reload1.c is 329k.)
1987
1988    If your file has more than one kind of end-of-line marker, you
1989    will get messed-up line numbering.  */
1990
1991 ssize_t
1992 _cpp_prescan (pfile, fp, len)
1993      cpp_reader *pfile;
1994      cpp_buffer *fp;
1995      ssize_t len;
1996 {
1997   U_CHAR *buf, *op;
1998   const U_CHAR *ibase, *ip, *ilimit;
1999   U_CHAR *line_base;
2000   unsigned long line;
2001   unsigned int deferred_newlines;
2002
2003   /* Allocate an extra byte in case we must add a trailing \n.  */
2004   buf = (U_CHAR *) xmalloc (len + 1);
2005   line_base = op = buf;
2006   ip = ibase = fp->buf;
2007   ilimit = ibase + len;
2008   line = 1;
2009   deferred_newlines = 0;
2010
2011   for (;;)
2012     {
2013       const U_CHAR *iq;
2014
2015       /* Deal with \-newline, potentially in the middle of a token. */
2016       if (deferred_newlines)
2017         {
2018           if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
2019             {
2020               /* Previous was not white space.  Skip to white
2021                  space, if we can, before outputting the \r's */
2022               iq = ip;
2023               while (iq < ilimit
2024                      && *iq != ' '
2025                      && *iq != '\t'
2026                      && *iq != '\n'
2027                      && NORMAL(*iq))
2028                 iq++;
2029               memcpy (op, ip, iq - ip);
2030               op += iq - ip;
2031               ip += iq - ip;
2032               if (! NORMAL(*ip))
2033                 goto do_speccase;
2034             }
2035           while (deferred_newlines)
2036             deferred_newlines--, *op++ = '\r';
2037         }
2038
2039       /* Copy as much as we can without special treatment. */
2040       iq = ip;
2041       while (iq < ilimit && NORMAL (*iq)) iq++;
2042       memcpy (op, ip, iq - ip);
2043       op += iq - ip;
2044       ip += iq - ip;
2045
2046     do_speccase:
2047       if (ip >= ilimit)
2048         break;
2049
2050       switch (chartab[*ip++])
2051         {
2052         case SPECCASE_CR:  /* \r */
2053           if (ip[-2] != '\n')
2054             {
2055               if (ip < ilimit && *ip == '\n')
2056                 ip++;
2057               *op++ = '\n';
2058             }
2059           break;
2060
2061         case SPECCASE_BACKSLASH:  /* \ */
2062         backslash:
2063           if (ip < ilimit)
2064             {
2065               if (*ip == '\n')
2066                 {
2067                   deferred_newlines++;
2068                   ip++;
2069                   if (*ip == '\r') ip++;
2070                   break;
2071                 }
2072               else if (*ip == '\r')
2073                 {
2074                   deferred_newlines++;
2075                   ip++;
2076                   if (*ip == '\n') ip++;
2077                   break;
2078                 }
2079             }
2080
2081           *op++ = '\\';
2082           break;
2083
2084         case SPECCASE_QUESTION: /* ? */
2085           {
2086             unsigned int d, t;
2087
2088             *op++ = '?'; /* Normal non-trigraph case */
2089             if (ip > ilimit - 2 || ip[0] != '?')
2090               break;
2091
2092             d = ip[1];
2093             t = chartab[d];
2094             if (NONTRI (t))
2095               break;
2096
2097             if (CPP_OPTION (pfile, warn_trigraphs))
2098               {
2099                 unsigned long col;
2100                 line_base = find_position (line_base, op, &line);
2101                 col = op - line_base + 1;
2102                 if (CPP_OPTION (pfile, trigraphs))
2103                   cpp_warning_with_line (pfile, line, col,
2104                                          "trigraph ??%c converted to %c", d, t);
2105                 else
2106                   cpp_warning_with_line (pfile, line, col,
2107                                          "trigraph ??%c ignored", d);
2108               }
2109
2110             ip += 2;
2111             if (CPP_OPTION (pfile, trigraphs))
2112               {
2113                 op[-1] = t;         /* Overwrite '?' */
2114                 if (t == '\\')
2115                   {
2116                     op--;
2117                     goto backslash;
2118                   }
2119               }
2120             else
2121               {
2122                 *op++ = '?';
2123                 *op++ = d;
2124               }
2125           }
2126           break;
2127         }
2128     }
2129
2130 #ifdef HAVE_MMAP_FILE
2131   if (fp->mapped)
2132     munmap ((caddr_t) fp->buf, len);
2133   else
2134 #endif
2135     free ((PTR) fp->buf);
2136
2137   if (op[-1] != '\n')
2138     {
2139       unsigned long col;
2140       line_base = find_position (line_base, op, &line);
2141       col = op - line_base + 1;
2142       cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2143       *op++ = '\n';
2144     }
2145
2146   fp->buf = buf;
2147   return op - buf;
2148 }
2149
2150 /* Allocate pfile->input_buffer, and initialize chartab[]
2151    if it hasn't happened already.  */
2152
2153 void
2154 _cpp_init_input_buffer (pfile)
2155      cpp_reader *pfile;
2156 {
2157   U_CHAR *tmp;
2158
2159   init_chartab ();
2160   _cpp_init_toklist (&pfile->directbuf, NO_DUMMY_TOKEN);
2161
2162   /* Determine the appropriate size for the input buffer.  Normal C
2163      source files are smaller than eight K.  */
2164   /* 8Kbytes of buffer proper, 1 to detect running off the end without
2165      address arithmetic all the time, and 3 for pushback during buffer
2166      refill, in case there's a potential trigraph or end-of-line
2167      digraph at the end of a block. */
2168
2169   tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2170   pfile->input_buffer = tmp;
2171   pfile->input_buffer_len = 8192;
2172 }
2173
2174 /* Utility routine:
2175    Compares, in the manner of strcmp(3), the token beginning at TOKEN
2176    and extending for LEN characters to the NUL-terminated string
2177    STRING.  Typical usage:
2178
2179    if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2180                  "inline"))
2181      { ... }
2182  */
2183
2184 int
2185 cpp_idcmp (token, len, string)
2186      const U_CHAR *token;
2187      size_t len;
2188      const char *string;
2189 {
2190   size_t len2 = strlen (string);
2191   int r;
2192
2193   if ((r = memcmp (token, string, MIN (len, len2))))
2194     return r;
2195
2196   /* The longer of the two strings sorts after the shorter.  */
2197   if (len == len2)
2198     return 0;
2199   else if (len < len2)
2200     return -1;
2201   else
2202     return 1;
2203 }
2204
2205 #ifdef NEW_LEXER
2206
2207 /* Lexing algorithm.
2208
2209  The original lexer in cpplib was made up of two passes: a first pass
2210  that replaced trigraphs and deleted esacped newlines, and a second
2211  pass that tokenized the result of the first pass.  Tokenisation was
2212  performed by peeking at the next character in the input stream.  For
2213  example, if the input stream contained "!=", the handler for the !
2214  character would peek at the next character, and if it were a '='
2215  would skip over it, and return a "!=" token, otherwise it would
2216  return just the "!" token.
2217
2218  To implement a single-pass lexer, this peeking ahead is unworkable.
2219  An arbitrary number of escaped newlines, and trigraphs (in particular
2220  ??/ which translates to the escape \), could separate the '!' and '='
2221  in the input stream, yet the next token is still a "!=".
2222
2223  Suppose instead that we lex by one logical line at a time, producing
2224  a token list or stack for each logical line, and when seeing the '!'
2225  push a CPP_NOT token on the list.  Then if the '!' is part of a
2226  longer token ("!=") we know we must see the remainder of the token by
2227  the time we reach the end of the logical line.  Thus we can have the
2228  '=' handler look at the previous token (at the end of the list / top
2229  of the stack) and see if it is a "!" token, and if so, instead of
2230  pushing a "=" token revise the existing token to be a "!=" token.
2231
2232  This works in the presence of escaped newlines, because the '\' would
2233  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
2234  newline ('\n' or '\r') handler looks at the token at the top of the
2235  stack to see if it is a CPP_BACKSLASH, and if so discards both.
2236  Otherwise it pushes the newline (CPP_VSPACE) token as normal.  Hence
2237  the '=' handler would never see any intervening escaped newlines.
2238
2239  To make trigraphs work in this context, as in precedence trigraphs
2240  are highest and converted before anything else, the '?' handler does
2241  lookahead to see if it is a trigraph, and if so skips the trigraph
2242  and pushes the token it represents onto the top of the stack.  This
2243  also works in the particular case of a CPP_BACKSLASH trigraph.
2244
2245  To the preprocessor, whitespace is only significant to the point of
2246  knowing whether whitespace precedes a particular token.  For example,
2247  the '=' handler needs to know whether there was whitespace between it
2248  and a "!" token on the top of the stack, to make the token conversion
2249  decision correctly.  So each token has a PREV_WHITESPACE flag to
2250  indicate this - the standard permits consecutive whitespace to be
2251  regarded as a single space.  The compiler front ends are not
2252  interested in whitespace at all; they just require a token stream.
2253  Another place where whitespace is significant to the preprocessor is
2254  a #define statment - if there is whitespace between the macro name
2255  and an initial "(" token the macro is "object-like", otherwise it is
2256  a function-like macro that takes arguments.
2257
2258  However, all is not rosy.  Parsing of identifiers, numbers, comments
2259  and strings becomes trickier because of the possibility of raw
2260  trigraphs and escaped newlines in the input stream.
2261
2262  The trigraphs are three consecutive characters beginning with two
2263  question marks.  A question mark is not valid as part of a number or
2264  identifier, so parsing of a number or identifier terminates normally
2265  upon reaching it, returning to the mainloop which handles the
2266  trigraph just like it would in any other position.  Similarly for the
2267  backslash of a backslash-newline combination.  So we just need the
2268  escaped-newline dropper in the mainloop to check if the token on the
2269  top of the stack after dropping the escaped newline is a number or
2270  identifier, and if so to continue the processing it as if nothing had
2271  happened.
2272
2273  For strings, we replace trigraphs whenever we reach a quote or
2274  newline, because there might be a backslash trigraph escaping them.
2275  We need to be careful that we start trigraph replacing from where we
2276  left off previously, because it is possible for a first scan to leave
2277  "fake" trigraphs that a second scan would pick up as real (e.g. the
2278  sequence "????/\n=" would find a fake ??= trigraph after removing the
2279  escaped newline.)
2280
2281  For line comments, on reaching a newline we scan the previous
2282  character(s) to see if it escaped, and continue if it is.  Block
2283  comments ignore everything and just focus on finding the comment
2284  termination mark.  The only difficult thing, and it is surprisingly
2285  tricky, is checking if an asterisk precedes the final slash since
2286  they could be separated by escaped newlines.  If the preprocessor is
2287  invoked with the output comments option, we don't bother removing
2288  escaped newlines and replacing trigraphs for output.
2289
2290  Finally, numbers can begin with a period, which is pushed initially
2291  as a CPP_DOT token in its own right.  The digit handler checks if the
2292  previous token was a CPP_DOT not separated by whitespace, and if so
2293  pops it off the stack and pushes a period into the number's buffer
2294  before calling the number parser.
2295
2296 */
2297
2298 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2299                                                     U":>", U"<%", U"%>"};
2300 static unsigned char trigraph_map[256];
2301
2302 void
2303 init_trigraph_map ()
2304 {
2305   trigraph_map['='] = '#';
2306   trigraph_map['('] = '[';
2307   trigraph_map[')'] = ']';
2308   trigraph_map['/'] = '\\';
2309   trigraph_map['\''] = '^';
2310   trigraph_map['<'] = '{';
2311   trigraph_map['>'] = '}';
2312   trigraph_map['!'] = '|';
2313   trigraph_map['-'] = '~';
2314 }
2315
2316 /* Call when a trigraph is encountered.  It warns if necessary, and
2317    returns true if the trigraph should be honoured.  END is the third
2318    character of a trigraph in the input stream.  */
2319 static int
2320 trigraph_ok (pfile, end)
2321      cpp_reader *pfile;
2322      const unsigned char *end;
2323 {
2324   int accept = CPP_OPTION (pfile, trigraphs);
2325
2326   if (CPP_OPTION (pfile, warn_trigraphs))
2327     {
2328       unsigned int col = end - 1 - pfile->buffer->line_base;
2329       if (accept)
2330         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2331                                "trigraph ??%c converted to %c",
2332                                (int) *end, (int) trigraph_map[*end]);
2333       else
2334         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2335                                "trigraph ??%c ignored", (int) *end);
2336     }
2337   return accept;
2338 }
2339
2340 /* Scan a string for trigraphs, warning or replacing them inline as
2341    appropriate.  When parsing a string, we must call this routine
2342    before processing a newline character (if trigraphs are enabled),
2343    since the newline might be escaped by a preceding backslash
2344    trigraph sequence.  Returns a pointer to the end of the name after
2345    replacement.  */
2346
2347 static unsigned char*
2348 trigraph_replace (pfile, src, limit)
2349      cpp_reader *pfile;
2350      unsigned char *src;
2351      unsigned char* limit;
2352 {
2353   unsigned char *dest;
2354
2355   /* Starting with src[1], find two consecutive '?'.  The case of no
2356      trigraphs is streamlined.  */
2357
2358   for (; src + 1 < limit; src += 2)
2359     {
2360       if (src[0] != '?')
2361         continue;
2362
2363       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
2364       if (src[-1] == '?')
2365         src--;
2366       else if (src + 2 == limit || src[1] != '?')
2367         continue;
2368
2369       /* Check if it really is a trigraph.  */
2370       if (trigraph_map[src[2]] == 0)
2371         continue;
2372
2373       dest = src;
2374       goto trigraph_found;
2375     }
2376   return limit;
2377
2378   /* Now we have a trigraph, we need to scan the remaining buffer, and
2379      copy-shifting its contents left if replacement is enabled.  */
2380   for (; src + 2 < limit; dest++, src++)
2381     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2382       {
2383       trigraph_found:
2384         src += 2;
2385         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2386           *dest = trigraph_map[*src];
2387       }
2388
2389   /* Copy remaining (at most 2) characters.  */
2390   while (src < limit)
2391     *dest++ = *src++;
2392   return dest;
2393 }
2394
2395 /* If CUR is a backslash or the end of a trigraphed backslash, return
2396    a pointer to its beginning, otherwise NULL.  We don't read beyond
2397    the buffer start, because there is the start of the comment in the
2398    buffer.  */
2399 static const unsigned char *
2400 backslash_start (pfile, cur)
2401      cpp_reader *pfile;
2402      const unsigned char *cur;
2403 {
2404   if (cur[0] == '\\')
2405     return cur;
2406   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2407       && trigraph_ok (pfile, cur))
2408     return cur - 2;
2409   return 0;
2410 }
2411
2412 /* Skip a C-style block comment.  This is probably the trickiest
2413    handler.  We find the end of the comment by seeing if an asterisk
2414    is before every '/' we encounter.  The nasty complication is that a
2415    previous asterisk may be separated by one or more escaped newlines.
2416    Returns non-zero if comment terminated by EOF, zero otherwise.  */
2417 static int
2418 skip_block_comment2 (pfile)
2419      cpp_reader *pfile;
2420 {
2421   cpp_buffer *buffer = pfile->buffer;
2422   const unsigned char *char_after_star = 0;
2423   register const unsigned char *cur = buffer->cur;
2424   int seen_eof = 0;
2425
2426   /* Inner loop would think the comment has ended if the first comment
2427      character is a '/'.  Avoid this and keep the inner loop clean by
2428      skipping such a character.  */
2429   if (cur < buffer->rlimit && cur[0] == '/')
2430     cur++;
2431
2432   for (; cur < buffer->rlimit; )
2433     {
2434       unsigned char c = *cur++;
2435
2436       /* People like decorating comments with '*', so check for
2437          '/' instead for efficiency.  */
2438       if (c == '/')
2439         {
2440           if (cur[-2] == '*' || cur - 1 == char_after_star)
2441             goto out;
2442
2443           /* Warn about potential nested comments, but not when
2444              the final character inside the comment is a '/'.
2445              Don't bother to get it right across escaped newlines.  */
2446           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2447               && cur[0] == '*' && cur[1] != '/')
2448             {
2449               buffer->cur = cur;
2450               cpp_warning (pfile, "'/*' within comment");
2451             }
2452         }
2453       else if (IS_NEWLINE(c))
2454         {
2455           const unsigned char* bslash = backslash_start (pfile, cur - 2);
2456
2457           handle_newline (cur, buffer->rlimit, c);
2458           /* Work correctly if there is an asterisk before an
2459              arbirtrarily long sequence of escaped newlines.  */
2460           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2461             char_after_star = cur;
2462           else
2463             char_after_star = 0;
2464         }
2465     }
2466   seen_eof = 1;
2467
2468  out:
2469   buffer->cur = cur;
2470   return seen_eof;
2471 }
2472
2473 /* Skip a C++ or Chill line comment.  Handles escaped newlines.
2474    Returns non-zero if a multiline comment.  */
2475 static int
2476 skip_line_comment2 (pfile)
2477      cpp_reader *pfile;
2478 {
2479   cpp_buffer *buffer = pfile->buffer;
2480   register const unsigned char *cur = buffer->cur;
2481   int multiline = 0;
2482
2483   for (; cur < buffer->rlimit; )
2484     {
2485       unsigned char c = *cur++;
2486
2487       if (IS_NEWLINE (c))
2488         {
2489           /* Check for a (trigaph?) backslash escaping the newline.  */
2490           if (!backslash_start (pfile, cur - 2))
2491             goto out;
2492           multiline = 1;
2493           handle_newline (cur, buffer->rlimit, c);
2494         }
2495     }
2496   cur++;
2497
2498  out:
2499   buffer->cur = cur - 1;        /* Leave newline for caller.  */
2500   return multiline;
2501 }
2502
2503 /* Skips whitespace, stopping at next non-whitespace character.
2504    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
2505    to be assigned the correct column.  */
2506 static void
2507 skip_whitespace (pfile, in_directive)
2508      cpp_reader *pfile;
2509      int in_directive;
2510 {
2511   cpp_buffer *buffer = pfile->buffer;
2512   register const unsigned char *cur = buffer->cur;
2513   unsigned short null_count = 0;
2514
2515   for (; cur < buffer->rlimit; )
2516     {
2517       unsigned char c = *cur++;
2518
2519       if (c == '\t')
2520         {
2521           unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
2522           pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
2523                                 - col % CPP_OPTION(pfile, tabstop));
2524         }
2525       if (IS_HSPACE(c))         /* FIXME: Fix ISTABLE.  */
2526         continue;
2527       if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines.  */
2528         goto out;
2529       if (c == '\0')
2530         null_count++;
2531       /* Mut be '\f' or '\v' */
2532       else if (in_directive && CPP_PEDANTIC (pfile))
2533         cpp_pedwarn (pfile, "%s in preprocessing directive",
2534                      c == '\f' ? "formfeed" : "vertical tab");
2535     }
2536   cur++;
2537
2538  out:
2539   buffer->cur = cur - 1;
2540   if (null_count)
2541     cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2542                  : "embedded null character ignored");
2543 }
2544
2545 /* Parse (append) an identifier.  */
2546 static void
2547 parse_name (pfile, list, name)
2548      cpp_reader *pfile;
2549      cpp_toklist *list;
2550      cpp_name *name;
2551 {
2552   const unsigned char *name_limit;
2553   unsigned char *namebuf;
2554   cpp_buffer *buffer = pfile->buffer;
2555   register const unsigned char *cur = buffer->cur;
2556
2557  expanded:
2558   name_limit = list->namebuf + list->name_cap;
2559   namebuf = list->namebuf + list->name_used;
2560
2561   for (; cur < buffer->rlimit && namebuf < name_limit; )
2562     {
2563       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
2564
2565       if (! is_idchar(c))
2566         goto out;
2567       namebuf++;
2568       cur++;
2569       if (c == '$' && CPP_PEDANTIC (pfile))
2570         {
2571           buffer->cur = cur;
2572           cpp_pedwarn (pfile, "'$' character in identifier");
2573         }
2574     }
2575
2576   /* Run out of name space?  */
2577   if (cur < buffer->rlimit)
2578     {
2579       list->name_used = namebuf - list->namebuf;
2580       auto_expand_name_space (list);
2581       goto expanded;
2582     }
2583
2584  out:
2585   buffer->cur = cur;
2586   name->len = namebuf - name->text;
2587   list->name_used = namebuf - list->namebuf;
2588 }
2589
2590 /* Parse (append) a number.  */
2591
2592 #define VALID_SIGN(c, prevc) \
2593   (((c) == '+' || (c) == '-') && \
2594    ((prevc) == 'e' || (prevc) == 'E' \
2595     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2596
2597 static void
2598 parse_number (pfile, list, name)
2599      cpp_reader *pfile;
2600      cpp_toklist *list;
2601      cpp_name *name;
2602 {
2603   const unsigned char *name_limit;
2604   unsigned char *namebuf;
2605   cpp_buffer *buffer = pfile->buffer;
2606   register const unsigned char *cur = buffer->cur;
2607
2608  expanded:
2609   name_limit = list->namebuf + list->name_cap;
2610   namebuf = list->namebuf + list->name_used;
2611
2612   for (; cur < buffer->rlimit && namebuf < name_limit; )
2613     {
2614       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
2615
2616       /* Perhaps we should accept '$' here if we accept it for
2617          identifiers.  We know namebuf[-1] is safe, because for c to
2618          be a sign we must have pushed at least one character.  */
2619       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2620         goto out;
2621
2622       namebuf++;
2623       cur++;
2624     }
2625
2626   /* Run out of name space?  */
2627   if (cur < buffer->rlimit)
2628     {
2629       list->name_used = namebuf - list->namebuf;
2630       auto_expand_name_space (list);
2631       goto expanded;
2632     }
2633
2634  out:
2635   buffer->cur = cur;
2636   name->len = namebuf - name->text;
2637   list->name_used = namebuf - list->namebuf;
2638 }
2639
2640 /* Places a string terminated by an unescaped TERMINATOR into a
2641    cpp_name, which should be expandable and thus at the top of the
2642    list's stack.  Handles embedded trigraphs, if necessary, and
2643    escaped newlines.
2644
2645    Can be used for character constants (terminator = '\''), string
2646    constants ('"') and angled headers ('>').  Multi-line strings are
2647    allowed, except for within directives.  */
2648
2649 static void
2650 parse_string2 (pfile, list, name, terminator, multiline_ok)
2651      cpp_reader *pfile;
2652      cpp_toklist *list;
2653      cpp_name *name;
2654      unsigned int terminator;
2655      int multiline_ok;
2656 {
2657   cpp_buffer *buffer = pfile->buffer;
2658   register const unsigned char *cur = buffer->cur;
2659   const unsigned char *name_limit;
2660   unsigned char *namebuf;
2661   unsigned int null_count = 0;
2662   int trigraphed_len = 0;
2663
2664  expanded:
2665   name_limit = list->namebuf + list->name_cap;
2666   namebuf = list->namebuf + list->name_used;
2667
2668   for (; cur < buffer->rlimit && namebuf < name_limit; )
2669     {
2670       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
2671
2672       if (c == '\0')
2673         null_count++;
2674       else if (c == terminator || IS_NEWLINE (c))
2675         {
2676           /* Needed for trigraph_replace and multiline string warning.  */
2677           buffer->cur = cur;
2678
2679           /* Scan for trigraphs before checking if backslash-escaped.  */
2680           if (CPP_OPTION (pfile, trigraphs)
2681               || CPP_OPTION (pfile, warn_trigraphs))
2682             {
2683               namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
2684                                             namebuf);
2685               trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
2686               if (trigraphed_len < 0)
2687                 trigraphed_len = 0;
2688             }
2689
2690           namebuf--;     /* Drop the newline / terminator from the name.  */
2691           if (IS_NEWLINE (c))
2692             {
2693               /* Drop a backslash newline, and continue. */
2694               if (namebuf[-1] == '\\')
2695                 {
2696                   handle_newline (cur, buffer->rlimit, c);
2697                   namebuf--;
2698                   continue;
2699                 }
2700
2701               cur--;
2702
2703               /* In Fortran and assembly language, silently terminate
2704                  strings of either variety at end of line.  This is a
2705                  kludge around not knowing where comments are in these
2706                  languages.  */
2707               if (CPP_OPTION (pfile, lang_fortran)
2708                   || CPP_OPTION (pfile, lang_asm))
2709                 goto out;
2710
2711               /* Character constants, headers and asserts may not
2712                  extend over multiple lines.  In Standard C, neither
2713                  may strings.  We accept multiline strings as an
2714                  extension, but not in directives.  */
2715               if (!multiline_ok)
2716                 goto unterminated;
2717
2718               cur++;  /* Move forwards again.  */
2719
2720               if (pfile->multiline_string_line == 0)
2721                 {
2722                   pfile->multiline_string_line = list->line;
2723                   if (CPP_PEDANTIC (pfile))
2724                     cpp_pedwarn (pfile, "multi-line string constant");
2725                 }
2726
2727               *namebuf++ = '\n';
2728               handle_newline (cur, buffer->rlimit, c);
2729             }
2730           else
2731             {
2732               unsigned char *temp;
2733
2734               /* An odd number of consecutive backslashes represents
2735                  an escaped terminator.  */
2736               temp = namebuf - 1;
2737               while (temp >= name->text && *temp == '\\')
2738                 temp--;
2739
2740               if ((namebuf - temp) & 1)
2741                 goto out;
2742               namebuf++;
2743             }
2744         }
2745     }
2746
2747   /* Run out of name space?  */
2748   if (cur < buffer->rlimit)
2749     {
2750       list->name_used = namebuf - list->namebuf;
2751       auto_expand_name_space (list);
2752       goto expanded;
2753     }
2754
2755   /* We may not have trigraph-replaced the input for this code path,
2756      but as the input is in error by being unterminated we don't
2757      bother.  Prevent warnings about no newlines at EOF.  */
2758   if (IS_NEWLINE(cur[-1]))
2759     cur--;
2760
2761  unterminated:
2762   cpp_error (pfile, "missing terminating %c character", (int) terminator);
2763
2764   if (terminator == '\"' && pfile->multiline_string_line != list->line
2765       && pfile->multiline_string_line != 0)
2766     {
2767       cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2768                            "possible start of unterminated string literal");
2769       pfile->multiline_string_line = 0;
2770     }
2771
2772  out:
2773   buffer->cur = cur;
2774   name->len = namebuf - name->text;
2775   list->name_used = namebuf - list->namebuf;
2776
2777   if (null_count > 0)
2778     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2779                          : "null character preserved"));
2780 }
2781
2782 /* The character TYPE helps us distinguish comment types: '*' = C
2783    style, '-' = Chill-style and '/' = C++ style.  For code simplicity,
2784    the stored comment includes the comment start and any terminator.  */
2785
2786 #define COMMENT_START_LEN 2
2787 static void
2788 save_comment (list, token, from, len, type)
2789      cpp_toklist *list;
2790      cpp_token *token;
2791      const unsigned char *from;
2792      unsigned int len;
2793      unsigned int type;
2794 {
2795   unsigned char *buffer;
2796
2797   len += COMMENT_START_LEN;
2798
2799   if (list->name_used + len > list->name_cap)
2800     expand_name_space (list, len);
2801
2802   INIT_TOKEN_NAME (list, token);
2803   token->type = CPP_COMMENT;
2804   token->val.name.len = len;
2805
2806   buffer = list->namebuf + list->name_used;
2807   list->name_used += len;
2808
2809   /* Copy the comment.  */
2810   if (type == '*')
2811     {
2812       *buffer++ = '/';
2813       *buffer++ = '*';
2814     }
2815   else
2816     {
2817       *buffer++ = type;
2818       *buffer++ = type;
2819     }
2820   memcpy (buffer, from, len - COMMENT_START_LEN);
2821 }
2822
2823 /*
2824  *  The tokenizer's main loop.  Returns a token list, representing a
2825  *  logical line in the input file.  On EOF after some tokens have
2826  *  been processed, we return immediately.  Then in next call, or if
2827  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
2828  *  token is placed in the list.
2829  *
2830  *  Implementation relies almost entirely on lookback, rather than
2831  *  looking forwards.  This means that tokenization requires just
2832  *  a single pass of the file, even in the presence of trigraphs and
2833  *  escaped newlines, providing significant performance benefits.
2834  *  Trigraph overhead is negligible if they are disabled, and low
2835  *  even when enabled.
2836  */
2837
2838 #define IS_DIRECTIVE() (list->tokens[first_token].type == CPP_HASH)
2839
2840 void
2841 _cpp_lex_line (pfile, list)
2842      cpp_reader *pfile;
2843      cpp_toklist *list;
2844 {
2845   cpp_token *cur_token, *token_limit;
2846   cpp_buffer *buffer = pfile->buffer;
2847   register const unsigned char *cur = buffer->cur;
2848   unsigned char flags = 0;
2849   unsigned int first_token = list->tokens_used;
2850
2851   list->line = CPP_BUF_LINE (buffer);
2852   pfile->col_adjust = 0;
2853  expanded:
2854   token_limit = list->tokens + list->tokens_cap;
2855   cur_token = list->tokens + list->tokens_used;
2856
2857   for (; cur < buffer->rlimit && cur_token < token_limit;)
2858     {
2859       unsigned char c = *cur++;
2860
2861       /* Optimize whitespace skipping, as most tokens are probably
2862          separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
2863
2864       if (is_hspace ((unsigned int) c))
2865         {
2866           /* Step back to get the null warning and tab correction.  */
2867           buffer->cur = cur - 1;
2868           skip_whitespace (pfile, IS_DIRECTIVE ());
2869           cur = buffer->cur;
2870
2871           flags = PREV_WHITESPACE;
2872           if (cur == buffer->rlimit)
2873             break;
2874           c = *cur++;
2875         }
2876
2877       /* Initialize current token.  Its type is set in the switch.  */
2878       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
2879       cur_token->flags = flags;
2880       flags = 0;
2881
2882       switch (c)
2883         {
2884         case '0': case '1': case '2': case '3': case '4':
2885         case '5': case '6': case '7': case '8': case '9':
2886           {
2887             int prev_dot;
2888
2889             cur--;              /* Backup character.  */
2890             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
2891             if (prev_dot)
2892               cur_token--;
2893             INIT_TOKEN_NAME (list, cur_token);
2894             /* Prepend an immediately previous CPP_DOT token.  */
2895             if (prev_dot)
2896               {
2897                 if (list->name_cap == list->name_used)
2898                   auto_expand_name_space (list);
2899
2900                 cur_token->val.name.len = 1;
2901                 list->namebuf[list->name_used++] = '.';
2902               }
2903
2904           continue_number:
2905             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
2906             buffer->cur = cur;
2907             parse_number (pfile, list, &cur_token->val.name);
2908             cur = buffer->cur;
2909             cur_token++;
2910           }
2911           break;
2912
2913         letter:
2914         case '_':
2915         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2916         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2917         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2918         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2919         case 'y': case 'z':
2920         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2921         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2922         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2923         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2924         case 'Y': case 'Z':
2925           cur--;                     /* Backup character.  */
2926           INIT_TOKEN_NAME (list, cur_token);
2927           cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
2928
2929         continue_name:
2930           buffer->cur = cur;
2931           parse_name (pfile, list, &cur_token->val.name);
2932           cur = buffer->cur;
2933
2934           /* Find handler for newly created / extended directive.  */
2935           if (IS_DIRECTIVE () && cur_token == &list->tokens[first_token + 1])
2936             _cpp_check_directive (list, cur_token);
2937           cur_token++;
2938           break;
2939
2940         case '\'':
2941           /* Fall through.  */
2942         case '\"':
2943           cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2944           /* Do we have a wide string?  */
2945           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2946               && cur_token[-1].val.name.len == 1
2947               && cur_token[-1].val.name.text[0] == 'L'
2948               && !CPP_TRADITIONAL (pfile))
2949             {
2950               /* No need for 'L' any more.  */
2951               list->name_used--;
2952               (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2953             }
2954
2955         do_parse_string:
2956           /* Here c is one of ' " or >.  */
2957           INIT_TOKEN_NAME (list, cur_token);
2958           buffer->cur = cur;
2959           parse_string2 (pfile, list, &cur_token->val.name, c,
2960                          c == '"' && !IS_DIRECTIVE());
2961           cur = buffer->cur;
2962           cur_token++;
2963           break;
2964
2965         case '/':
2966           cur_token->type = CPP_DIV;
2967           if (IMMED_TOKEN ())
2968             {
2969               if (PREV_TOKEN_TYPE == CPP_DIV)
2970                 {
2971                   /* We silently allow C++ comments in system headers,
2972                      irrespective of conformance mode, because lots of
2973                      broken systems do that and trying to clean it up
2974                      in fixincludes is a nightmare.  */
2975                   if (CPP_IN_SYSTEM_HEADER (pfile))
2976                     goto do_line_comment;
2977                   else if (CPP_OPTION (pfile, cplusplus_comments))
2978                     {
2979                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2980                           && ! buffer->warned_cplusplus_comments)
2981                         {
2982                           buffer->cur = cur;
2983                           cpp_pedwarn (pfile,
2984                              "C++ style comments are not allowed in ISO C89");
2985                           cpp_pedwarn (pfile,
2986                           "(this will be reported only once per input file)");
2987                           buffer->warned_cplusplus_comments = 1;
2988                         }
2989                     do_line_comment:
2990                       buffer->cur = cur;
2991                       if (cur[-2] != c)
2992                         cpp_warning (pfile,
2993                                      "comment start split across lines");
2994                       if (skip_line_comment2 (pfile))
2995                         cpp_error_with_line (pfile, list->line,
2996                                              cur_token[-1].col,
2997                                              "multi-line comment");
2998
2999                       /* Back-up to first '-' or '/'.  */
3000                       cur_token--;
3001                       if (!CPP_OPTION (pfile, discard_comments)
3002                           && (!IS_DIRECTIVE() || list->dirno == 0))
3003                         save_comment (list, cur_token++, cur,
3004                                       buffer->cur - cur, c);
3005                       cur = buffer->cur;
3006
3007                       if (!CPP_OPTION (pfile, traditional))
3008                         flags = PREV_WHITESPACE;
3009                       break;
3010                     }
3011                 }
3012             }
3013           cur_token++;
3014           break;
3015
3016         case '*':
3017           cur_token->type = CPP_MULT;
3018           if (IMMED_TOKEN ())
3019             {
3020               if (PREV_TOKEN_TYPE == CPP_DIV)
3021                 {
3022                   buffer->cur = cur;
3023                   if (cur[-2] != '/')
3024                     cpp_warning (pfile,
3025                                  "comment start '/*' split across lines");
3026                   if (skip_block_comment2 (pfile))
3027                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3028                                          "unterminated comment");
3029                   else if (buffer->cur[-2] != '*')
3030                     cpp_warning (pfile,
3031                                  "comment end '*/' split across lines");
3032
3033                   /* Back up to opening '/'.  */
3034                   cur_token--;
3035                   if (!CPP_OPTION (pfile, discard_comments)
3036                       && (!IS_DIRECTIVE() || list->dirno == 0))
3037                     save_comment (list, cur_token++, cur,
3038                                   buffer->cur - cur, c);
3039                   cur = buffer->cur;
3040
3041                   if (!CPP_OPTION (pfile, traditional))
3042                     flags = PREV_WHITESPACE;
3043                   break;
3044                 }
3045               else if (CPP_OPTION (pfile, cplusplus))
3046                 {
3047                   /* In C++, there are .* and ->* operators.  */
3048                   if (PREV_TOKEN_TYPE == CPP_DEREF)
3049                     BACKUP_TOKEN (CPP_DEREF_STAR);
3050                   else if (PREV_TOKEN_TYPE == CPP_DOT)
3051                     BACKUP_TOKEN (CPP_DOT_STAR);
3052                 }
3053             }
3054           cur_token++;
3055           break;
3056
3057         case '\n':
3058         case '\r':
3059           handle_newline (cur, buffer->rlimit, c);
3060           if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3061             {
3062               /* Remove the escaped newline.  Then continue to process
3063                  any interrupted name or number.  */
3064               cur_token--;
3065               if (IMMED_TOKEN ())
3066                 {
3067                   cur_token--;
3068                   if (cur_token->type == CPP_NAME)
3069                     goto continue_name;
3070                   else if (cur_token->type == CPP_NUMBER)
3071                     goto continue_number;
3072                   cur_token++;
3073                 }
3074               /* Remember whitespace setting.  */
3075               flags = cur_token->flags;
3076               break;
3077             }
3078           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3079             {
3080               buffer->cur = cur;
3081               cpp_warning (pfile, "backslash and newline separated by space");
3082             }
3083           /* Skip vertical space until we have at least one token to
3084              return.  */
3085           if (cur_token != &list->tokens[first_token])
3086             goto out;
3087           list->line = CPP_BUF_LINE (buffer);
3088           break;
3089
3090         case '-':
3091           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3092             {
3093               if (CPP_OPTION (pfile, chill))
3094                 goto do_line_comment;
3095               REVISE_TOKEN (CPP_MINUS_MINUS);
3096             }
3097           else
3098             PUSH_TOKEN (CPP_MINUS);
3099           break;
3100
3101           /* The digraph flag checking ensures that ## and %:%:
3102              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
3103         make_hash:
3104         case '#':
3105           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3106               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3107             REVISE_TOKEN (CPP_PASTE);
3108           else
3109             PUSH_TOKEN (CPP_HASH);
3110           break;
3111
3112         case ':':
3113           cur_token->type = CPP_COLON;
3114           if (IMMED_TOKEN ())
3115             {
3116               if (PREV_TOKEN_TYPE == CPP_COLON
3117                   && CPP_OPTION (pfile, cplusplus))
3118                 BACKUP_TOKEN (CPP_SCOPE);
3119               /* Digraph: "<:" is a '['  */
3120               else if (PREV_TOKEN_TYPE == CPP_LESS)
3121                 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3122               /* Digraph: "%:" is a '#'  */
3123               else if (PREV_TOKEN_TYPE == CPP_MOD)
3124                 {
3125                   (--cur_token)->flags |= DIGRAPH;
3126                   goto make_hash;
3127                 }
3128             }
3129           cur_token++;
3130           break;
3131
3132         case '&':
3133           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3134             REVISE_TOKEN (CPP_AND_AND);
3135           else
3136             PUSH_TOKEN (CPP_AND);
3137           break;
3138
3139         make_or:
3140         case '|':
3141           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3142             REVISE_TOKEN (CPP_OR_OR);
3143           else
3144             PUSH_TOKEN (CPP_OR);
3145           break;
3146
3147         case '+':
3148           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3149             REVISE_TOKEN (CPP_PLUS_PLUS);
3150           else
3151             PUSH_TOKEN (CPP_PLUS);
3152           break;
3153
3154         case '=':
3155             /* This relies on equidistance of "?=" and "?" tokens.  */
3156           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3157             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3158           else
3159             PUSH_TOKEN (CPP_EQ);
3160           break;
3161
3162         case '>':
3163           cur_token->type = CPP_GREATER;
3164           if (IMMED_TOKEN ())
3165             {
3166               if (PREV_TOKEN_TYPE == CPP_GREATER)
3167                 BACKUP_TOKEN (CPP_RSHIFT);
3168               else if (PREV_TOKEN_TYPE == CPP_MINUS)
3169                 BACKUP_TOKEN (CPP_DEREF);
3170               /* Digraph: ":>" is a ']'  */
3171               else if (PREV_TOKEN_TYPE == CPP_COLON)
3172                 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3173               /* Digraph: "%>" is a '}'  */
3174               else if (PREV_TOKEN_TYPE == CPP_MOD)
3175                 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3176             }
3177           cur_token++;
3178           break;
3179
3180         case '<':
3181           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3182             {
3183               REVISE_TOKEN (CPP_LSHIFT);
3184               break;
3185             }
3186           /* Is this the beginning of a header name?  */
3187           if (list->flags & SYNTAX_INCLUDE)
3188             {
3189               c = '>';  /* Terminator.  */
3190               cur_token->type = CPP_HEADER_NAME;
3191               goto do_parse_string;
3192             }
3193           PUSH_TOKEN (CPP_LESS);
3194           break;
3195
3196         case '%':
3197           /* Digraph: "<%" is a '{'  */
3198           cur_token->type = CPP_MOD;
3199           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3200             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3201           cur_token++;
3202           break;
3203
3204         case '?':
3205           if (cur + 1 < buffer->rlimit && *cur == '?'
3206               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3207             {
3208               /* Handle trigraph.  */
3209               cur++;
3210               switch (*cur++)
3211                 {
3212                 case '(': goto make_open_square;
3213                 case ')': goto make_close_square;
3214                 case '<': goto make_open_brace;
3215                 case '>': goto make_close_brace;
3216                 case '=': goto make_hash;
3217                 case '!': goto make_or;
3218                 case '-': goto make_complement;
3219                 case '/': goto make_backslash;
3220                 case '\'': goto make_xor;
3221                 }
3222             }
3223           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3224             {
3225               /* GNU C++ defines <? and >? operators.  */
3226               if (PREV_TOKEN_TYPE == CPP_LESS)
3227                 {
3228                   REVISE_TOKEN (CPP_MIN);
3229                   break;
3230                 }
3231               else if (PREV_TOKEN_TYPE == CPP_GREATER)
3232                 {
3233                   REVISE_TOKEN (CPP_MAX);
3234                   break;
3235                 }
3236             }
3237           PUSH_TOKEN (CPP_QUERY);
3238           break;
3239
3240         case '.':
3241           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3242               && IMMED_TOKEN ()
3243               && !(cur_token[-1].flags & PREV_WHITESPACE))
3244             {
3245               cur_token -= 2;
3246               PUSH_TOKEN (CPP_ELLIPSIS);
3247             }
3248           else
3249             PUSH_TOKEN (CPP_DOT);
3250           break;
3251
3252         make_complement:
3253         case '~': PUSH_TOKEN (CPP_COMPL); break;
3254         make_xor:
3255         case '^': PUSH_TOKEN (CPP_XOR); break;
3256         make_open_brace:
3257         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3258         make_close_brace:
3259         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3260         make_open_square:
3261         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3262         make_close_square:
3263         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3264         make_backslash:
3265         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3266         case '!': PUSH_TOKEN (CPP_NOT); break;
3267         case ',': PUSH_TOKEN (CPP_COMMA); break;
3268         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3269         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
3270         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3271
3272         case '$':
3273           if (CPP_OPTION (pfile, dollars_in_ident))
3274             goto letter;
3275           /* Fall through */
3276         default:
3277           cur_token->aux = c;
3278           cur_token->val.name.len = 0; /* FIXME: needed for transition only */
3279           PUSH_TOKEN (CPP_OTHER);
3280           break;
3281         }
3282     }
3283
3284   /* Run out of token space?  */
3285   if (cur_token == token_limit)
3286     {
3287       list->tokens_used = cur_token - list->tokens;
3288       _cpp_expand_token_space (list, 256);
3289       goto expanded;
3290     }
3291
3292   cur_token->flags = flags;
3293   if (cur_token == &list->tokens[first_token])
3294     {
3295       /* FIXME: move this warning to callers who care.  */
3296       if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
3297         cpp_warning (pfile, "no newline at end of file");
3298       cur_token++->type = CPP_EOF;
3299     }
3300
3301  out:
3302   list->tokens[first_token].flags |= BOL;
3303   buffer->cur = cur;
3304   list->tokens_used = cur_token - list->tokens;
3305 }
3306
3307 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
3308    already contain the enough space to hold the token's spelling.  If
3309    WHITESPACE is true, and the token was preceded by whitespace,
3310    output a single space before the token proper.  Returns a pointer
3311    to the character after the last character written.  */
3312
3313 static unsigned char *
3314 spell_token (pfile, token, buffer, whitespace)
3315      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
3316      const cpp_token *token;
3317      unsigned char *buffer;
3318      int whitespace;
3319 {
3320   /* Whitespace will not be wanted by handlers of the # and ##
3321      operators calling this function, but will be wanted by the
3322      function that writes out the preprocessed file.  */
3323   if (whitespace && token->flags & PREV_WHITESPACE)
3324     *buffer++ = ' ';
3325
3326   switch (token_spellings[token->type].type)
3327     {
3328     case SPELL_OPERATOR:
3329       {
3330         const unsigned char *spelling;
3331         unsigned char c;
3332
3333         if (token->flags & DIGRAPH)
3334           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3335         else
3336           spelling = token_spellings[token->type].spelling;
3337
3338         while ((c = *spelling++) != '\0')
3339           *buffer++ = c;
3340       }
3341       break;
3342
3343     case SPELL_IDENT:
3344       memcpy (buffer, token->val.name.text, token->val.name.len);
3345       buffer += token->val.name.len;
3346       break;
3347
3348     case SPELL_STRING:
3349       {
3350         unsigned char c;
3351
3352         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3353           *buffer++ = 'L';
3354         c = '\'';
3355         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3356           c = '"';
3357         *buffer++ = c;
3358         memcpy (buffer, token->val.name.text, token->val.name.len);
3359         buffer += token->val.name.len;
3360         *buffer++ = c;
3361       }
3362       break;
3363
3364     case SPELL_CHAR:
3365       *buffer++ = token->aux;
3366       break;
3367
3368     case SPELL_NONE:
3369       cpp_ice (pfile, "Unspellable token");
3370       break;
3371     }
3372
3373   return buffer;
3374 }
3375
3376 /* Temporary function for illustrative purposes.  */
3377 void
3378 _cpp_lex_file (pfile)
3379      cpp_reader* pfile;
3380 {
3381   cpp_toklist* list;
3382
3383   init_trigraph_map ();
3384   list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3385   _cpp_init_toklist (list, DUMMY_TOKEN);
3386
3387   for (;;)
3388     {
3389       _cpp_lex_line (pfile, list);
3390       if (list->tokens[0].type == CPP_EOF)
3391         break;
3392
3393 #if 0
3394       if (list->dirno)
3395         _cpp_handle_directive (pfile, list);
3396       else
3397 #endif
3398         _cpp_output_list (pfile, list);
3399       _cpp_clear_toklist (list);
3400     }
3401 }
3402
3403 /* Temporary function for illustrative purposes.  */
3404 static void
3405 _cpp_output_list (pfile, list)
3406      cpp_reader *pfile;
3407      cpp_toklist *list;
3408 {
3409   unsigned int i;
3410
3411   for (i = 0; i < list->tokens_used; i++)
3412     {
3413       CPP_RESERVE (pfile, TOKEN_LEN (&list->tokens[i]));
3414       pfile->limit = spell_token (pfile, &list->tokens[i], pfile->limit, 1);
3415     }
3416 }
3417
3418 #endif