gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "intl.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 #ifdef HAVE_MMAP_FILE
  30 # include <sys/mman.h>
  31 #endif
  32
  33 #define PEEKBUF(BUFFER, N) \
  34   ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
  35 #define GETBUF(BUFFER) \
  36   ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
  37 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
  38
  39 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
  40 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
  41 #define GETC() GETBUF (CPP_BUFFER (pfile))
  42 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
  43
  44 static void skip_block_comment  PARAMS ((cpp_reader *));
  45 static void skip_line_comment   PARAMS ((cpp_reader *));
  46 static int maybe_macroexpand    PARAMS ((cpp_reader *, long));
  47 static int skip_comment         PARAMS ((cpp_reader *, int));
  48 static int copy_comment         PARAMS ((cpp_reader *, int));
  49 static void skip_string         PARAMS ((cpp_reader *, int));
  50 static void parse_string        PARAMS ((cpp_reader *, int));
  51 static U_CHAR *find_position    PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
  52 static void null_warning        PARAMS ((cpp_reader *, unsigned int));
  53
  54 static void safe_fwrite         PARAMS ((cpp_reader *, const U_CHAR *,
  55                                          size_t, FILE *));
  56 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
  57                                          unsigned int));
  58 static void bump_column         PARAMS ((cpp_printer *, unsigned int,
  59                                          unsigned int));
  60 static void expand_name_space   PARAMS ((cpp_toklist *, unsigned int));
  61 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
  62                                          unsigned int));
  63
  64 #define auto_expand_name_space(list) \
  65     expand_name_space ((list), 1 + (list)->name_cap / 2)
  66
  67 #ifdef NEW_LEXER
  68
  69 void init_trigraph_map PARAMS ((void));
  70 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
  71                                                 unsigned char *));
  72 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
  73                                                      const unsigned char *));
  74 static int skip_block_comment2 PARAMS ((cpp_reader *));
  75 static int skip_line_comment2 PARAMS ((cpp_reader *));
  76 static void skip_whitespace PARAMS ((cpp_reader *, int));
  77 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  78 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  79 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
  80                                   unsigned int, int));
  81 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
  82 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
  83                                   const unsigned char *,
  84                                   unsigned int, unsigned int));
  85 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
  86
  87 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
  88
  89 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
  90                                             unsigned char *, int));
  91
  92 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
  93                                           cpp_token *));
  94
  95 /* Macros on a cpp_name.  */
  96 #define INIT_TOKEN_NAME(list, token) \
  97   do {(token)->val.name.len = 0; \
  98       (token)->val.name.text = (list)->namebuf + (list)->name_used; \
  99       (list)->tokens_used = token - (list)->tokens + 1; \
 100   } while (0)
 101
 102 /* Maybe put these in the ISTABLE eventually.  */
 103 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
 104 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
 105
 106 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 107    character, if any, is in buffer.  */
 108 #define handle_newline(cur, limit, c) \
 109   do {\
 110   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 111     (cur)++; \
 112   CPP_BUMP_LINE_CUR (pfile, (cur)); \
 113   pfile->col_adjust = 0; \
 114   } while (0)
 115
 116 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
 117 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 118
 119 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
 120 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
 121 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
 122 #define BACKUP_DIGRAPH(ttype) do { \
 123   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 124
 125 /* An upper bound on the number of bytes needed to spell a token,
 126    including preceding whitespace.  */
 127 #define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
 128                                SPELL_NONE ? (token)->val.name.len: 0))
 129
 130 #endif
 131
 132 /* Order here matters.  Those beyond SPELL_NONE store their spelling
 133    in the token list, and it's length in the token->val.name.len.  */
 134 enum spell_type
 135 {
 136   SPELL_OPERATOR = 0,
 137   SPELL_NONE,
 138   SPELL_CHAR,    /* FIXME: revert order of NONE and CHAR after transition. */
 139   SPELL_IDENT,
 140   SPELL_STRING
 141 };
 142
 143 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
 144 #define I(e, s) {SPELL_IDENT, s},
 145 #define S(e, s) {SPELL_STRING, s},
 146 #define C(e, s) {SPELL_CHAR, s},
 147 #define N(e, s) {SPELL_NONE, s},
 148
 149 static const struct token_spelling
 150 {
 151   ENUM_BITFIELD(spell_type) type : CHAR_BIT;
 152   const U_CHAR *spelling;
 153 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 154
 155 #undef T
 156 #undef I
 157 #undef S
 158 #undef C
 159 #undef N
 160
 161 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 162
 163 void
 164 _cpp_grow_token_buffer (pfile, n)
 165      cpp_reader *pfile;
 166      long n;
 167 {
 168   long old_written = CPP_WRITTEN (pfile);
 169   pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
 170   pfile->token_buffer = (U_CHAR *)
 171     xrealloc(pfile->token_buffer, pfile->token_buffer_size);
 172   CPP_SET_WRITTEN (pfile, old_written);
 173 }
 174
 175 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
 176    If BUFFER != NULL, then use the LENGTH characters in BUFFER
 177    as the new input buffer.
 178    Return the new buffer, or NULL on failure.  */
 179
 180 cpp_buffer *
 181 cpp_push_buffer (pfile, buffer, length)
 182      cpp_reader *pfile;
 183      const U_CHAR *buffer;
 184      long length;
 185 {
 186   cpp_buffer *buf = CPP_BUFFER (pfile);
 187   cpp_buffer *new;
 188   if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
 189     {
 190       cpp_fatal (pfile, "macro or `#include' recursion too deep");
 191       return NULL;
 192     }
 193
 194   new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
 195
 196   new->if_stack = pfile->if_stack;
 197   new->buf = new->cur = buffer;
 198   new->rlimit = buffer + length;
 199   new->prev = buf;
 200   new->mark = NULL;
 201   new->line_base = NULL;
 202
 203   CPP_BUFFER (pfile) = new;
 204   return new;
 205 }
 206
 207 cpp_buffer *
 208 cpp_pop_buffer (pfile)
 209      cpp_reader *pfile;
 210 {
 211   cpp_buffer *buf = CPP_BUFFER (pfile);
 212   if (ACTIVE_MARK_P (pfile))
 213     cpp_ice (pfile, "mark active in cpp_pop_buffer");
 214
 215   if (buf->ihash)
 216     {
 217       _cpp_unwind_if_stack (pfile, buf);
 218       if (buf->buf)
 219         free ((PTR) buf->buf);
 220       if (pfile->system_include_depth)
 221         pfile->system_include_depth--;
 222       if (pfile->potential_control_macro)
 223         {
 224           buf->ihash->control_macro = pfile->potential_control_macro;
 225           pfile->potential_control_macro = 0;
 226         }
 227       pfile->input_stack_listing_current = 0;
 228     }
 229   else if (buf->macro)
 230     {
 231       cpp_hashnode *m = buf->macro;
 232
 233       m->disabled = 0;
 234       if ((m->type == T_FMACRO && buf->mapped)
 235           || m->type == T_SPECLINE || m->type == T_FILE
 236           || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
 237           || m->type == T_STDC)
 238         free ((PTR) buf->buf);
 239     }
 240   CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
 241   free (buf);
 242   pfile->buffer_stack_depth--;
 243   return CPP_BUFFER (pfile);
 244 }
 245
 246 /* Deal with the annoying semantics of fwrite.  */
 247 static void
 248 safe_fwrite (pfile, buf, len, fp)
 249      cpp_reader *pfile;
 250      const U_CHAR *buf;
 251      size_t len;
 252      FILE *fp;
 253 {
 254   size_t count;
 255
 256   while (len)
 257     {
 258       count = fwrite (buf, 1, len, fp);
 259       if (count == 0)
 260         goto error;
 261       len -= count;
 262       buf += count;
 263     }
 264   return;
 265
 266  error:
 267   cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
 268 }
 269
 270 /* Notify the compiler proper that the current line number has jumped,
 271    or the current file name has changed.  */
 272
 273 static void
 274 output_line_command (pfile, print, line)
 275      cpp_reader *pfile;
 276      cpp_printer *print;
 277      unsigned int line;
 278 {
 279   cpp_buffer *ip = cpp_file_buffer (pfile);
 280   enum { same = 0, enter, leave, rname } change;
 281   static const char * const codes[] = { "", " 1", " 2", "" };
 282
 283   if (CPP_OPTION (pfile, no_line_commands))
 284     return;
 285
 286   /* Determine whether the current filename has changed, and if so,
 287      how.  'nominal_fname' values are unique, so they can be compared
 288      by comparing pointers.  */
 289   if (ip->nominal_fname == print->last_fname)
 290     change = same;
 291   else
 292     {
 293       if (pfile->buffer_stack_depth == print->last_bsd)
 294         change = rname;
 295       else
 296         {
 297           if (pfile->buffer_stack_depth > print->last_bsd)
 298             change = enter;
 299           else
 300             change = leave;
 301           print->last_bsd = pfile->buffer_stack_depth;
 302         }
 303       print->last_fname = ip->nominal_fname;
 304     }
 305   /* If the current file has not changed, we can output a few newlines
 306      instead if we want to increase the line number by a small amount.
 307      We cannot do this if print->lineno is zero, because that means we
 308      haven't output any line commands yet.  (The very first line
 309      command output is a `same_file' command.)  */
 310   if (change == same && print->lineno != 0
 311       && line >= print->lineno && line < print->lineno + 8)
 312     {
 313       while (line > print->lineno)
 314         {
 315           putc ('\n', print->outf);
 316           print->lineno++;
 317         }
 318       return;
 319     }
 320
 321 #ifndef NO_IMPLICIT_EXTERN_C
 322   if (CPP_OPTION (pfile, cplusplus))
 323     fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
 324              codes[change],
 325              ip->system_header_p ? " 3" : "",
 326              (ip->system_header_p == 2) ? " 4" : "");
 327   else
 328 #endif
 329     fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
 330              codes[change],
 331              ip->system_header_p ? " 3" : "");
 332   print->lineno = line;
 333 }
 334
 335 /* Write the contents of the token_buffer to the output stream, and
 336    clear the token_buffer.  Also handles generating line commands and
 337    keeping track of file transitions.  */
 338
 339 void
 340 cpp_output_tokens (pfile, print)
 341      cpp_reader *pfile;
 342      cpp_printer *print;
 343 {
 344   cpp_buffer *ip;
 345
 346   if (CPP_WRITTEN (pfile) - print->written)
 347     {
 348       if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
 349         print->lineno++;
 350       safe_fwrite (pfile, pfile->token_buffer,
 351                    CPP_WRITTEN (pfile) - print->written, print->outf);
 352     }
 353
 354   ip = cpp_file_buffer (pfile);
 355   if (ip)
 356     output_line_command (pfile, print, CPP_BUF_LINE (ip));
 357
 358   CPP_SET_WRITTEN (pfile, print->written);
 359 }
 360
 361 /* Helper for cpp_output_list - increases the column number to match
 362    what we expect it to be.  */
 363
 364 static void
 365 bump_column (print, from, to)
 366      cpp_printer *print;
 367      unsigned int from, to;
 368 {
 369   unsigned int tabs, spcs;
 370   unsigned int delta = to - from;
 371
 372   /* Only if FROM is 0, advance by tabs.  */
 373   if (from == 0)
 374     tabs = delta / 8, spcs = delta % 8;
 375   else
 376     tabs = 0, spcs = delta;
 377
 378   while (tabs--) putc ('\t', print->outf);
 379   while (spcs--) putc (' ', print->outf);
 380 }
 381
 382 /* Write out the list L onto pfile->token_buffer.  This function is
 383    incomplete:
 384
 385    1) pfile->token_buffer is not going to continue to exist.
 386    2) At the moment, tokens don't carry the information described
 387    in cpplib.h; they are all strings.
 388    3) The list has to be a complete line, and has to be written starting
 389    at the beginning of a line.  */
 390
 391 void
 392 cpp_output_list (pfile, print, list)
 393      cpp_reader *pfile;
 394      cpp_printer *print;
 395      const cpp_toklist *list;
 396 {
 397   unsigned int i;
 398   unsigned int curcol = 1;
 399
 400   /* XXX Probably does not do what is intended.  */
 401   if (print->lineno != list->line)
 402     output_line_command (pfile, print, list->line);
 403
 404   for (i = 0; i < list->tokens_used; i++)
 405     {
 406       if (TOK_TYPE (list, i) == CPP_VSPACE)
 407         {
 408           output_line_command (pfile, print, list->tokens[i].aux);
 409           continue;
 410         }
 411
 412       if (curcol < TOK_COL (list, i))
 413         {
 414           /* Insert space to bring the column to what it should be.  */
 415           bump_column (print, curcol - 1, TOK_COL (list, i));
 416           curcol = TOK_COL (list, i);
 417         }
 418       /* XXX We may have to insert space to prevent an accidental
 419          token paste.  */
 420       safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
 421       curcol += TOK_LEN (list, i);
 422     }
 423 }
 424
 425 /* Scan a string (which may have escape marks), perform macro expansion,
 426    and write the result to the token_buffer.  */
 427
 428 void
 429 _cpp_expand_to_buffer (pfile, buf, length)
 430      cpp_reader *pfile;
 431      const U_CHAR *buf;
 432      int length;
 433 {
 434   cpp_buffer *stop;
 435   enum cpp_ttype token;
 436   U_CHAR *buf1;
 437
 438   if (length < 0)
 439     {
 440       cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
 441       return;
 442     }
 443
 444   /* Copy the buffer, because it might be in an unsafe place - for
 445      example, a sequence on the token_buffer, where the pointers will
 446      be invalidated if we enlarge the token_buffer.  */
 447   buf1 = alloca (length);
 448   memcpy (buf1, buf, length);
 449
 450   /* Set up the input on the input stack.  */
 451   stop = CPP_BUFFER (pfile);
 452   if (cpp_push_buffer (pfile, buf1, length) == NULL)
 453     return;
 454   CPP_BUFFER (pfile)->has_escapes = 1;
 455
 456   /* Scan the input, create the output.  */
 457   for (;;)
 458     {
 459       token = cpp_get_token (pfile);
 460       if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 461         break;
 462     }
 463 }
 464
 465 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 466
 467 void
 468 cpp_scan_buffer_nooutput (pfile)
 469      cpp_reader *pfile;
 470 {
 471   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 472   enum cpp_ttype token;
 473   unsigned int old_written = CPP_WRITTEN (pfile);
 474   /* In no-output mode, we can ignore everything but directives.  */
 475   for (;;)
 476     {
 477       if (! pfile->only_seen_white)
 478         _cpp_skip_rest_of_line (pfile);
 479       token = cpp_get_token (pfile);
 480       if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 481         break;
 482     }
 483   CPP_SET_WRITTEN (pfile, old_written);
 484 }
 485
 486 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 487
 488 void
 489 cpp_scan_buffer (pfile, print)
 490      cpp_reader *pfile;
 491      cpp_printer *print;
 492 {
 493   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 494   enum cpp_ttype token;
 495
 496   for (;;)
 497     {
 498       token = cpp_get_token (pfile);
 499       if (token == CPP_VSPACE || token == CPP_EOF
 500           /* XXX Temporary kluge - force flush after #include only */
 501           || (token == CPP_DIRECTIVE
 502               && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
 503         {
 504           cpp_output_tokens (pfile, print);
 505           if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 506             return;
 507         }
 508     }
 509 }
 510
 511 /* Return the topmost cpp_buffer that corresponds to a file (not a macro).  */
 512
 513 cpp_buffer *
 514 cpp_file_buffer (pfile)
 515      cpp_reader *pfile;
 516 {
 517   cpp_buffer *ip;
 518
 519   for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
 520     if (ip->ihash != NULL)
 521       return ip;
 522   return NULL;
 523 }
 524
 525 /* Token-buffer helper functions.  */
 526
 527 /* Expand a token list's string space. It is *vital* that
 528    list->tokens_used is correct, to get pointer fix-up right.  */
 529 static void
 530 expand_name_space (list, len)
 531      cpp_toklist *list;
 532      unsigned int len;
 533 {
 534   const U_CHAR *old_namebuf;
 535
 536   old_namebuf = list->namebuf;
 537   list->name_cap += len;
 538   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 539
 540   /* Fix up token text pointers.  */
 541   if (list->namebuf != old_namebuf)
 542     {
 543       unsigned int i;
 544
 545       for (i = 0; i < list->tokens_used; i++)
 546         if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
 547           list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
 548     }
 549 }
 550
 551 /* Expand the number of tokens in a list.  */
 552 void
 553 _cpp_expand_token_space (list, count)
 554      cpp_toklist *list;
 555      unsigned int count;
 556 {
 557   unsigned int n;
 558
 559   list->tokens_cap += count;
 560   n = list->tokens_cap;
 561   if (list->flags & LIST_OFFSET)
 562     list->tokens--, n++;
 563   list->tokens = (cpp_token *)
 564     xrealloc (list->tokens, n * sizeof (cpp_token));
 565   if (list->flags & LIST_OFFSET)
 566     list->tokens++;             /* Skip the dummy.  */
 567 }
 568
 569 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 570    an extra token in front of the token list, as this allows the lexer
 571    to always peek at the previous token without worrying about
 572    underflowing the list, and some initial space.  Otherwise, no
 573    token- or name-space is allocated, and there is no dummy token.  */
 574 void
 575 _cpp_init_toklist (list, flags)
 576      cpp_toklist *list;
 577      int flags;
 578 {
 579   /* We malloc zero bytes because we may want to realloc later, and
 580      some old implementations don't like realloc-ing a null pointer.  */
 581   if (flags == NO_DUMMY_TOKEN)
 582     {
 583       list->tokens_cap = 0;
 584       list->tokens = (cpp_token *) malloc (0);
 585       list->name_cap = 0;
 586       list->flags = 0;
 587     }
 588   else
 589     {
 590       /* Initialize token space.  Put a dummy token before the start
 591          that will fail matches.  */
 592       list->tokens_cap = 256;   /* 4K's worth.  */
 593       list->tokens = (cpp_token *)
 594         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 595       list->tokens[0].type = CPP_EOF;
 596       list->tokens++;
 597
 598       /* Initialize name space.  */
 599       list->name_cap = 1024;
 600       list->flags = LIST_OFFSET;
 601     }
 602
 603   /* Allocate name space.  */
 604   list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 605
 606   _cpp_clear_toklist (list);
 607 }
 608
 609 /* Clear a token list.  */
 610 void
 611 _cpp_clear_toklist (list)
 612      cpp_toklist *list;
 613 {
 614   list->tokens_used = 0;
 615   list->name_used = 0;
 616   list->dirno = -1;
 617   list->flags &= LIST_OFFSET;  /* clear all but that one */
 618 }
 619
 620 /* Free a token list.  Does not free the list itself, which may be
 621    embedded in a larger structure.  */
 622 void
 623 _cpp_free_toklist (list)
 624      cpp_toklist *list;
 625 {
 626   if (list->flags & LIST_OFFSET)
 627     free (list->tokens - 1);    /* Backup over dummy token.  */
 628   else
 629     free (list->tokens);
 630   free (list->namebuf);
 631 }
 632
 633 /* Slice a token list: copy the sublist [START, FINISH) into COPY.
 634    COPY is assumed not to be initialized.  The comment space is not
 635    copied.  */
 636 void
 637 _cpp_slice_toklist (copy, start, finish)
 638      cpp_toklist *copy;
 639      const cpp_token *start, *finish;
 640 {
 641   unsigned int i, n;
 642   size_t bytes;
 643
 644   n = finish - start;
 645   copy->tokens_cap = n;
 646   copy->tokens = (cpp_token *) xmalloc (n * sizeof (cpp_token));
 647   memcpy (copy->tokens, start, n * sizeof (cpp_token));
 648
 649   bytes = 0;
 650   for (i = 0; i < n; i++)
 651     if (token_spellings[start[i].type].type > SPELL_NONE)
 652       bytes += start[i].val.name.len;
 653
 654   copy->namebuf = xmalloc (bytes);
 655   bytes = 0;
 656   for (i = 0; i < n; i++)
 657     if (token_spellings[start[i].type].type > SPELL_NONE)
 658       {
 659         memcpy (copy->namebuf + bytes,
 660                 start[i].val.name.text, start[i].val.name.len);
 661         copy->tokens[i].val.name.text = copy->namebuf + bytes;
 662         bytes += start[i].val.name.len;
 663       }
 664
 665   copy->tokens_cap = n;
 666   copy->tokens_used = n;
 667   copy->name_used = bytes;
 668   copy->name_cap = bytes;
 669
 670   copy->flags = 0;
 671   copy->dirno = -1;
 672 }
 673
 674 /* Shrink a token list down to the minimum size.  */
 675 void
 676 _cpp_squeeze_toklist (list)
 677      cpp_toklist *list;
 678 {
 679   long delta;
 680   const U_CHAR *old_namebuf;
 681
 682   if (list->flags & LIST_OFFSET)
 683     {
 684       list->tokens--;
 685       memmove (list->tokens, list->tokens + 1,
 686                list->tokens_used * sizeof (cpp_token));
 687       list->tokens = xrealloc (list->tokens,
 688                                list->tokens_used * sizeof (cpp_token));
 689       list->flags &= ~LIST_OFFSET;
 690     }
 691   else
 692     list->tokens = xrealloc (list->tokens,
 693                              list->tokens_used * sizeof (cpp_token));
 694   list->tokens_cap = list->tokens_used;
 695
 696   old_namebuf = list->namebuf;
 697   list->namebuf = xrealloc (list->namebuf, list->name_used);
 698   list->name_cap = list->name_used;
 699
 700   /* Fix up token text pointers.  */
 701   delta = list->namebuf - old_namebuf;
 702   if (delta)
 703     {
 704       unsigned int i;
 705
 706       for (i = 0; i < list->tokens_used; i++)
 707         if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
 708           list->tokens[i].val.name.text += delta;
 709     }
 710 }
 711
 712 /* Compare two tokens.  */
 713 int
 714 _cpp_equiv_tokens (a, b)
 715      const cpp_token *a, *b;
 716 {
 717   if (a->type != b->type
 718       || a->flags != b->flags
 719       || a->aux != b->aux)
 720     return 0;
 721
 722   if (token_spellings[a->type].type > SPELL_NONE)
 723     {
 724       if (a->val.name.len != b->val.name.len
 725           || ustrncmp(a->val.name.text,
 726                       b->val.name.text,
 727                       a->val.name.len))
 728         return 0;
 729     }
 730   return 1;
 731 }
 732
 733 /* Compare two token lists.  */
 734 int
 735 _cpp_equiv_toklists (a, b)
 736      const cpp_toklist *a, *b;
 737 {
 738   unsigned int i;
 739
 740   if (a->tokens_used != b->tokens_used)
 741     return 0;
 742
 743   for (i = 0; i < a->tokens_used; i++)
 744     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 745       return 0;
 746   return 1;
 747 }
 748
 749 /* Scan until we encounter a token of type STOP or a newline, and
 750    create a token list for it.  Does not macro-expand or execute
 751    directives.  The final token is not included in the list or
 752    consumed from the input.  Returns the type of the token stopped at. */
 753
 754 enum cpp_ttype
 755 _cpp_scan_until (pfile, list, stop)
 756      cpp_reader *pfile;
 757      cpp_toklist *list;
 758      enum cpp_ttype stop;
 759 {
 760   int i, col;
 761   long written, len;
 762   enum cpp_ttype type;
 763   int space_before;
 764
 765   _cpp_clear_toklist (list);
 766   list->line = CPP_BUF_LINE (CPP_BUFFER (pfile));
 767
 768   written = CPP_WRITTEN (pfile);
 769   i = 0;
 770   space_before = 0;
 771   for (;;)
 772     {
 773       col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
 774       type = _cpp_lex_token (pfile);
 775       len = CPP_WRITTEN (pfile) - written;
 776       CPP_SET_WRITTEN (pfile, written);
 777       if (type == CPP_HSPACE)
 778         {
 779           if (CPP_PEDANTIC (pfile))
 780             pedantic_whitespace (pfile, pfile->token_buffer + written, len);
 781           space_before = 1;
 782           continue;
 783         }
 784       else if (type == CPP_COMMENT)
 785         /* Only happens when processing -traditional macro definitions.
 786            Do not give this a token entry, but do not change space_before
 787            either.  */
 788         continue;
 789
 790       if (list->tokens_used >= list->tokens_cap)
 791         _cpp_expand_token_space (list, 256);
 792       if (list->name_used + len >= list->name_cap)
 793         expand_name_space (list, list->name_used + len + 1 - list->name_cap);
 794
 795       if (type == CPP_MACRO)
 796         type = CPP_NAME;
 797
 798       if (type == CPP_VSPACE || type == stop)
 799         break;
 800
 801       list->tokens_used++;
 802       TOK_TYPE  (list, i) = type;
 803       TOK_COL   (list, i) = col;
 804       TOK_AUX   (list, i) = 0;
 805       TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
 806
 807       TOK_LEN (list, i) = len;
 808       if (token_spellings[type].type > SPELL_NONE)
 809         {
 810           memcpy (list->namebuf + list->name_used, CPP_PWRITTEN (pfile), len);
 811           TOK_NAME (list, i) = list->namebuf + list->name_used;
 812           list->name_used += len;
 813         }
 814       else
 815         TOK_NAME (list, i) = token_spellings[type].spelling;
 816       i++;
 817       space_before = 0;
 818     }
 819
 820   /* XXX Temporary kluge: put back the newline (or whatever).  */
 821   FORWARD(-1);
 822
 823   /* Don't consider the first token to have white before.  */
 824   TOK_FLAGS (list, 0) &= ~PREV_WHITESPACE;
 825   return type;
 826 }
 827
 828 /* Skip a C-style block comment.  We know it's a comment, and point is
 829    at the second character of the starter.  */
 830 static void
 831 skip_block_comment (pfile)
 832      cpp_reader *pfile;
 833 {
 834   unsigned int line, col;
 835   const U_CHAR *limit, *cur;
 836
 837   FORWARD(1);
 838   line = CPP_BUF_LINE (CPP_BUFFER (pfile));
 839   col = CPP_BUF_COL (CPP_BUFFER (pfile));
 840   limit = CPP_BUFFER (pfile)->rlimit;
 841   cur = CPP_BUFFER (pfile)->cur;
 842
 843   while (cur < limit)
 844     {
 845       char c = *cur++;
 846       if (c == '\n' || c == '\r')
 847         {
 848           /* \r cannot be a macro escape marker here. */
 849           if (!ACTIVE_MARK_P (pfile))
 850             CPP_BUMP_LINE_CUR (pfile, cur);
 851         }
 852       else if (c == '*')
 853         {
 854           /* Check for teminator.  */
 855           if (cur < limit && *cur == '/')
 856             goto out;
 857
 858           /* Warn about comment starter embedded in comment.  */
 859           if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
 860             cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
 861                                    cur - CPP_BUFFER (pfile)->line_base,
 862                                    "'/*' within comment");
 863         }
 864     }
 865
 866   cpp_error_with_line (pfile, line, col, "unterminated comment");
 867   cur--;
 868  out:
 869   CPP_BUFFER (pfile)->cur = cur + 1;
 870 }
 871
 872 /* Skip a C++/Chill line comment.  We know it's a comment, and point
 873    is at the second character of the initiator.  */
 874 static void
 875 skip_line_comment (pfile)
 876      cpp_reader *pfile;
 877 {
 878   FORWARD(1);
 879   for (;;)
 880     {
 881       int c = GETC ();
 882
 883       /* We don't have to worry about EOF in here.  */
 884       if (c == '\n')
 885         {
 886           /* Don't consider final '\n' to be part of comment.  */
 887           FORWARD(-1);
 888           return;
 889         }
 890       else if (c == '\r')
 891         {
 892           /* \r cannot be a macro escape marker here. */
 893           if (!ACTIVE_MARK_P (pfile))
 894             CPP_BUMP_LINE (pfile);
 895           if (CPP_OPTION (pfile, warn_comments))
 896             cpp_warning (pfile, "backslash-newline within line comment");
 897         }
 898     }
 899 }
 900
 901 /* Skip a comment - C, C++, or Chill style.  M is the first character
 902    of the comment marker.  If this really is a comment, skip to its
 903    end and return ' '.  If this is not a comment, return M (which will
 904    be '/' or '-').  */
 905
 906 static int
 907 skip_comment (pfile, m)
 908      cpp_reader *pfile;
 909      int m;
 910 {
 911   if (m == '/' && PEEKC() == '*')
 912     {
 913       skip_block_comment (pfile);
 914       return ' ';
 915     }
 916   else if (m == '/' && PEEKC() == '/')
 917     {
 918       if (CPP_BUFFER (pfile)->system_header_p)
 919         {
 920           /* We silently allow C++ comments in system headers, irrespective
 921              of conformance mode, because lots of busted systems do that
 922              and trying to clean it up in fixincludes is a nightmare.  */
 923           skip_line_comment (pfile);
 924           return ' ';
 925         }
 926       else if (CPP_OPTION (pfile, cplusplus_comments))
 927         {
 928           if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
 929             {
 930               if (CPP_WTRADITIONAL (pfile))
 931                 cpp_pedwarn (pfile,
 932                         "C++ style comments are not allowed in traditional C");
 933               else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
 934                 cpp_pedwarn (pfile,
 935                         "C++ style comments are not allowed in ISO C89");
 936               if (CPP_WTRADITIONAL (pfile)
 937                   || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
 938                 cpp_pedwarn (pfile,
 939                            "(this will be reported only once per input file)");
 940               CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
 941             }
 942           skip_line_comment (pfile);
 943           return ' ';
 944         }
 945       else
 946         return m;
 947     }
 948   else if (m == '-' && PEEKC() == '-'
 949            && CPP_OPTION (pfile, chill))
 950     {
 951       skip_line_comment (pfile);
 952       return ' ';
 953     }
 954   else
 955     return m;
 956 }
 957
 958 /* Identical to skip_comment except that it copies the comment into the
 959    token_buffer.  This is used if !discard_comments.  */
 960 static int
 961 copy_comment (pfile, m)
 962      cpp_reader *pfile;
 963      int m;
 964 {
 965   const U_CHAR *start = CPP_BUFFER (pfile)->cur;  /* XXX Layering violation */
 966   const U_CHAR *limit;
 967
 968   if (skip_comment (pfile, m) == m)
 969     return m;
 970
 971   limit = CPP_BUFFER (pfile)->cur;
 972   CPP_RESERVE (pfile, limit - start + 2);
 973   CPP_PUTC_Q (pfile, m);
 974   for (; start <= limit; start++)
 975     if (*start != '\r')
 976       CPP_PUTC_Q (pfile, *start);
 977
 978   return ' ';
 979 }
 980
 981 static void
 982 null_warning (pfile, count)
 983      cpp_reader *pfile;
 984      unsigned int count;
 985 {
 986   if (count == 1)
 987     cpp_warning (pfile, "embedded null character ignored");
 988   else
 989     cpp_warning (pfile, "embedded null characters ignored");
 990 }
 991
 992 /* Skip whitespace \-newline and comments.  Does not macro-expand.  */
 993
 994 void
 995 _cpp_skip_hspace (pfile)
 996      cpp_reader *pfile;
 997 {
 998   unsigned int null_count = 0;
 999   int c;
1000
1001   while (1)
1002     {
1003       c = GETC();
1004       if (c == EOF)
1005         goto out;
1006       else if (is_hspace(c))
1007         {
1008           if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
1009             cpp_pedwarn (pfile, "%s in preprocessing directive",
1010                          c == '\f' ? "formfeed" : "vertical tab");
1011           else if (c == '\0')
1012             null_count++;
1013         }
1014       else if (c == '\r')
1015         {
1016           /* \r is a backslash-newline marker if !has_escapes, and
1017              a deletable-whitespace or no-reexpansion marker otherwise. */
1018           if (CPP_BUFFER (pfile)->has_escapes)
1019             {
1020               if (PEEKC() == ' ')
1021                 FORWARD(1);
1022               else
1023                 break;
1024             }
1025           else
1026             CPP_BUMP_LINE (pfile);
1027         }
1028       else if (c == '/' || c == '-')
1029         {
1030           c = skip_comment (pfile, c);
1031           if (c  != ' ')
1032             break;
1033         }
1034       else
1035         break;
1036     }
1037   FORWARD(-1);
1038  out:
1039   if (null_count)
1040     null_warning (pfile, null_count);
1041 }
1042
1043 /* Read and discard the rest of the current line.  */
1044
1045 void
1046 _cpp_skip_rest_of_line (pfile)
1047      cpp_reader *pfile;
1048 {
1049   for (;;)
1050     {
1051       int c = GETC();
1052       switch (c)
1053         {
1054         case '\n':
1055           FORWARD(-1);
1056         case EOF:
1057           return;
1058
1059         case '\r':
1060           if (! CPP_BUFFER (pfile)->has_escapes)
1061             CPP_BUMP_LINE (pfile);
1062           break;
1063
1064         case '\'':
1065         case '\"':
1066           skip_string (pfile, c);
1067           break;
1068
1069         case '/':
1070         case '-':
1071           skip_comment (pfile, c);
1072           break;
1073
1074         case '\f':
1075         case '\v':
1076           if (CPP_PEDANTIC (pfile))
1077             cpp_pedwarn (pfile, "%s in preprocessing directive",
1078                          c == '\f' ? "formfeed" : "vertical tab");
1079           break;
1080
1081         }
1082     }
1083 }
1084
1085 /* Parse an identifier starting with C.  */
1086
1087 void
1088 _cpp_parse_name (pfile, c)
1089      cpp_reader *pfile;
1090      int c;
1091 {
1092   for (;;)
1093   {
1094       if (! is_idchar(c))
1095       {
1096           FORWARD (-1);
1097           break;
1098       }
1099
1100       if (c == '$' && CPP_PEDANTIC (pfile))
1101         cpp_pedwarn (pfile, "`$' in identifier");
1102
1103       CPP_RESERVE(pfile, 2); /* One more for final NUL.  */
1104       CPP_PUTC_Q (pfile, c);
1105       c = GETC();
1106       if (c == EOF)
1107         break;
1108   }
1109   return;
1110 }
1111
1112 /* Parse and skip over a string starting with C.  A single quoted
1113    string is treated like a double -- some programs (e.g., troff) are
1114    perverse this way.  (However, a single quoted string is not allowed
1115    to extend over multiple lines.)  */
1116 static void
1117 skip_string (pfile, c)
1118      cpp_reader *pfile;
1119      int c;
1120 {
1121   unsigned int start_line, start_column;
1122   unsigned int null_count = 0;
1123
1124   start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
1125   start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
1126   while (1)
1127     {
1128       int cc = GETC();
1129       switch (cc)
1130         {
1131         case EOF:
1132           cpp_error_with_line (pfile, start_line, start_column,
1133                                "unterminated string or character constant");
1134           if (pfile->multiline_string_line != start_line
1135               && pfile->multiline_string_line != 0)
1136             cpp_error_with_line (pfile,
1137                                  pfile->multiline_string_line, -1,
1138                          "possible real start of unterminated constant");
1139           pfile->multiline_string_line = 0;
1140           goto out;
1141
1142         case '\0':
1143           null_count++;
1144           break;
1145
1146         case '\n':
1147           CPP_BUMP_LINE (pfile);
1148           /* In Fortran and assembly language, silently terminate
1149              strings of either variety at end of line.  This is a
1150              kludge around not knowing where comments are in these
1151              languages.  */
1152           if (CPP_OPTION (pfile, lang_fortran)
1153               || CPP_OPTION (pfile, lang_asm))
1154             {
1155               FORWARD(-1);
1156               goto out;
1157             }
1158           /* Character constants may not extend over multiple lines.
1159              In Standard C, neither may strings.  We accept multiline
1160              strings as an extension.  */
1161           if (c == '\'')
1162             {
1163               cpp_error_with_line (pfile, start_line, start_column,
1164                                    "unterminated character constant");
1165               FORWARD(-1);
1166               goto out;
1167             }
1168           if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
1169             cpp_pedwarn_with_line (pfile, start_line, start_column,
1170                                    "string constant runs past end of line");
1171           if (pfile->multiline_string_line == 0)
1172             pfile->multiline_string_line = start_line;
1173           break;
1174
1175         case '\r':
1176           if (CPP_BUFFER (pfile)->has_escapes)
1177             {
1178               cpp_ice (pfile, "\\r escape inside string constant");
1179               FORWARD(1);
1180             }
1181           else
1182             /* Backslash newline is replaced by nothing at all.  */
1183             CPP_BUMP_LINE (pfile);
1184           break;
1185
1186         case '\\':
1187           FORWARD(1);
1188           break;
1189
1190         case '\"':
1191         case '\'':
1192           if (cc == c)
1193             goto out;
1194           break;
1195         }
1196     }
1197
1198  out:
1199   if (null_count == 1)
1200     cpp_warning (pfile, "null character in string or character constant");
1201   else if (null_count > 1)
1202     cpp_warning (pfile, "null characters in string or character constant");
1203 }
1204
1205 /* Parse a string and copy it to the output.  */
1206
1207 static void
1208 parse_string (pfile, c)
1209      cpp_reader *pfile;
1210      int c;
1211 {
1212   const U_CHAR *start = CPP_BUFFER (pfile)->cur;  /* XXX Layering violation */
1213   const U_CHAR *limit;
1214
1215   skip_string (pfile, c);
1216
1217   limit = CPP_BUFFER (pfile)->cur;
1218   CPP_RESERVE (pfile, limit - start + 2);
1219   CPP_PUTC_Q (pfile, c);
1220   for (; start < limit; start++)
1221     if (*start != '\r')
1222       CPP_PUTC_Q (pfile, *start);
1223 }
1224
1225 /* Get the next token, and add it to the text in pfile->token_buffer.
1226    Return the kind of token we got.  */
1227
1228 enum cpp_ttype
1229 _cpp_lex_token (pfile)
1230      cpp_reader *pfile;
1231 {
1232   register int c, c2;
1233   enum cpp_ttype token;
1234
1235   if (CPP_BUFFER (pfile) == NULL)
1236     return CPP_EOF;
1237
1238  get_next:
1239   c = GETC();
1240   switch (c)
1241     {
1242     case EOF:
1243       return CPP_EOF;
1244
1245     case '/':
1246       if (PEEKC () == '=')
1247         goto op2;
1248
1249     comment:
1250       if (CPP_OPTION (pfile, discard_comments))
1251         c = skip_comment (pfile, c);
1252       else
1253         c = copy_comment (pfile, c);
1254       if (c != ' ')
1255         goto randomchar;
1256
1257       /* Comments are equivalent to spaces.
1258          For -traditional, a comment is equivalent to nothing.  */
1259       if (!CPP_OPTION (pfile, discard_comments))
1260         return CPP_COMMENT;
1261       else if (CPP_TRADITIONAL (pfile))
1262         goto get_next;
1263       else
1264         {
1265           CPP_PUTC (pfile, c);
1266           return CPP_HSPACE;
1267         }
1268
1269     case '#':
1270       CPP_PUTC (pfile, c);
1271
1272     hash:
1273       c2 = PEEKC ();
1274       if (c2 == '#')
1275         {
1276           FORWARD (1);
1277           CPP_PUTC (pfile, c2);
1278           return CPP_PASTE;
1279         }
1280       else if (c2 == '%' && PEEKN (1) == ':')
1281         {
1282           /* Digraph: "%:" == "#".  */
1283           FORWARD (1);
1284           CPP_RESERVE (pfile, 2);
1285           CPP_PUTC_Q (pfile, c2);
1286           CPP_PUTC_Q (pfile, GETC ());
1287           return CPP_PASTE;
1288         }
1289       else
1290         return CPP_HASH;
1291
1292     case '\"':
1293     case '\'':
1294       parse_string (pfile, c);
1295       return c == '\'' ? CPP_CHAR : CPP_STRING;
1296
1297     case '$':
1298       if (!CPP_OPTION (pfile, dollars_in_ident))
1299         goto randomchar;
1300       goto letter;
1301
1302     case ':':
1303       c2 = PEEKC ();
1304       /* Digraph: ":>" == "]".  */
1305       if (c2 == '>'
1306           || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1307         goto op2;
1308       goto randomchar;
1309
1310     case '&':
1311     case '+':
1312     case '|':
1313       c2 = PEEKC ();
1314       if (c2 == c || c2 == '=')
1315         goto op2;
1316       goto randomchar;
1317
1318     case '%':
1319       /* Digraphs: "%:" == "#", "%>" == "}".  */
1320       c2 = PEEKC ();
1321       if (c2 == ':')
1322         {
1323           FORWARD (1);
1324           CPP_RESERVE (pfile, 2);
1325           CPP_PUTC_Q (pfile, c);
1326           CPP_PUTC_Q (pfile, c2);
1327           goto hash;
1328         }
1329       else if (c2 == '>')
1330         {
1331           FORWARD (1);
1332           CPP_RESERVE (pfile, 2);
1333           CPP_PUTC_Q (pfile, c);
1334           CPP_PUTC_Q (pfile, c2);
1335           return CPP_OPEN_BRACE;
1336         }
1337       /* else fall through */
1338
1339     case '*':
1340     case '!':
1341     case '=':
1342     case '^':
1343       if (PEEKC () == '=')
1344         goto op2;
1345       goto randomchar;
1346
1347     case '-':
1348       c2 = PEEKC ();
1349       if (c2 == '-')
1350         {
1351           if (CPP_OPTION (pfile, chill))
1352             goto comment;  /* Chill style comment */
1353           else
1354             goto op2;
1355         }
1356       else if (c2 == '=')
1357         goto op2;
1358       else if (c2 == '>')
1359         {
1360           if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1361             {
1362               /* In C++, there's a ->* operator.  */
1363               token = CPP_OTHER;
1364               CPP_RESERVE (pfile, 4);
1365               CPP_PUTC_Q (pfile, c);
1366               CPP_PUTC_Q (pfile, GETC ());
1367               CPP_PUTC_Q (pfile, GETC ());
1368               return token;
1369             }
1370           goto op2;
1371         }
1372       goto randomchar;
1373
1374     case '<':
1375       if (pfile->parsing_include_directive)
1376         {
1377           for (;;)
1378             {
1379               CPP_PUTC (pfile, c);
1380               if (c == '>')
1381                 break;
1382               c = GETC ();
1383               if (c == '\n' || c == EOF)
1384                 {
1385                   cpp_error (pfile,
1386                              "missing '>' in `#include <FILENAME>'");
1387                   break;
1388                 }
1389               else if (c == '\r')
1390                 {
1391                   if (!CPP_BUFFER (pfile)->has_escapes)
1392                     {
1393                       /* Backslash newline is replaced by nothing. */
1394                       CPP_ADJUST_WRITTEN (pfile, -1);
1395                       CPP_BUMP_LINE (pfile);
1396                     }
1397                   else
1398                     {
1399                       /* We might conceivably get \r- or \r<space> in
1400                          here.  Just delete 'em. */
1401                       int d = GETC();
1402                       if (d != '-' && d != ' ')
1403                         cpp_ice (pfile, "unrecognized escape \\r%c", d);
1404                       CPP_ADJUST_WRITTEN (pfile, -1);
1405                     }
1406                 }
1407             }
1408           return CPP_STRING;
1409         }
1410       /* Digraphs: "<%" == "{", "<:" == "[".  */
1411       c2 = PEEKC ();
1412       if (c2 == '%')
1413         {
1414           FORWARD (1);
1415           CPP_RESERVE (pfile, 2);
1416           CPP_PUTC_Q (pfile, c);
1417           CPP_PUTC_Q (pfile, c2);
1418           return CPP_CLOSE_BRACE;
1419         }
1420       else if (c2 == ':')
1421         goto op2;
1422       /* else fall through */
1423     case '>':
1424       c2 = PEEKC ();
1425       if (c2 == '=')
1426         goto op2;
1427       /* GNU C++ supports MIN and MAX operators <? and >?.  */
1428       if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1429         goto randomchar;
1430       FORWARD(1);
1431       CPP_RESERVE (pfile, 3);
1432       CPP_PUTC_Q (pfile, c);
1433       CPP_PUTC_Q (pfile, c2);
1434       if (PEEKC () == '=')
1435         CPP_PUTC_Q (pfile, GETC ());
1436       return CPP_OTHER;
1437
1438     case '.':
1439       c2 = PEEKC ();
1440       if (ISDIGIT (c2))
1441         {
1442           CPP_PUTC (pfile, c);
1443           c = GETC ();
1444           goto number;
1445         }
1446
1447       /* In C++ there's a .* operator.  */
1448       if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1449         goto op2;
1450
1451       if (c2 == '.' && PEEKN(1) == '.')
1452         {
1453           CPP_RESERVE (pfile, 3);
1454           CPP_PUTC_Q (pfile, '.');
1455           CPP_PUTC_Q (pfile, '.');
1456           CPP_PUTC_Q (pfile, '.');
1457           FORWARD (2);
1458           return CPP_ELLIPSIS;
1459         }
1460       goto randomchar;
1461
1462     op2:
1463       CPP_RESERVE (pfile, 2);
1464       CPP_PUTC_Q (pfile, c);
1465       CPP_PUTC_Q (pfile, GETC ());
1466       return CPP_OTHER;
1467
1468     case 'L':
1469       c2 = PEEKC ();
1470       if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1471         {
1472           CPP_PUTC (pfile, c);
1473           c = GETC ();
1474           parse_string (pfile, c);
1475           return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1476         }
1477       goto letter;
1478
1479     case '0': case '1': case '2': case '3': case '4':
1480     case '5': case '6': case '7': case '8': case '9':
1481     number:
1482     c2  = '.';
1483     for (;;)
1484       {
1485         CPP_RESERVE (pfile, 2);
1486         CPP_PUTC_Q (pfile, c);
1487         c = PEEKC ();
1488         if (c == EOF)
1489           break;
1490         if (!is_numchar(c) && c != '.'
1491             && ((c2 != 'e' && c2 != 'E'
1492                  && ((c2 != 'p' && c2 != 'P')
1493                      || CPP_OPTION (pfile, c89)))
1494                 || (c != '+' && c != '-')))
1495           break;
1496         FORWARD(1);
1497         c2= c;
1498       }
1499     return CPP_NUMBER;
1500     case 'b': case 'c': case 'd': case 'h': case 'o':
1501     case 'B': case 'C': case 'D': case 'H': case 'O':
1502       if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1503         {
1504           CPP_RESERVE (pfile, 2);
1505           CPP_PUTC_Q (pfile, c);
1506           CPP_PUTC_Q (pfile, '\'');
1507           FORWARD(1);
1508           for (;;)
1509             {
1510               c = GETC();
1511               if (c == EOF)
1512                 goto chill_number_eof;
1513               if (!is_numchar(c))
1514                 break;
1515               CPP_PUTC (pfile, c);
1516             }
1517           if (c == '\'')
1518             {
1519               CPP_RESERVE (pfile, 2);
1520               CPP_PUTC_Q (pfile, c);
1521               return CPP_STRING;
1522             }
1523           else
1524             {
1525               FORWARD(-1);
1526             chill_number_eof:
1527               return CPP_NUMBER;
1528             }
1529         }
1530       else
1531         goto letter;
1532     case '_':
1533     case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1534     case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1535     case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1536     case 'x': case 'y': case 'z':
1537     case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1538     case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1539     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1540     case 'Y': case 'Z':
1541     letter:
1542     _cpp_parse_name (pfile, c);
1543     return CPP_MACRO;
1544
1545     case ' ':  case '\t':  case '\v': case '\f': case '\0':
1546       {
1547         int null_count = 0;
1548
1549         for (;;)
1550           {
1551             if (c == '\0')
1552               null_count++;
1553             else
1554               CPP_PUTC (pfile, c);
1555             c = PEEKC ();
1556             if (c == EOF || !is_hspace(c))
1557               break;
1558             FORWARD(1);
1559           }
1560         if (null_count)
1561           null_warning (pfile, null_count);
1562         return CPP_HSPACE;
1563       }
1564
1565     case '\r':
1566       if (CPP_BUFFER (pfile)->has_escapes)
1567         {
1568           c = GETC ();
1569           if (c == '-')
1570             {
1571               if (pfile->output_escapes)
1572                 CPP_PUTS (pfile, "\r-", 2);
1573               _cpp_parse_name (pfile, GETC ());
1574               return CPP_NAME;
1575             }
1576           else if (c == ' ')
1577             {
1578               /* "\r " means a space, but only if necessary to prevent
1579                  accidental token concatenation.  */
1580               CPP_RESERVE (pfile, 2);
1581               if (pfile->output_escapes)
1582                 CPP_PUTC_Q (pfile, '\r');
1583               CPP_PUTC_Q (pfile, c);
1584               return CPP_HSPACE;
1585             }
1586           else
1587             {
1588               cpp_ice (pfile, "unrecognized escape \\r%c", c);
1589               goto get_next;
1590             }
1591         }
1592       else
1593         {
1594           /* Backslash newline is ignored. */
1595           if (!ACTIVE_MARK_P (pfile))
1596             CPP_BUMP_LINE (pfile);
1597           goto get_next;
1598         }
1599
1600     case '\n':
1601       CPP_PUTC (pfile, c);
1602       return CPP_VSPACE;
1603
1604     case '(': token = CPP_OPEN_PAREN;  goto char1;
1605     case ')': token = CPP_CLOSE_PAREN; goto char1;
1606     case '{': token = CPP_OPEN_BRACE;  goto char1;
1607     case '}': token = CPP_CLOSE_BRACE; goto char1;
1608     case ',': token = CPP_COMMA;       goto char1;
1609     case ';': token = CPP_SEMICOLON;   goto char1;
1610
1611     randomchar:
1612     default:
1613       token = CPP_OTHER;
1614     char1:
1615       CPP_PUTC (pfile, c);
1616       return token;
1617     }
1618 }
1619
1620 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1621    Caller is expected to have checked no_macro_expand.  */
1622 static int
1623 maybe_macroexpand (pfile, written)
1624      cpp_reader *pfile;
1625      long written;
1626 {
1627   U_CHAR *macro = pfile->token_buffer + written;
1628   size_t len = CPP_WRITTEN (pfile) - written;
1629   cpp_hashnode *hp = cpp_lookup (pfile, macro, len);
1630
1631   /* cpp_lookup never returns null.  */
1632   if (hp->type == T_VOID)
1633     return 0;
1634   if (hp->disabled || hp->type == T_IDENTITY)
1635     {
1636       if (pfile->output_escapes)
1637         {
1638           /* Insert a no-reexpand marker before IDENT.  */
1639           CPP_RESERVE (pfile, 2);
1640           CPP_ADJUST_WRITTEN (pfile, 2);
1641           macro = pfile->token_buffer + written;
1642
1643           memmove (macro + 2, macro, len);
1644           macro[0] = '\r';
1645           macro[1] = '-';
1646         }
1647       return 0;
1648     }
1649   if (hp->type == T_EMPTY)
1650     {
1651       /* Special case optimization: macro expands to nothing.  */
1652       CPP_SET_WRITTEN (pfile, written);
1653       CPP_PUTC_Q (pfile, ' ');
1654       return 1;
1655     }
1656
1657   /* If macro wants an arglist, verify that a '(' follows.  */
1658   if (hp->type == T_FMACRO)
1659     {
1660       int macbuf_whitespace = 0;
1661       int c;
1662
1663       while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1664         {
1665           const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1666           for (;;)
1667             {
1668               _cpp_skip_hspace (pfile);
1669               c = PEEKC ();
1670               if (c == '\n')
1671                 FORWARD(1);
1672               else
1673                 break;
1674             }
1675           if (point != CPP_BUFFER (pfile)->cur)
1676             macbuf_whitespace = 1;
1677           if (c == '(')
1678             goto is_macro_call;
1679           else if (c != EOF)
1680             goto not_macro_call;
1681           cpp_pop_buffer (pfile);
1682         }
1683
1684       CPP_SET_MARK (pfile);
1685       for (;;)
1686         {
1687           _cpp_skip_hspace (pfile);
1688           c = PEEKC ();
1689           if (c == '\n')
1690             FORWARD(1);
1691           else
1692             break;
1693         }
1694       CPP_GOTO_MARK (pfile);
1695
1696       if (c != '(')
1697         {
1698         not_macro_call:
1699           if (macbuf_whitespace)
1700             CPP_PUTC (pfile, ' ');
1701
1702           /* K+R treated this as a hard error.  */
1703           if (CPP_OPTION (pfile, warn_traditional))
1704             cpp_warning (pfile,
1705          "traditional C rejects function macro %s in non-function context",
1706                          hp->name);
1707           return 0;
1708         }
1709     }
1710
1711  is_macro_call:
1712   /* This is now known to be a macro call.
1713      Expand the macro, reading arguments as needed,
1714      and push the expansion on the input stack.  */
1715   _cpp_macroexpand (pfile, hp);
1716   CPP_SET_WRITTEN (pfile, written);
1717   return 1;
1718 }
1719
1720 /* Complain about \v or \f in a preprocessing directive (constraint
1721    violation, C99 6.10 para 5).  Caller has checked CPP_PEDANTIC.  */
1722 static void
1723 pedantic_whitespace (pfile, p, len)
1724      cpp_reader *pfile;
1725      U_CHAR *p;
1726      unsigned int len;
1727 {
1728   while (len)
1729     {
1730       if (*p == '\v')
1731         cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1732       else if (*p == '\f')
1733         cpp_pedwarn (pfile, "form feed in preprocessing directive");
1734       p++;
1735       len--;
1736     }
1737 }
1738
1739
1740 enum cpp_ttype
1741 cpp_get_token (pfile)
1742      cpp_reader *pfile;
1743 {
1744   enum cpp_ttype token;
1745   long written = CPP_WRITTEN (pfile);
1746
1747  get_next:
1748   token = _cpp_lex_token (pfile);
1749
1750   switch (token)
1751     {
1752     default:
1753       pfile->potential_control_macro = 0;
1754       pfile->only_seen_white = 0;
1755       return token;
1756
1757     case CPP_VSPACE:
1758       if (pfile->only_seen_white == 0)
1759         pfile->only_seen_white = 1;
1760       CPP_BUMP_LINE (pfile);
1761       return token;
1762
1763     case CPP_HSPACE:
1764     case CPP_COMMENT:
1765       return token;
1766
1767     case CPP_HASH:
1768       pfile->potential_control_macro = 0;
1769       if (!pfile->only_seen_white)
1770         return CPP_HASH;
1771       /* XXX shouldn't have to do this - remove the hash or %: from
1772          the token buffer.  */
1773       if (CPP_PWRITTEN (pfile)[-1] == '#')
1774         CPP_ADJUST_WRITTEN (pfile, -1);
1775       else
1776         CPP_ADJUST_WRITTEN (pfile, -2);
1777
1778       if (_cpp_handle_directive (pfile))
1779         return CPP_DIRECTIVE;
1780       pfile->only_seen_white = 0;
1781       CPP_PUTC (pfile, '#');
1782       return CPP_HASH;
1783
1784     case CPP_MACRO:
1785       pfile->potential_control_macro = 0;
1786       pfile->only_seen_white = 0;
1787       if (! pfile->no_macro_expand
1788           && maybe_macroexpand (pfile, written))
1789         goto get_next;
1790       return CPP_NAME;
1791
1792     case CPP_EOF:
1793       if (CPP_BUFFER (pfile) == NULL)
1794         return CPP_EOF;
1795       if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1796         {
1797           cpp_pop_buffer (pfile);
1798           goto get_next;
1799         }
1800       cpp_pop_buffer (pfile);
1801       return CPP_EOF;
1802     }
1803 }
1804
1805 /* Like cpp_get_token, but skip spaces and comments.  */
1806
1807 enum cpp_ttype
1808 cpp_get_non_space_token (pfile)
1809      cpp_reader *pfile;
1810 {
1811   int old_written = CPP_WRITTEN (pfile);
1812   for (;;)
1813     {
1814       enum cpp_ttype token = cpp_get_token (pfile);
1815       if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1816         return token;
1817       CPP_SET_WRITTEN (pfile, old_written);
1818     }
1819 }
1820
1821 /* Like cpp_get_token, except that it does not execute directives,
1822    does not consume vertical space, and discards horizontal space.  */
1823 enum cpp_ttype
1824 _cpp_get_directive_token (pfile)
1825      cpp_reader *pfile;
1826 {
1827   long old_written;
1828   enum cpp_ttype token;
1829   int at_bol;
1830
1831  get_next:
1832   at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1833   old_written = CPP_WRITTEN (pfile);
1834   token = _cpp_lex_token (pfile);
1835   switch (token)
1836     {
1837     default:
1838       return token;
1839
1840     case CPP_VSPACE:
1841       /* Put it back and return VSPACE.  */
1842       FORWARD(-1);
1843       CPP_ADJUST_WRITTEN (pfile, -1);
1844       return CPP_VSPACE;
1845
1846     case CPP_HSPACE:
1847       /* The purpose of this rather strange check is to prevent pedantic
1848          warnings for ^L in an #ifdefed out block.  */
1849       if (CPP_PEDANTIC (pfile) && ! at_bol)
1850         pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1851                              CPP_WRITTEN (pfile) - old_written);
1852       CPP_SET_WRITTEN (pfile, old_written);
1853       goto get_next;
1854       return CPP_HSPACE;
1855
1856     case CPP_MACRO:
1857       if (! pfile->no_macro_expand
1858           && maybe_macroexpand (pfile, old_written))
1859         goto get_next;
1860       return CPP_NAME;
1861
1862     case CPP_EOF:
1863       if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1864         {
1865           cpp_pop_buffer (pfile);
1866           goto get_next;
1867         }
1868       else
1869         /* This can happen for files that don't end with a newline,
1870            and for cpp_define and friends.  Pretend they do, so
1871            callers don't have to deal.  A warning will be issued by
1872            someone else, if necessary.  */
1873         return CPP_VSPACE;
1874     }
1875 }
1876
1877 /* Determine the current line and column.  Used only by read_and_prescan. */
1878 static U_CHAR *
1879 find_position (start, limit, linep)
1880      U_CHAR *start;
1881      U_CHAR *limit;
1882      unsigned long *linep;
1883 {
1884   unsigned long line = *linep;
1885   U_CHAR *lbase = start;
1886   while (start < limit)
1887     {
1888       U_CHAR ch = *start++;
1889       if (ch == '\n' || ch == '\r')
1890         {
1891           line++;
1892           lbase = start;
1893         }
1894     }
1895   *linep = line;
1896   return lbase;
1897 }
1898
1899 /* The following table is used by _cpp_prescan.  If we have
1900    designated initializers, it can be constant data; otherwise, it is
1901    set up at runtime by _cpp_init_input_buffer.  */
1902
1903 #if (GCC_VERSION >= 2007)
1904 #define init_chartab()  /* nothing */
1905 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1906 #define END };
1907 #define s(p, v) [p] = v,
1908 #else
1909 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1910  static void init_chartab PARAMS ((void)) { \
1911  unsigned char *x = chartab;
1912 #define END }
1913 #define s(p, v) x[p] = v;
1914 #endif
1915
1916 /* Table of characters that can't be handled in the inner loop.
1917    Also contains the mapping between trigraph third characters and their
1918    replacements.  */
1919 #define SPECCASE_CR        1
1920 #define SPECCASE_BACKSLASH 2
1921 #define SPECCASE_QUESTION  3
1922
1923 CHARTAB
1924   s('\r', SPECCASE_CR)
1925   s('\\', SPECCASE_BACKSLASH)
1926   s('?',  SPECCASE_QUESTION)
1927
1928   s('=', '#')   s(')', ']')     s('!', '|')
1929   s('(', '[')   s('\'', '^')    s('>', '}')
1930   s('/', '\\')  s('<', '{')     s('-', '~')
1931 END
1932
1933 #undef CHARTAB
1934 #undef END
1935 #undef s
1936
1937 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1938 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1939
1940 /* Prescan pass over a file already loaded into BUF.  This is
1941    translation phases 1 and 2 (C99 5.1.1.2).
1942
1943    Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1944    canonical form (\n).  If enabled, convert and/or warn about
1945    trigraphs.  Convert backslash-newline to a one-character escape
1946    (\r) and remove it from "embarrassing" places (i.e. the middle of a
1947    token).  If there is no newline at the end of the file, add one and
1948    warn.  Returns -1 on failure, or the actual length of the data to
1949    be scanned.
1950
1951    This function does a lot of work, and can be a serious performance
1952    bottleneck.  It has been tuned heavily; make sure you understand it
1953    before hacking.  The common case - no trigraphs, Unix style line
1954    breaks, backslash-newline set off by whitespace, newline at EOF -
1955    has been optimized at the expense of the others.  The performance
1956    penalty for DOS style line breaks (\r\n) is about 15%.
1957
1958    Warnings lose particularly heavily since we have to determine the
1959    line number, which involves scanning from the beginning of the file
1960    or from the last warning.  The penalty for the absence of a newline
1961    at the end of reload1.c is about 60%.  (reload1.c is 329k.)
1962
1963    If your file has more than one kind of end-of-line marker, you
1964    will get messed-up line numbering.  */
1965
1966 ssize_t
1967 _cpp_prescan (pfile, fp, len)
1968      cpp_reader *pfile;
1969      cpp_buffer *fp;
1970      ssize_t len;
1971 {
1972   U_CHAR *buf, *op;
1973   const U_CHAR *ibase, *ip, *ilimit;
1974   U_CHAR *line_base;
1975   unsigned long line;
1976   unsigned int deferred_newlines;
1977
1978   /* Allocate an extra byte in case we must add a trailing \n.  */
1979   buf = (U_CHAR *) xmalloc (len + 1);
1980   line_base = op = buf;
1981   ip = ibase = fp->buf;
1982   ilimit = ibase + len;
1983   line = 1;
1984   deferred_newlines = 0;
1985
1986   for (;;)
1987     {
1988       const U_CHAR *iq;
1989
1990       /* Deal with \-newline, potentially in the middle of a token. */
1991       if (deferred_newlines)
1992         {
1993           if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1994             {
1995               /* Previous was not white space.  Skip to white
1996                  space, if we can, before outputting the \r's */
1997               iq = ip;
1998               while (iq < ilimit
1999                      && *iq != ' '
2000                      && *iq != '\t'
2001                      && *iq != '\n'
2002                      && NORMAL(*iq))
2003                 iq++;
2004               memcpy (op, ip, iq - ip);
2005               op += iq - ip;
2006               ip += iq - ip;
2007               if (! NORMAL(*ip))
2008                 goto do_speccase;
2009             }
2010           while (deferred_newlines)
2011             deferred_newlines--, *op++ = '\r';
2012         }
2013
2014       /* Copy as much as we can without special treatment. */
2015       iq = ip;
2016       while (iq < ilimit && NORMAL (*iq)) iq++;
2017       memcpy (op, ip, iq - ip);
2018       op += iq - ip;
2019       ip += iq - ip;
2020
2021     do_speccase:
2022       if (ip >= ilimit)
2023         break;
2024
2025       switch (chartab[*ip++])
2026         {
2027         case SPECCASE_CR:  /* \r */
2028           if (ip[-2] != '\n')
2029             {
2030               if (ip < ilimit && *ip == '\n')
2031                 ip++;
2032               *op++ = '\n';
2033             }
2034           break;
2035
2036         case SPECCASE_BACKSLASH:  /* \ */
2037         backslash:
2038           if (ip < ilimit)
2039             {
2040               if (*ip == '\n')
2041                 {
2042                   deferred_newlines++;
2043                   ip++;
2044                   if (*ip == '\r') ip++;
2045                   break;
2046                 }
2047               else if (*ip == '\r')
2048                 {
2049                   deferred_newlines++;
2050                   ip++;
2051                   if (*ip == '\n') ip++;
2052                   break;
2053                 }
2054             }
2055
2056           *op++ = '\\';
2057           break;
2058
2059         case SPECCASE_QUESTION: /* ? */
2060           {
2061             unsigned int d, t;
2062
2063             *op++ = '?'; /* Normal non-trigraph case */
2064             if (ip > ilimit - 2 || ip[0] != '?')
2065               break;
2066
2067             d = ip[1];
2068             t = chartab[d];
2069             if (NONTRI (t))
2070               break;
2071
2072             if (CPP_OPTION (pfile, warn_trigraphs))
2073               {
2074                 unsigned long col;
2075                 line_base = find_position (line_base, op, &line);
2076                 col = op - line_base + 1;
2077                 if (CPP_OPTION (pfile, trigraphs))
2078                   cpp_warning_with_line (pfile, line, col,
2079                                          "trigraph ??%c converted to %c", d, t);
2080                 else
2081                   cpp_warning_with_line (pfile, line, col,
2082                                          "trigraph ??%c ignored", d);
2083               }
2084
2085             ip += 2;
2086             if (CPP_OPTION (pfile, trigraphs))
2087               {
2088                 op[-1] = t;         /* Overwrite '?' */
2089                 if (t == '\\')
2090                   {
2091                     op--;
2092                     goto backslash;
2093                   }
2094               }
2095             else
2096               {
2097                 *op++ = '?';
2098                 *op++ = d;
2099               }
2100           }
2101           break;
2102         }
2103     }
2104
2105 #ifdef HAVE_MMAP_FILE
2106   if (fp->mapped)
2107     munmap ((caddr_t) fp->buf, len);
2108   else
2109 #endif
2110     free ((PTR) fp->buf);
2111
2112   if (op[-1] != '\n')
2113     {
2114       unsigned long col;
2115       line_base = find_position (line_base, op, &line);
2116       col = op - line_base + 1;
2117       cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2118       *op++ = '\n';
2119     }
2120
2121   fp->buf = buf;
2122   return op - buf;
2123 }
2124
2125 /* Allocate pfile->input_buffer, and initialize chartab[]
2126    if it hasn't happened already.  */
2127
2128 void
2129 _cpp_init_input_buffer (pfile)
2130      cpp_reader *pfile;
2131 {
2132   U_CHAR *tmp;
2133
2134   init_chartab ();
2135   _cpp_init_toklist (&pfile->directbuf, NO_DUMMY_TOKEN);
2136
2137   /* Determine the appropriate size for the input buffer.  Normal C
2138      source files are smaller than eight K.  */
2139   /* 8Kbytes of buffer proper, 1 to detect running off the end without
2140      address arithmetic all the time, and 3 for pushback during buffer
2141      refill, in case there's a potential trigraph or end-of-line
2142      digraph at the end of a block. */
2143
2144   tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2145   pfile->input_buffer = tmp;
2146   pfile->input_buffer_len = 8192;
2147 }
2148
2149 /* Utility routine:
2150    Compares, in the manner of strcmp(3), the token beginning at TOKEN
2151    and extending for LEN characters to the NUL-terminated string
2152    STRING.  Typical usage:
2153
2154    if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2155                  "inline"))
2156      { ... }
2157  */
2158
2159 int
2160 cpp_idcmp (token, len, string)
2161      const U_CHAR *token;
2162      size_t len;
2163      const char *string;
2164 {
2165   size_t len2 = strlen (string);
2166   int r;
2167
2168   if ((r = memcmp (token, string, MIN (len, len2))))
2169     return r;
2170
2171   /* The longer of the two strings sorts after the shorter.  */
2172   if (len == len2)
2173     return 0;
2174   else if (len < len2)
2175     return -1;
2176   else
2177     return 1;
2178 }
2179
2180 #ifdef NEW_LEXER
2181
2182 /* Lexing algorithm.
2183
2184  The original lexer in cpplib was made up of two passes: a first pass
2185  that replaced trigraphs and deleted esacped newlines, and a second
2186  pass that tokenized the result of the first pass.  Tokenisation was
2187  performed by peeking at the next character in the input stream.  For
2188  example, if the input stream contained "!=", the handler for the !
2189  character would peek at the next character, and if it were a '='
2190  would skip over it, and return a "!=" token, otherwise it would
2191  return just the "!" token.
2192
2193  To implement a single-pass lexer, this peeking ahead is unworkable.
2194  An arbitrary number of escaped newlines, and trigraphs (in particular
2195  ??/ which translates to the escape \), could separate the '!' and '='
2196  in the input stream, yet the next token is still a "!=".
2197
2198  Suppose instead that we lex by one logical line at a time, producing
2199  a token list or stack for each logical line, and when seeing the '!'
2200  push a CPP_NOT token on the list.  Then if the '!' is part of a
2201  longer token ("!=") we know we must see the remainder of the token by
2202  the time we reach the end of the logical line.  Thus we can have the
2203  '=' handler look at the previous token (at the end of the list / top
2204  of the stack) and see if it is a "!" token, and if so, instead of
2205  pushing a "=" token revise the existing token to be a "!=" token.
2206
2207  This works in the presence of escaped newlines, because the '\' would
2208  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
2209  newline ('\n' or '\r') handler looks at the token at the top of the
2210  stack to see if it is a CPP_BACKSLASH, and if so discards both.
2211  Otherwise it pushes the newline (CPP_VSPACE) token as normal.  Hence
2212  the '=' handler would never see any intervening escaped newlines.
2213
2214  To make trigraphs work in this context, as in precedence trigraphs
2215  are highest and converted before anything else, the '?' handler does
2216  lookahead to see if it is a trigraph, and if so skips the trigraph
2217  and pushes the token it represents onto the top of the stack.  This
2218  also works in the particular case of a CPP_BACKSLASH trigraph.
2219
2220  To the preprocessor, whitespace is only significant to the point of
2221  knowing whether whitespace precedes a particular token.  For example,
2222  the '=' handler needs to know whether there was whitespace between it
2223  and a "!" token on the top of the stack, to make the token conversion
2224  decision correctly.  So each token has a PREV_WHITESPACE flag to
2225  indicate this - the standard permits consecutive whitespace to be
2226  regarded as a single space.  The compiler front ends are not
2227  interested in whitespace at all; they just require a token stream.
2228  Another place where whitespace is significant to the preprocessor is
2229  a #define statment - if there is whitespace between the macro name
2230  and an initial "(" token the macro is "object-like", otherwise it is
2231  a function-like macro that takes arguments.
2232
2233  However, all is not rosy.  Parsing of identifiers, numbers, comments
2234  and strings becomes trickier because of the possibility of raw
2235  trigraphs and escaped newlines in the input stream.
2236
2237  The trigraphs are three consecutive characters beginning with two
2238  question marks.  A question mark is not valid as part of a number or
2239  identifier, so parsing of a number or identifier terminates normally
2240  upon reaching it, returning to the mainloop which handles the
2241  trigraph just like it would in any other position.  Similarly for the
2242  backslash of a backslash-newline combination.  So we just need the
2243  escaped-newline dropper in the mainloop to check if the token on the
2244  top of the stack after dropping the escaped newline is a number or
2245  identifier, and if so to continue the processing it as if nothing had
2246  happened.
2247
2248  For strings, we replace trigraphs whenever we reach a quote or
2249  newline, because there might be a backslash trigraph escaping them.
2250  We need to be careful that we start trigraph replacing from where we
2251  left off previously, because it is possible for a first scan to leave
2252  "fake" trigraphs that a second scan would pick up as real (e.g. the
2253  sequence "????/\n=" would find a fake ??= trigraph after removing the
2254  escaped newline.)
2255
2256  For line comments, on reaching a newline we scan the previous
2257  character(s) to see if it escaped, and continue if it is.  Block
2258  comments ignore everything and just focus on finding the comment
2259  termination mark.  The only difficult thing, and it is surprisingly
2260  tricky, is checking if an asterisk precedes the final slash since
2261  they could be separated by escaped newlines.  If the preprocessor is
2262  invoked with the output comments option, we don't bother removing
2263  escaped newlines and replacing trigraphs for output.
2264
2265  Finally, numbers can begin with a period, which is pushed initially
2266  as a CPP_DOT token in its own right.  The digit handler checks if the
2267  previous token was a CPP_DOT not separated by whitespace, and if so
2268  pops it off the stack and pushes a period into the number's buffer
2269  before calling the number parser.
2270
2271 */
2272
2273 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2274                                                     U":>", U"<%", U"%>"};
2275 static unsigned char trigraph_map[256];
2276
2277 void
2278 init_trigraph_map ()
2279 {
2280   trigraph_map['='] = '#';
2281   trigraph_map['('] = '[';
2282   trigraph_map[')'] = ']';
2283   trigraph_map['/'] = '\\';
2284   trigraph_map['\''] = '^';
2285   trigraph_map['<'] = '{';
2286   trigraph_map['>'] = '}';
2287   trigraph_map['!'] = '|';
2288   trigraph_map['-'] = '~';
2289 }
2290
2291 /* Call when a trigraph is encountered.  It warns if necessary, and
2292    returns true if the trigraph should be honoured.  END is the third
2293    character of a trigraph in the input stream.  */
2294 static int
2295 trigraph_ok (pfile, end)
2296      cpp_reader *pfile;
2297      const unsigned char *end;
2298 {
2299   int accept = CPP_OPTION (pfile, trigraphs);
2300
2301   if (CPP_OPTION (pfile, warn_trigraphs))
2302     {
2303       unsigned int col = end - 1 - pfile->buffer->line_base;
2304       if (accept)
2305         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2306                                "trigraph ??%c converted to %c",
2307                                (int) *end, (int) trigraph_map[*end]);
2308       else
2309         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2310                                "trigraph ??%c ignored", (int) *end);
2311     }
2312   return accept;
2313 }
2314
2315 /* Scan a string for trigraphs, warning or replacing them inline as
2316    appropriate.  When parsing a string, we must call this routine
2317    before processing a newline character (if trigraphs are enabled),
2318    since the newline might be escaped by a preceding backslash
2319    trigraph sequence.  Returns a pointer to the end of the name after
2320    replacement.  */
2321
2322 static unsigned char*
2323 trigraph_replace (pfile, src, limit)
2324      cpp_reader *pfile;
2325      unsigned char *src;
2326      unsigned char* limit;
2327 {
2328   unsigned char *dest;
2329
2330   /* Starting with src[1], find two consecutive '?'.  The case of no
2331      trigraphs is streamlined.  */
2332
2333   for (; src + 1 < limit; src += 2)
2334     {
2335       if (src[0] != '?')
2336         continue;
2337
2338       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
2339       if (src[-1] == '?')
2340         src--;
2341       else if (src + 2 == limit || src[1] != '?')
2342         continue;
2343
2344       /* Check if it really is a trigraph.  */
2345       if (trigraph_map[src[2]] == 0)
2346         continue;
2347
2348       dest = src;
2349       goto trigraph_found;
2350     }
2351   return limit;
2352
2353   /* Now we have a trigraph, we need to scan the remaining buffer, and
2354      copy-shifting its contents left if replacement is enabled.  */
2355   for (; src + 2 < limit; dest++, src++)
2356     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2357       {
2358       trigraph_found:
2359         src += 2;
2360         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2361           *dest = trigraph_map[*src];
2362       }
2363
2364   /* Copy remaining (at most 2) characters.  */
2365   while (src < limit)
2366     *dest++ = *src++;
2367   return dest;
2368 }
2369
2370 /* If CUR is a backslash or the end of a trigraphed backslash, return
2371    a pointer to its beginning, otherwise NULL.  We don't read beyond
2372    the buffer start, because there is the start of the comment in the
2373    buffer.  */
2374 static const unsigned char *
2375 backslash_start (pfile, cur)
2376      cpp_reader *pfile;
2377      const unsigned char *cur;
2378 {
2379   if (cur[0] == '\\')
2380     return cur;
2381   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2382       && trigraph_ok (pfile, cur))
2383     return cur - 2;
2384   return 0;
2385 }
2386
2387 /* Skip a C-style block comment.  This is probably the trickiest
2388    handler.  We find the end of the comment by seeing if an asterisk
2389    is before every '/' we encounter.  The nasty complication is that a
2390    previous asterisk may be separated by one or more escaped newlines.
2391    Returns non-zero if comment terminated by EOF, zero otherwise.  */
2392 static int
2393 skip_block_comment2 (pfile)
2394      cpp_reader *pfile;
2395 {
2396   cpp_buffer *buffer = pfile->buffer;
2397   const unsigned char *char_after_star = 0;
2398   register const unsigned char *cur = buffer->cur;
2399   int seen_eof = 0;
2400
2401   /* Inner loop would think the comment has ended if the first comment
2402      character is a '/'.  Avoid this and keep the inner loop clean by
2403      skipping such a character.  */
2404   if (cur < buffer->rlimit && cur[0] == '/')
2405     cur++;
2406
2407   for (; cur < buffer->rlimit; )
2408     {
2409       unsigned char c = *cur++;
2410
2411       /* People like decorating comments with '*', so check for
2412          '/' instead for efficiency.  */
2413       if (c == '/')
2414         {
2415           if (cur[-2] == '*' || cur - 1 == char_after_star)
2416             goto out;
2417
2418           /* Warn about potential nested comments, but not when
2419              the final character inside the comment is a '/'.
2420              Don't bother to get it right across escaped newlines.  */
2421           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2422               && cur[0] == '*' && cur[1] != '/')
2423             {
2424               buffer->cur = cur;
2425               cpp_warning (pfile, "'/*' within comment");
2426             }
2427         }
2428       else if (IS_NEWLINE(c))
2429         {
2430           const unsigned char* bslash = backslash_start (pfile, cur - 2);
2431
2432           handle_newline (cur, buffer->rlimit, c);
2433           /* Work correctly if there is an asterisk before an
2434              arbirtrarily long sequence of escaped newlines.  */
2435           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2436             char_after_star = cur;
2437           else
2438             char_after_star = 0;
2439         }
2440     }
2441   seen_eof = 1;
2442
2443  out:
2444   buffer->cur = cur;
2445   return seen_eof;
2446 }
2447
2448 /* Skip a C++ or Chill line comment.  Handles escaped newlines.
2449    Returns non-zero if a multiline comment.  */
2450 static int
2451 skip_line_comment2 (pfile)
2452      cpp_reader *pfile;
2453 {
2454   cpp_buffer *buffer = pfile->buffer;
2455   register const unsigned char *cur = buffer->cur;
2456   int multiline = 0;
2457
2458   for (; cur < buffer->rlimit; )
2459     {
2460       unsigned char c = *cur++;
2461
2462       if (IS_NEWLINE (c))
2463         {
2464           /* Check for a (trigaph?) backslash escaping the newline.  */
2465           if (!backslash_start (pfile, cur - 2))
2466             goto out;
2467           multiline = 1;
2468           handle_newline (cur, buffer->rlimit, c);
2469         }
2470     }
2471   cur++;
2472
2473  out:
2474   buffer->cur = cur - 1;        /* Leave newline for caller.  */
2475   return multiline;
2476 }
2477
2478 /* Skips whitespace, stopping at next non-whitespace character.
2479    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
2480    to be assigned the correct column.  */
2481 static void
2482 skip_whitespace (pfile, in_directive)
2483      cpp_reader *pfile;
2484      int in_directive;
2485 {
2486   cpp_buffer *buffer = pfile->buffer;
2487   register const unsigned char *cur = buffer->cur;
2488   unsigned short null_count = 0;
2489
2490   for (; cur < buffer->rlimit; )
2491     {
2492       unsigned char c = *cur++;
2493
2494       if (c == '\t')
2495         {
2496           unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
2497           pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
2498                                 - col % CPP_OPTION(pfile, tabstop));
2499         }
2500       if (IS_HSPACE(c))         /* FIXME: Fix ISTABLE.  */
2501         continue;
2502       if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines.  */
2503         goto out;
2504       if (c == '\0')
2505         null_count++;
2506       /* Mut be '\f' or '\v' */
2507       else if (in_directive && CPP_PEDANTIC (pfile))
2508         cpp_pedwarn (pfile, "%s in preprocessing directive",
2509                      c == '\f' ? "formfeed" : "vertical tab");
2510     }
2511   cur++;
2512
2513  out:
2514   buffer->cur = cur - 1;
2515   if (null_count)
2516     cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2517                  : "embedded null character ignored");
2518 }
2519
2520 /* Parse (append) an identifier.  */
2521 static void
2522 parse_name (pfile, list, name)
2523      cpp_reader *pfile;
2524      cpp_toklist *list;
2525      cpp_name *name;
2526 {
2527   const unsigned char *name_limit;
2528   unsigned char *namebuf;
2529   cpp_buffer *buffer = pfile->buffer;
2530   register const unsigned char *cur = buffer->cur;
2531
2532  expanded:
2533   name_limit = list->namebuf + list->name_cap;
2534   namebuf = list->namebuf + list->name_used;
2535
2536   for (; cur < buffer->rlimit && namebuf < name_limit; )
2537     {
2538       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
2539
2540       if (! is_idchar(c))
2541         goto out;
2542       namebuf++;
2543       cur++;
2544       if (c == '$' && CPP_PEDANTIC (pfile))
2545         {
2546           buffer->cur = cur;
2547           cpp_pedwarn (pfile, "'$' character in identifier");
2548         }
2549     }
2550
2551   /* Run out of name space?  */
2552   if (cur < buffer->rlimit)
2553     {
2554       list->name_used = namebuf - list->namebuf;
2555       auto_expand_name_space (list);
2556       goto expanded;
2557     }
2558
2559  out:
2560   buffer->cur = cur;
2561   name->len = namebuf - name->text;
2562   list->name_used = namebuf - list->namebuf;
2563 }
2564
2565 /* Parse (append) a number.  */
2566
2567 #define VALID_SIGN(c, prevc) \
2568   (((c) == '+' || (c) == '-') && \
2569    ((prevc) == 'e' || (prevc) == 'E' \
2570     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2571
2572 static void
2573 parse_number (pfile, list, name)
2574      cpp_reader *pfile;
2575      cpp_toklist *list;
2576      cpp_name *name;
2577 {
2578   const unsigned char *name_limit;
2579   unsigned char *namebuf;
2580   cpp_buffer *buffer = pfile->buffer;
2581   register const unsigned char *cur = buffer->cur;
2582
2583  expanded:
2584   name_limit = list->namebuf + list->name_cap;
2585   namebuf = list->namebuf + list->name_used;
2586
2587   for (; cur < buffer->rlimit && namebuf < name_limit; )
2588     {
2589       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
2590
2591       /* Perhaps we should accept '$' here if we accept it for
2592          identifiers.  We know namebuf[-1] is safe, because for c to
2593          be a sign we must have pushed at least one character.  */
2594       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2595         goto out;
2596
2597       namebuf++;
2598       cur++;
2599     }
2600
2601   /* Run out of name space?  */
2602   if (cur < buffer->rlimit)
2603     {
2604       list->name_used = namebuf - list->namebuf;
2605       auto_expand_name_space (list);
2606       goto expanded;
2607     }
2608
2609  out:
2610   buffer->cur = cur;
2611   name->len = namebuf - name->text;
2612   list->name_used = namebuf - list->namebuf;
2613 }
2614
2615 /* Places a string terminated by an unescaped TERMINATOR into a
2616    cpp_name, which should be expandable and thus at the top of the
2617    list's stack.  Handles embedded trigraphs, if necessary, and
2618    escaped newlines.
2619
2620    Can be used for character constants (terminator = '\''), string
2621    constants ('"') and angled headers ('>').  Multi-line strings are
2622    allowed, except for within directives.  */
2623
2624 static void
2625 parse_string2 (pfile, list, name, terminator, multiline_ok)
2626      cpp_reader *pfile;
2627      cpp_toklist *list;
2628      cpp_name *name;
2629      unsigned int terminator;
2630      int multiline_ok;
2631 {
2632   cpp_buffer *buffer = pfile->buffer;
2633   register const unsigned char *cur = buffer->cur;
2634   const unsigned char *name_limit;
2635   unsigned char *namebuf;
2636   unsigned int null_count = 0;
2637   int trigraphed_len = 0;
2638
2639  expanded:
2640   name_limit = list->namebuf + list->name_cap;
2641   namebuf = list->namebuf + list->name_used;
2642
2643   for (; cur < buffer->rlimit && namebuf < name_limit; )
2644     {
2645       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
2646
2647       if (c == '\0')
2648         null_count++;
2649       else if (c == terminator || IS_NEWLINE (c))
2650         {
2651           /* Needed for trigraph_replace and multiline string warning.  */
2652           buffer->cur = cur;
2653
2654           /* Scan for trigraphs before checking if backslash-escaped.  */
2655           if (CPP_OPTION (pfile, trigraphs)
2656               || CPP_OPTION (pfile, warn_trigraphs))
2657             {
2658               namebuf = trigraph_replace (pfile, name->text + trigraphed_len,
2659                                             namebuf);
2660               trigraphed_len = namebuf - 2 - (name->text + trigraphed_len);
2661               if (trigraphed_len < 0)
2662                 trigraphed_len = 0;
2663             }
2664
2665           namebuf--;     /* Drop the newline / terminator from the name.  */
2666           if (IS_NEWLINE (c))
2667             {
2668               /* Drop a backslash newline, and continue. */
2669               if (namebuf[-1] == '\\')
2670                 {
2671                   handle_newline (cur, buffer->rlimit, c);
2672                   namebuf--;
2673                   continue;
2674                 }
2675
2676               cur--;
2677
2678               /* In Fortran and assembly language, silently terminate
2679                  strings of either variety at end of line.  This is a
2680                  kludge around not knowing where comments are in these
2681                  languages.  */
2682               if (CPP_OPTION (pfile, lang_fortran)
2683                   || CPP_OPTION (pfile, lang_asm))
2684                 goto out;
2685
2686               /* Character constants, headers and asserts may not
2687                  extend over multiple lines.  In Standard C, neither
2688                  may strings.  We accept multiline strings as an
2689                  extension, but not in directives.  */
2690               if (!multiline_ok)
2691                 goto unterminated;
2692
2693               cur++;  /* Move forwards again.  */
2694
2695               if (pfile->multiline_string_line == 0)
2696                 {
2697                   pfile->multiline_string_line = list->line;
2698                   if (CPP_PEDANTIC (pfile))
2699                     cpp_pedwarn (pfile, "multi-line string constant");
2700                 }
2701
2702               *namebuf++ = '\n';
2703               handle_newline (cur, buffer->rlimit, c);
2704             }
2705           else
2706             {
2707               unsigned char *temp;
2708
2709               /* An odd number of consecutive backslashes represents
2710                  an escaped terminator.  */
2711               temp = namebuf - 1;
2712               while (temp >= name->text && *temp == '\\')
2713                 temp--;
2714
2715               if ((namebuf - temp) & 1)
2716                 goto out;
2717               namebuf++;
2718             }
2719         }
2720     }
2721
2722   /* Run out of name space?  */
2723   if (cur < buffer->rlimit)
2724     {
2725       list->name_used = namebuf - list->namebuf;
2726       auto_expand_name_space (list);
2727       goto expanded;
2728     }
2729
2730   /* We may not have trigraph-replaced the input for this code path,
2731      but as the input is in error by being unterminated we don't
2732      bother.  Prevent warnings about no newlines at EOF.  */
2733   if (IS_NEWLINE(cur[-1]))
2734     cur--;
2735
2736  unterminated:
2737   cpp_error (pfile, "missing terminating %c character", (int) terminator);
2738
2739   if (terminator == '\"' && pfile->multiline_string_line != list->line
2740       && pfile->multiline_string_line != 0)
2741     {
2742       cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2743                            "possible start of unterminated string literal");
2744       pfile->multiline_string_line = 0;
2745     }
2746
2747  out:
2748   buffer->cur = cur;
2749   name->len = namebuf - name->text;
2750   list->name_used = namebuf - list->namebuf;
2751
2752   if (null_count > 0)
2753     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2754                          : "null character preserved"));
2755 }
2756
2757 /* The character TYPE helps us distinguish comment types: '*' = C
2758    style, '-' = Chill-style and '/' = C++ style.  For code simplicity,
2759    the stored comment includes the comment start and any terminator.  */
2760
2761 #define COMMENT_START_LEN 2
2762 static void
2763 save_comment (list, token, from, len, type)
2764      cpp_toklist *list;
2765      cpp_token *token;
2766      const unsigned char *from;
2767      unsigned int len;
2768      unsigned int type;
2769 {
2770   unsigned char *buffer;
2771
2772   len += COMMENT_START_LEN;
2773
2774   if (list->name_used + len > list->name_cap)
2775     expand_name_space (list, len);
2776
2777   INIT_TOKEN_NAME (list, token);
2778   token->type = CPP_COMMENT;
2779   token->val.name.len = len;
2780
2781   buffer = list->namebuf + list->name_used;
2782   list->name_used += len;
2783
2784   /* Copy the comment.  */
2785   if (type == '*')
2786     {
2787       *buffer++ = '/';
2788       *buffer++ = '*';
2789     }
2790   else
2791     {
2792       *buffer++ = type;
2793       *buffer++ = type;
2794     }
2795   memcpy (buffer, from, len - COMMENT_START_LEN);
2796 }
2797
2798 /*
2799  *  The tokenizer's main loop.  Returns a token list, representing a
2800  *  logical line in the input file.  On EOF after some tokens have
2801  *  been processed, we return immediately.  Then in next call, or if
2802  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
2803  *  token is placed in the list.
2804  *
2805  *  Implementation relies almost entirely on lookback, rather than
2806  *  looking forwards.  This means that tokenization requires just
2807  *  a single pass of the file, even in the presence of trigraphs and
2808  *  escaped newlines, providing significant performance benefits.
2809  *  Trigraph overhead is negligible if they are disabled, and low
2810  *  even when enabled.
2811  */
2812
2813 #define IS_DIRECTIVE() (list->tokens[first_token].type == CPP_HASH)
2814
2815 void
2816 _cpp_lex_line (pfile, list)
2817      cpp_reader *pfile;
2818      cpp_toklist *list;
2819 {
2820   cpp_token *cur_token, *token_limit;
2821   cpp_buffer *buffer = pfile->buffer;
2822   register const unsigned char *cur = buffer->cur;
2823   unsigned char flags = 0;
2824   unsigned int first_token = list->tokens_used;
2825
2826   list->line = CPP_BUF_LINE (buffer);
2827   pfile->col_adjust = 0;
2828  expanded:
2829   token_limit = list->tokens + list->tokens_cap;
2830   cur_token = list->tokens + list->tokens_used;
2831
2832   for (; cur < buffer->rlimit && cur_token < token_limit;)
2833     {
2834       unsigned char c = *cur++;
2835
2836       /* Optimize whitespace skipping, as most tokens are probably
2837          separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
2838
2839       if (is_hspace ((unsigned int) c))
2840         {
2841           /* Step back to get the null warning and tab correction.  */
2842           buffer->cur = cur - 1;
2843           skip_whitespace (pfile, IS_DIRECTIVE ());
2844           cur = buffer->cur;
2845
2846           flags = PREV_WHITESPACE;
2847           if (cur == buffer->rlimit)
2848             break;
2849           c = *cur++;
2850         }
2851
2852       /* Initialize current token.  Its type is set in the switch.  */
2853       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
2854       cur_token->flags = flags;
2855       flags = 0;
2856
2857       switch (c)
2858         {
2859         case '0': case '1': case '2': case '3': case '4':
2860         case '5': case '6': case '7': case '8': case '9':
2861           {
2862             int prev_dot;
2863
2864             cur--;              /* Backup character.  */
2865             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
2866             if (prev_dot)
2867               cur_token--;
2868             INIT_TOKEN_NAME (list, cur_token);
2869             /* Prepend an immediately previous CPP_DOT token.  */
2870             if (prev_dot)
2871               {
2872                 if (list->name_cap == list->name_used)
2873                   auto_expand_name_space (list);
2874
2875                 cur_token->val.name.len = 1;
2876                 list->namebuf[list->name_used++] = '.';
2877               }
2878
2879           continue_number:
2880             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
2881             buffer->cur = cur;
2882             parse_number (pfile, list, &cur_token->val.name);
2883             cur = buffer->cur;
2884             cur_token++;
2885           }
2886           break;
2887
2888         letter:
2889         case '_':
2890         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2891         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2892         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2893         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2894         case 'y': case 'z':
2895         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2896         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2897         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2898         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2899         case 'Y': case 'Z':
2900           cur--;                     /* Backup character.  */
2901           INIT_TOKEN_NAME (list, cur_token);
2902           cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
2903
2904         continue_name:
2905           buffer->cur = cur;
2906           parse_name (pfile, list, &cur_token->val.name);
2907           cur = buffer->cur;
2908
2909           /* Find handler for newly created / extended directive.  */
2910           if (IS_DIRECTIVE () && cur_token == &list->tokens[first_token + 1])
2911             _cpp_check_directive (list, cur_token);
2912           cur_token++;
2913           break;
2914
2915         case '\'':
2916           /* Fall through.  */
2917         case '\"':
2918           cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2919           /* Do we have a wide string?  */
2920           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2921               && cur_token[-1].val.name.len == 1
2922               && cur_token[-1].val.name.text[0] == 'L'
2923               && !CPP_TRADITIONAL (pfile))
2924             {
2925               /* No need for 'L' any more.  */
2926               list->name_used--;
2927               (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2928             }
2929
2930         do_parse_string:
2931           /* Here c is one of ' " or >.  */
2932           INIT_TOKEN_NAME (list, cur_token);
2933           buffer->cur = cur;
2934           parse_string2 (pfile, list, &cur_token->val.name, c,
2935                          c == '"' && !IS_DIRECTIVE());
2936           cur = buffer->cur;
2937           cur_token++;
2938           break;
2939
2940         case '/':
2941           cur_token->type = CPP_DIV;
2942           if (IMMED_TOKEN ())
2943             {
2944               if (PREV_TOKEN_TYPE == CPP_DIV)
2945                 {
2946                   /* We silently allow C++ comments in system headers,
2947                      irrespective of conformance mode, because lots of
2948                      broken systems do that and trying to clean it up
2949                      in fixincludes is a nightmare.  */
2950                   if (buffer->system_header_p)
2951                     goto do_line_comment;
2952                   else if (CPP_OPTION (pfile, cplusplus_comments))
2953                     {
2954                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2955                           && ! buffer->warned_cplusplus_comments)
2956                         {
2957                           buffer->cur = cur;
2958                           cpp_pedwarn (pfile,
2959                              "C++ style comments are not allowed in ISO C89");
2960                           cpp_pedwarn (pfile,
2961                           "(this will be reported only once per input file)");
2962                           buffer->warned_cplusplus_comments = 1;
2963                         }
2964                     do_line_comment:
2965                       buffer->cur = cur;
2966                       if (cur[-2] != c)
2967                         cpp_warning (pfile,
2968                                      "comment start split across lines");
2969                       if (skip_line_comment2 (pfile))
2970                         cpp_error_with_line (pfile, list->line,
2971                                              cur_token[-1].col,
2972                                              "multi-line comment");
2973
2974                       /* Back-up to first '-' or '/'.  */
2975                       cur_token--;
2976                       if (!CPP_OPTION (pfile, discard_comments)
2977                           && (!IS_DIRECTIVE() || list->dirno == 0))
2978                         save_comment (list, cur_token++, cur,
2979                                       buffer->cur - cur, c);
2980                       cur = buffer->cur;
2981
2982                       if (!CPP_OPTION (pfile, traditional))
2983                         flags = PREV_WHITESPACE;
2984                       break;
2985                     }
2986                 }
2987             }
2988           cur_token++;
2989           break;
2990
2991         case '*':
2992           cur_token->type = CPP_MULT;
2993           if (IMMED_TOKEN ())
2994             {
2995               if (PREV_TOKEN_TYPE == CPP_DIV)
2996                 {
2997                   buffer->cur = cur;
2998                   if (cur[-2] != '/')
2999                     cpp_warning (pfile,
3000                                  "comment start '/*' split across lines");
3001                   if (skip_block_comment2 (pfile))
3002                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3003                                          "unterminated comment");
3004                   else if (buffer->cur[-2] != '*')
3005                     cpp_warning (pfile,
3006                                  "comment end '*/' split across lines");
3007
3008                   /* Back up to opening '/'.  */
3009                   cur_token--;
3010                   if (!CPP_OPTION (pfile, discard_comments)
3011                       && (!IS_DIRECTIVE() || list->dirno == 0))
3012                     save_comment (list, cur_token++, cur,
3013                                   buffer->cur - cur, c);
3014                   cur = buffer->cur;
3015
3016                   if (!CPP_OPTION (pfile, traditional))
3017                     flags = PREV_WHITESPACE;
3018                   break;
3019                 }
3020               else if (CPP_OPTION (pfile, cplusplus))
3021                 {
3022                   /* In C++, there are .* and ->* operators.  */
3023                   if (PREV_TOKEN_TYPE == CPP_DEREF)
3024                     BACKUP_TOKEN (CPP_DEREF_STAR);
3025                   else if (PREV_TOKEN_TYPE == CPP_DOT)
3026                     BACKUP_TOKEN (CPP_DOT_STAR);
3027                 }
3028             }
3029           cur_token++;
3030           break;
3031
3032         case '\n':
3033         case '\r':
3034           handle_newline (cur, buffer->rlimit, c);
3035           if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3036             {
3037               /* Remove the escaped newline.  Then continue to process
3038                  any interrupted name or number.  */
3039               cur_token--;
3040               if (IMMED_TOKEN ())
3041                 {
3042                   cur_token--;
3043                   if (cur_token->type == CPP_NAME)
3044                     goto continue_name;
3045                   else if (cur_token->type == CPP_NUMBER)
3046                     goto continue_number;
3047                   cur_token++;
3048                 }
3049               /* Remember whitespace setting.  */
3050               flags = cur_token->flags;
3051               break;
3052             }
3053           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3054             {
3055               buffer->cur = cur;
3056               cpp_warning (pfile, "backslash and newline separated by space");
3057             }
3058           /* Skip vertical space until we have at least one token to
3059              return.  */
3060           if (cur_token != &list->tokens[first_token])
3061             goto out;
3062           list->line = CPP_BUF_LINE (buffer);
3063           break;
3064
3065         case '-':
3066           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3067             {
3068               if (CPP_OPTION (pfile, chill))
3069                 goto do_line_comment;
3070               REVISE_TOKEN (CPP_MINUS_MINUS);
3071             }
3072           else
3073             PUSH_TOKEN (CPP_MINUS);
3074           break;
3075
3076           /* The digraph flag checking ensures that ## and %:%:
3077              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
3078         make_hash:
3079         case '#':
3080           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3081               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3082             REVISE_TOKEN (CPP_PASTE);
3083           else
3084             PUSH_TOKEN (CPP_HASH);
3085           break;
3086
3087         case ':':
3088           cur_token->type = CPP_COLON;
3089           if (IMMED_TOKEN ())
3090             {
3091               if (PREV_TOKEN_TYPE == CPP_COLON
3092                   && CPP_OPTION (pfile, cplusplus))
3093                 BACKUP_TOKEN (CPP_SCOPE);
3094               /* Digraph: "<:" is a '['  */
3095               else if (PREV_TOKEN_TYPE == CPP_LESS)
3096                 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3097               /* Digraph: "%:" is a '#'  */
3098               else if (PREV_TOKEN_TYPE == CPP_MOD)
3099                 {
3100                   (--cur_token)->flags |= DIGRAPH;
3101                   goto make_hash;
3102                 }
3103             }
3104           cur_token++;
3105           break;
3106
3107         case '&':
3108           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3109             REVISE_TOKEN (CPP_AND_AND);
3110           else
3111             PUSH_TOKEN (CPP_AND);
3112           break;
3113
3114         make_or:
3115         case '|':
3116           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3117             REVISE_TOKEN (CPP_OR_OR);
3118           else
3119             PUSH_TOKEN (CPP_OR);
3120           break;
3121
3122         case '+':
3123           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3124             REVISE_TOKEN (CPP_PLUS_PLUS);
3125           else
3126             PUSH_TOKEN (CPP_PLUS);
3127           break;
3128
3129         case '=':
3130             /* This relies on equidistance of "?=" and "?" tokens.  */
3131           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3132             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3133           else
3134             PUSH_TOKEN (CPP_EQ);
3135           break;
3136
3137         case '>':
3138           cur_token->type = CPP_GREATER;
3139           if (IMMED_TOKEN ())
3140             {
3141               if (PREV_TOKEN_TYPE == CPP_GREATER)
3142                 BACKUP_TOKEN (CPP_RSHIFT);
3143               else if (PREV_TOKEN_TYPE == CPP_MINUS)
3144                 BACKUP_TOKEN (CPP_DEREF);
3145               /* Digraph: ":>" is a ']'  */
3146               else if (PREV_TOKEN_TYPE == CPP_COLON)
3147                 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3148               /* Digraph: "%>" is a '}'  */
3149               else if (PREV_TOKEN_TYPE == CPP_MOD)
3150                 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3151             }
3152           cur_token++;
3153           break;
3154
3155         case '<':
3156           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3157             {
3158               REVISE_TOKEN (CPP_LSHIFT);
3159               break;
3160             }
3161           /* Is this the beginning of a header name?  */
3162           if (list->flags & SYNTAX_INCLUDE)
3163             {
3164               c = '>';  /* Terminator.  */
3165               cur_token->type = CPP_HEADER_NAME;
3166               goto do_parse_string;
3167             }
3168           PUSH_TOKEN (CPP_LESS);
3169           break;
3170
3171         case '%':
3172           /* Digraph: "<%" is a '{'  */
3173           cur_token->type = CPP_MOD;
3174           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3175             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3176           cur_token++;
3177           break;
3178
3179         case '?':
3180           if (cur + 1 < buffer->rlimit && *cur == '?'
3181               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3182             {
3183               /* Handle trigraph.  */
3184               cur++;
3185               switch (*cur++)
3186                 {
3187                 case '(': goto make_open_square;
3188                 case ')': goto make_close_square;
3189                 case '<': goto make_open_brace;
3190                 case '>': goto make_close_brace;
3191                 case '=': goto make_hash;
3192                 case '!': goto make_or;
3193                 case '-': goto make_complement;
3194                 case '/': goto make_backslash;
3195                 case '\'': goto make_xor;
3196                 }
3197             }
3198           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3199             {
3200               /* GNU C++ defines <? and >? operators.  */
3201               if (PREV_TOKEN_TYPE == CPP_LESS)
3202                 {
3203                   REVISE_TOKEN (CPP_MIN);
3204                   break;
3205                 }
3206               else if (PREV_TOKEN_TYPE == CPP_GREATER)
3207                 {
3208                   REVISE_TOKEN (CPP_MAX);
3209                   break;
3210                 }
3211             }
3212           PUSH_TOKEN (CPP_QUERY);
3213           break;
3214
3215         case '.':
3216           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3217               && IMMED_TOKEN ()
3218               && !(cur_token[-1].flags & PREV_WHITESPACE))
3219             {
3220               cur_token -= 2;
3221               PUSH_TOKEN (CPP_ELLIPSIS);
3222             }
3223           else
3224             PUSH_TOKEN (CPP_DOT);
3225           break;
3226
3227         make_complement:
3228         case '~': PUSH_TOKEN (CPP_COMPL); break;
3229         make_xor:
3230         case '^': PUSH_TOKEN (CPP_XOR); break;
3231         make_open_brace:
3232         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3233         make_close_brace:
3234         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3235         make_open_square:
3236         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3237         make_close_square:
3238         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3239         make_backslash:
3240         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3241         case '!': PUSH_TOKEN (CPP_NOT); break;
3242         case ',': PUSH_TOKEN (CPP_COMMA); break;
3243         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3244         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
3245         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3246
3247         case '$':
3248           if (CPP_OPTION (pfile, dollars_in_ident))
3249             goto letter;
3250           /* Fall through */
3251         default:
3252           cur_token->aux = c;
3253           cur_token->val.name.len = 0; /* FIXME: needed for transition only */
3254           PUSH_TOKEN (CPP_OTHER);
3255           break;
3256         }
3257     }
3258
3259   /* Run out of token space?  */
3260   if (cur_token == token_limit)
3261     {
3262       list->tokens_used = cur_token - list->tokens;
3263       _cpp_expand_token_space (list, 256);
3264       goto expanded;
3265     }
3266
3267   cur_token->flags = flags;
3268   if (cur_token == &list->tokens[first_token])
3269     {
3270       /* FIXME: move this warning to callers who care.  */
3271       if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
3272         cpp_warning (pfile, "no newline at end of file");
3273       cur_token++->type = CPP_EOF;
3274     }
3275
3276  out:
3277   list->tokens[first_token].flags |= BOL;
3278   buffer->cur = cur;
3279   list->tokens_used = cur_token - list->tokens;
3280 }
3281
3282 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
3283    already contain the enough space to hold the token's spelling.  If
3284    WHITESPACE is true, and the token was preceded by whitespace,
3285    output a single space before the token proper.  Returns a pointer
3286    to the character after the last character written.  */
3287
3288 static unsigned char *
3289 spell_token (pfile, token, buffer, whitespace)
3290      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
3291      const cpp_token *token;
3292      unsigned char *buffer;
3293      int whitespace;
3294 {
3295   /* Whitespace will not be wanted by handlers of the # and ##
3296      operators calling this function, but will be wanted by the
3297      function that writes out the preprocessed file.  */
3298   if (whitespace && token->flags & PREV_WHITESPACE)
3299     *buffer++ = ' ';
3300
3301   switch (token_spellings[token->type].type)
3302     {
3303     case SPELL_OPERATOR:
3304       {
3305         const unsigned char *spelling;
3306         unsigned char c;
3307
3308         if (token->flags & DIGRAPH)
3309           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3310         else
3311           spelling = token_spellings[token->type].spelling;
3312
3313         while ((c = *spelling++) != '\0')
3314           *buffer++ = c;
3315       }
3316       break;
3317
3318     case SPELL_IDENT:
3319       memcpy (buffer, token->val.name.text, token->val.name.len);
3320       buffer += token->val.name.len;
3321       break;
3322
3323     case SPELL_STRING:
3324       {
3325         unsigned char c;
3326
3327         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3328           *buffer++ = 'L';
3329         c = '\'';
3330         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3331           c = '"';
3332         *buffer++ = c;
3333         memcpy (buffer, token->val.name.text, token->val.name.len);
3334         buffer += token->val.name.len;
3335         *buffer++ = c;
3336       }
3337       break;
3338
3339     case SPELL_CHAR:
3340       *buffer++ = token->aux;
3341       break;
3342
3343     case SPELL_NONE:
3344       cpp_ice (pfile, "Unspellable token");
3345       break;
3346     }
3347
3348   return buffer;
3349 }
3350
3351 /* Temporary function for illustrative purposes.  */
3352 void
3353 _cpp_lex_file (pfile)
3354      cpp_reader* pfile;
3355 {
3356   cpp_toklist* list;
3357
3358   init_trigraph_map ();
3359   list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3360   _cpp_init_toklist (list, DUMMY_TOKEN);
3361
3362   for (;;)
3363     {
3364       _cpp_lex_line (pfile, list);
3365       if (list->tokens[0].type == CPP_EOF)
3366         break;
3367
3368 #if 0
3369       if (list->dirno)
3370         _cpp_handle_directive (pfile, list);
3371       else
3372 #endif
3373         _cpp_output_list (pfile, list);
3374       _cpp_clear_toklist (list);
3375     }
3376 }
3377
3378 /* Temporary function for illustrative purposes.  */
3379 static void
3380 _cpp_output_list (pfile, list)
3381      cpp_reader *pfile;
3382      cpp_toklist *list;
3383 {
3384   unsigned int i;
3385
3386   for (i = 0; i < list->tokens_used; i++)
3387     {
3388       CPP_RESERVE (pfile, TOKEN_LEN (&list->tokens[i]));
3389       pfile->limit = spell_token (pfile, &list->tokens[i], pfile->limit, 1);
3390     }
3391 }
3392
3393 #endif