gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "intl.h"
  26 #include "cpplib.h"
  27 #include "cpphash.h"
  28
  29 #define PEEKBUF(BUFFER, N) \
  30   ((BUFFER)->rlimit - (BUFFER)->cur > (N) ? (BUFFER)->cur[N] : EOF)
  31 #define GETBUF(BUFFER) \
  32   ((BUFFER)->cur < (BUFFER)->rlimit ? *(BUFFER)->cur++ : EOF)
  33 #define FORWARDBUF(BUFFER, N) ((BUFFER)->cur += (N))
  34
  35 #define PEEKN(N) PEEKBUF (CPP_BUFFER (pfile), N)
  36 #define FORWARD(N) FORWARDBUF (CPP_BUFFER (pfile), (N))
  37 #define GETC() GETBUF (CPP_BUFFER (pfile))
  38 #define PEEKC() PEEKBUF (CPP_BUFFER (pfile), 0)
  39
  40 static void skip_block_comment  PARAMS ((cpp_reader *));
  41 static void skip_line_comment   PARAMS ((cpp_reader *));
  42 static int maybe_macroexpand    PARAMS ((cpp_reader *, long));
  43 static int skip_comment         PARAMS ((cpp_reader *, int));
  44 static int copy_comment         PARAMS ((cpp_reader *, int));
  45 static void skip_string         PARAMS ((cpp_reader *, int));
  46 static void parse_string        PARAMS ((cpp_reader *, int));
  47 static U_CHAR *find_position    PARAMS ((U_CHAR *, U_CHAR *, unsigned long *));
  48 static void null_warning        PARAMS ((cpp_reader *, unsigned int));
  49
  50 static void safe_fwrite         PARAMS ((cpp_reader *, const U_CHAR *,
  51                                          size_t, FILE *));
  52 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
  53                                          unsigned int));
  54 static void bump_column         PARAMS ((cpp_printer *, unsigned int,
  55                                          unsigned int));
  56 static void expand_name_space   PARAMS ((cpp_toklist *, unsigned int));
  57 static void expand_token_space  PARAMS ((cpp_toklist *));
  58 static void init_token_list     PARAMS ((cpp_reader *, cpp_toklist *, int));
  59 static void pedantic_whitespace PARAMS ((cpp_reader *, U_CHAR *,
  60                                          unsigned int));
  61
  62 #define auto_expand_name_space(list) \
  63     expand_name_space ((list), (list)->name_cap / 2)
  64
  65 #ifdef NEW_LEXER
  66
  67 static void expand_comment_space PARAMS ((cpp_toklist *));
  68 void init_trigraph_map PARAMS ((void));
  69 static unsigned char* trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
  70                                                 unsigned char *));
  71 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
  72                                                      const unsigned char *));
  73 static int skip_block_comment2 PARAMS ((cpp_reader *));
  74 static int skip_line_comment2 PARAMS ((cpp_reader *));
  75 static void skip_whitespace PARAMS ((cpp_reader *, int));
  76 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  77 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  78 static void parse_string2 PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *,
  79                                   unsigned int));
  80 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
  81 static void save_comment PARAMS ((cpp_toklist *, const unsigned char *,
  82                                   unsigned int, unsigned int, unsigned int));
  83 void _cpp_lex_line PARAMS ((cpp_reader *, cpp_toklist *));
  84
  85 static void _cpp_output_list PARAMS ((cpp_reader *, cpp_toklist *));
  86
  87 static unsigned char * spell_token PARAMS ((cpp_reader *, cpp_token *,
  88                                             cpp_toklist *, unsigned char *,
  89                                             int));
  90
  91 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
  92                                           cpp_token *));
  93
  94 /* Macros on a cpp_name.  */
  95 #define INIT_NAME(list, name) \
  96   do {(name).len = 0; (name).offset = (list)->name_used;} while (0)
  97
  98 #define IS_DIRECTIVE(list) (TOK_TYPE (list, 0) == CPP_HASH)
  99 #define COLUMN(cur) ((cur) - buffer->line_base)
 100
 101 /* Maybe put these in the ISTABLE eventually.  */
 102 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
 103 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
 104
 105 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 106    character, if any, is in buffer.  */
 107 #define handle_newline(cur, limit, c) \
 108   do {\
 109   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 110     (cur)++; \
 111   CPP_BUMP_LINE_CUR (pfile, (cur)); \
 112   } while (0)
 113
 114 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITESPACE))
 115 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 116
 117 /* Order here matters.  Those beyond SPELL_NONE store their spelling
 118    in the token list, and it's length in the token->val.name.len.  */
 119 #define SPELL_OPERATOR 0
 120 #define SPELL_CHAR     1
 121 #define SPELL_NONE     2
 122 #define SPELL_IDENT    3
 123 #define SPELL_STRING   4
 124
 125 #define T(e, s) {SPELL_OPERATOR, s},
 126 #define I(e, s) {SPELL_IDENT, s},
 127 #define S(e, s) {SPELL_STRING, s},
 128 #define C(e, s) {SPELL_CHAR, s},
 129 #define N(e, s) {SPELL_NONE, s},
 130
 131 static const struct token_spelling
 132 {
 133   unsigned char type;
 134   PTR  speller;
 135 } token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 136
 137 #undef T
 138 #undef I
 139 #undef S
 140 #undef C
 141 #undef N
 142
 143 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
 144 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
 145 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
 146 #define BACKUP_DIGRAPH(ttype) do { \
 147   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 148
 149 /* An upper bound on the number of bytes needed to spell a token,
 150    including preceding whitespace.  */
 151 #define TOKEN_LEN(token) (5 + (token_spellings[token->type].type > \
 152                                SPELL_NONE ? token->val.name.len: 0))
 153
 154 #endif
 155
 156 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 157
 158 void
 159 _cpp_grow_token_buffer (pfile, n)
 160      cpp_reader *pfile;
 161      long n;
 162 {
 163   long old_written = CPP_WRITTEN (pfile);
 164   pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
 165   pfile->token_buffer = (U_CHAR *)
 166     xrealloc(pfile->token_buffer, pfile->token_buffer_size);
 167   CPP_SET_WRITTEN (pfile, old_written);
 168 }
 169
 170 /* Allocate a new cpp_buffer for PFILE, and push it on the input buffer stack.
 171    If BUFFER != NULL, then use the LENGTH characters in BUFFER
 172    as the new input buffer.
 173    Return the new buffer, or NULL on failure.  */
 174
 175 cpp_buffer *
 176 cpp_push_buffer (pfile, buffer, length)
 177      cpp_reader *pfile;
 178      const U_CHAR *buffer;
 179      long length;
 180 {
 181   cpp_buffer *buf = CPP_BUFFER (pfile);
 182   cpp_buffer *new;
 183   if (++pfile->buffer_stack_depth == CPP_STACK_MAX)
 184     {
 185       cpp_fatal (pfile, "macro or `#include' recursion too deep");
 186       return NULL;
 187     }
 188
 189   new = (cpp_buffer *) xcalloc (1, sizeof (cpp_buffer));
 190
 191   new->if_stack = pfile->if_stack;
 192   new->buf = new->cur = buffer;
 193   new->rlimit = buffer + length;
 194   new->prev = buf;
 195   new->mark = NULL;
 196   new->line_base = NULL;
 197
 198   CPP_BUFFER (pfile) = new;
 199   return new;
 200 }
 201
 202 cpp_buffer *
 203 cpp_pop_buffer (pfile)
 204      cpp_reader *pfile;
 205 {
 206   cpp_buffer *buf = CPP_BUFFER (pfile);
 207   if (ACTIVE_MARK_P (pfile))
 208     cpp_ice (pfile, "mark active in cpp_pop_buffer");
 209
 210   if (buf->ihash)
 211     {
 212       _cpp_unwind_if_stack (pfile, buf);
 213       if (buf->buf)
 214         free ((PTR) buf->buf);
 215       if (pfile->system_include_depth)
 216         pfile->system_include_depth--;
 217       if (pfile->potential_control_macro)
 218         {
 219           buf->ihash->control_macro = pfile->potential_control_macro;
 220           pfile->potential_control_macro = 0;
 221         }
 222       pfile->input_stack_listing_current = 0;
 223     }
 224   else if (buf->macro)
 225     {
 226       HASHNODE *m = buf->macro;
 227
 228       m->disabled = 0;
 229       if ((m->type == T_FMACRO && buf->mapped)
 230           || m->type == T_SPECLINE || m->type == T_FILE
 231           || m->type == T_BASE_FILE || m->type == T_INCLUDE_LEVEL
 232           || m->type == T_STDC)
 233         free ((PTR) buf->buf);
 234     }
 235   CPP_BUFFER (pfile) = CPP_PREV_BUFFER (buf);
 236   free (buf);
 237   pfile->buffer_stack_depth--;
 238   return CPP_BUFFER (pfile);
 239 }
 240
 241 /* Deal with the annoying semantics of fwrite.  */
 242 static void
 243 safe_fwrite (pfile, buf, len, fp)
 244      cpp_reader *pfile;
 245      const U_CHAR *buf;
 246      size_t len;
 247      FILE *fp;
 248 {
 249   size_t count;
 250
 251   while (len)
 252     {
 253       count = fwrite (buf, 1, len, fp);
 254       if (count == 0)
 255         goto error;
 256       len -= count;
 257       buf += count;
 258     }
 259   return;
 260
 261  error:
 262   cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
 263 }
 264
 265 /* Notify the compiler proper that the current line number has jumped,
 266    or the current file name has changed.  */
 267
 268 static void
 269 output_line_command (pfile, print, line)
 270      cpp_reader *pfile;
 271      cpp_printer *print;
 272      unsigned int line;
 273 {
 274   cpp_buffer *ip = cpp_file_buffer (pfile);
 275   enum { same = 0, enter, leave, rname } change;
 276   static const char * const codes[] = { "", " 1", " 2", "" };
 277
 278   if (CPP_OPTION (pfile, no_line_commands))
 279     return;
 280
 281   /* Determine whether the current filename has changed, and if so,
 282      how.  'nominal_fname' values are unique, so they can be compared
 283      by comparing pointers.  */
 284   if (ip->nominal_fname == print->last_fname)
 285     change = same;
 286   else
 287     {
 288       if (pfile->buffer_stack_depth == print->last_bsd)
 289         change = rname;
 290       else
 291         {
 292           if (pfile->buffer_stack_depth > print->last_bsd)
 293             change = enter;
 294           else
 295             change = leave;
 296           print->last_bsd = pfile->buffer_stack_depth;
 297         }
 298       print->last_fname = ip->nominal_fname;
 299     }
 300   /* If the current file has not changed, we can output a few newlines
 301      instead if we want to increase the line number by a small amount.
 302      We cannot do this if print->lineno is zero, because that means we
 303      haven't output any line commands yet.  (The very first line
 304      command output is a `same_file' command.)  */
 305   if (change == same && print->lineno != 0
 306       && line >= print->lineno && line < print->lineno + 8)
 307     {
 308       while (line > print->lineno)
 309         {
 310           putc ('\n', print->outf);
 311           print->lineno++;
 312         }
 313       return;
 314     }
 315
 316 #ifndef NO_IMPLICIT_EXTERN_C
 317   if (CPP_OPTION (pfile, cplusplus))
 318     fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
 319              codes[change],
 320              ip->system_header_p ? " 3" : "",
 321              (ip->system_header_p == 2) ? " 4" : "");
 322   else
 323 #endif
 324     fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
 325              codes[change],
 326              ip->system_header_p ? " 3" : "");
 327   print->lineno = line;
 328 }
 329
 330 /* Write the contents of the token_buffer to the output stream, and
 331    clear the token_buffer.  Also handles generating line commands and
 332    keeping track of file transitions.  */
 333
 334 void
 335 cpp_output_tokens (pfile, print)
 336      cpp_reader *pfile;
 337      cpp_printer *print;
 338 {
 339   cpp_buffer *ip;
 340
 341   if (CPP_WRITTEN (pfile) - print->written)
 342     {
 343       if (CPP_PWRITTEN (pfile)[-1] == '\n' && print->lineno)
 344         print->lineno++;
 345       safe_fwrite (pfile, pfile->token_buffer,
 346                    CPP_WRITTEN (pfile) - print->written, print->outf);
 347     }
 348
 349   ip = cpp_file_buffer (pfile);
 350   if (ip)
 351     output_line_command (pfile, print, CPP_BUF_LINE (ip));
 352
 353   CPP_SET_WRITTEN (pfile, print->written);
 354 }
 355
 356 /* Helper for cpp_output_list - increases the column number to match
 357    what we expect it to be.  */
 358
 359 static void
 360 bump_column (print, from, to)
 361      cpp_printer *print;
 362      unsigned int from, to;
 363 {
 364   unsigned int tabs, spcs;
 365   unsigned int delta = to - from;
 366
 367   /* Only if FROM is 0, advance by tabs.  */
 368   if (from == 0)
 369     tabs = delta / 8, spcs = delta % 8;
 370   else
 371     tabs = 0, spcs = delta;
 372
 373   while (tabs--) putc ('\t', print->outf);
 374   while (spcs--) putc (' ', print->outf);
 375 }
 376
 377 /* Write out the list L onto pfile->token_buffer.  This function is
 378    incomplete:
 379
 380    1) pfile->token_buffer is not going to continue to exist.
 381    2) At the moment, tokens don't carry the information described
 382    in cpplib.h; they are all strings.
 383    3) The list has to be a complete line, and has to be written starting
 384    at the beginning of a line.  */
 385
 386 void
 387 cpp_output_list (pfile, print, list)
 388      cpp_reader *pfile;
 389      cpp_printer *print;
 390      const cpp_toklist *list;
 391 {
 392   unsigned int i;
 393   unsigned int curcol = 1;
 394
 395   /* XXX Probably does not do what is intended.  */
 396   if (print->lineno != list->line)
 397     output_line_command (pfile, print, list->line);
 398
 399   for (i = 0; i < list->tokens_used; i++)
 400     {
 401       if (TOK_TYPE (list, i) == CPP_VSPACE)
 402         {
 403           output_line_command (pfile, print, list->tokens[i].aux);
 404           continue;
 405         }
 406
 407       if (curcol < TOK_COL (list, i))
 408         {
 409           /* Insert space to bring the column to what it should be.  */
 410           bump_column (print, curcol - 1, TOK_COL (list, i));
 411           curcol = TOK_COL (list, i);
 412         }
 413       /* XXX We may have to insert space to prevent an accidental
 414          token paste.  */
 415       safe_fwrite (pfile, TOK_NAME (list, i), TOK_LEN (list, i), print->outf);
 416       curcol += TOK_LEN (list, i);
 417     }
 418 }
 419
 420 /* Scan a string (which may have escape marks), perform macro expansion,
 421    and write the result to the token_buffer.  */
 422
 423 void
 424 _cpp_expand_to_buffer (pfile, buf, length)
 425      cpp_reader *pfile;
 426      const U_CHAR *buf;
 427      int length;
 428 {
 429   cpp_buffer *stop;
 430   enum cpp_ttype token;
 431   U_CHAR *buf1;
 432
 433   if (length < 0)
 434     {
 435       cpp_ice (pfile, "length < 0 in cpp_expand_to_buffer");
 436       return;
 437     }
 438
 439   /* Copy the buffer, because it might be in an unsafe place - for
 440      example, a sequence on the token_buffer, where the pointers will
 441      be invalidated if we enlarge the token_buffer.  */
 442   buf1 = alloca (length);
 443   memcpy (buf1, buf, length);
 444
 445   /* Set up the input on the input stack.  */
 446   stop = CPP_BUFFER (pfile);
 447   if (cpp_push_buffer (pfile, buf1, length) == NULL)
 448     return;
 449   CPP_BUFFER (pfile)->has_escapes = 1;
 450
 451   /* Scan the input, create the output.  */
 452   for (;;)
 453     {
 454       token = cpp_get_token (pfile);
 455       if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 456         break;
 457     }
 458 }
 459
 460 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 461
 462 void
 463 cpp_scan_buffer_nooutput (pfile)
 464      cpp_reader *pfile;
 465 {
 466   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 467   enum cpp_ttype token;
 468   unsigned int old_written = CPP_WRITTEN (pfile);
 469   /* In no-output mode, we can ignore everything but directives.  */
 470   for (;;)
 471     {
 472       if (! pfile->only_seen_white)
 473         _cpp_skip_rest_of_line (pfile);
 474       token = cpp_get_token (pfile);
 475       if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 476         break;
 477     }
 478   CPP_SET_WRITTEN (pfile, old_written);
 479 }
 480
 481 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 482
 483 void
 484 cpp_scan_buffer (pfile, print)
 485      cpp_reader *pfile;
 486      cpp_printer *print;
 487 {
 488   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 489   enum cpp_ttype token;
 490
 491   for (;;)
 492     {
 493       token = cpp_get_token (pfile);
 494       if (token == CPP_EOF || token == CPP_VSPACE
 495           /* XXX Temporary kluge - force flush after #include only */
 496           || (token == CPP_DIRECTIVE
 497               && CPP_BUFFER (pfile)->nominal_fname != print->last_fname))
 498         {
 499           cpp_output_tokens (pfile, print);
 500           if (token == CPP_EOF && CPP_BUFFER (pfile) == stop)
 501             return;
 502         }
 503     }
 504 }
 505
 506 /* Return the topmost cpp_buffer that corresponds to a file (not a macro).  */
 507
 508 cpp_buffer *
 509 cpp_file_buffer (pfile)
 510      cpp_reader *pfile;
 511 {
 512   cpp_buffer *ip;
 513
 514   for (ip = CPP_BUFFER (pfile); ip; ip = CPP_PREV_BUFFER (ip))
 515     if (ip->ihash != NULL)
 516       return ip;
 517   return NULL;
 518 }
 519
 520 /* Token-buffer helper functions.  */
 521
 522 /* Expand a token list's string space.  */
 523 static void
 524 expand_name_space (list, len)
 525      cpp_toklist *list;
 526      unsigned int len;
 527 {
 528   list->name_cap += len;
 529   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 530 }
 531
 532 /* Expand the number of tokens in a list.  */
 533 static void
 534 expand_token_space (list)
 535      cpp_toklist *list;
 536 {
 537   list->tokens_cap *= 2;
 538   list->tokens = (cpp_token *)
 539     xrealloc (list->tokens - 1, (list->tokens_cap + 1) * sizeof (cpp_token));
 540   list->tokens++;               /* Skip the dummy.  */
 541 }
 542
 543 /* Initialize a token list.  We allocate an extra token in front of
 544    the token list, as this allows us to always peek at the previous
 545    token without worrying about underflowing the list.  */
 546 static void
 547 init_token_list (pfile, list, recycle)
 548      cpp_reader *pfile;
 549      cpp_toklist *list;
 550      int recycle;
 551 {
 552   /* Recycling a used list saves 3 free-malloc pairs.  */
 553   if (!recycle)
 554     {
 555       /* Initialize token space.  Put a dummy token before the start
 556          that will fail matches.  */
 557       list->tokens_cap = 256;   /* 4K's worth.  */
 558       list->tokens = (cpp_token *)
 559         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 560       list->tokens[0].type = CPP_EOF;
 561       list->tokens++;
 562
 563       /* Initialize name space.  */
 564       list->name_cap = 1024;
 565       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 566
 567       /* Only create a comment space on demand.  */
 568       list->comments_cap = 0;
 569       list->comments = 0;
 570     }
 571
 572   list->tokens_used = 0;
 573   list->name_used = 0;
 574   list->comments_used = 0;
 575   if (pfile->buffer)
 576     list->line = pfile->buffer->lineno;
 577   list->dir_handler = 0;
 578   list->dir_flags = 0;
 579 }
 580
 581 /* Scan an entire line and create a token list for it.  Does not
 582    macro-expand or execute directives.  */
 583
 584 void
 585 _cpp_scan_line (pfile, list)
 586      cpp_reader *pfile;
 587      cpp_toklist *list;
 588 {
 589   int i, col;
 590   long written, len;
 591   enum cpp_ttype type;
 592   int space_before;
 593
 594   init_token_list (pfile, list, 1);
 595
 596   written = CPP_WRITTEN (pfile);
 597   i = 0;
 598   space_before = 0;
 599   for (;;)
 600     {
 601       col = CPP_BUFFER (pfile)->cur - CPP_BUFFER (pfile)->line_base;
 602       type = _cpp_lex_token (pfile);
 603       len = CPP_WRITTEN (pfile) - written;
 604       CPP_SET_WRITTEN (pfile, written);
 605       if (type == CPP_HSPACE)
 606         {
 607           if (CPP_PEDANTIC (pfile))
 608             pedantic_whitespace (pfile, pfile->token_buffer + written, len);
 609           space_before = 1;
 610           continue;
 611         }
 612       else if (type == CPP_COMMENT)
 613         /* Only happens when processing -traditional macro definitions.
 614            Do not give this a token entry, but do not change space_before
 615            either.  */
 616         continue;
 617
 618       if (list->tokens_used >= list->tokens_cap)
 619         expand_token_space (list);
 620       if (list->name_used + len >= list->name_cap)
 621         expand_name_space (list, list->name_used + len + 1 - list->name_cap);
 622
 623       if (type == CPP_MACRO)
 624         type = CPP_NAME;
 625
 626       list->tokens_used++;
 627       TOK_TYPE  (list, i) = type;
 628       TOK_COL   (list, i) = col;
 629       TOK_FLAGS (list, i) = space_before ? PREV_WHITESPACE : 0;
 630
 631       if (type == CPP_VSPACE)
 632         break;
 633
 634       TOK_LEN (list, i) = len;
 635       TOK_OFFSET (list, i) = list->name_used;
 636       memcpy (TOK_NAME (list, i), CPP_PWRITTEN (pfile), len);
 637       list->name_used += len;
 638       i++;
 639       space_before = 0;
 640     }
 641   TOK_AUX (list, i) = CPP_BUFFER (pfile)->lineno + 1;
 642
 643   /* XXX Temporary kluge: put back the newline.  */
 644   FORWARD(-1);
 645 }
 646
 647
 648 /* Skip a C-style block comment.  We know it's a comment, and point is
 649    at the second character of the starter.  */
 650 static void
 651 skip_block_comment (pfile)
 652      cpp_reader *pfile;
 653 {
 654   unsigned int line, col;
 655   const U_CHAR *limit, *cur;
 656
 657   FORWARD(1);
 658   line = CPP_BUF_LINE (CPP_BUFFER (pfile));
 659   col = CPP_BUF_COL (CPP_BUFFER (pfile));
 660   limit = CPP_BUFFER (pfile)->rlimit;
 661   cur = CPP_BUFFER (pfile)->cur;
 662
 663   while (cur < limit)
 664     {
 665       char c = *cur++;
 666       if (c == '\n' || c == '\r')
 667         {
 668           /* \r cannot be a macro escape marker here. */
 669           if (!ACTIVE_MARK_P (pfile))
 670             CPP_BUMP_LINE_CUR (pfile, cur);
 671         }
 672       else if (c == '*')
 673         {
 674           /* Check for teminator.  */
 675           if (cur < limit && *cur == '/')
 676             goto out;
 677
 678           /* Warn about comment starter embedded in comment.  */
 679           if (cur[-2] == '/' && CPP_OPTION (pfile, warn_comments))
 680             cpp_warning_with_line (pfile, CPP_BUFFER (pfile)->lineno,
 681                                    cur - CPP_BUFFER (pfile)->line_base,
 682                                    "'/*' within comment");
 683         }
 684     }
 685
 686   cpp_error_with_line (pfile, line, col, "unterminated comment");
 687   cur--;
 688  out:
 689   CPP_BUFFER (pfile)->cur = cur + 1;
 690 }
 691
 692 /* Skip a C++/Chill line comment.  We know it's a comment, and point
 693    is at the second character of the initiator.  */
 694 static void
 695 skip_line_comment (pfile)
 696      cpp_reader *pfile;
 697 {
 698   FORWARD(1);
 699   for (;;)
 700     {
 701       int c = GETC ();
 702
 703       /* We don't have to worry about EOF in here.  */
 704       if (c == '\n')
 705         {
 706           /* Don't consider final '\n' to be part of comment.  */
 707           FORWARD(-1);
 708           return;
 709         }
 710       else if (c == '\r')
 711         {
 712           /* \r cannot be a macro escape marker here. */
 713           if (!ACTIVE_MARK_P (pfile))
 714             CPP_BUMP_LINE (pfile);
 715           if (CPP_OPTION (pfile, warn_comments))
 716             cpp_warning (pfile, "backslash-newline within line comment");
 717         }
 718     }
 719 }
 720
 721 /* Skip a comment - C, C++, or Chill style.  M is the first character
 722    of the comment marker.  If this really is a comment, skip to its
 723    end and return ' '.  If this is not a comment, return M (which will
 724    be '/' or '-').  */
 725
 726 static int
 727 skip_comment (pfile, m)
 728      cpp_reader *pfile;
 729      int m;
 730 {
 731   if (m == '/' && PEEKC() == '*')
 732     {
 733       skip_block_comment (pfile);
 734       return ' ';
 735     }
 736   else if (m == '/' && PEEKC() == '/')
 737     {
 738       if (CPP_BUFFER (pfile)->system_header_p)
 739         {
 740           /* We silently allow C++ comments in system headers, irrespective
 741              of conformance mode, because lots of busted systems do that
 742              and trying to clean it up in fixincludes is a nightmare.  */
 743           skip_line_comment (pfile);
 744           return ' ';
 745         }
 746       else if (CPP_OPTION (pfile, cplusplus_comments))
 747         {
 748           if (! CPP_BUFFER (pfile)->warned_cplusplus_comments)
 749             {
 750               if (CPP_WTRADITIONAL (pfile))
 751                 cpp_pedwarn (pfile,
 752                         "C++ style comments are not allowed in traditional C");
 753               else if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile))
 754                 cpp_pedwarn (pfile,
 755                         "C++ style comments are not allowed in ISO C89");
 756               if (CPP_WTRADITIONAL (pfile)
 757                   || (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)))
 758                 cpp_pedwarn (pfile,
 759                            "(this will be reported only once per input file)");
 760               CPP_BUFFER (pfile)->warned_cplusplus_comments = 1;
 761             }
 762           skip_line_comment (pfile);
 763           return ' ';
 764         }
 765       else
 766         return m;
 767     }
 768   else if (m == '-' && PEEKC() == '-'
 769            && CPP_OPTION (pfile, chill))
 770     {
 771       skip_line_comment (pfile);
 772       return ' ';
 773     }
 774   else
 775     return m;
 776 }
 777
 778 /* Identical to skip_comment except that it copies the comment into the
 779    token_buffer.  This is used if !discard_comments.  */
 780 static int
 781 copy_comment (pfile, m)
 782      cpp_reader *pfile;
 783      int m;
 784 {
 785   const U_CHAR *start = CPP_BUFFER (pfile)->cur;  /* XXX Layering violation */
 786   const U_CHAR *limit;
 787
 788   if (skip_comment (pfile, m) == m)
 789     return m;
 790
 791   limit = CPP_BUFFER (pfile)->cur;
 792   CPP_RESERVE (pfile, limit - start + 2);
 793   CPP_PUTC_Q (pfile, m);
 794   for (; start <= limit; start++)
 795     if (*start != '\r')
 796       CPP_PUTC_Q (pfile, *start);
 797
 798   return ' ';
 799 }
 800
 801 static void
 802 null_warning (pfile, count)
 803      cpp_reader *pfile;
 804      unsigned int count;
 805 {
 806   if (count == 1)
 807     cpp_warning (pfile, "embedded null character ignored");
 808   else
 809     cpp_warning (pfile, "embedded null characters ignored");
 810 }
 811
 812 /* Skip whitespace \-newline and comments.  Does not macro-expand.  */
 813
 814 void
 815 _cpp_skip_hspace (pfile)
 816      cpp_reader *pfile;
 817 {
 818   unsigned int null_count = 0;
 819   int c;
 820
 821   while (1)
 822     {
 823       c = GETC();
 824       if (c == EOF)
 825         goto out;
 826       else if (is_hspace(c))
 827         {
 828           if ((c == '\f' || c == '\v') && CPP_PEDANTIC (pfile))
 829             cpp_pedwarn (pfile, "%s in preprocessing directive",
 830                          c == '\f' ? "formfeed" : "vertical tab");
 831           else if (c == '\0')
 832             null_count++;
 833         }
 834       else if (c == '\r')
 835         {
 836           /* \r is a backslash-newline marker if !has_escapes, and
 837              a deletable-whitespace or no-reexpansion marker otherwise. */
 838           if (CPP_BUFFER (pfile)->has_escapes)
 839             {
 840               if (PEEKC() == ' ')
 841                 FORWARD(1);
 842               else
 843                 break;
 844             }
 845           else
 846             CPP_BUMP_LINE (pfile);
 847         }
 848       else if (c == '/' || c == '-')
 849         {
 850           c = skip_comment (pfile, c);
 851           if (c  != ' ')
 852             break;
 853         }
 854       else
 855         break;
 856     }
 857   FORWARD(-1);
 858  out:
 859   if (null_count)
 860     null_warning (pfile, null_count);
 861 }
 862
 863 /* Read and discard the rest of the current line.  */
 864
 865 void
 866 _cpp_skip_rest_of_line (pfile)
 867      cpp_reader *pfile;
 868 {
 869   for (;;)
 870     {
 871       int c = GETC();
 872       switch (c)
 873         {
 874         case '\n':
 875           FORWARD(-1);
 876         case EOF:
 877           return;
 878
 879         case '\r':
 880           if (! CPP_BUFFER (pfile)->has_escapes)
 881             CPP_BUMP_LINE (pfile);
 882           break;
 883
 884         case '\'':
 885         case '\"':
 886           skip_string (pfile, c);
 887           break;
 888
 889         case '/':
 890         case '-':
 891           skip_comment (pfile, c);
 892           break;
 893
 894         case '\f':
 895         case '\v':
 896           if (CPP_PEDANTIC (pfile))
 897             cpp_pedwarn (pfile, "%s in preprocessing directive",
 898                          c == '\f' ? "formfeed" : "vertical tab");
 899           break;
 900
 901         }
 902     }
 903 }
 904
 905 /* Parse an identifier starting with C.  */
 906
 907 void
 908 _cpp_parse_name (pfile, c)
 909      cpp_reader *pfile;
 910      int c;
 911 {
 912   for (;;)
 913   {
 914       if (! is_idchar(c))
 915       {
 916           FORWARD (-1);
 917           break;
 918       }
 919
 920       if (c == '$' && CPP_PEDANTIC (pfile))
 921         cpp_pedwarn (pfile, "`$' in identifier");
 922
 923       CPP_RESERVE(pfile, 2); /* One more for final NUL.  */
 924       CPP_PUTC_Q (pfile, c);
 925       c = GETC();
 926       if (c == EOF)
 927         break;
 928   }
 929   return;
 930 }
 931
 932 /* Parse and skip over a string starting with C.  A single quoted
 933    string is treated like a double -- some programs (e.g., troff) are
 934    perverse this way.  (However, a single quoted string is not allowed
 935    to extend over multiple lines.)  */
 936 static void
 937 skip_string (pfile, c)
 938      cpp_reader *pfile;
 939      int c;
 940 {
 941   unsigned int start_line, start_column;
 942   unsigned int null_count = 0;
 943
 944   start_line = CPP_BUF_LINE (CPP_BUFFER (pfile));
 945   start_column = CPP_BUF_COL (CPP_BUFFER (pfile));
 946   while (1)
 947     {
 948       int cc = GETC();
 949       switch (cc)
 950         {
 951         case EOF:
 952           cpp_error_with_line (pfile, start_line, start_column,
 953                                "unterminated string or character constant");
 954           if (pfile->multiline_string_line != start_line
 955               && pfile->multiline_string_line != 0)
 956             cpp_error_with_line (pfile,
 957                                  pfile->multiline_string_line, -1,
 958                          "possible real start of unterminated constant");
 959           pfile->multiline_string_line = 0;
 960           goto out;
 961
 962         case '\0':
 963           null_count++;
 964           break;
 965
 966         case '\n':
 967           CPP_BUMP_LINE (pfile);
 968           /* In Fortran and assembly language, silently terminate
 969              strings of either variety at end of line.  This is a
 970              kludge around not knowing where comments are in these
 971              languages.  */
 972           if (CPP_OPTION (pfile, lang_fortran)
 973               || CPP_OPTION (pfile, lang_asm))
 974             {
 975               FORWARD(-1);
 976               goto out;
 977             }
 978           /* Character constants may not extend over multiple lines.
 979              In Standard C, neither may strings.  We accept multiline
 980              strings as an extension.  */
 981           if (c == '\'')
 982             {
 983               cpp_error_with_line (pfile, start_line, start_column,
 984                                    "unterminated character constant");
 985               FORWARD(-1);
 986               goto out;
 987             }
 988           if (CPP_PEDANTIC (pfile) && pfile->multiline_string_line == 0)
 989             cpp_pedwarn_with_line (pfile, start_line, start_column,
 990                                    "string constant runs past end of line");
 991           if (pfile->multiline_string_line == 0)
 992             pfile->multiline_string_line = start_line;
 993           break;
 994
 995         case '\r':
 996           if (CPP_BUFFER (pfile)->has_escapes)
 997             {
 998               cpp_ice (pfile, "\\r escape inside string constant");
 999               FORWARD(1);
1000             }
1001           else
1002             /* Backslash newline is replaced by nothing at all.  */
1003             CPP_BUMP_LINE (pfile);
1004           break;
1005
1006         case '\\':
1007           FORWARD(1);
1008           break;
1009
1010         case '\"':
1011         case '\'':
1012           if (cc == c)
1013             goto out;
1014           break;
1015         }
1016     }
1017
1018  out:
1019   if (null_count == 1)
1020     cpp_warning (pfile, "null character in string or character constant");
1021   else if (null_count > 1)
1022     cpp_warning (pfile, "null characters in string or character constant");
1023 }
1024
1025 /* Parse a string and copy it to the output.  */
1026
1027 static void
1028 parse_string (pfile, c)
1029      cpp_reader *pfile;
1030      int c;
1031 {
1032   const U_CHAR *start = CPP_BUFFER (pfile)->cur;  /* XXX Layering violation */
1033   const U_CHAR *limit;
1034
1035   skip_string (pfile, c);
1036
1037   limit = CPP_BUFFER (pfile)->cur;
1038   CPP_RESERVE (pfile, limit - start + 2);
1039   CPP_PUTC_Q (pfile, c);
1040   for (; start < limit; start++)
1041     if (*start != '\r')
1042       CPP_PUTC_Q (pfile, *start);
1043 }
1044
1045 /* Read an assertion into the token buffer, converting to
1046    canonical form: `#predicate(a n swe r)'  The next non-whitespace
1047    character to read should be the first letter of the predicate.
1048    Returns 0 for syntax error, 1 for bare predicate, 2 for predicate
1049    with answer (see callers for why). In case of 0, an error has been
1050    printed. */
1051 int
1052 _cpp_parse_assertion (pfile)
1053      cpp_reader *pfile;
1054 {
1055   int c, dropwhite;
1056   _cpp_skip_hspace (pfile);
1057   c = PEEKC();
1058   if (c == '\n')
1059     {
1060       cpp_error (pfile, "assertion without predicate");
1061       return 0;
1062     }
1063   else if (! is_idstart(c))
1064     {
1065       cpp_error (pfile, "assertion predicate is not an identifier");
1066       return 0;
1067     }
1068   CPP_PUTC(pfile, '#');
1069   FORWARD(1);
1070   _cpp_parse_name (pfile, c);
1071
1072   c = PEEKC();
1073   if (c != '(')
1074     {
1075       if (is_hspace(c) || c == '\r')
1076         _cpp_skip_hspace (pfile);
1077       c = PEEKC();
1078     }
1079   if (c != '(')
1080     return 1;
1081
1082   CPP_PUTC(pfile, '(');
1083   FORWARD(1);
1084   dropwhite = 1;
1085   while ((c = GETC()) != ')')
1086     {
1087       if (is_space(c))
1088         {
1089           if (! dropwhite)
1090             {
1091               CPP_PUTC(pfile, ' ');
1092               dropwhite = 1;
1093             }
1094         }
1095       else if (c == '\n' || c == EOF)
1096         {
1097           if (c == '\n') FORWARD(-1);
1098           cpp_error (pfile, "un-terminated assertion answer");
1099           return 0;
1100         }
1101       else if (c == '\r')
1102         /* \r cannot be a macro escape here. */
1103         CPP_BUMP_LINE (pfile);
1104       else
1105         {
1106           CPP_PUTC (pfile, c);
1107           dropwhite = 0;
1108         }
1109     }
1110
1111   if (pfile->limit[-1] == ' ')
1112     pfile->limit[-1] = ')';
1113   else if (pfile->limit[-1] == '(')
1114     {
1115       cpp_error (pfile, "empty token sequence in assertion");
1116       return 0;
1117     }
1118   else
1119     CPP_PUTC (pfile, ')');
1120
1121   return 2;
1122 }
1123
1124 /* Get the next token, and add it to the text in pfile->token_buffer.
1125    Return the kind of token we got.  */
1126
1127 enum cpp_ttype
1128 _cpp_lex_token (pfile)
1129      cpp_reader *pfile;
1130 {
1131   register int c, c2;
1132   enum cpp_ttype token;
1133
1134   if (CPP_BUFFER (pfile) == NULL)
1135     return CPP_EOF;
1136
1137  get_next:
1138   c = GETC();
1139   switch (c)
1140     {
1141     case EOF:
1142       return CPP_EOF;
1143
1144     case '/':
1145       if (PEEKC () == '=')
1146         goto op2;
1147
1148     comment:
1149       if (CPP_OPTION (pfile, discard_comments))
1150         c = skip_comment (pfile, c);
1151       else
1152         c = copy_comment (pfile, c);
1153       if (c != ' ')
1154         goto randomchar;
1155
1156       /* Comments are equivalent to spaces.
1157          For -traditional, a comment is equivalent to nothing.  */
1158       if (!CPP_OPTION (pfile, discard_comments))
1159         return CPP_COMMENT;
1160       else if (CPP_TRADITIONAL (pfile))
1161         {
1162           if (pfile->parsing_define_directive)
1163             return CPP_COMMENT;
1164           goto get_next;
1165         }
1166       else
1167         {
1168           CPP_PUTC (pfile, c);
1169           return CPP_HSPACE;
1170         }
1171
1172     case '#':
1173       CPP_PUTC (pfile, c);
1174
1175     hash:
1176       if (pfile->parsing_if_directive)
1177         {
1178           CPP_ADJUST_WRITTEN (pfile, -1);
1179           if (_cpp_parse_assertion (pfile))
1180             return CPP_ASSERTION;
1181           return CPP_OTHER;
1182         }
1183
1184       if (pfile->parsing_define_directive)
1185         {
1186           c2 = PEEKC ();
1187           if (c2 == '#')
1188             {
1189               FORWARD (1);
1190               CPP_PUTC (pfile, c2);
1191             }
1192           else if (c2 == '%' && PEEKN (1) == ':')
1193             {
1194               /* Digraph: "%:" == "#".  */
1195               FORWARD (1);
1196               CPP_RESERVE (pfile, 2);
1197               CPP_PUTC_Q (pfile, c2);
1198               CPP_PUTC_Q (pfile, GETC ());
1199             }
1200           else
1201             return CPP_HASH;
1202
1203           return CPP_PASTE;
1204         }
1205
1206       if (!pfile->only_seen_white)
1207         return CPP_OTHER;
1208
1209       /* Remove the "#" or "%:" from the token buffer.  */
1210       CPP_ADJUST_WRITTEN (pfile, (c == '#' ? -1 : -2));
1211       return CPP_DIRECTIVE;
1212
1213     case '\"':
1214     case '\'':
1215       parse_string (pfile, c);
1216       return c == '\'' ? CPP_CHAR : CPP_STRING;
1217
1218     case '$':
1219       if (!CPP_OPTION (pfile, dollars_in_ident))
1220         goto randomchar;
1221       goto letter;
1222
1223     case ':':
1224       c2 = PEEKC ();
1225       /* Digraph: ":>" == "]".  */
1226       if (c2 == '>'
1227           || (c2 == ':' && CPP_OPTION (pfile, cplusplus)))
1228         goto op2;
1229       goto randomchar;
1230
1231     case '&':
1232     case '+':
1233     case '|':
1234       c2 = PEEKC ();
1235       if (c2 == c || c2 == '=')
1236         goto op2;
1237       goto randomchar;
1238
1239     case '%':
1240       /* Digraphs: "%:" == "#", "%>" == "}".  */
1241       c2 = PEEKC ();
1242       if (c2 == ':')
1243         {
1244           FORWARD (1);
1245           CPP_RESERVE (pfile, 2);
1246           CPP_PUTC_Q (pfile, c);
1247           CPP_PUTC_Q (pfile, c2);
1248           goto hash;
1249         }
1250       else if (c2 == '>')
1251         {
1252           FORWARD (1);
1253           CPP_RESERVE (pfile, 2);
1254           CPP_PUTC_Q (pfile, c);
1255           CPP_PUTC_Q (pfile, c2);
1256           return CPP_OPEN_BRACE;
1257         }
1258       /* else fall through */
1259
1260     case '*':
1261     case '!':
1262     case '=':
1263     case '^':
1264       if (PEEKC () == '=')
1265         goto op2;
1266       goto randomchar;
1267
1268     case '-':
1269       c2 = PEEKC ();
1270       if (c2 == '-')
1271         {
1272           if (CPP_OPTION (pfile, chill))
1273             goto comment;  /* Chill style comment */
1274           else
1275             goto op2;
1276         }
1277       else if (c2 == '=')
1278         goto op2;
1279       else if (c2 == '>')
1280         {
1281           if (CPP_OPTION (pfile, cplusplus) && PEEKN (1) == '*')
1282             {
1283               /* In C++, there's a ->* operator.  */
1284               token = CPP_OTHER;
1285               CPP_RESERVE (pfile, 4);
1286               CPP_PUTC_Q (pfile, c);
1287               CPP_PUTC_Q (pfile, GETC ());
1288               CPP_PUTC_Q (pfile, GETC ());
1289               return token;
1290             }
1291           goto op2;
1292         }
1293       goto randomchar;
1294
1295     case '<':
1296       if (pfile->parsing_include_directive)
1297         {
1298           for (;;)
1299             {
1300               CPP_PUTC (pfile, c);
1301               if (c == '>')
1302                 break;
1303               c = GETC ();
1304               if (c == '\n' || c == EOF)
1305                 {
1306                   cpp_error (pfile,
1307                              "missing '>' in `#include <FILENAME>'");
1308                   break;
1309                 }
1310               else if (c == '\r')
1311                 {
1312                   if (!CPP_BUFFER (pfile)->has_escapes)
1313                     {
1314                       /* Backslash newline is replaced by nothing. */
1315                       CPP_ADJUST_WRITTEN (pfile, -1);
1316                       CPP_BUMP_LINE (pfile);
1317                     }
1318                   else
1319                     {
1320                       /* We might conceivably get \r- or \r<space> in
1321                          here.  Just delete 'em. */
1322                       int d = GETC();
1323                       if (d != '-' && d != ' ')
1324                         cpp_ice (pfile, "unrecognized escape \\r%c", d);
1325                       CPP_ADJUST_WRITTEN (pfile, -1);
1326                     }
1327                 }
1328             }
1329           return CPP_STRING;
1330         }
1331       /* Digraphs: "<%" == "{", "<:" == "[".  */
1332       c2 = PEEKC ();
1333       if (c2 == '%')
1334         {
1335           FORWARD (1);
1336           CPP_RESERVE (pfile, 2);
1337           CPP_PUTC_Q (pfile, c);
1338           CPP_PUTC_Q (pfile, c2);
1339           return CPP_CLOSE_BRACE;
1340         }
1341       else if (c2 == ':')
1342         goto op2;
1343       /* else fall through */
1344     case '>':
1345       c2 = PEEKC ();
1346       if (c2 == '=')
1347         goto op2;
1348       /* GNU C++ supports MIN and MAX operators <? and >?.  */
1349       if (c2 != c && (!CPP_OPTION (pfile, cplusplus) || c2 != '?'))
1350         goto randomchar;
1351       FORWARD(1);
1352       CPP_RESERVE (pfile, 3);
1353       CPP_PUTC_Q (pfile, c);
1354       CPP_PUTC_Q (pfile, c2);
1355       if (PEEKC () == '=')
1356         CPP_PUTC_Q (pfile, GETC ());
1357       return CPP_OTHER;
1358
1359     case '.':
1360       c2 = PEEKC ();
1361       if (ISDIGIT (c2))
1362         {
1363           CPP_PUTC (pfile, c);
1364           c = GETC ();
1365           goto number;
1366         }
1367
1368       /* In C++ there's a .* operator.  */
1369       if (CPP_OPTION (pfile, cplusplus) && c2 == '*')
1370         goto op2;
1371
1372       if (c2 == '.' && PEEKN(1) == '.')
1373         {
1374           CPP_RESERVE (pfile, 3);
1375           CPP_PUTC_Q (pfile, '.');
1376           CPP_PUTC_Q (pfile, '.');
1377           CPP_PUTC_Q (pfile, '.');
1378           FORWARD (2);
1379           return CPP_ELLIPSIS;
1380         }
1381       goto randomchar;
1382
1383     op2:
1384       CPP_RESERVE (pfile, 2);
1385       CPP_PUTC_Q (pfile, c);
1386       CPP_PUTC_Q (pfile, GETC ());
1387       return CPP_OTHER;
1388
1389     case 'L':
1390       c2 = PEEKC ();
1391       if ((c2 == '\'' || c2 == '\"') && !CPP_TRADITIONAL (pfile))
1392         {
1393           CPP_PUTC (pfile, c);
1394           c = GETC ();
1395           parse_string (pfile, c);
1396           return c == '\'' ? CPP_WCHAR : CPP_WSTRING;
1397         }
1398       goto letter;
1399
1400     case '0': case '1': case '2': case '3': case '4':
1401     case '5': case '6': case '7': case '8': case '9':
1402     number:
1403     c2  = '.';
1404     for (;;)
1405       {
1406         CPP_RESERVE (pfile, 2);
1407         CPP_PUTC_Q (pfile, c);
1408         c = PEEKC ();
1409         if (c == EOF)
1410           break;
1411         if (!is_numchar(c) && c != '.'
1412             && ((c2 != 'e' && c2 != 'E'
1413                  && ((c2 != 'p' && c2 != 'P')
1414                      || CPP_OPTION (pfile, c89)))
1415                 || (c != '+' && c != '-')))
1416           break;
1417         FORWARD(1);
1418         c2= c;
1419       }
1420     return CPP_NUMBER;
1421     case 'b': case 'c': case 'd': case 'h': case 'o':
1422     case 'B': case 'C': case 'D': case 'H': case 'O':
1423       if (CPP_OPTION (pfile, chill) && PEEKC () == '\'')
1424         {
1425           CPP_RESERVE (pfile, 2);
1426           CPP_PUTC_Q (pfile, c);
1427           CPP_PUTC_Q (pfile, '\'');
1428           FORWARD(1);
1429           for (;;)
1430             {
1431               c = GETC();
1432               if (c == EOF)
1433                 goto chill_number_eof;
1434               if (!is_numchar(c))
1435                 break;
1436               CPP_PUTC (pfile, c);
1437             }
1438           if (c == '\'')
1439             {
1440               CPP_RESERVE (pfile, 2);
1441               CPP_PUTC_Q (pfile, c);
1442               return CPP_STRING;
1443             }
1444           else
1445             {
1446               FORWARD(-1);
1447             chill_number_eof:
1448               return CPP_NUMBER;
1449             }
1450         }
1451       else
1452         goto letter;
1453     case '_':
1454     case 'a': case 'e': case 'f': case 'g': case 'i': case 'j':
1455     case 'k': case 'l': case 'm': case 'n': case 'p': case 'q':
1456     case 'r': case 's': case 't': case 'u': case 'v': case 'w':
1457     case 'x': case 'y': case 'z':
1458     case 'A': case 'E': case 'F': case 'G': case 'I': case 'J':
1459     case 'K': case 'M': case 'N': case 'P': case 'Q': case 'R':
1460     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1461     case 'Y': case 'Z':
1462     letter:
1463     _cpp_parse_name (pfile, c);
1464     return CPP_MACRO;
1465
1466     case ' ':  case '\t':  case '\v': case '\f': case '\0':
1467       {
1468         int null_count = 0;
1469
1470         for (;;)
1471           {
1472             if (c == '\0')
1473               null_count++;
1474             else
1475               CPP_PUTC (pfile, c);
1476             c = PEEKC ();
1477             if (c == EOF || !is_hspace(c))
1478               break;
1479             FORWARD(1);
1480           }
1481         if (null_count)
1482           null_warning (pfile, null_count);
1483         return CPP_HSPACE;
1484       }
1485
1486     case '\r':
1487       if (CPP_BUFFER (pfile)->has_escapes)
1488         {
1489           c = GETC ();
1490           if (c == '-')
1491             {
1492               if (pfile->output_escapes)
1493                 CPP_PUTS (pfile, "\r-", 2);
1494               _cpp_parse_name (pfile, GETC ());
1495               return CPP_NAME;
1496             }
1497           else if (c == ' ')
1498             {
1499               /* "\r " means a space, but only if necessary to prevent
1500                  accidental token concatenation.  */
1501               CPP_RESERVE (pfile, 2);
1502               if (pfile->output_escapes)
1503                 CPP_PUTC_Q (pfile, '\r');
1504               CPP_PUTC_Q (pfile, c);
1505               return CPP_HSPACE;
1506             }
1507           else
1508             {
1509               cpp_ice (pfile, "unrecognized escape \\r%c", c);
1510               goto get_next;
1511             }
1512         }
1513       else
1514         {
1515           /* Backslash newline is ignored. */
1516           if (!ACTIVE_MARK_P (pfile))
1517             CPP_BUMP_LINE (pfile);
1518           goto get_next;
1519         }
1520
1521     case '\n':
1522       CPP_PUTC (pfile, c);
1523       return CPP_VSPACE;
1524
1525     case '(': token = CPP_OPEN_PAREN;  goto char1;
1526     case ')': token = CPP_CLOSE_PAREN; goto char1;
1527     case '{': token = CPP_OPEN_BRACE;  goto char1;
1528     case '}': token = CPP_CLOSE_BRACE; goto char1;
1529     case ',': token = CPP_COMMA;       goto char1;
1530     case ';': token = CPP_SEMICOLON;   goto char1;
1531
1532     randomchar:
1533     default:
1534       token = CPP_OTHER;
1535     char1:
1536       CPP_PUTC (pfile, c);
1537       return token;
1538     }
1539 }
1540
1541 /* Check for and expand a macro, which is from WRITTEN to CPP_WRITTEN (pfile).
1542    Caller is expected to have checked no_macro_expand.  */
1543 static int
1544 maybe_macroexpand (pfile, written)
1545      cpp_reader *pfile;
1546      long written;
1547 {
1548   U_CHAR *macro = pfile->token_buffer + written;
1549   size_t len = CPP_WRITTEN (pfile) - written;
1550   HASHNODE *hp = _cpp_lookup (pfile, macro, len);
1551
1552   /* _cpp_lookup never returns null.  */
1553   if (hp->type == T_VOID)
1554     return 0;
1555   if (hp->disabled || hp->type == T_IDENTITY)
1556     {
1557       if (pfile->output_escapes)
1558         {
1559           /* Insert a no-reexpand marker before IDENT.  */
1560           CPP_RESERVE (pfile, 2);
1561           CPP_ADJUST_WRITTEN (pfile, 2);
1562           macro = pfile->token_buffer + written;
1563
1564           memmove (macro + 2, macro, len);
1565           macro[0] = '\r';
1566           macro[1] = '-';
1567         }
1568       return 0;
1569     }
1570   if (hp->type == T_EMPTY)
1571     {
1572       /* Special case optimization: macro expands to nothing.  */
1573       CPP_SET_WRITTEN (pfile, written);
1574       CPP_PUTC_Q (pfile, ' ');
1575       return 1;
1576     }
1577
1578   /* If macro wants an arglist, verify that a '(' follows.  */
1579   if (hp->type == T_FMACRO)
1580     {
1581       int macbuf_whitespace = 0;
1582       int c;
1583
1584       while (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1585         {
1586           const U_CHAR *point = CPP_BUFFER (pfile)->cur;
1587           for (;;)
1588             {
1589               _cpp_skip_hspace (pfile);
1590               c = PEEKC ();
1591               if (c == '\n')
1592                 FORWARD(1);
1593               else
1594                 break;
1595             }
1596           if (point != CPP_BUFFER (pfile)->cur)
1597             macbuf_whitespace = 1;
1598           if (c == '(')
1599             goto is_macro_call;
1600           else if (c != EOF)
1601             goto not_macro_call;
1602           cpp_pop_buffer (pfile);
1603         }
1604
1605       CPP_SET_MARK (pfile);
1606       for (;;)
1607         {
1608           _cpp_skip_hspace (pfile);
1609           c = PEEKC ();
1610           if (c == '\n')
1611             FORWARD(1);
1612           else
1613             break;
1614         }
1615       CPP_GOTO_MARK (pfile);
1616
1617       if (c != '(')
1618         {
1619         not_macro_call:
1620           if (macbuf_whitespace)
1621             CPP_PUTC (pfile, ' ');
1622           return 0;
1623         }
1624     }
1625
1626  is_macro_call:
1627   /* This is now known to be a macro call.
1628      Expand the macro, reading arguments as needed,
1629      and push the expansion on the input stack.  */
1630   _cpp_macroexpand (pfile, hp);
1631   CPP_SET_WRITTEN (pfile, written);
1632   return 1;
1633 }
1634
1635 /* Complain about \v or \f in a preprocessing directive (constraint
1636    violation, C99 6.10 para 5).  Caller has checked CPP_PEDANTIC.  */
1637 static void
1638 pedantic_whitespace (pfile, p, len)
1639      cpp_reader *pfile;
1640      U_CHAR *p;
1641      unsigned int len;
1642 {
1643   while (len)
1644     {
1645       if (*p == '\v')
1646         cpp_pedwarn (pfile, "vertical tab in preprocessing directive");
1647       else if (*p == '\f')
1648         cpp_pedwarn (pfile, "form feed in preprocessing directive");
1649       p++;
1650       len--;
1651     }
1652 }
1653
1654
1655 enum cpp_ttype
1656 cpp_get_token (pfile)
1657      cpp_reader *pfile;
1658 {
1659   enum cpp_ttype token;
1660   long written = CPP_WRITTEN (pfile);
1661
1662  get_next:
1663   token = _cpp_lex_token (pfile);
1664
1665   switch (token)
1666     {
1667     default:
1668       pfile->potential_control_macro = 0;
1669       pfile->only_seen_white = 0;
1670       return token;
1671
1672     case CPP_VSPACE:
1673       if (pfile->only_seen_white == 0)
1674         pfile->only_seen_white = 1;
1675       CPP_BUMP_LINE (pfile);
1676       return token;
1677
1678     case CPP_HSPACE:
1679     case CPP_COMMENT:
1680       return token;
1681
1682     case CPP_DIRECTIVE:
1683       pfile->potential_control_macro = 0;
1684       if (_cpp_handle_directive (pfile))
1685         return CPP_DIRECTIVE;
1686       pfile->only_seen_white = 0;
1687       CPP_PUTC (pfile, '#');
1688       return CPP_OTHER;
1689
1690     case CPP_MACRO:
1691       pfile->potential_control_macro = 0;
1692       pfile->only_seen_white = 0;
1693       if (! pfile->no_macro_expand
1694           && maybe_macroexpand (pfile, written))
1695         goto get_next;
1696       return CPP_NAME;
1697
1698     case CPP_EOF:
1699       if (CPP_BUFFER (pfile) == NULL)
1700         return CPP_EOF;
1701       if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1702         {
1703           cpp_pop_buffer (pfile);
1704           goto get_next;
1705         }
1706       cpp_pop_buffer (pfile);
1707       return CPP_EOF;
1708     }
1709 }
1710
1711 /* Like cpp_get_token, but skip spaces and comments.  */
1712
1713 enum cpp_ttype
1714 cpp_get_non_space_token (pfile)
1715      cpp_reader *pfile;
1716 {
1717   int old_written = CPP_WRITTEN (pfile);
1718   for (;;)
1719     {
1720       enum cpp_ttype token = cpp_get_token (pfile);
1721       if (token != CPP_COMMENT && token != CPP_HSPACE && token != CPP_VSPACE)
1722         return token;
1723       CPP_SET_WRITTEN (pfile, old_written);
1724     }
1725 }
1726
1727 /* Like cpp_get_token, except that it does not execute directives,
1728    does not consume vertical space, and discards horizontal space.  */
1729 enum cpp_ttype
1730 _cpp_get_directive_token (pfile)
1731      cpp_reader *pfile;
1732 {
1733   long old_written;
1734   enum cpp_ttype token;
1735   int at_bol;
1736
1737  get_next:
1738   at_bol = (CPP_BUFFER (pfile)->cur == CPP_BUFFER (pfile)->line_base);
1739   old_written = CPP_WRITTEN (pfile);
1740   token = _cpp_lex_token (pfile);
1741   switch (token)
1742     {
1743     default:
1744       return token;
1745
1746     case CPP_VSPACE:
1747       /* Put it back and return VSPACE.  */
1748       FORWARD(-1);
1749       CPP_ADJUST_WRITTEN (pfile, -1);
1750       return CPP_VSPACE;
1751
1752     case CPP_HSPACE:
1753       /* The purpose of this rather strange check is to prevent pedantic
1754          warnings for ^L in an #ifdefed out block.  */
1755       if (CPP_PEDANTIC (pfile) && ! at_bol)
1756         pedantic_whitespace (pfile, pfile->token_buffer + old_written,
1757                              CPP_WRITTEN (pfile) - old_written);
1758       CPP_SET_WRITTEN (pfile, old_written);
1759       goto get_next;
1760       return CPP_HSPACE;
1761
1762     case CPP_DIRECTIVE:
1763       /* Don't execute the directive, but don't smash it to OTHER either.  */
1764       CPP_PUTC (pfile, '#');
1765       return CPP_DIRECTIVE;
1766
1767     case CPP_MACRO:
1768       if (! pfile->no_macro_expand
1769           && maybe_macroexpand (pfile, old_written))
1770         goto get_next;
1771       return CPP_NAME;
1772
1773     case CPP_EOF:
1774       if (CPP_IS_MACRO_BUFFER (CPP_BUFFER (pfile)))
1775         {
1776           cpp_pop_buffer (pfile);
1777           goto get_next;
1778         }
1779       else
1780         /* This can happen for files that don't end with a newline,
1781            and for cpp_define and friends.  Pretend they do, so
1782            callers don't have to deal.  A warning will be issued by
1783            someone else, if necessary.  */
1784         return CPP_VSPACE;
1785     }
1786 }
1787
1788 /* Determine the current line and column.  Used only by read_and_prescan. */
1789 static U_CHAR *
1790 find_position (start, limit, linep)
1791      U_CHAR *start;
1792      U_CHAR *limit;
1793      unsigned long *linep;
1794 {
1795   unsigned long line = *linep;
1796   U_CHAR *lbase = start;
1797   while (start < limit)
1798     {
1799       U_CHAR ch = *start++;
1800       if (ch == '\n' || ch == '\r')
1801         {
1802           line++;
1803           lbase = start;
1804         }
1805     }
1806   *linep = line;
1807   return lbase;
1808 }
1809
1810 /* The following table is used by _cpp_read_and_prescan.  If we have
1811    designated initializers, it can be constant data; otherwise, it is
1812    set up at runtime by _cpp_init_input_buffer.  */
1813
1814 #ifndef UCHAR_MAX
1815 #define UCHAR_MAX 255   /* assume 8-bit bytes */
1816 #endif
1817
1818 #if (GCC_VERSION >= 2007)
1819 #define init_chartab()  /* nothing */
1820 #define CHARTAB __extension__ static const U_CHAR chartab[UCHAR_MAX + 1] = {
1821 #define END };
1822 #define s(p, v) [p] = v,
1823 #else
1824 #define CHARTAB static U_CHAR chartab[UCHAR_MAX + 1] = { 0 }; \
1825  static void init_chartab PARAMS ((void)) { \
1826  unsigned char *x = chartab;
1827 #define END }
1828 #define s(p, v) x[p] = v;
1829 #endif
1830
1831 /* Table of characters that can't be handled in the inner loop.
1832    Also contains the mapping between trigraph third characters and their
1833    replacements.  */
1834 #define SPECCASE_CR        1
1835 #define SPECCASE_BACKSLASH 2
1836 #define SPECCASE_QUESTION  3
1837
1838 CHARTAB
1839   s('\r', SPECCASE_CR)
1840   s('\\', SPECCASE_BACKSLASH)
1841   s('?',  SPECCASE_QUESTION)
1842
1843   s('=', '#')   s(')', ']')     s('!', '|')
1844   s('(', '[')   s('\'', '^')    s('>', '}')
1845   s('/', '\\')  s('<', '{')     s('-', '~')
1846 END
1847
1848 #undef CHARTAB
1849 #undef END
1850 #undef s
1851
1852 #define NORMAL(c) ((chartab[c]) == 0 || (chartab[c]) > SPECCASE_QUESTION)
1853 #define NONTRI(c) ((c) <= SPECCASE_QUESTION)
1854
1855 /* Read the entire contents of file DESC into buffer BUF.  LEN is how
1856    much memory to allocate initially; more will be allocated if
1857    necessary.  Convert end-of-line markers (\n, \r, \r\n, \n\r) to
1858    canonical form (\n).  If enabled, convert and/or warn about
1859    trigraphs.  Convert backslash-newline to a one-character escape
1860    (\r) and remove it from "embarrassing" places (i.e. the middle of a
1861    token).  If there is no newline at the end of the file, add one and
1862    warn.  Returns -1 on failure, or the actual length of the data to
1863    be scanned.
1864
1865    This function does a lot of work, and can be a serious performance
1866    bottleneck.  It has been tuned heavily; make sure you understand it
1867    before hacking.  The common case - no trigraphs, Unix style line
1868    breaks, backslash-newline set off by whitespace, newline at EOF -
1869    has been optimized at the expense of the others.  The performance
1870    penalty for DOS style line breaks (\r\n) is about 15%.
1871
1872    Warnings lose particularly heavily since we have to determine the
1873    line number, which involves scanning from the beginning of the file
1874    or from the last warning.  The penalty for the absence of a newline
1875    at the end of reload1.c is about 60%.  (reload1.c is 329k.)
1876
1877    If your file has more than one kind of end-of-line marker, you
1878    will get messed-up line numbering.
1879
1880    So that the cases of the switch statement do not have to concern
1881    themselves with the complications of reading beyond the end of the
1882    buffer, the buffer is guaranteed to have at least 3 characters in
1883    it (or however many are left in the file, if less) on entry to the
1884    switch.  This is enough to handle trigraphs and the "\\\n\r" and
1885    "\\\r\n" cases.
1886
1887    The end of the buffer is marked by a '\\', which, being a special
1888    character, guarantees we will exit the fast-scan loops and perform
1889    a refill. */
1890
1891 long
1892 _cpp_read_and_prescan (pfile, fp, desc, len)
1893      cpp_reader *pfile;
1894      cpp_buffer *fp;
1895      int desc;
1896      size_t len;
1897 {
1898   U_CHAR *buf = (U_CHAR *) xmalloc (len);
1899   U_CHAR *ip, *op, *line_base;
1900   U_CHAR *ibase;
1901   unsigned long line;
1902   unsigned int deferred_newlines;
1903   size_t offset;
1904   int count = 0;
1905
1906   offset = 0;
1907   deferred_newlines = 0;
1908   op = buf;
1909   line_base = buf;
1910   line = 1;
1911   ibase = pfile->input_buffer + 3;
1912   ip = ibase;
1913   ip[-1] = '\0';  /* Guarantee no match with \n for SPECCASE_CR */
1914
1915   for (;;)
1916     {
1917       U_CHAR *near_buff_end;
1918
1919       count = read (desc, ibase, pfile->input_buffer_len);
1920       if (count < 0)
1921         goto error;
1922
1923       ibase[count] = '\\';  /* Marks end of buffer */
1924       if (count)
1925         {
1926           near_buff_end = pfile->input_buffer + count;
1927           offset += count;
1928           if (offset > len)
1929             {
1930               size_t delta_op;
1931               size_t delta_line_base;
1932               len = offset * 2;
1933               if (offset > len)
1934                 /* len overflowed.
1935                    This could happen if the file is larger than half the
1936                    maximum address space of the machine. */
1937                 goto too_big;
1938
1939               delta_op = op - buf;
1940               delta_line_base = line_base - buf;
1941               buf = (U_CHAR *) xrealloc (buf, len);
1942               op = buf + delta_op;
1943               line_base = buf + delta_line_base;
1944             }
1945         }
1946       else
1947         {
1948           if (ip == ibase)
1949             break;
1950           /* Allow normal processing of the (at most 2) remaining
1951              characters.  The end-of-buffer marker is still present
1952              and prevents false matches within the switch. */
1953           near_buff_end = ibase - 1;
1954         }
1955
1956       for (;;)
1957         {
1958           unsigned int span;
1959
1960           /* Deal with \-newline, potentially in the middle of a token. */
1961           if (deferred_newlines)
1962             {
1963               if (op != buf && ! is_space (op[-1]) && op[-1] != '\r')
1964                 {
1965                   /* Previous was not white space.  Skip to white
1966                      space, if we can, before outputting the \r's */
1967                   span = 0;
1968                   while (ip[span] != ' '
1969                          && ip[span] != '\t'
1970                          && ip[span] != '\n'
1971                          && NORMAL(ip[span]))
1972                     span++;
1973                   memcpy (op, ip, span);
1974                   op += span;
1975                   ip += span;
1976                   if (! NORMAL(ip[0]))
1977                     goto do_speccase;
1978                 }
1979               while (deferred_newlines)
1980                 deferred_newlines--, *op++ = '\r';
1981             }
1982
1983           /* Copy as much as we can without special treatment. */
1984           span = 0;
1985           while (NORMAL (ip[span])) span++;
1986           memcpy (op, ip, span);
1987           op += span;
1988           ip += span;
1989
1990         do_speccase:
1991           if (ip > near_buff_end) /* Do we have enough chars? */
1992             break;
1993           switch (chartab[*ip++])
1994             {
1995             case SPECCASE_CR:  /* \r */
1996               if (ip[-2] != '\n')
1997                 {
1998                   if (*ip == '\n')
1999                     ip++;
2000                   *op++ = '\n';
2001                 }
2002               break;
2003
2004             case SPECCASE_BACKSLASH:  /* \ */
2005               if (*ip == '\n')
2006                 {
2007                   deferred_newlines++;
2008                   ip++;
2009                   if (*ip == '\r') ip++;
2010                 }
2011               else if (*ip == '\r')
2012                 {
2013                   deferred_newlines++;
2014                   ip++;
2015                   if (*ip == '\n') ip++;
2016                 }
2017               else
2018                 *op++ = '\\';
2019               break;
2020
2021             case SPECCASE_QUESTION: /* ? */
2022               {
2023                 unsigned int d, t;
2024
2025                 *op++ = '?'; /* Normal non-trigraph case */
2026                 if (ip[0] != '?')
2027                   break;
2028
2029                 d = ip[1];
2030                 t = chartab[d];
2031                 if (NONTRI (t))
2032                   break;
2033
2034                 if (CPP_OPTION (pfile, warn_trigraphs))
2035                   {
2036                     unsigned long col;
2037                     line_base = find_position (line_base, op, &line);
2038                     col = op - line_base + 1;
2039                     if (CPP_OPTION (pfile, trigraphs))
2040                       cpp_warning_with_line (pfile, line, col,
2041                                              "trigraph ??%c converted to %c", d, t);
2042                     else
2043                       cpp_warning_with_line (pfile, line, col,
2044                                              "trigraph ??%c ignored", d);
2045                   }
2046
2047                 ip += 2;
2048                 if (CPP_OPTION (pfile, trigraphs))
2049                   {
2050                     op[-1] = t;     /* Overwrite '?' */
2051                     if (t == '\\')
2052                       {
2053                         op--;
2054                         *--ip = '\\';
2055                         goto do_speccase; /* May need buffer refill */
2056                       }
2057                   }
2058                 else
2059                   {
2060                     *op++ = '?';
2061                     *op++ = d;
2062                   }
2063               }
2064               break;
2065             }
2066         }
2067       /* Copy previous char plus unprocessed (at most 2) chars
2068          to beginning of buffer, refill it with another
2069          read(), and continue processing */
2070       memmove (ip - count - 1, ip - 1, 4 - (ip - near_buff_end));
2071       ip -= count;
2072     }
2073
2074   if (offset == 0)
2075     return 0;
2076
2077   if (op[-1] != '\n')
2078     {
2079       unsigned long col;
2080       line_base = find_position (line_base, op, &line);
2081       col = op - line_base + 1;
2082       cpp_warning_with_line (pfile, line, col, "no newline at end of file");
2083       if (offset + 1 > len)
2084         {
2085           len += 1;
2086           if (offset + 1 > len)
2087             goto too_big;
2088           buf = (U_CHAR *) xrealloc (buf, len);
2089           op = buf + offset;
2090         }
2091       *op++ = '\n';
2092     }
2093
2094   fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf));
2095   return op - buf;
2096
2097  too_big:
2098   cpp_notice (pfile, "%s is too large (>%lu bytes)", fp->ihash->name,
2099               (unsigned long)offset);
2100   free (buf);
2101   return -1;
2102
2103  error:
2104   cpp_error_from_errno (pfile, fp->ihash->name);
2105   free (buf);
2106   return -1;
2107 }
2108
2109 /* Allocate pfile->input_buffer, and initialize chartab[]
2110    if it hasn't happened already.  */
2111
2112 void
2113 _cpp_init_input_buffer (pfile)
2114      cpp_reader *pfile;
2115 {
2116   U_CHAR *tmp;
2117
2118   init_chartab ();
2119   init_token_list (pfile, &pfile->directbuf, 0);
2120
2121   /* Determine the appropriate size for the input buffer.  Normal C
2122      source files are smaller than eight K.  */
2123   /* 8Kbytes of buffer proper, 1 to detect running off the end without
2124      address arithmetic all the time, and 3 for pushback during buffer
2125      refill, in case there's a potential trigraph or end-of-line
2126      digraph at the end of a block. */
2127
2128   tmp = (U_CHAR *) xmalloc (8192 + 1 + 3);
2129   pfile->input_buffer = tmp;
2130   pfile->input_buffer_len = 8192;
2131 }
2132
2133 /* Utility routine:
2134    Compares, in the manner of strcmp(3), the token beginning at TOKEN
2135    and extending for LEN characters to the NUL-terminated string
2136    STRING.  Typical usage:
2137
2138    if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
2139                  "inline"))
2140      { ... }
2141  */
2142
2143 int
2144 cpp_idcmp (token, len, string)
2145      const U_CHAR *token;
2146      size_t len;
2147      const char *string;
2148 {
2149   size_t len2 = strlen (string);
2150   int r;
2151
2152   if ((r = memcmp (token, string, MIN (len, len2))))
2153     return r;
2154
2155   /* The longer of the two strings sorts after the shorter.  */
2156   if (len == len2)
2157     return 0;
2158   else if (len < len2)
2159     return -1;
2160   else
2161     return 1;
2162 }
2163
2164 #ifdef NEW_LEXER
2165
2166 /* Lexing algorithm.
2167
2168  The original lexer in cpplib was made up of two passes: a first pass
2169  that replaced trigraphs and deleted esacped newlines, and a second
2170  pass that tokenized the result of the first pass.  Tokenisation was
2171  performed by peeking at the next character in the input stream.  For
2172  example, if the input stream contained "!=", the handler for the !
2173  character would peek at the next character, and if it were a '='
2174  would skip over it, and return a "!=" token, otherwise it would
2175  return just the "!" token.
2176
2177  To implement a single-pass lexer, this peeking ahead is unworkable.
2178  An arbitrary number of escaped newlines, and trigraphs (in particular
2179  ??/ which translates to the escape \), could separate the '!' and '='
2180  in the input stream, yet the next token is still a "!=".
2181
2182  Suppose instead that we lex by one logical line at a time, producing
2183  a token list or stack for each logical line, and when seeing the '!'
2184  push a CPP_NOT token on the list.  Then if the '!' is part of a
2185  longer token ("!=") we know we must see the remainder of the token by
2186  the time we reach the end of the logical line.  Thus we can have the
2187  '=' handler look at the previous token (at the end of the list / top
2188  of the stack) and see if it is a "!" token, and if so, instead of
2189  pushing a "=" token revise the existing token to be a "!=" token.
2190
2191  This works in the presence of escaped newlines, because the '\' would
2192  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
2193  newline ('\n' or '\r') handler looks at the token at the top of the
2194  stack to see if it is a CPP_BACKSLASH, and if so discards both.
2195  Otherwise it pushes the newline (CPP_VSPACE) token as normal.  Hence
2196  the '=' handler would never see any intervening escaped newlines.
2197
2198  To make trigraphs work in this context, as in precedence trigraphs
2199  are highest and converted before anything else, the '?' handler does
2200  lookahead to see if it is a trigraph, and if so skips the trigraph
2201  and pushes the token it represents onto the top of the stack.  This
2202  also works in the particular case of a CPP_BACKSLASH trigraph.
2203
2204  To the preprocessor, whitespace is only significant to the point of
2205  knowing whether whitespace precedes a particular token.  For example,
2206  the '=' handler needs to know whether there was whitespace between it
2207  and a "!" token on the top of the stack, to make the token conversion
2208  decision correctly.  So each token has a PREV_WHITESPACE flag to
2209  indicate this - the standard permits consecutive whitespace to be
2210  regarded as a single space.  The compiler front ends are not
2211  interested in whitespace at all; they just require a token stream.
2212  Another place where whitespace is significant to the preprocessor is
2213  a #define statment - if there is whitespace between the macro name
2214  and an initial "(" token the macro is "object-like", otherwise it is
2215  a function-like macro that takes arguments.
2216
2217  However, all is not rosy.  Parsing of identifiers, numbers, comments
2218  and strings becomes trickier because of the possibility of raw
2219  trigraphs and escaped newlines in the input stream.
2220
2221  The trigraphs are three consecutive characters beginning with two
2222  question marks.  A question mark is not valid as part of a number or
2223  identifier, so parsing of a number or identifier terminates normally
2224  upon reaching it, returning to the mainloop which handles the
2225  trigraph just like it would in any other position.  Similarly for the
2226  backslash of a backslash-newline combination.  So we just need the
2227  escaped-newline dropper in the mainloop to check if the token on the
2228  top of the stack after dropping the escaped newline is a number or
2229  identifier, and if so to continue the processing it as if nothing had
2230  happened.
2231
2232  For strings, we replace trigraphs whenever we reach a quote or
2233  newline, because there might be a backslash trigraph escaping them.
2234  We need to be careful that we start trigraph replacing from where we
2235  left off previously, because it is possible for a first scan to leave
2236  "fake" trigraphs that a second scan would pick up as real (e.g. the
2237  sequence "????/\n=" would find a fake ??= trigraph after removing the
2238  escaped newline.)
2239
2240  For line comments, on reaching a newline we scan the previous
2241  character(s) to see if it escaped, and continue if it is.  Block
2242  comments ignore everything and just focus on finding the comment
2243  termination mark.  The only difficult thing, and it is surprisingly
2244  tricky, is checking if an asterisk precedes the final slash since
2245  they could be separated by escaped newlines.  If the preprocessor is
2246  invoked with the output comments option, we don't bother removing
2247  escaped newlines and replacing trigraphs for output.
2248
2249  Finally, numbers can begin with a period, which is pushed initially
2250  as a CPP_DOT token in its own right.  The digit handler checks if the
2251  previous token was a CPP_DOT not separated by whitespace, and if so
2252  pops it off the stack and pushes a period into the number's buffer
2253  before calling the number parser.
2254
2255 */
2256
2257 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
2258                                                     U":>", U"<%", U"%>"};
2259 static unsigned char trigraph_map[256];
2260
2261 static void
2262 expand_comment_space (list)
2263      cpp_toklist *list;
2264 {
2265   if (list->comments_cap == 0)
2266     {
2267       list->comments_cap = 10;
2268       list->comments = (cpp_token *)
2269         xmalloc (list->comments_cap * sizeof (cpp_token));
2270     }
2271   else
2272     {
2273       list->comments_cap *= 2;
2274       list->comments = (cpp_token *)
2275         xrealloc (list->comments, list->comments_cap);
2276     }
2277 }
2278
2279 void
2280 cpp_free_token_list (list)
2281      cpp_toklist *list;
2282 {
2283   if (list->comments)
2284     free (list->comments);
2285   free (list->tokens - 1);      /* Backup over dummy token.  */
2286   free (list->namebuf);
2287   free (list);
2288 }
2289
2290 void
2291 init_trigraph_map ()
2292 {
2293   trigraph_map['='] = '#';
2294   trigraph_map['('] = '[';
2295   trigraph_map[')'] = ']';
2296   trigraph_map['/'] = '\\';
2297   trigraph_map['\''] = '^';
2298   trigraph_map['<'] = '{';
2299   trigraph_map['>'] = '}';
2300   trigraph_map['!'] = '|';
2301   trigraph_map['-'] = '~';
2302 }
2303
2304 /* Call when a trigraph is encountered.  It warns if necessary, and
2305    returns true if the trigraph should be honoured.  END is the third
2306    character of a trigraph in the input stream.  */
2307 static int
2308 trigraph_ok (pfile, end)
2309      cpp_reader *pfile;
2310      const unsigned char *end;
2311 {
2312   int accept = CPP_OPTION (pfile, trigraphs);
2313
2314   if (CPP_OPTION (pfile, warn_trigraphs))
2315     {
2316       unsigned int col = end - 1 - pfile->buffer->line_base;
2317       if (accept)
2318         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2319                                "trigraph ??%c converted to %c",
2320                                (int) *end, (int) trigraph_map[*end]);
2321       else
2322         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
2323                                "trigraph ??%c ignored", (int) *end);
2324     }
2325   return accept;
2326 }
2327
2328 /* Scan a string for trigraphs, warning or replacing them inline as
2329    appropriate.  When parsing a string, we must call this routine
2330    before processing a newline character (if trigraphs are enabled),
2331    since the newline might be escaped by a preceding backslash
2332    trigraph sequence.  Returns a pointer to the end of the name after
2333    replacement.  */
2334
2335 static unsigned char*
2336 trigraph_replace (pfile, src, limit)
2337      cpp_reader *pfile;
2338      unsigned char *src;
2339      unsigned char* limit;
2340 {
2341   unsigned char *dest;
2342
2343   /* Starting with src[1], find two consecutive '?'.  The case of no
2344      trigraphs is streamlined.  */
2345
2346   for (; src + 1 < limit; src += 2)
2347     {
2348       if (src[0] != '?')
2349         continue;
2350
2351       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
2352       if (src[-1] == '?')
2353         src--;
2354       else if (src + 2 == limit || src[1] != '?')
2355         continue;
2356
2357       /* Check if it really is a trigraph.  */
2358       if (trigraph_map[src[2]] == 0)
2359         continue;
2360
2361       dest = src;
2362       goto trigraph_found;
2363     }
2364   return limit;
2365
2366   /* Now we have a trigraph, we need to scan the remaining buffer, and
2367      copy-shifting its contents left if replacement is enabled.  */
2368   for (; src + 2 < limit; dest++, src++)
2369     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
2370       {
2371       trigraph_found:
2372         src += 2;
2373         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
2374           *dest = trigraph_map[*src];
2375       }
2376
2377   /* Copy remaining (at most 2) characters.  */
2378   while (src < limit)
2379     *dest++ = *src++;
2380   return dest;
2381 }
2382
2383 /* If CUR is a backslash or the end of a trigraphed backslash, return
2384    a pointer to its beginning, otherwise NULL.  We don't read beyond
2385    the buffer start, because there is the start of the comment in the
2386    buffer.  */
2387 static const unsigned char *
2388 backslash_start (pfile, cur)
2389      cpp_reader *pfile;
2390      const unsigned char *cur;
2391 {
2392   if (cur[0] == '\\')
2393     return cur;
2394   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
2395       && trigraph_ok (pfile, cur))
2396     return cur - 2;
2397   return 0;
2398 }
2399
2400 /* Skip a C-style block comment.  This is probably the trickiest
2401    handler.  We find the end of the comment by seeing if an asterisk
2402    is before every '/' we encounter.  The nasty complication is that a
2403    previous asterisk may be separated by one or more escaped newlines.
2404    Returns non-zero if comment terminated by EOF, zero otherwise.  */
2405 static int
2406 skip_block_comment2 (pfile)
2407      cpp_reader *pfile;
2408 {
2409   cpp_buffer *buffer = pfile->buffer;
2410   const unsigned char *char_after_star = 0;
2411   register const unsigned char *cur = buffer->cur;
2412   int seen_eof = 0;
2413
2414   /* Inner loop would think the comment has ended if the first comment
2415      character is a '/'.  Avoid this and keep the inner loop clean by
2416      skipping such a character.  */
2417   if (cur < buffer->rlimit && cur[0] == '/')
2418     cur++;
2419
2420   for (; cur < buffer->rlimit; )
2421     {
2422       unsigned char c = *cur++;
2423
2424       /* People like decorating comments with '*', so check for
2425          '/' instead for efficiency.  */
2426       if (c == '/')
2427         {
2428           if (cur[-2] == '*' || cur - 1 == char_after_star)
2429             goto out;
2430
2431           /* Warn about potential nested comments, but not when
2432              the final character inside the comment is a '/'.
2433              Don't bother to get it right across escaped newlines.  */
2434           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
2435               && cur[0] == '*' && cur[1] != '/')
2436             {
2437               buffer->cur = cur;
2438               cpp_warning (pfile, "'/*' within comment");
2439             }
2440         }
2441       else if (IS_NEWLINE(c))
2442         {
2443           const unsigned char* bslash = backslash_start (pfile, cur - 2);
2444
2445           handle_newline (cur, buffer->rlimit, c);
2446           /* Work correctly if there is an asterisk before an
2447              arbirtrarily long sequence of escaped newlines.  */
2448           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
2449             char_after_star = cur;
2450           else
2451             char_after_star = 0;
2452         }
2453     }
2454   seen_eof = 1;
2455
2456  out:
2457   buffer->cur = cur;
2458   return seen_eof;
2459 }
2460
2461 /* Skip a C++ or Chill line comment.  Handles escaped newlines.
2462    Returns non-zero if a multiline comment.  */
2463 static int
2464 skip_line_comment2 (pfile)
2465      cpp_reader *pfile;
2466 {
2467   cpp_buffer *buffer = pfile->buffer;
2468   register const unsigned char *cur = buffer->cur;
2469   int multiline = 0;
2470
2471   for (; cur < buffer->rlimit; )
2472     {
2473       unsigned char c = *cur++;
2474
2475       if (IS_NEWLINE (c))
2476         {
2477           /* Check for a (trigaph?) backslash escaping the newline.  */
2478           if (!backslash_start (pfile, cur - 2))
2479             goto out;
2480           multiline = 1;
2481           handle_newline (cur, buffer->rlimit, c);
2482         }
2483     }
2484   cur++;
2485
2486  out:
2487   buffer->cur = cur - 1;        /* Leave newline for caller.  */
2488   return multiline;
2489 }
2490
2491 /* Skips whitespace, stopping at next non-whitespace character.  */
2492 static void
2493 skip_whitespace (pfile, in_directive)
2494      cpp_reader *pfile;
2495      int in_directive;
2496 {
2497   cpp_buffer *buffer = pfile->buffer;
2498   register const unsigned char *cur = buffer->cur;
2499   unsigned short null_count = 0;
2500
2501   for (; cur < buffer->rlimit; )
2502     {
2503       unsigned char c = *cur++;
2504
2505       if (IS_HSPACE(c))         /* FIXME: Fix ISTABLE.  */
2506         continue;
2507       if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines.  */
2508         goto out;
2509       if (c == '\0')
2510         null_count++;
2511       /* Mut be '\f' or '\v' */
2512       else if (in_directive && CPP_PEDANTIC (pfile))
2513         cpp_pedwarn (pfile, "%s in preprocessing directive",
2514                      c == '\f' ? "formfeed" : "vertical tab");
2515     }
2516   cur++;
2517
2518  out:
2519   buffer->cur = cur - 1;
2520   if (null_count)
2521     cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
2522                  : "embedded null character ignored");
2523 }
2524
2525 /* Parse (append) an identifier.  */
2526 static void
2527 parse_name (pfile, list, name)
2528      cpp_reader *pfile;
2529      cpp_toklist *list;
2530      cpp_name *name;
2531 {
2532   const unsigned char *name_limit;
2533   unsigned char *namebuf;
2534   cpp_buffer *buffer = pfile->buffer;
2535   register const unsigned char *cur = buffer->cur;
2536
2537  expanded:
2538   name_limit = list->namebuf + list->name_cap;
2539   namebuf = list->namebuf + list->name_used;
2540
2541   for (; cur < buffer->rlimit && namebuf < name_limit; )
2542     {
2543       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
2544
2545       if (! is_idchar(c))
2546         goto out;
2547       namebuf++;
2548       cur++;
2549       if (c == '$' && CPP_PEDANTIC (pfile))
2550         {
2551           buffer->cur = cur;
2552           cpp_pedwarn (pfile, "'$' character in identifier");
2553         }
2554     }
2555
2556   /* Run out of name space?  */
2557   if (cur < buffer->rlimit)
2558     {
2559       list->name_used = namebuf - list->namebuf;
2560       auto_expand_name_space (list);
2561       goto expanded;
2562     }
2563
2564  out:
2565   buffer->cur = cur;
2566   name->len = namebuf - (list->namebuf + name->offset);
2567   list->name_used = namebuf - list->namebuf;
2568 }
2569
2570 /* Parse (append) a number.  */
2571
2572 #define VALID_SIGN(c, prevc) \
2573   (((c) == '+' || (c) == '-') && \
2574    ((prevc) == 'e' || (prevc) == 'E' \
2575     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
2576
2577 static void
2578 parse_number (pfile, list, name)
2579      cpp_reader *pfile;
2580      cpp_toklist *list;
2581      cpp_name *name;
2582 {
2583   const unsigned char *name_limit;
2584   unsigned char *namebuf;
2585   cpp_buffer *buffer = pfile->buffer;
2586   register const unsigned char *cur = buffer->cur;
2587
2588  expanded:
2589   name_limit = list->namebuf + list->name_cap;
2590   namebuf = list->namebuf + list->name_used;
2591
2592   for (; cur < buffer->rlimit && namebuf < name_limit; )
2593     {
2594       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
2595
2596       /* Perhaps we should accept '$' here if we accept it for
2597          identifiers.  We know namebuf[-1] is safe, because for c to
2598          be a sign we must have pushed at least one character.  */
2599       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
2600         goto out;
2601
2602       namebuf++;
2603       cur++;
2604     }
2605
2606   /* Run out of name space?  */
2607   if (cur < buffer->rlimit)
2608     {
2609       list->name_used = namebuf - list->namebuf;
2610       auto_expand_name_space (list);
2611       goto expanded;
2612     }
2613
2614  out:
2615   buffer->cur = cur;
2616   name->len = namebuf - (list->namebuf + name->offset);
2617   list->name_used = namebuf - list->namebuf;
2618 }
2619
2620 /* Places a string terminated by an unescaped TERMINATOR into a
2621    cpp_name, which should be expandable and thus at the top of the
2622    list's stack.  Handles embedded trigraphs, if necessary, and
2623    escaped newlines.
2624
2625    Can be used for character constants (terminator = '\''), string
2626    constants ('"'), angled headers ('>') and assertions (')').  */
2627
2628 static void
2629 parse_string2 (pfile, list, name, terminator)
2630      cpp_reader *pfile;
2631      cpp_toklist *list;
2632      cpp_name *name;
2633      unsigned int terminator;
2634 {
2635   cpp_buffer *buffer = pfile->buffer;
2636   register const unsigned char *cur = buffer->cur;
2637   const unsigned char *name_limit;
2638   unsigned char *namebuf;
2639   unsigned int null_count = 0;
2640   int trigraphed_len = 0;
2641
2642  expanded:
2643   name_limit = list->namebuf + list->name_cap;
2644   namebuf = list->namebuf + list->name_used;
2645
2646   for (; cur < buffer->rlimit && namebuf < name_limit; )
2647     {
2648       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
2649
2650       if (c == '\0')
2651         null_count++;
2652       else if (c == terminator || IS_NEWLINE (c))
2653         {
2654           unsigned char* name_start = list->namebuf + name->offset;
2655
2656           /* Needed for trigraph_replace and multiline string warning.  */
2657           buffer->cur = cur;
2658
2659           /* Scan for trigraphs before checking if backslash-escaped.  */
2660           if (CPP_OPTION (pfile, trigraphs)
2661               || CPP_OPTION (pfile, warn_trigraphs))
2662             {
2663               namebuf = trigraph_replace (pfile, name_start + trigraphed_len,
2664                                             namebuf);
2665               trigraphed_len = namebuf - 2 - (name_start + trigraphed_len);
2666               if (trigraphed_len < 0)
2667                 trigraphed_len = 0;
2668             }
2669
2670           namebuf--;     /* Drop the newline / terminator from the name.  */
2671           if (IS_NEWLINE (c))
2672             {
2673               /* Drop a backslash newline, and continue. */
2674               if (namebuf[-1] == '\\')
2675                 {
2676                   handle_newline (cur, buffer->rlimit, c);
2677                   namebuf--;
2678                   continue;
2679                 }
2680
2681               cur--;
2682
2683               /* In Fortran and assembly language, silently terminate
2684                  strings of either variety at end of line.  This is a
2685                  kludge around not knowing where comments are in these
2686                  languages.  */
2687               if (CPP_OPTION (pfile, lang_fortran)
2688                   || CPP_OPTION (pfile, lang_asm))
2689                 goto out;
2690
2691               /* Character constants, headers and asserts may not
2692                  extend over multiple lines.  In Standard C, neither
2693                  may strings.  We accept multiline strings as an
2694                  extension, but not in directives.  */
2695               if (terminator != '"' || IS_DIRECTIVE (list))
2696                 goto unterminated;
2697
2698               cur++;  /* Move forwards again.  */
2699
2700               if (pfile->multiline_string_line == 0)
2701                 {
2702                   pfile->multiline_string_line = list->line;
2703                   if (CPP_PEDANTIC (pfile))
2704                     cpp_pedwarn (pfile, "multi-line string constant");
2705                 }
2706
2707               *namebuf++ = '\n';
2708               handle_newline (cur, buffer->rlimit, c);
2709             }
2710           else
2711             {
2712               unsigned char *temp;
2713
2714               /* An odd number of consecutive backslashes represents
2715                  an escaped terminator.  */
2716               temp = namebuf - 1;
2717               while (temp >= name_start && *temp == '\\')
2718                 temp--;
2719
2720               if ((namebuf - temp) & 1)
2721                 goto out;
2722               namebuf++;
2723             }
2724         }
2725     }
2726
2727   /* Run out of name space?  */
2728   if (cur < buffer->rlimit)
2729     {
2730       list->name_used = namebuf - list->namebuf;
2731       auto_expand_name_space (list);
2732       goto expanded;
2733     }
2734
2735   /* We may not have trigraph-replaced the input for this code path,
2736      but as the input is in error by being unterminated we don't
2737      bother.  Prevent warnings about no newlines at EOF.  */
2738   if (IS_NEWLINE(cur[-1]))
2739     cur--;
2740
2741  unterminated:
2742   cpp_error (pfile, "missing terminating %c character", (int) terminator);
2743
2744   if (terminator == '\"' && pfile->multiline_string_line != list->line
2745       && pfile->multiline_string_line != 0)
2746     {
2747       cpp_error_with_line (pfile, pfile->multiline_string_line, -1,
2748                            "possible start of unterminated string literal");
2749       pfile->multiline_string_line = 0;
2750     }
2751
2752  out:
2753   buffer->cur = cur;
2754   name->len = namebuf - (list->namebuf + name->offset);
2755   list->name_used = namebuf - list->namebuf;
2756
2757   if (null_count > 0)
2758     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
2759                          : "null character preserved"));
2760 }
2761
2762 /* The character TYPE helps us distinguish comment types: '*' = C
2763    style, '-' = Chill-style and '/' = C++ style.  For code simplicity,
2764    the stored comment includes the comment start and any terminator.  */
2765
2766 #define COMMENT_START_LEN 2
2767 static void
2768 save_comment (list, from, len, tok_no, type)
2769      cpp_toklist *list;
2770      const unsigned char *from;
2771      unsigned int len;
2772      unsigned int tok_no;
2773      unsigned int type;
2774 {
2775   cpp_token *comment;
2776   unsigned char *buffer;
2777
2778   len += COMMENT_START_LEN;
2779
2780   if (list->comments_used == list->comments_cap)
2781     expand_comment_space (list);
2782
2783   if (list->name_used + len > list->name_cap)
2784     expand_name_space (list, len);
2785
2786   comment = &list->comments[list->comments_used++];
2787   comment->type = CPP_COMMENT;
2788   comment->aux = tok_no;
2789   comment->val.name.len = len;
2790   comment->val.name.offset = list->name_used;
2791
2792   buffer = list->namebuf + list->name_used;
2793   if (type == '*')
2794     {
2795       *buffer++ = '/';
2796       *buffer++ = '*';
2797     }
2798   else
2799     {
2800       *buffer++ = type;
2801       *buffer++ = type;
2802     }
2803
2804   memcpy (buffer, from, len - COMMENT_START_LEN);
2805   list->name_used += len;
2806 }
2807
2808 /*
2809  *  The tokenizer's main loop.  Returns a token list, representing a
2810  *  logical line in the input file, terminated with a CPP_VSPACE
2811  *  token.  On EOF, a token list containing the single CPP_EOF token
2812  *  is returned.
2813  *
2814  *  Implementation relies almost entirely on lookback, rather than
2815  *  looking forwards.  This means that tokenization requires just
2816  *  a single pass of the file, even in the presence of trigraphs and
2817  *  escaped newlines, providing significant performance benefits.
2818  *  Trigraph overhead is negligible if they are disabled, and low
2819  *  even when enabled.
2820  */
2821
2822 void
2823 _cpp_lex_line (pfile, list)
2824      cpp_reader *pfile;
2825      cpp_toklist *list;
2826 {
2827   cpp_token *cur_token, *token_limit;
2828   cpp_buffer *buffer = pfile->buffer;
2829   register const unsigned char *cur = buffer->cur;
2830   unsigned char flags = 0;
2831
2832  expanded:
2833   token_limit = list->tokens + list->tokens_cap;
2834   cur_token = list->tokens + list->tokens_used;
2835
2836   for (; cur < buffer->rlimit && cur_token < token_limit;)
2837     {
2838       unsigned char c = *cur++;
2839
2840       /* Optimize whitespace skipping, in particular the case of a
2841          single whitespace character, as every other token is probably
2842          whitespace. (' ' '\t' '\v' '\f' '\0').  */
2843       if (is_hspace ((unsigned int) c))
2844         {
2845           if (c == '\0' || (cur < buffer->rlimit && is_hspace (*cur)))
2846             {
2847               buffer->cur = cur - (c == '\0');  /* Get the null warning.  */
2848               skip_whitespace (pfile, IS_DIRECTIVE (list));
2849               cur = buffer->cur;
2850             }
2851           flags = PREV_WHITESPACE;
2852           if (cur == buffer->rlimit)
2853             break;
2854           c = *cur++;
2855         }
2856
2857       /* Initialize current token.  Its type is set in the switch.  */
2858       cur_token->col = COLUMN (cur);
2859       cur_token->flags = flags;
2860       flags = 0;
2861
2862       switch (c)
2863         {
2864         case '0': case '1': case '2': case '3': case '4':
2865         case '5': case '6': case '7': case '8': case '9':
2866           /* Prepend an immediately previous CPP_DOT token.  */
2867           if (PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ())
2868             {
2869               cur_token--;
2870               if (list->name_cap == list->name_used)
2871                 auto_expand_name_space (list);
2872
2873               cur_token->val.name.len = 1;
2874               cur_token->val.name.offset = list->name_used;
2875               list->namebuf[list->name_used++] = '.';
2876             }
2877           else
2878             INIT_NAME (list, cur_token->val.name);
2879           cur--;                /* Backup character.  */
2880
2881         continue_number:
2882           buffer->cur = cur;
2883           parse_number (pfile, list, &cur_token->val.name);
2884           cur = buffer->cur;
2885
2886           PUSH_TOKEN (CPP_NUMBER); /* Number not yet interpreted.  */
2887           break;
2888
2889         letter:
2890         case '_':
2891         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2892         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2893         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
2894         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
2895         case 'y': case 'z':
2896         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
2897         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
2898         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
2899         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
2900         case 'Y': case 'Z':
2901           INIT_NAME (list, cur_token->val.name);
2902           cur--;                     /* Backup character.  */
2903           cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
2904
2905         continue_name:
2906           buffer->cur = cur;
2907           parse_name (pfile, list, &cur_token->val.name);
2908           cur = buffer->cur;
2909
2910           /* Find handler for newly created / extended directive.  */
2911           if (IS_DIRECTIVE (list) && cur_token == &list->tokens[1])
2912             _cpp_check_directive (list, cur_token);
2913           cur_token++;
2914           break;
2915
2916         case '\'':
2917           /* Fall through.  */
2918         case '\"':
2919           cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
2920           /* Do we have a wide string?  */
2921           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
2922               && cur_token[-1].val.name.len == 1
2923               && *(list->namebuf + cur_token[-1].val.name.offset) == 'L'
2924               && !CPP_TRADITIONAL (pfile))
2925             {
2926               /* No need for 'L' any more.  */
2927               list->name_used--;
2928               (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
2929             }
2930
2931         do_parse_string:
2932           /* Here c is one of ' " > or ).  */
2933           INIT_NAME (list, cur_token->val.name);
2934           buffer->cur = cur;
2935           parse_string2 (pfile, list, &cur_token->val.name, c);
2936           cur = buffer->cur;
2937           cur_token++;
2938           break;
2939
2940         case '/':
2941           cur_token->type = CPP_DIV;
2942           if (IMMED_TOKEN ())
2943             {
2944               if (PREV_TOKEN_TYPE == CPP_DIV)
2945                 {
2946                   /* We silently allow C++ comments in system headers,
2947                      irrespective of conformance mode, because lots of
2948                      broken systems do that and trying to clean it up
2949                      in fixincludes is a nightmare.  */
2950                   if (buffer->system_header_p)
2951                     goto do_line_comment;
2952                   else if (CPP_OPTION (pfile, cplusplus_comments))
2953                     {
2954                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
2955                           && ! buffer->warned_cplusplus_comments)
2956                         {
2957                           buffer->cur = cur;
2958                           cpp_pedwarn (pfile,
2959                              "C++ style comments are not allowed in ISO C89");
2960                           cpp_pedwarn (pfile,
2961                           "(this will be reported only once per input file)");
2962                           buffer->warned_cplusplus_comments = 1;
2963                         }
2964                     do_line_comment:
2965                       buffer->cur = cur;
2966                       if (cur[-2] != c)
2967                         cpp_warning (pfile,
2968                                      "comment start split across lines");
2969                       if (skip_line_comment2 (pfile))
2970                         cpp_error_with_line (pfile, list->line,
2971                                              cur_token[-1].col,
2972                                              "multi-line comment");
2973                       if (!CPP_OPTION (pfile, discard_comments))
2974                         save_comment (list, cur, buffer->cur - cur,
2975                                       cur_token - 1 - list->tokens, c);
2976                       cur = buffer->cur;
2977
2978                       /* Back-up to first '-' or '/'.  */
2979                       cur_token -= 2;
2980                       if (!CPP_OPTION (pfile, traditional))
2981                         flags = PREV_WHITESPACE;
2982                     }
2983                 }
2984             }
2985           cur_token++;
2986           break;
2987
2988         case '*':
2989           cur_token->type = CPP_MULT;
2990           if (IMMED_TOKEN ())
2991             {
2992               if (PREV_TOKEN_TYPE == CPP_DIV)
2993                 {
2994                   buffer->cur = cur;
2995                   if (cur[-2] != '/')
2996                     cpp_warning (pfile,
2997                                  "comment start '/*' split across lines");
2998                   if (skip_block_comment2 (pfile))
2999                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
3000                                          "unterminated comment");
3001                   else if (buffer->cur[-2] != '*')
3002                     cpp_warning (pfile,
3003                                  "comment end '*/' split across lines");
3004                   if (!CPP_OPTION (pfile, discard_comments))
3005                     save_comment (list, cur, buffer->cur - cur,
3006                                  cur_token - 1 - list->tokens, c);
3007                   cur = buffer->cur;
3008
3009                   cur_token -= 2;
3010                   if (!CPP_OPTION (pfile, traditional))
3011                     flags = PREV_WHITESPACE;
3012                 }
3013               else if (CPP_OPTION (pfile, cplusplus))
3014                 {
3015                   /* In C++, there are .* and ->* operators.  */
3016                   if (PREV_TOKEN_TYPE == CPP_DEREF)
3017                     BACKUP_TOKEN (CPP_DEREF_STAR);
3018                   else if (PREV_TOKEN_TYPE == CPP_DOT)
3019                     BACKUP_TOKEN (CPP_DOT_STAR);
3020                 }
3021             }
3022           cur_token++;
3023           break;
3024
3025         case '\n':
3026         case '\r':
3027           handle_newline (cur, buffer->rlimit, c);
3028           if (PREV_TOKEN_TYPE == CPP_BACKSLASH && IMMED_TOKEN ())
3029             {
3030               /* Remove the escaped newline.  Then continue to process
3031                  any interrupted name or number.  */
3032               cur_token--;
3033               if (IMMED_TOKEN ())
3034                 {
3035                   cur_token--;
3036                   if (cur_token->type == CPP_NAME)
3037                     goto continue_name;
3038                   else if (cur_token->type == CPP_NUMBER)
3039                     goto continue_number;
3040                   cur_token++;
3041                 }
3042               /* Remember whitespace setting.  */
3043               flags = cur_token->flags;
3044               break;
3045             }
3046           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
3047             {
3048               buffer->cur = cur;
3049               cpp_warning (pfile, "backslash and newline separated by space");
3050             }
3051           PUSH_TOKEN (CPP_VSPACE);
3052           goto out;
3053
3054         case '-':
3055           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
3056             {
3057               if (CPP_OPTION (pfile, chill))
3058                 goto do_line_comment;
3059               REVISE_TOKEN (CPP_MINUS_MINUS);
3060             }
3061           else
3062             PUSH_TOKEN (CPP_MINUS);
3063           break;
3064
3065           /* The digraph flag checking ensures that ## and %:%:
3066              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
3067         make_hash:
3068         case '#':
3069           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
3070               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
3071             REVISE_TOKEN (CPP_PASTE);
3072           else
3073             PUSH_TOKEN (CPP_HASH);
3074           break;
3075
3076         case ':':
3077           cur_token->type = CPP_COLON;
3078           if (IMMED_TOKEN ())
3079             {
3080               if (PREV_TOKEN_TYPE == CPP_COLON
3081                   && CPP_OPTION (pfile, cplusplus))
3082                 BACKUP_TOKEN (CPP_SCOPE);
3083               /* Digraph: "<:" is a '['  */
3084               else if (PREV_TOKEN_TYPE == CPP_LESS)
3085                 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
3086               /* Digraph: "%:" is a '#'  */
3087               else if (PREV_TOKEN_TYPE == CPP_MOD)
3088                 {
3089                   (--cur_token)->flags |= DIGRAPH;
3090                   goto make_hash;
3091                 }
3092             }
3093           cur_token++;
3094           break;
3095
3096         case '&':
3097           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
3098             REVISE_TOKEN (CPP_AND_AND);
3099           else
3100             PUSH_TOKEN (CPP_AND);
3101           break;
3102
3103         make_or:
3104         case '|':
3105           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
3106             REVISE_TOKEN (CPP_OR_OR);
3107           else
3108             PUSH_TOKEN (CPP_OR);
3109           break;
3110
3111         case '+':
3112           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
3113             REVISE_TOKEN (CPP_PLUS_PLUS);
3114           else
3115             PUSH_TOKEN (CPP_PLUS);
3116           break;
3117
3118         case '=':
3119             /* This relies on equidistance of "?=" and "?" tokens.  */
3120           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
3121             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
3122           else
3123             PUSH_TOKEN (CPP_EQ);
3124           break;
3125
3126         case '>':
3127           cur_token->type = CPP_GREATER;
3128           if (IMMED_TOKEN ())
3129             {
3130               if (PREV_TOKEN_TYPE == CPP_GREATER)
3131                 BACKUP_TOKEN (CPP_RSHIFT);
3132               else if (PREV_TOKEN_TYPE == CPP_MINUS)
3133                 BACKUP_TOKEN (CPP_DEREF);
3134               /* Digraph: ":>" is a ']'  */
3135               else if (PREV_TOKEN_TYPE == CPP_COLON)
3136                 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
3137               /* Digraph: "%>" is a '}'  */
3138               else if (PREV_TOKEN_TYPE == CPP_MOD)
3139                 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
3140             }
3141           cur_token++;
3142           break;
3143
3144         case '<':
3145           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3146             {
3147               REVISE_TOKEN (CPP_LSHIFT);
3148               break;
3149             }
3150           /* Is this the beginning of a header name?  */
3151           if (list->dir_flags & SYNTAX_INCLUDE)
3152             {
3153               c = '>';  /* Terminator.  */
3154               cur_token->type = CPP_HEADER_NAME;
3155               goto do_parse_string;
3156             }
3157           PUSH_TOKEN (CPP_LESS);
3158           break;
3159
3160         case '%':
3161           /* Digraph: "<%" is a '{'  */
3162           cur_token->type = CPP_MOD;
3163           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
3164             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
3165           cur_token++;
3166           break;
3167
3168         case '(':
3169           /* Is this the beginning of an assertion string?  */
3170           if (list->dir_flags & SYNTAX_ASSERT)
3171             {
3172               c = ')';  /* Terminator.  */
3173               cur_token->type = CPP_ASSERTION;
3174               goto do_parse_string;
3175             }
3176           PUSH_TOKEN (CPP_OPEN_PAREN);
3177           break;
3178
3179         case '?':
3180           if (cur + 1 < buffer->rlimit && *cur == '?'
3181               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
3182             {
3183               /* Handle trigraph.  */
3184               cur++;
3185               switch (*cur++)
3186                 {
3187                 case '(': goto make_open_square;
3188                 case ')': goto make_close_square;
3189                 case '<': goto make_open_brace;
3190                 case '>': goto make_close_brace;
3191                 case '=': goto make_hash;
3192                 case '!': goto make_or;
3193                 case '-': goto make_complement;
3194                 case '/': goto make_backslash;
3195                 case '\'': goto make_xor;
3196                 }
3197             }
3198           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
3199             {
3200               /* GNU C++ defines <? and >? operators.  */
3201               if (PREV_TOKEN_TYPE == CPP_LESS)
3202                 {
3203                   REVISE_TOKEN (CPP_MIN);
3204                   break;
3205                 }
3206               else if (PREV_TOKEN_TYPE == CPP_GREATER)
3207                 {
3208                   REVISE_TOKEN (CPP_MAX);
3209                   break;
3210                 }
3211             }
3212           PUSH_TOKEN (CPP_QUERY);
3213           break;
3214
3215         case '.':
3216           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
3217               && IMMED_TOKEN ()
3218               && !(cur_token[-1].flags & PREV_WHITESPACE))
3219             {
3220               cur_token -= 2;
3221               PUSH_TOKEN (CPP_ELLIPSIS);
3222             }
3223           else
3224             PUSH_TOKEN (CPP_DOT);
3225           break;
3226
3227         make_complement:
3228         case '~': PUSH_TOKEN (CPP_COMPL); break;
3229         make_xor:
3230         case '^': PUSH_TOKEN (CPP_XOR); break;
3231         make_open_brace:
3232         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
3233         make_close_brace:
3234         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
3235         make_open_square:
3236         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
3237         make_close_square:
3238         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
3239         make_backslash:
3240         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
3241         case '!': PUSH_TOKEN (CPP_NOT); break;
3242         case ',': PUSH_TOKEN (CPP_COMMA); break;
3243         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
3244         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
3245
3246         case '$':
3247           if (CPP_OPTION (pfile, dollars_in_ident))
3248             goto letter;
3249           /* Fall through */
3250         default:
3251           cur_token->aux = c;
3252           PUSH_TOKEN (CPP_OTHER);
3253           break;
3254         }
3255     }
3256
3257   /* Run out of token space?  */
3258   if (cur_token == token_limit)
3259     {
3260       list->tokens_used = cur_token - list->tokens;
3261       expand_token_space (list);
3262       goto expanded;
3263     }
3264
3265   cur_token->type = CPP_EOF;
3266   cur_token->flags = flags;
3267
3268   if (cur_token != &list->tokens[0])
3269     {
3270       /* Next call back will get just a CPP_EOF.  */
3271       buffer->cur = cur;
3272       cpp_warning (pfile, "no newline at end of file");
3273       PUSH_TOKEN (CPP_VSPACE);
3274     }
3275
3276  out:
3277   buffer->cur = cur;
3278
3279   list->tokens_used = cur_token - list->tokens;
3280
3281   /* FIXME:  take this check out and put it in the caller.
3282      list->directive == 0 indicates an unknown directive (but null
3283      directive is OK).  This is the first time we can be sure the
3284      directive is invalid, and thus warn about it, because it might
3285      have been split by escaped newlines.  Also, don't complain about
3286      invalid directives in assembly source, we don't know where the
3287      comments are, and # may introduce assembler pseudo-ops.  */
3288
3289   if (IS_DIRECTIVE (list) && list->dir_handler == 0
3290       && list->tokens[1].type != CPP_VSPACE
3291       && !CPP_OPTION (pfile, lang_asm))
3292     cpp_error_with_line (pfile, list->line, list->tokens[1].col,
3293                          "invalid preprocessing directive");
3294 }
3295
3296 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
3297    already contain the enough space to hold the token's spelling.  If
3298    WHITESPACE is true, and the token was preceded by whitespace,
3299    output a single space before the token proper.  Returns a pointer
3300    to the character after the last character written.  */
3301
3302 static unsigned char *
3303 spell_token (pfile, token, list, buffer, whitespace)
3304      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
3305      cpp_token *token;
3306      cpp_toklist *list;         /* FIXME: get rid of this...  */
3307      unsigned char *buffer;
3308      int whitespace;
3309 {
3310   /* Whitespace will not be wanted by handlers of the # and ##
3311      operators calling this function, but will be wanted by the
3312      function that writes out the preprocessed file.  */
3313   if (whitespace && token->flags & PREV_WHITESPACE)
3314     *buffer++ = ' ';
3315
3316   switch (token_spellings[token->type].type)
3317     {
3318     case SPELL_OPERATOR:
3319       {
3320         const unsigned char *spelling;
3321         unsigned char c;
3322
3323         if (token->flags & DIGRAPH)
3324           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
3325         else
3326           spelling = token_spellings[token->type].speller;
3327
3328         while ((c = *spelling++) != '\0')
3329           *buffer++ = c;
3330       }
3331       break;
3332
3333     case SPELL_IDENT:
3334       memcpy (buffer, list->namebuf + token->val.name.offset,
3335               token->val.name.len);
3336       buffer += token->val.name.len;
3337       break;
3338
3339     case SPELL_STRING:
3340       {
3341         unsigned char c;
3342
3343         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
3344           *buffer++ = 'L';
3345         c = '\'';
3346         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
3347           c = '"';
3348         *buffer++ = c;
3349         memcpy (buffer, list->namebuf + token->val.name.offset,
3350                 token->val.name.len);
3351         buffer += token->val.name.len;
3352         *buffer++ = c;
3353       }
3354       break;
3355
3356     case SPELL_CHAR:
3357       *buffer++ = token->aux;
3358       break;
3359
3360     case SPELL_NONE:
3361       cpp_ice (pfile, "Unspellable token");
3362       break;
3363     }
3364
3365   return buffer;
3366 }
3367
3368 /* Temporary function for illustrative purposes.  */
3369 void
3370 _cpp_lex_file (pfile)
3371      cpp_reader* pfile;
3372 {
3373   int recycle;
3374   cpp_toklist* list;
3375
3376   init_trigraph_map ();
3377   list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3378
3379   for (recycle = 0; ;)
3380     {
3381       init_token_list (pfile, list, recycle);
3382       recycle = 1;
3383
3384       _cpp_lex_line (pfile, list);
3385       if (list->tokens[0].type == CPP_EOF)
3386         break;
3387
3388       if (list->dir_handler)
3389         {
3390           if (list->dir_handler (pfile))
3391             {
3392               list = (cpp_toklist *) xmalloc (sizeof (cpp_toklist));
3393               recycle = 0;
3394             }
3395         }
3396       else
3397         _cpp_output_list (pfile, list);
3398     }
3399 }
3400
3401 /* Temporary function for illustrative purposes.  */
3402 static void
3403 _cpp_output_list (pfile, list)
3404      cpp_reader *pfile;
3405      cpp_toklist *list;
3406 {
3407   cpp_token *token, *comment, *comment_before = 0;
3408
3409   if (list->comments_used > 0)
3410     {
3411       comment = &list->comments[0];
3412       comment_before = &list->tokens[comment->aux];
3413     }
3414
3415   token = &list->tokens[0];
3416   do
3417     {
3418       /* Output comments if -C.  */
3419       while (token == comment_before)
3420         {
3421           /* Make space for the comment, and copy it out.  */
3422           CPP_RESERVE (pfile, TOKEN_LEN (comment));
3423           pfile->limit = spell_token (pfile, comment, list, pfile->limit, 0);
3424
3425           /* Stop if no comments left, or no more comments appear
3426              before the current token.  */
3427           comment++;
3428           if (comment == list->comments + list->comments_used)
3429             break;
3430           comment_before = &list->tokens[comment->aux];
3431         }
3432
3433       CPP_RESERVE (pfile, TOKEN_LEN (token));
3434       pfile->limit = spell_token (pfile, token, list, pfile->limit, 1);
3435     }
3436   while (token++->type != CPP_VSPACE);
3437 }
3438
3439 #endif