gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o Fix ISTABLE to flag the parts we want for IS_HSPACE and IS_NEWLINE.
  28 o Get use of digraphs in sync with the standard reqd on the command line.
  29 o -dM and with _cpp_dump_list: too many \n output.
  30 o Put a printer object in cpp_reader?
  31 o Check line numbers assigned to all errors.
  32 o Replace strncmp with memcmp almost everywhere.
  33 o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
  34 o Get rid of cpp_get_directive_token.
  35 o Convert do_ functions to return void.  Kaveh thinks its OK; and said he'll
  36   give it a run when we've got some code.
  37 o _cpp_parse_expr updated to new lexer.
  38 o Distinguish integers, floats, and 'other' pp-numbers.
  39 o Store ints and char constants as binary values.
  40 o New command-line assertion syntax.
  41 o Merge hash table text pointer and token list text pointer for identifiers.
  42 o Have _cpp_parse_expr use all the information the new lexer provides.
  43 o Work towards functions in cpperror.c taking a message level parameter.
  44   If we do this, merge the common code of do_warning and do_error.
  45 o Comment all functions, and describe macro expansion algorithm.
  46 o Move as much out of header files as possible.
  47 o Remove single quote pairs `', and some '', from diagnostics.
  48 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  49
  50 */
  51
  52 #include "config.h"
  53 #include "system.h"
  54 #include "intl.h"
  55 #include "cpplib.h"
  56 #include "cpphash.h"
  57 #include "symcat.h"
  58
  59 #define auto_expand_name_space(list) \
  60     _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
  61 static void safe_fwrite         PARAMS ((cpp_reader *, const U_CHAR *,
  62                                          size_t, FILE *));
  63 static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
  64                                          unsigned int));
  65 static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
  66                                          unsigned int));
  67
  68 static void process_directive   PARAMS ((cpp_reader *, const cpp_token *));
  69 static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
  70                                                 unsigned char *));
  71 static const unsigned char *backslash_start PARAMS ((cpp_reader *,
  72                                                      const unsigned char *));
  73 static int skip_block_comment PARAMS ((cpp_reader *));
  74 static int skip_line_comment PARAMS ((cpp_reader *));
  75 static void skip_whitespace PARAMS ((cpp_reader *, int));
  76 static void parse_name PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  77 static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_name *));
  78 static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
  79                                   unsigned int));
  80 static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
  81 static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
  82                                   const unsigned char *,
  83                                   unsigned int, unsigned int));
  84 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
  85 static int lex_next PARAMS ((cpp_reader *, int));
  86 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
  87                                       const cpp_token *));
  88
  89 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
  90 static void expand_context_stack PARAMS ((cpp_reader *));
  91 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
  92                                             unsigned char *));
  93 static void output_token PARAMS ((cpp_reader *, const cpp_token *,
  94                                   const cpp_token *));
  95 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
  96                                           cpp_token *));
  97 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
  98                                             unsigned int));
  99 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 100 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 101                                                 const cpp_token *));
 102 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 103 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 104                                                        const cpp_token *));
 105 static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
 106                                          const cpp_token *, int *));
 107 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 108 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 109 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 110 static void release_temp_tokens         PARAMS ((cpp_reader *));
 111 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 112 static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
 113
 114 #define INIT_TOKEN_NAME(list, token) \
 115   do {(token)->val.name.len = 0; \
 116       (token)->val.name.text = (list)->namebuf + (list)->name_used; \
 117   } while (0)
 118
 119 #define VALID_SIGN(c, prevc) \
 120   (((c) == '+' || (c) == '-') && \
 121    ((prevc) == 'e' || (prevc) == 'E' \
 122     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 123
 124 /* Maybe put these in the ISTABLE eventually.  */
 125 #define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
 126 #define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
 127
 128 /* Handle LF, CR, CR-LF and LF-CR style newlines.  Assumes next
 129    character, if any, is in buffer.  */
 130
 131 #define handle_newline(cur, limit, c) \
 132  do { \
 133   if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
 134     (cur)++; \
 135   pfile->buffer->lineno++; \
 136   pfile->buffer->line_base = (cur); \
 137   pfile->col_adjust = 0; \
 138  } while (0)
 139
 140 #define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
 141 #define PREV_TOKEN_TYPE (cur_token[-1].type)
 142
 143 #define PUSH_TOKEN(ttype) cur_token++->type = ttype
 144 #define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
 145 #define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
 146 #define BACKUP_DIGRAPH(ttype) do { \
 147   BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
 148
 149 /* An upper bound on the number of bytes needed to spell a token,
 150    including preceding whitespace.  */
 151 #define TOKEN_LEN(token) (5 + (token_spellings[(token)->type].type > \
 152                                SPELL_NONE ? (token)->val.name.len: 0))
 153
 154 #define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
 155 #define I(e, s) {SPELL_IDENT, s},
 156 #define S(e, s) {SPELL_STRING, s},
 157 #define C(e, s) {SPELL_CHAR, s},
 158 #define N(e, s) {SPELL_NONE, s},
 159
 160 const struct token_spelling
 161 token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
 162
 163 #undef T
 164 #undef I
 165 #undef S
 166 #undef C
 167 #undef N
 168
 169 /* For debugging: the internal names of the tokens.  */
 170 #define T(e, s) STRINGX(e),
 171 #define I(e, s) STRINGX(e),
 172 #define S(e, s) STRINGX(e),
 173 #define C(e, s) STRINGX(e),
 174 #define N(e, s) STRINGX(e),
 175
 176 const char * const token_names[N_TTYPES] = { TTYPE_TABLE };
 177
 178 #undef T
 179 #undef I
 180 #undef S
 181 #undef C
 182 #undef N
 183
 184 /* The following table is used by trigraph_ok/trigraph_replace.  If we
 185    have designated initializers, it can be constant data; otherwise,
 186    it is set up at runtime by _cpp_init_input_buffer.  */
 187
 188 #if (GCC_VERSION >= 2007)
 189 #define init_trigraph_map()  /* nothing */
 190 #define TRIGRAPH_MAP \
 191 __extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
 192 #define END };
 193 #define s(p, v) [p] = v,
 194 #else
 195 #define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
 196  static void init_trigraph_map PARAMS ((void)) { \
 197  unsigned char *x = trigraph_map;
 198 #define END }
 199 #define s(p, v) x[p] = v;
 200 #endif
 201
 202 TRIGRAPH_MAP
 203   s('=', '#')   s(')', ']')     s('!', '|')
 204   s('(', '[')   s('\'', '^')    s('>', '}')
 205   s('/', '\\')  s('<', '{')     s('-', '~')
 206 END
 207
 208 #undef TRIGRAPH_MAP
 209 #undef END
 210 #undef s
 211
 212 /* Re-allocates PFILE->token_buffer so it will hold at least N more chars.  */
 213
 214 void
 215 _cpp_grow_token_buffer (pfile, n)
 216      cpp_reader *pfile;
 217      long n;
 218 {
 219   long old_written = CPP_WRITTEN (pfile);
 220   pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
 221   pfile->token_buffer = (U_CHAR *)
 222     xrealloc(pfile->token_buffer, pfile->token_buffer_size);
 223   CPP_SET_WRITTEN (pfile, old_written);
 224 }
 225
 226 /* Deal with the annoying semantics of fwrite.  */
 227 static void
 228 safe_fwrite (pfile, buf, len, fp)
 229      cpp_reader *pfile;
 230      const U_CHAR *buf;
 231      size_t len;
 232      FILE *fp;
 233 {
 234   size_t count;
 235
 236   while (len)
 237     {
 238       count = fwrite (buf, 1, len, fp);
 239       if (count == 0)
 240         goto error;
 241       len -= count;
 242       buf += count;
 243     }
 244   return;
 245
 246  error:
 247   cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
 248 }
 249
 250 /* Notify the compiler proper that the current line number has jumped,
 251    or the current file name has changed.  */
 252
 253 static void
 254 output_line_command (pfile, print, line)
 255      cpp_reader *pfile;
 256      cpp_printer *print;
 257      unsigned int line;
 258 {
 259   cpp_buffer *ip = CPP_BUFFER (pfile);
 260   enum { same = 0, enter, leave, rname } change;
 261   static const char * const codes[] = { "", " 1", " 2", "" };
 262
 263   if (line == 0)
 264     return;
 265
 266   /* End the previous line of text.  */
 267   if (pfile->need_newline)
 268     putc ('\n', print->outf);
 269   pfile->need_newline = 0;
 270
 271   if (CPP_OPTION (pfile, no_line_commands))
 272     return;
 273
 274   /* If ip is null, we've been called from cpp_finish, and they just
 275      needed the final flush and trailing newline.  */
 276   if (!ip)
 277     return;
 278
 279   if (pfile->include_depth == print->last_id)
 280     {
 281       /* Determine whether the current filename has changed, and if so,
 282          how.  'nominal_fname' values are unique, so they can be compared
 283          by comparing pointers.  */
 284       if (ip->nominal_fname == print->last_fname)
 285         change = same;
 286       else
 287         change = rname;
 288     }
 289   else
 290     {
 291       if (pfile->include_depth > print->last_id)
 292         change = enter;
 293       else
 294         change = leave;
 295       print->last_id = pfile->include_depth;
 296     }
 297   print->last_fname = ip->nominal_fname;
 298
 299   /* If the current file has not changed, we can output a few newlines
 300      instead if we want to increase the line number by a small amount.
 301      We cannot do this if print->lineno is zero, because that means we
 302      haven't output any line commands yet.  (The very first line
 303      command output is a `same_file' command.)  */
 304   if (change == same && print->lineno > 0
 305       && line >= print->lineno && line < print->lineno + 8)
 306     {
 307       while (line > print->lineno)
 308         {
 309           putc ('\n', print->outf);
 310           print->lineno++;
 311         }
 312       return;
 313     }
 314
 315 #ifndef NO_IMPLICIT_EXTERN_C
 316   if (CPP_OPTION (pfile, cplusplus))
 317     fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
 318              codes[change],
 319              ip->inc->sysp ? " 3" : "",
 320              (ip->inc->sysp == 2) ? " 4" : "");
 321   else
 322 #endif
 323     fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
 324              codes[change],
 325              ip->inc->sysp ? " 3" : "");
 326   print->lineno = line;
 327 }
 328
 329 /* Write the contents of the token_buffer to the output stream, and
 330    clear the token_buffer.  Also handles generating line commands and
 331    keeping track of file transitions.  */
 332
 333 void
 334 cpp_output_tokens (pfile, print, line)
 335      cpp_reader *pfile;
 336      cpp_printer *print;
 337      unsigned int line;
 338 {
 339   if (CPP_WRITTEN (pfile) - print->written)
 340     {
 341       safe_fwrite (pfile, pfile->token_buffer,
 342                    CPP_WRITTEN (pfile) - print->written, print->outf);
 343       pfile->need_newline = 1;
 344       if (print->lineno)
 345         print->lineno++;
 346
 347       CPP_SET_WRITTEN (pfile, print->written);
 348     }
 349   output_line_command (pfile, print, line);
 350 }
 351
 352 /* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output.  */
 353
 354 void
 355 cpp_scan_buffer_nooutput (pfile)
 356      cpp_reader *pfile;
 357 {
 358   unsigned int old_written = CPP_WRITTEN (pfile);
 359   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 360
 361   for (;;)
 362     {
 363       /* In no-output mode, we can ignore everything but directives.  */
 364       const cpp_token *token = cpp_get_token (pfile);
 365       if (token->type == CPP_EOF)
 366         {
 367           cpp_pop_buffer (pfile);
 368           if (CPP_BUFFER (pfile) == stop)
 369             break;
 370         }
 371       _cpp_skip_rest_of_line (pfile);
 372     }
 373   CPP_SET_WRITTEN (pfile, old_written);
 374 }
 375
 376 /* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT.  */
 377
 378 void
 379 cpp_scan_buffer (pfile, print)
 380      cpp_reader *pfile;
 381      cpp_printer *print;
 382 {
 383   cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
 384   const cpp_token *token, *prev = 0;
 385
 386   for (;;)
 387     {
 388       token = cpp_get_token (pfile);
 389       if (token->type == CPP_EOF)
 390         {
 391           cpp_pop_buffer (pfile);
 392           if (CPP_BUFFER (pfile) == stop)
 393             return;
 394           cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
 395           prev = 0;
 396           continue;
 397         }
 398
 399       if (token->flags & BOL)
 400         {
 401           cpp_output_tokens (pfile, print, pfile->token_list.line);
 402           prev = 0;
 403         }
 404
 405       output_token (pfile, token, prev);
 406       prev = token;
 407     }
 408 }
 409
 410 /* Helper routine used by parse_include, which can't see spell_token.
 411    Reinterpret the current line as an h-char-sequence (< ... >); we are
 412    looking at the first token after the <.  */
 413 const cpp_token *
 414 _cpp_glue_header_name (pfile)
 415      cpp_reader *pfile;
 416 {
 417   unsigned int written = CPP_WRITTEN (pfile);
 418   const cpp_token *t;
 419   cpp_token *hdr;
 420   U_CHAR *buf;
 421   size_t len;
 422
 423   for (;;)
 424     {
 425       t = cpp_get_token (pfile);
 426       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 427         break;
 428
 429       CPP_RESERVE (pfile, TOKEN_LEN (t));
 430       if (t->flags & PREV_WHITE)
 431         CPP_PUTC_Q (pfile, ' ');
 432       pfile->limit = spell_token (pfile, t, pfile->limit);
 433     }
 434
 435   if (t->type == CPP_EOF)
 436     cpp_error (pfile, "missing terminating > character");
 437
 438   len = CPP_WRITTEN (pfile) - written;
 439   buf = xmalloc (len);
 440   memcpy (buf, pfile->token_buffer + written, len);
 441   CPP_SET_WRITTEN (pfile, written);
 442
 443   hdr = get_temp_token (pfile);
 444   hdr->type = CPP_HEADER_NAME;
 445   hdr->flags = 0;
 446   hdr->val.name.text = buf;
 447   hdr->val.name.len = len;
 448   return hdr;
 449 }
 450
 451 /* Token-buffer helper functions.  */
 452
 453 /* Expand a token list's string space. It is *vital* that
 454    list->tokens_used is correct, to get pointer fix-up right.  */
 455 void
 456 _cpp_expand_name_space (list, len)
 457      cpp_toklist *list;
 458      unsigned int len;
 459 {
 460   const U_CHAR *old_namebuf;
 461
 462   old_namebuf = list->namebuf;
 463   list->name_cap += len;
 464   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 465
 466   /* Fix up token text pointers.  */
 467   if (list->namebuf != old_namebuf)
 468     {
 469       unsigned int i;
 470
 471       for (i = 0; i < list->tokens_used; i++)
 472         if (token_spellings[list->tokens[i].type].type > SPELL_NONE)
 473           list->tokens[i].val.name.text += (list->namebuf - old_namebuf);
 474     }
 475 }
 476
 477 /* If there is not enough room for LEN more characters, expand the
 478    list by just enough to have room for LEN characters.  */
 479 void
 480 _cpp_reserve_name_space (list, len)
 481      cpp_toklist *list;
 482      unsigned int len;
 483 {
 484   unsigned int room = list->name_cap - list->name_used;
 485
 486   if (room < len)
 487     _cpp_expand_name_space (list, len - room);
 488 }
 489
 490 /* Expand the number of tokens in a list.  */
 491 void
 492 _cpp_expand_token_space (list, count)
 493      cpp_toklist *list;
 494      unsigned int count;
 495 {
 496   unsigned int n;
 497
 498   list->tokens_cap += count;
 499   n = list->tokens_cap;
 500   if (list->flags & LIST_OFFSET)
 501     list->tokens--, n++;
 502   list->tokens = (cpp_token *)
 503     xrealloc (list->tokens, n * sizeof (cpp_token));
 504   if (list->flags & LIST_OFFSET)
 505     list->tokens++;             /* Skip the dummy.  */
 506 }
 507
 508 /* Initialize a token list.  If flags is DUMMY_TOKEN, we allocate
 509    an extra token in front of the token list, as this allows the lexer
 510    to always peek at the previous token without worrying about
 511    underflowing the list, and some initial space.  Otherwise, no
 512    token- or name-space is allocated, and there is no dummy token.  */
 513 void
 514 _cpp_init_toklist (list, flags)
 515      cpp_toklist *list;
 516      int flags;
 517 {
 518   if (flags == NO_DUMMY_TOKEN)
 519     {
 520       list->tokens_cap = 0;
 521       list->tokens = 0;
 522       list->name_cap = 0;
 523       list->namebuf = 0;
 524       list->flags = 0;
 525     }
 526   else
 527     {
 528       /* Initialize token space.  Put a dummy token before the start
 529          that will fail matches.  */
 530       list->tokens_cap = 256;   /* 4K's worth.  */
 531       list->tokens = (cpp_token *)
 532         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 533       list->tokens[0].type = CPP_EOF;
 534       list->tokens++;
 535
 536       /* Initialize name space.  */
 537       list->name_cap = 1024;
 538       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 539       list->flags = LIST_OFFSET;
 540     }
 541
 542   _cpp_clear_toklist (list);
 543 }
 544
 545 /* Clear a token list.  */
 546 void
 547 _cpp_clear_toklist (list)
 548      cpp_toklist *list;
 549 {
 550   list->tokens_used = 0;
 551   list->name_used = 0;
 552   list->directive = 0;
 553   list->paramc = 0;
 554   list->params_len = 0;
 555   list->flags &= LIST_OFFSET;  /* clear all but that one */
 556 }
 557
 558 /* Free a token list.  Does not free the list itself, which may be
 559    embedded in a larger structure.  */
 560 void
 561 _cpp_free_toklist (list)
 562      const cpp_toklist *list;
 563 {
 564   if (list->flags & LIST_OFFSET)
 565     free (list->tokens - 1);    /* Backup over dummy token.  */
 566   else
 567     free (list->tokens);
 568   free (list->namebuf);
 569 }
 570
 571 /* Compare two tokens.  */
 572 int
 573 _cpp_equiv_tokens (a, b)
 574      const cpp_token *a, *b;
 575 {
 576   if (a->type == b->type && a->flags == b->flags)
 577     switch (token_spellings[a->type].type)
 578       {
 579       default:                  /* Keep compiler happy.  */
 580       case SPELL_OPERATOR:
 581         return 1;
 582       case SPELL_CHAR:
 583       case SPELL_NONE:
 584         return a->val.aux == b->val.aux; /* arg_no or character.  */
 585       case SPELL_IDENT:
 586       case SPELL_STRING:
 587         return (a->val.name.len == b->val.name.len
 588                 && !memcmp (a->val.name.text, b->val.name.text,
 589                             a->val.name.len));
 590       }
 591
 592   return 0;
 593 }
 594
 595 /* Compare two token lists.  */
 596 int
 597 _cpp_equiv_toklists (a, b)
 598      const cpp_toklist *a, *b;
 599 {
 600   unsigned int i;
 601
 602   if (a->tokens_used != b->tokens_used
 603       || a->flags != b->flags
 604       || a->paramc != b->paramc)
 605     return 0;
 606
 607   for (i = 0; i < a->tokens_used; i++)
 608     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 609       return 0;
 610   return 1;
 611 }
 612
 613 /* Utility routine:
 614    Compares, in the manner of strcmp(3), the token beginning at TOKEN
 615    and extending for LEN characters to the NUL-terminated string
 616    STRING.  Typical usage:
 617
 618    if (! cpp_idcmp (pfile->token_buffer + here, CPP_WRITTEN (pfile) - here,
 619                  "inline"))
 620      { ... }
 621  */
 622
 623 int
 624 cpp_idcmp (token, len, string)
 625      const U_CHAR *token;
 626      size_t len;
 627      const char *string;
 628 {
 629   size_t len2 = strlen (string);
 630   int r;
 631
 632   if ((r = memcmp (token, string, MIN (len, len2))))
 633     return r;
 634
 635   /* The longer of the two strings sorts after the shorter.  */
 636   if (len == len2)
 637     return 0;
 638   else if (len < len2)
 639     return -1;
 640   else
 641     return 1;
 642 }
 643
 644 /* Lexing algorithm.
 645
 646  The original lexer in cpplib was made up of two passes: a first pass
 647  that replaced trigraphs and deleted esacped newlines, and a second
 648  pass that tokenized the result of the first pass.  Tokenisation was
 649  performed by peeking at the next character in the input stream.  For
 650  example, if the input stream contained "!=", the handler for the !
 651  character would peek at the next character, and if it were a '='
 652  would skip over it, and return a "!=" token, otherwise it would
 653  return just the "!" token.
 654
 655  To implement a single-pass lexer, this peeking ahead is unworkable.
 656  An arbitrary number of escaped newlines, and trigraphs (in particular
 657  ??/ which translates to the escape \), could separate the '!' and '='
 658  in the input stream, yet the next token is still a "!=".
 659
 660  Suppose instead that we lex by one logical line at a time, producing
 661  a token list or stack for each logical line, and when seeing the '!'
 662  push a CPP_NOT token on the list.  Then if the '!' is part of a
 663  longer token ("!=") we know we must see the remainder of the token by
 664  the time we reach the end of the logical line.  Thus we can have the
 665  '=' handler look at the previous token (at the end of the list / top
 666  of the stack) and see if it is a "!" token, and if so, instead of
 667  pushing a "=" token revise the existing token to be a "!=" token.
 668
 669  This works in the presence of escaped newlines, because the '\' would
 670  have been pushed on the top of the stack as a CPP_BACKSLASH.  The
 671  newline ('\n' or '\r') handler looks at the token at the top of the
 672  stack to see if it is a CPP_BACKSLASH, and if so discards both.
 673  Otherwise it pushes the newline (CPP_VSPACE) token as normal.  Hence
 674  the '=' handler would never see any intervening escaped newlines.
 675
 676  To make trigraphs work in this context, as in precedence trigraphs
 677  are highest and converted before anything else, the '?' handler does
 678  lookahead to see if it is a trigraph, and if so skips the trigraph
 679  and pushes the token it represents onto the top of the stack.  This
 680  also works in the particular case of a CPP_BACKSLASH trigraph.
 681
 682  To the preprocessor, whitespace is only significant to the point of
 683  knowing whether whitespace precedes a particular token.  For example,
 684  the '=' handler needs to know whether there was whitespace between it
 685  and a "!" token on the top of the stack, to make the token conversion
 686  decision correctly.  So each token has a PREV_WHITE flag to
 687  indicate this - the standard permits consecutive whitespace to be
 688  regarded as a single space.  The compiler front ends are not
 689  interested in whitespace at all; they just require a token stream.
 690  Another place where whitespace is significant to the preprocessor is
 691  a #define statment - if there is whitespace between the macro name
 692  and an initial "(" token the macro is "object-like", otherwise it is
 693  a function-like macro that takes arguments.
 694
 695  However, all is not rosy.  Parsing of identifiers, numbers, comments
 696  and strings becomes trickier because of the possibility of raw
 697  trigraphs and escaped newlines in the input stream.
 698
 699  The trigraphs are three consecutive characters beginning with two
 700  question marks.  A question mark is not valid as part of a number or
 701  identifier, so parsing of a number or identifier terminates normally
 702  upon reaching it, returning to the mainloop which handles the
 703  trigraph just like it would in any other position.  Similarly for the
 704  backslash of a backslash-newline combination.  So we just need the
 705  escaped-newline dropper in the mainloop to check if the token on the
 706  top of the stack after dropping the escaped newline is a number or
 707  identifier, and if so to continue the processing it as if nothing had
 708  happened.
 709
 710  For strings, we replace trigraphs whenever we reach a quote or
 711  newline, because there might be a backslash trigraph escaping them.
 712  We need to be careful that we start trigraph replacing from where we
 713  left off previously, because it is possible for a first scan to leave
 714  "fake" trigraphs that a second scan would pick up as real (e.g. the
 715  sequence "????/\n=" would find a fake ??= trigraph after removing the
 716  escaped newline.)
 717
 718  For line comments, on reaching a newline we scan the previous
 719  character(s) to see if it escaped, and continue if it is.  Block
 720  comments ignore everything and just focus on finding the comment
 721  termination mark.  The only difficult thing, and it is surprisingly
 722  tricky, is checking if an asterisk precedes the final slash since
 723  they could be separated by escaped newlines.  If the preprocessor is
 724  invoked with the output comments option, we don't bother removing
 725  escaped newlines and replacing trigraphs for output.
 726
 727  Finally, numbers can begin with a period, which is pushed initially
 728  as a CPP_DOT token in its own right.  The digit handler checks if the
 729  previous token was a CPP_DOT not separated by whitespace, and if so
 730  pops it off the stack and pushes a period into the number's buffer
 731  before calling the number parser.
 732
 733 */
 734
 735 static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
 736                                                     U":>", U"<%", U"%>"};
 737
 738 /* Call when a trigraph is encountered.  It warns if necessary, and
 739    returns true if the trigraph should be honoured.  END is the third
 740    character of a trigraph in the input stream.  */
 741 static int
 742 trigraph_ok (pfile, end)
 743      cpp_reader *pfile;
 744      const unsigned char *end;
 745 {
 746   int accept = CPP_OPTION (pfile, trigraphs);
 747
 748   if (CPP_OPTION (pfile, warn_trigraphs))
 749     {
 750       unsigned int col = end - 1 - pfile->buffer->line_base;
 751       if (accept)
 752         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 753                                "trigraph ??%c converted to %c",
 754                                (int) *end, (int) trigraph_map[*end]);
 755       else
 756         cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
 757                                "trigraph ??%c ignored", (int) *end);
 758     }
 759   return accept;
 760 }
 761
 762 /* Scan a string for trigraphs, warning or replacing them inline as
 763    appropriate.  When parsing a string, we must call this routine
 764    before processing a newline character (if trigraphs are enabled),
 765    since the newline might be escaped by a preceding backslash
 766    trigraph sequence.  Returns a pointer to the end of the name after
 767    replacement.  */
 768
 769 static unsigned char *
 770 trigraph_replace (pfile, src, limit)
 771      cpp_reader *pfile;
 772      unsigned char *src;
 773      unsigned char *limit;
 774 {
 775   unsigned char *dest;
 776
 777   /* Starting with src[1], find two consecutive '?'.  The case of no
 778      trigraphs is streamlined.  */
 779
 780   for (src++; src + 1 < limit; src += 2)
 781     {
 782       if (src[0] != '?')
 783         continue;
 784
 785       /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s.  */
 786       if (src[-1] == '?')
 787         src--;
 788       else if (src + 2 == limit || src[1] != '?')
 789         continue;
 790
 791       /* Check if it really is a trigraph.  */
 792       if (trigraph_map[src[2]] == 0)
 793         continue;
 794
 795       dest = src;
 796       goto trigraph_found;
 797     }
 798   return limit;
 799
 800   /* Now we have a trigraph, we need to scan the remaining buffer, and
 801      copy-shifting its contents left if replacement is enabled.  */
 802   for (; src + 2 < limit; dest++, src++)
 803     if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
 804       {
 805       trigraph_found:
 806         src += 2;
 807         if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
 808           *dest = trigraph_map[*src];
 809       }
 810
 811   /* Copy remaining (at most 2) characters.  */
 812   while (src < limit)
 813     *dest++ = *src++;
 814   return dest;
 815 }
 816
 817 /* If CUR is a backslash or the end of a trigraphed backslash, return
 818    a pointer to its beginning, otherwise NULL.  We don't read beyond
 819    the buffer start, because there is the start of the comment in the
 820    buffer.  */
 821 static const unsigned char *
 822 backslash_start (pfile, cur)
 823      cpp_reader *pfile;
 824      const unsigned char *cur;
 825 {
 826   if (cur[0] == '\\')
 827     return cur;
 828   if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
 829       && trigraph_ok (pfile, cur))
 830     return cur - 2;
 831   return 0;
 832 }
 833
 834 /* Skip a C-style block comment.  This is probably the trickiest
 835    handler.  We find the end of the comment by seeing if an asterisk
 836    is before every '/' we encounter.  The nasty complication is that a
 837    previous asterisk may be separated by one or more escaped newlines.
 838    Returns non-zero if comment terminated by EOF, zero otherwise.  */
 839 static int
 840 skip_block_comment (pfile)
 841      cpp_reader *pfile;
 842 {
 843   cpp_buffer *buffer = pfile->buffer;
 844   const unsigned char *char_after_star = 0;
 845   register const unsigned char *cur = buffer->cur;
 846   int seen_eof = 0;
 847
 848   /* Inner loop would think the comment has ended if the first comment
 849      character is a '/'.  Avoid this and keep the inner loop clean by
 850      skipping such a character.  */
 851   if (cur < buffer->rlimit && cur[0] == '/')
 852     cur++;
 853
 854   for (; cur < buffer->rlimit; )
 855     {
 856       unsigned char c = *cur++;
 857
 858       /* People like decorating comments with '*', so check for
 859          '/' instead for efficiency.  */
 860       if (c == '/')
 861         {
 862           if (cur[-2] == '*' || cur - 1 == char_after_star)
 863             goto out;
 864
 865           /* Warn about potential nested comments, but not when
 866              the final character inside the comment is a '/'.
 867              Don't bother to get it right across escaped newlines.  */
 868           if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
 869               && cur[0] == '*' && cur[1] != '/')
 870             {
 871               buffer->cur = cur;
 872               cpp_warning (pfile, "'/*' within comment");
 873             }
 874         }
 875       else if (IS_NEWLINE(c))
 876         {
 877           const unsigned char* bslash = backslash_start (pfile, cur - 2);
 878
 879           handle_newline (cur, buffer->rlimit, c);
 880           /* Work correctly if there is an asterisk before an
 881              arbirtrarily long sequence of escaped newlines.  */
 882           if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
 883             char_after_star = cur;
 884           else
 885             char_after_star = 0;
 886         }
 887     }
 888   seen_eof = 1;
 889
 890  out:
 891   buffer->cur = cur;
 892   return seen_eof;
 893 }
 894
 895 /* Skip a C++ or Chill line comment.  Handles escaped newlines.
 896    Returns non-zero if a multiline comment.  */
 897 static int
 898 skip_line_comment (pfile)
 899      cpp_reader *pfile;
 900 {
 901   cpp_buffer *buffer = pfile->buffer;
 902   register const unsigned char *cur = buffer->cur;
 903   int multiline = 0;
 904
 905   for (; cur < buffer->rlimit; )
 906     {
 907       unsigned char c = *cur++;
 908
 909       if (IS_NEWLINE (c))
 910         {
 911           /* Check for a (trigaph?) backslash escaping the newline.  */
 912           if (!backslash_start (pfile, cur - 2))
 913             goto out;
 914           multiline = 1;
 915           handle_newline (cur, buffer->rlimit, c);
 916         }
 917     }
 918   cur++;
 919
 920  out:
 921   buffer->cur = cur - 1;        /* Leave newline for caller.  */
 922   return multiline;
 923 }
 924
 925 /* Skips whitespace, stopping at next non-whitespace character.
 926    Adjusts pfile->col_adjust to account for tabs.  This enables tokens
 927    to be assigned the correct column.  */
 928 static void
 929 skip_whitespace (pfile, in_directive)
 930      cpp_reader *pfile;
 931      int in_directive;
 932 {
 933   cpp_buffer *buffer = pfile->buffer;
 934   register const unsigned char *cur = buffer->cur;
 935   unsigned short null_count = 0;
 936
 937   for (; cur < buffer->rlimit; )
 938     {
 939       unsigned char c = *cur++;
 940
 941       if (c == '\t')
 942         {
 943           unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
 944           pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
 945                                 - col % CPP_OPTION(pfile, tabstop));
 946         }
 947       if (IS_HSPACE(c))         /* FIXME: Fix ISTABLE.  */
 948         continue;
 949       if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines.  */
 950         goto out;
 951       if (c == '\0')
 952         null_count++;
 953       /* Mut be '\f' or '\v' */
 954       else if (in_directive && CPP_PEDANTIC (pfile))
 955         cpp_pedwarn (pfile, "%s in preprocessing directive",
 956                      c == '\f' ? "formfeed" : "vertical tab");
 957     }
 958   cur++;
 959
 960  out:
 961   buffer->cur = cur - 1;
 962   if (null_count)
 963     cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
 964                  : "embedded null character ignored");
 965 }
 966
 967 /* Parse (append) an identifier.  */
 968 static void
 969 parse_name (pfile, list, name)
 970      cpp_reader *pfile;
 971      cpp_toklist *list;
 972      cpp_name *name;
 973 {
 974   const unsigned char *name_limit;
 975   unsigned char *namebuf;
 976   cpp_buffer *buffer = pfile->buffer;
 977   register const unsigned char *cur = buffer->cur;
 978
 979  expanded:
 980   name_limit = list->namebuf + list->name_cap;
 981   namebuf = list->namebuf + list->name_used;
 982
 983   for (; cur < buffer->rlimit && namebuf < name_limit; )
 984     {
 985       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
 986
 987       if (! is_idchar(c))
 988         goto out;
 989       namebuf++;
 990       cur++;
 991       /* $ is not a legal identifier character in the standard, but is
 992          commonly accepted as an extension.  Don't warn about it in
 993          skipped conditional blocks. */
 994       if (c == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 995         {
 996           buffer->cur = cur;
 997           cpp_pedwarn (pfile, "'$' character in identifier");
 998         }
 999     }
1000
1001   /* Run out of name space?  */
1002   if (cur < buffer->rlimit)
1003     {
1004       list->name_used = namebuf - list->namebuf;
1005       auto_expand_name_space (list);
1006       goto expanded;
1007     }
1008
1009  out:
1010   buffer->cur = cur;
1011   name->len = namebuf - name->text;
1012   list->name_used = namebuf - list->namebuf;
1013 }
1014
1015 /* Parse (append) a number.  */
1016 static void
1017 parse_number (pfile, list, name)
1018      cpp_reader *pfile;
1019      cpp_toklist *list;
1020      cpp_name *name;
1021 {
1022   const unsigned char *name_limit;
1023   unsigned char *namebuf;
1024   cpp_buffer *buffer = pfile->buffer;
1025   register const unsigned char *cur = buffer->cur;
1026
1027  expanded:
1028   name_limit = list->namebuf + list->name_cap;
1029   namebuf = list->namebuf + list->name_used;
1030
1031   for (; cur < buffer->rlimit && namebuf < name_limit; )
1032     {
1033       unsigned char c = *namebuf = *cur; /* Copy a single char.  */
1034
1035       /* Perhaps we should accept '$' here if we accept it for
1036          identifiers.  We know namebuf[-1] is safe, because for c to
1037          be a sign we must have pushed at least one character.  */
1038       if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1039         goto out;
1040
1041       namebuf++;
1042       cur++;
1043     }
1044
1045   /* Run out of name space?  */
1046   if (cur < buffer->rlimit)
1047     {
1048       list->name_used = namebuf - list->namebuf;
1049       auto_expand_name_space (list);
1050       goto expanded;
1051     }
1052
1053  out:
1054   buffer->cur = cur;
1055   name->len = namebuf - name->text;
1056   list->name_used = namebuf - list->namebuf;
1057 }
1058
1059 /* Places a string terminated by an unescaped TERMINATOR into a
1060    cpp_name, which should be expandable and thus at the top of the
1061    list's stack.  Handles embedded trigraphs, if necessary, and
1062    escaped newlines.
1063
1064    Can be used for character constants (terminator = '\''), string
1065    constants ('"') and angled headers ('>').  Multi-line strings are
1066    allowed, except for within directives.  */
1067
1068 static void
1069 parse_string (pfile, list, token, terminator)
1070      cpp_reader *pfile;
1071      cpp_toklist *list;
1072      cpp_token *token;
1073      unsigned int terminator;
1074 {
1075   cpp_buffer *buffer = pfile->buffer;
1076   cpp_name *name = &token->val.name;
1077   register const unsigned char *cur = buffer->cur;
1078   const unsigned char *name_limit;
1079   unsigned char *namebuf;
1080   unsigned int null_count = 0;
1081   unsigned int trigraphed = list->name_used;
1082
1083  expanded:
1084   name_limit = list->namebuf + list->name_cap;
1085   namebuf = list->namebuf + list->name_used;
1086
1087   for (; cur < buffer->rlimit && namebuf < name_limit; )
1088     {
1089       unsigned int c = *namebuf++ = *cur++; /* Copy a single char.  */
1090
1091       if (c == '\0')
1092         null_count++;
1093       else if (c == terminator || IS_NEWLINE (c))
1094         {
1095           /* Needed for trigraph_replace and multiline string warning.  */
1096           buffer->cur = cur;
1097
1098           /* Scan for trigraphs before checking if backslash-escaped.  */
1099           if ((CPP_OPTION (pfile, trigraphs)
1100                || CPP_OPTION (pfile, warn_trigraphs))
1101               && namebuf - (list->namebuf + trigraphed) >= 3)
1102             {
1103               namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1104                                           namebuf);
1105               /* The test above guarantees trigraphed will be positive.  */
1106               trigraphed = namebuf - list->namebuf - 2;
1107             }
1108
1109           namebuf--;     /* Drop the newline / terminator from the name.  */
1110           if (IS_NEWLINE (c))
1111             {
1112               /* Drop a backslash newline, and continue. */
1113               if (namebuf[-1] == '\\')
1114                 {
1115                   handle_newline (cur, buffer->rlimit, c);
1116                   namebuf--;
1117                   continue;
1118                 }
1119
1120               cur--;
1121
1122               /* In Fortran and assembly language, silently terminate
1123                  strings of either variety at end of line.  This is a
1124                  kludge around not knowing where comments are in these
1125                  languages.  */
1126               if (CPP_OPTION (pfile, lang_fortran)
1127                   || CPP_OPTION (pfile, lang_asm))
1128                 goto out;
1129
1130               /* Character constants, headers and asserts may not
1131                  extend over multiple lines.  In Standard C, neither
1132                  may strings.  We accept multiline strings as an
1133                  extension.  (Even in directives - otherwise, glibc's
1134                  longlong.h breaks.)  */
1135               if (terminator != '"')
1136                 goto unterminated;
1137
1138               cur++;  /* Move forwards again.  */
1139
1140               if (pfile->multiline_string_line == 0)
1141                 {
1142                   pfile->multiline_string_line = token->line;
1143                   pfile->multiline_string_column = token->col;
1144                   if (CPP_PEDANTIC (pfile))
1145                     cpp_pedwarn (pfile, "multi-line string constant");
1146                 }
1147
1148               *namebuf++ = '\n';
1149               handle_newline (cur, buffer->rlimit, c);
1150             }
1151           else
1152             {
1153               unsigned char *temp;
1154
1155               /* An odd number of consecutive backslashes represents
1156                  an escaped terminator.  */
1157               temp = namebuf - 1;
1158               while (temp >= name->text && *temp == '\\')
1159                 temp--;
1160
1161               if ((namebuf - temp) & 1)
1162                 goto out;
1163               namebuf++;
1164             }
1165         }
1166     }
1167
1168   /* Run out of name space?  */
1169   if (cur < buffer->rlimit)
1170     {
1171       list->name_used = namebuf - list->namebuf;
1172       auto_expand_name_space (list);
1173       goto expanded;
1174     }
1175
1176   /* We may not have trigraph-replaced the input for this code path,
1177      but as the input is in error by being unterminated we don't
1178      bother.  Prevent warnings about no newlines at EOF.  */
1179   if (IS_NEWLINE(cur[-1]))
1180     cur--;
1181
1182  unterminated:
1183   cpp_error (pfile, "missing terminating %c character", (int) terminator);
1184
1185   if (terminator == '\"' && pfile->multiline_string_line != list->line
1186       && pfile->multiline_string_line != 0)
1187     {
1188       cpp_error_with_line (pfile, pfile->multiline_string_line,
1189                            pfile->multiline_string_column,
1190                            "possible start of unterminated string literal");
1191       pfile->multiline_string_line = 0;
1192     }
1193
1194  out:
1195   buffer->cur = cur;
1196   name->len = namebuf - name->text;
1197   list->name_used = namebuf - list->namebuf;
1198
1199   if (null_count > 0)
1200     cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1201                          : "null character preserved"));
1202 }
1203
1204 /* The character TYPE helps us distinguish comment types: '*' = C
1205    style, '-' = Chill-style and '/' = C++ style.  For code simplicity,
1206    the stored comment includes the comment start and any terminator.  */
1207
1208 #define COMMENT_START_LEN 2
1209 static void
1210 save_comment (list, token, from, len, type)
1211      cpp_toklist *list;
1212      cpp_token *token;
1213      const unsigned char *from;
1214      unsigned int len;
1215      unsigned int type;
1216 {
1217   unsigned char *buffer;
1218
1219   len += COMMENT_START_LEN;
1220
1221   if (list->name_used + len > list->name_cap)
1222     _cpp_expand_name_space (list, len);
1223
1224   INIT_TOKEN_NAME (list, token);
1225   token->type = CPP_COMMENT;
1226   token->val.name.len = len;
1227
1228   buffer = list->namebuf + list->name_used;
1229   list->name_used += len;
1230
1231   /* Copy the comment.  */
1232   if (type == '*')
1233     {
1234       *buffer++ = '/';
1235       *buffer++ = '*';
1236     }
1237   else
1238     {
1239       *buffer++ = type;
1240       *buffer++ = type;
1241     }
1242   memcpy (buffer, from, len - COMMENT_START_LEN);
1243 }
1244
1245 /*
1246  *  The tokenizer's main loop.  Returns a token list, representing a
1247  *  logical line in the input file.  On EOF after some tokens have
1248  *  been processed, we return immediately.  Then in next call, or if
1249  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1250  *  token is placed in the list.
1251  *
1252  *  Implementation relies almost entirely on lookback, rather than
1253  *  looking forwards.  This means that tokenization requires just
1254  *  a single pass of the file, even in the presence of trigraphs and
1255  *  escaped newlines, providing significant performance benefits.
1256  *  Trigraph overhead is negligible if they are disabled, and low
1257  *  even when enabled.
1258  */
1259
1260 #define IS_DIRECTIVE() (list->directive != 0)
1261 #define MIGHT_BE_DIRECTIVE() \
1262 (cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
1263
1264 static void
1265 lex_line (pfile, list)
1266      cpp_reader *pfile;
1267      cpp_toklist *list;
1268 {
1269   cpp_token *cur_token, *token_limit, *first;
1270   cpp_buffer *buffer = pfile->buffer;
1271   const unsigned char *cur = buffer->cur;
1272   unsigned char flags = 0;
1273   unsigned int first_token = list->tokens_used;
1274
1275   if (!(list->flags & LIST_OFFSET))
1276     (abort) ();
1277
1278   list->file = buffer->nominal_fname;
1279   list->line = CPP_BUF_LINE (buffer);
1280   pfile->col_adjust = 0;
1281   pfile->in_lex_line = 1;
1282   if (cur == buffer->buf)
1283     list->flags |= BEG_OF_FILE;
1284
1285  expanded:
1286   token_limit = list->tokens + list->tokens_cap;
1287   cur_token = list->tokens + list->tokens_used;
1288
1289   for (; cur < buffer->rlimit && cur_token < token_limit;)
1290     {
1291       unsigned char c;
1292
1293       /* Optimize whitespace skipping, as most tokens are probably
1294          separated by whitespace. (' ' '\t' '\v' '\f' '\0').  */
1295       c = *cur++;
1296       if (is_hspace (c))
1297         {
1298           /* Step back to get the null warning and tab correction.  */
1299           buffer->cur = cur - 1;
1300           skip_whitespace (pfile, IS_DIRECTIVE ());
1301           cur = buffer->cur;
1302
1303           flags = PREV_WHITE;
1304           if (cur == buffer->rlimit)
1305             break;
1306           c = *cur++;
1307         }
1308
1309       /* Initialize current token.  CPP_EOF will not be fixed up by
1310          expand_name_space.  */
1311       list->tokens_used = cur_token - list->tokens + 1;
1312       cur_token->type = CPP_EOF;
1313       cur_token->col = CPP_BUF_COLUMN (buffer, cur);
1314       cur_token->line = CPP_BUF_LINE (buffer);
1315       cur_token->flags = flags;
1316       flags = 0;
1317
1318       switch (c)
1319         {
1320         case '0': case '1': case '2': case '3': case '4':
1321         case '5': case '6': case '7': case '8': case '9':
1322           {
1323             int prev_dot;
1324
1325             cur--;              /* Backup character.  */
1326             prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1327             if (prev_dot)
1328               cur_token--;
1329             INIT_TOKEN_NAME (list, cur_token);
1330             /* Prepend an immediately previous CPP_DOT token.  */
1331             if (prev_dot)
1332               {
1333                 if (list->name_cap == list->name_used)
1334                   auto_expand_name_space (list);
1335
1336                 cur_token->val.name.len = 1;
1337                 list->namebuf[list->name_used++] = '.';
1338               }
1339
1340           continue_number:
1341             cur_token->type = CPP_NUMBER; /* Before parse_number.  */
1342             buffer->cur = cur;
1343             parse_number (pfile, list, &cur_token->val.name);
1344             cur = buffer->cur;
1345           }
1346           /* Check for # 123 form of #line.  */
1347           if (MIGHT_BE_DIRECTIVE ())
1348             list->directive = _cpp_check_linemarker (pfile, cur_token,
1349                                                      !(cur_token[-1].flags
1350                                                        & PREV_WHITE));
1351           cur_token++;
1352           break;
1353
1354         letter:
1355         case '_':
1356         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1357         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1358         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1359         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1360         case 'y': case 'z':
1361         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1362         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1363         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1364         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1365         case 'Y': case 'Z':
1366           cur--;                     /* Backup character.  */
1367           INIT_TOKEN_NAME (list, cur_token);
1368           cur_token->type = CPP_NAME; /* Identifier, macro etc.  */
1369
1370         continue_name:
1371           buffer->cur = cur;
1372           parse_name (pfile, list, &cur_token->val.name);
1373           cur = buffer->cur;
1374
1375           if (MIGHT_BE_DIRECTIVE ())
1376             list->directive = _cpp_check_directive (pfile, cur_token,
1377                                                     !(list->tokens[0].flags
1378                                                       & PREV_WHITE));
1379           cur_token++;
1380           break;
1381
1382         case '\'':
1383           /* Character constants are not recognized when processing Fortran,
1384              or if -traditional.  */
1385           if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1386             goto other;
1387
1388           /* Fall through.  */
1389         case '\"':
1390           /* Traditionally, escaped strings are not strings.  */
1391           if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1392               && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1393             goto other;
1394
1395           cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1396           /* Do we have a wide string?  */
1397           if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
1398               && cur_token[-1].val.name.len == 1
1399               && cur_token[-1].val.name.text[0] == 'L'
1400               && !CPP_TRADITIONAL (pfile))
1401             {
1402               /* No need for 'L' any more.  */
1403               list->name_used--;
1404               (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1405             }
1406
1407         do_parse_string:
1408           /* Here c is one of ' " or >.  */
1409           INIT_TOKEN_NAME (list, cur_token);
1410           buffer->cur = cur;
1411           parse_string (pfile, list, cur_token, c);
1412           cur = buffer->cur;
1413           cur_token++;
1414           break;
1415
1416         case '/':
1417           cur_token->type = CPP_DIV;
1418           if (IMMED_TOKEN ())
1419             {
1420               if (PREV_TOKEN_TYPE == CPP_DIV)
1421                 {
1422                   /* We silently allow C++ comments in system headers,
1423                      irrespective of conformance mode, because lots of
1424                      broken systems do that and trying to clean it up
1425                      in fixincludes is a nightmare.  */
1426                   if (CPP_IN_SYSTEM_HEADER (pfile))
1427                     goto do_line_comment;
1428                   else if (CPP_OPTION (pfile, cplusplus_comments))
1429                     {
1430                       if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1431                           && ! buffer->warned_cplusplus_comments)
1432                         {
1433                           buffer->cur = cur;
1434                           cpp_pedwarn (pfile,
1435                              "C++ style comments are not allowed in ISO C89");
1436                           cpp_pedwarn (pfile,
1437                           "(this will be reported only once per input file)");
1438                           buffer->warned_cplusplus_comments = 1;
1439                         }
1440                     do_line_comment:
1441                       buffer->cur = cur;
1442 #if 0 /* Leave until new lexer in place.  */
1443                       if (cur[-2] != c)
1444                         cpp_warning (pfile,
1445                                      "comment start split across lines");
1446 #endif
1447                       if (skip_line_comment (pfile))
1448                         cpp_warning (pfile, "multi-line comment");
1449
1450                       /* Back-up to first '-' or '/'.  */
1451                       cur_token--;
1452                       if (!CPP_OPTION (pfile, discard_comments)
1453                           && (!IS_DIRECTIVE()
1454                               || (list->directive->flags & COMMENTS)))
1455                         save_comment (list, cur_token++, cur,
1456                                       buffer->cur - cur, c);
1457                       else if (!CPP_OPTION (pfile, traditional))
1458                         flags = PREV_WHITE;
1459
1460                       cur = buffer->cur;
1461                       break;
1462                     }
1463                 }
1464             }
1465           cur_token++;
1466           break;
1467
1468         case '*':
1469           cur_token->type = CPP_MULT;
1470           if (IMMED_TOKEN ())
1471             {
1472               if (PREV_TOKEN_TYPE == CPP_DIV)
1473                 {
1474                   buffer->cur = cur;
1475 #if 0 /* Leave until new lexer in place.  */
1476                   if (cur[-2] != '/')
1477                     cpp_warning (pfile,
1478                                  "comment start '/*' split across lines");
1479 #endif
1480                   if (skip_block_comment (pfile))
1481                     cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1482                                          "unterminated comment");
1483 #if 0 /* Leave until new lexer in place.  */
1484                   else if (buffer->cur[-2] != '*')
1485                     cpp_warning (pfile,
1486                                  "comment end '*/' split across lines");
1487 #endif
1488                   /* Back up to opening '/'.  */
1489                   cur_token--;
1490                   if (!CPP_OPTION (pfile, discard_comments)
1491                       && (!IS_DIRECTIVE()
1492                           || (list->directive->flags & COMMENTS)))
1493                     save_comment (list, cur_token++, cur,
1494                                   buffer->cur - cur, c);
1495                   else if (!CPP_OPTION (pfile, traditional))
1496                     flags = PREV_WHITE;
1497
1498                   cur = buffer->cur;
1499                   break;
1500                 }
1501               else if (CPP_OPTION (pfile, cplusplus))
1502                 {
1503                   /* In C++, there are .* and ->* operators.  */
1504                   if (PREV_TOKEN_TYPE == CPP_DEREF)
1505                     BACKUP_TOKEN (CPP_DEREF_STAR);
1506                   else if (PREV_TOKEN_TYPE == CPP_DOT)
1507                     BACKUP_TOKEN (CPP_DOT_STAR);
1508                 }
1509             }
1510           cur_token++;
1511           break;
1512
1513         case '\n':
1514         case '\r':
1515           handle_newline (cur, buffer->rlimit, c);
1516           if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1517             {
1518               if (IMMED_TOKEN ())
1519                 {
1520                   /* Remove the escaped newline.  Then continue to process
1521                      any interrupted name or number.  */
1522                   cur_token--;
1523                   /* Backslash-newline may not be immediately followed by
1524                      EOF (C99 5.1.1.2).  */
1525                   if (cur >= buffer->rlimit)
1526                     {
1527                       cpp_pedwarn (pfile, "backslash-newline at end of file");
1528                       break;
1529                     }
1530                   if (IMMED_TOKEN ())
1531                     {
1532                       cur_token--;
1533                       if (cur_token->type == CPP_NAME)
1534                         goto continue_name;
1535                       else if (cur_token->type == CPP_NUMBER)
1536                         goto continue_number;
1537                       cur_token++;
1538                     }
1539                   /* Remember whitespace setting.  */
1540                   flags = cur_token->flags;
1541                   break;
1542                 }
1543               else
1544                 {
1545                   buffer->cur = cur;
1546                   cpp_warning (pfile,
1547                                "backslash and newline separated by space");
1548                 }
1549             }
1550           else if (MIGHT_BE_DIRECTIVE ())
1551             {
1552               /* "Null directive." C99 6.10.7: A preprocessing
1553                  directive of the form # <new-line> has no effect.
1554
1555                  But it is still a directive, and therefore disappears
1556                  from the output. */
1557               cur_token--;
1558               if (cur_token->flags & PREV_WHITE)
1559                 {
1560                   if (CPP_WTRADITIONAL (pfile))
1561                     cpp_warning (pfile,
1562                                  "K+R C ignores #\\n with the # indented");
1563                   if (CPP_TRADITIONAL (pfile))
1564                     cur_token++;
1565                 }
1566             }
1567
1568           /* Skip vertical space until we have at least one token to
1569              return.  */
1570           if (cur_token != &list->tokens[first_token])
1571             goto out;
1572           list->line = CPP_BUF_LINE (buffer);
1573           break;
1574
1575         case '-':
1576           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1577             {
1578               if (CPP_OPTION (pfile, chill))
1579                 goto do_line_comment;
1580               REVISE_TOKEN (CPP_MINUS_MINUS);
1581             }
1582           else
1583             PUSH_TOKEN (CPP_MINUS);
1584           break;
1585
1586         make_hash:
1587         case '#':
1588           /* The digraph flag checking ensures that ## and %:%:
1589              are interpreted as CPP_PASTE, but #%: and %:# are not.  */
1590           if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1591               && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1592             REVISE_TOKEN (CPP_PASTE);
1593           else
1594             PUSH_TOKEN (CPP_HASH);
1595           break;
1596
1597         case ':':
1598           cur_token->type = CPP_COLON;
1599           if (IMMED_TOKEN ())
1600             {
1601               if (PREV_TOKEN_TYPE == CPP_COLON
1602                   && CPP_OPTION (pfile, cplusplus))
1603                 BACKUP_TOKEN (CPP_SCOPE);
1604               /* Digraph: "<:" is a '['  */
1605               else if (PREV_TOKEN_TYPE == CPP_LESS)
1606                 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1607               /* Digraph: "%:" is a '#'  */
1608               else if (PREV_TOKEN_TYPE == CPP_MOD)
1609                 {
1610                   (--cur_token)->flags |= DIGRAPH;
1611                   goto make_hash;
1612                 }
1613             }
1614           cur_token++;
1615           break;
1616
1617         case '&':
1618           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1619             REVISE_TOKEN (CPP_AND_AND);
1620           else
1621             PUSH_TOKEN (CPP_AND);
1622           break;
1623
1624         make_or:
1625         case '|':
1626           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1627             REVISE_TOKEN (CPP_OR_OR);
1628           else
1629             PUSH_TOKEN (CPP_OR);
1630           break;
1631
1632         case '+':
1633           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1634             REVISE_TOKEN (CPP_PLUS_PLUS);
1635           else
1636             PUSH_TOKEN (CPP_PLUS);
1637           break;
1638
1639         case '=':
1640             /* This relies on equidistance of "?=" and "?" tokens.  */
1641           if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1642             REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1643           else
1644             PUSH_TOKEN (CPP_EQ);
1645           break;
1646
1647         case '>':
1648           cur_token->type = CPP_GREATER;
1649           if (IMMED_TOKEN ())
1650             {
1651               if (PREV_TOKEN_TYPE == CPP_GREATER)
1652                 BACKUP_TOKEN (CPP_RSHIFT);
1653               else if (PREV_TOKEN_TYPE == CPP_MINUS)
1654                 BACKUP_TOKEN (CPP_DEREF);
1655               /* Digraph: ":>" is a ']'  */
1656               else if (PREV_TOKEN_TYPE == CPP_COLON)
1657                 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1658               /* Digraph: "%>" is a '}'  */
1659               else if (PREV_TOKEN_TYPE == CPP_MOD)
1660                 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1661             }
1662           cur_token++;
1663           break;
1664
1665         case '<':
1666           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1667             {
1668               REVISE_TOKEN (CPP_LSHIFT);
1669               break;
1670             }
1671           /* Is this the beginning of a header name?  */
1672           if (IS_DIRECTIVE () && (list->directive->flags & INCL))
1673             {
1674               c = '>';  /* Terminator.  */
1675               cur_token->type = CPP_HEADER_NAME;
1676               goto do_parse_string;
1677             }
1678           PUSH_TOKEN (CPP_LESS);
1679           break;
1680
1681         case '%':
1682           /* Digraph: "<%" is a '{'  */
1683           cur_token->type = CPP_MOD;
1684           if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1685             BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1686           cur_token++;
1687           break;
1688
1689         case '?':
1690           if (cur + 1 < buffer->rlimit && *cur == '?'
1691               && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1692             {
1693               /* Handle trigraph.  */
1694               cur++;
1695               switch (*cur++)
1696                 {
1697                 case '(': goto make_open_square;
1698                 case ')': goto make_close_square;
1699                 case '<': goto make_open_brace;
1700                 case '>': goto make_close_brace;
1701                 case '=': goto make_hash;
1702                 case '!': goto make_or;
1703                 case '-': goto make_complement;
1704                 case '/': goto make_backslash;
1705                 case '\'': goto make_xor;
1706                 }
1707             }
1708           if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1709             {
1710               /* GNU C++ defines <? and >? operators.  */
1711               if (PREV_TOKEN_TYPE == CPP_LESS)
1712                 {
1713                   REVISE_TOKEN (CPP_MIN);
1714                   break;
1715                 }
1716               else if (PREV_TOKEN_TYPE == CPP_GREATER)
1717                 {
1718                   REVISE_TOKEN (CPP_MAX);
1719                   break;
1720                 }
1721             }
1722           PUSH_TOKEN (CPP_QUERY);
1723           break;
1724
1725         case '.':
1726           if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1727               && IMMED_TOKEN ()
1728               && !(cur_token[-1].flags & PREV_WHITE))
1729             {
1730               cur_token -= 2;
1731               PUSH_TOKEN (CPP_ELLIPSIS);
1732             }
1733           else
1734             PUSH_TOKEN (CPP_DOT);
1735           break;
1736
1737         make_complement:
1738         case '~': PUSH_TOKEN (CPP_COMPL); break;
1739         make_xor:
1740         case '^': PUSH_TOKEN (CPP_XOR); break;
1741         make_open_brace:
1742         case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1743         make_close_brace:
1744         case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1745         make_open_square:
1746         case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1747         make_close_square:
1748         case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1749         make_backslash:
1750         case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1751         case '!': PUSH_TOKEN (CPP_NOT); break;
1752         case ',': PUSH_TOKEN (CPP_COMMA); break;
1753         case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
1754         case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
1755         case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
1756
1757         case '$':
1758           if (CPP_OPTION (pfile, dollars_in_ident))
1759             goto letter;
1760           /* Fall through */
1761         other:
1762         default:
1763           cur_token->val.aux = c;
1764           PUSH_TOKEN (CPP_OTHER);
1765           break;
1766         }
1767     }
1768
1769   /* Run out of token space?  */
1770   if (cur_token == token_limit)
1771     {
1772       list->tokens_used = cur_token - list->tokens;
1773       _cpp_expand_token_space (list, 256);
1774       goto expanded;
1775     }
1776
1777   cur_token->flags = flags;
1778   if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
1779     {
1780       if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
1781         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1782                                CPP_BUF_COLUMN (buffer, cur),
1783                                "no newline at end of file");
1784       cur_token++->type = CPP_EOF;
1785     }
1786
1787  out:
1788   /* All tokens are allocated, so the memory location is fixed.  */
1789   first = &list->tokens[first_token];
1790
1791   /* Don't complain about the null directive, nor directives in
1792      assembly source: we don't know where the comments are, and # may
1793      introduce assembler pseudo-ops.  Don't complain about invalid
1794      directives in skipped conditional groups (6.10 p4).  */
1795   if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1796       && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1797     {
1798       if (first[1].type == CPP_NAME)
1799         cpp_error (pfile, "invalid preprocessing directive #%.*s",
1800                    (int) first[1].val.name.len, first[1].val.name.text);
1801       else
1802         cpp_error (pfile, "invalid preprocessing directive");
1803     }
1804
1805   /* Put EOF at end of directives.  This covers "directives do not
1806      extend beyond the end of the line (description 6.10 part 2)".  */
1807   if (IS_DIRECTIVE () || !pfile->done_initializing)
1808     {
1809       pfile->first_directive_token = first;
1810       cur_token++->type = CPP_EOF;
1811     }
1812
1813   if (first_token == 0 || IS_DIRECTIVE ())
1814     /* Set beginning of line flag.  */
1815     first->flags |= BOL;
1816   else
1817     /* 6.10.3.10: Within the sequence of preprocessing tokens making
1818        up the invocation of a function-like macro, new line is
1819        considered a normal white-space character.  */
1820     first->flags |= PREV_WHITE;
1821
1822   buffer->cur = cur;
1823   list->tokens_used = cur_token - list->tokens;
1824   pfile->in_lex_line = 0;
1825 }
1826
1827 /* Write the spelling of a token TOKEN, with any appropriate
1828    whitespace before it, to the token_buffer.  PREV is the previous
1829    token, which is used to determine if we need to shove in an extra
1830    space in order to avoid accidental token paste.  */
1831 static void
1832 output_token (pfile, token, prev)
1833      cpp_reader *pfile;
1834      const cpp_token *token, *prev;
1835 {
1836   int dummy;
1837
1838   if (token->col && (token->flags & BOL))
1839     {
1840       /* Supply enough whitespace to put this token in its original
1841          column.  Don't bother trying to reconstruct tabs; we can't
1842          get it right in general, and nothing ought to care.  (Yes,
1843          some things do care; the fault lies with them.)  */
1844       unsigned char *buffer;
1845       unsigned int spaces = token->col - 1;
1846
1847       CPP_RESERVE (pfile, token->col);
1848       buffer = pfile->limit;
1849
1850       while (spaces--)
1851         *buffer++ = ' ';
1852       pfile->limit = buffer;
1853     }
1854   else if (token->flags & PREV_WHITE)
1855     CPP_PUTC (pfile, ' ');
1856   /* Check for and prevent accidental token pasting, in ANSI mode.  */
1857
1858   else if (!CPP_TRADITIONAL (pfile) && prev)
1859     {
1860       if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1861         CPP_PUTC (pfile, ' ');
1862       /* can_paste catches most of the accidental paste cases, but not all.
1863          Consider a + ++b - if there is not a space between the + and ++, it
1864          will be misparsed as a++ + b.  */
1865       else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1866                || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1867         CPP_PUTC (pfile, ' ');
1868     }
1869
1870   CPP_RESERVE (pfile, TOKEN_LEN (token));
1871   pfile->limit = spell_token (pfile, token, pfile->limit);
1872 }
1873
1874 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1875    already contain the enough space to hold the token's spelling.  If
1876    WHITESPACE is true, and the token was preceded by whitespace,
1877    output a single space before the token proper.  Returns a pointer
1878    to the character after the last character written.  */
1879
1880 static unsigned char *
1881 spell_token (pfile, token, buffer)
1882      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1883      const cpp_token *token;
1884      unsigned char *buffer;
1885 {
1886   switch (token_spellings[token->type].type)
1887     {
1888     case SPELL_OPERATOR:
1889       {
1890         const unsigned char *spelling;
1891         unsigned char c;
1892
1893         if (token->flags & DIGRAPH)
1894           spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1895         else
1896           spelling = token_spellings[token->type].spelling;
1897
1898         while ((c = *spelling++) != '\0')
1899           *buffer++ = c;
1900       }
1901       break;
1902
1903     case SPELL_IDENT:
1904       memcpy (buffer, token->val.name.text, token->val.name.len);
1905       buffer += token->val.name.len;
1906       break;
1907
1908     case SPELL_STRING:
1909       {
1910         unsigned char c;
1911
1912         if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1913           *buffer++ = 'L';
1914         c = '\'';
1915         if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1916           c = '"';
1917         *buffer++ = c;
1918         memcpy (buffer, token->val.name.text, token->val.name.len);
1919         buffer += token->val.name.len;
1920         *buffer++ = c;
1921       }
1922       break;
1923
1924     case SPELL_CHAR:
1925       *buffer++ = token->val.aux;
1926       break;
1927
1928     case SPELL_NONE:
1929       cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
1930       break;
1931     }
1932
1933   return buffer;
1934 }
1935
1936 /* Macro expansion algorithm.  TODO.  */
1937
1938 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0, {0}};
1939 static const cpp_token eof_token = {0, 0, CPP_EOF, 0, {0}};
1940
1941 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1942 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1943
1944 /* Flags for cpp_context.  */
1945 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
1946 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
1947 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
1948 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
1949
1950 #define ASSIGN_FLAGS_AND_POS(d, s) \
1951   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1952       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1953   } while (0)
1954
1955 /* f is flags, just consisting of PREV_WHITE | BOL.  */
1956 #define MODIFY_FLAGS_AND_POS(d, s, f) \
1957   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1958       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1959   } while (0)
1960
1961 typedef struct cpp_context cpp_context;
1962 struct cpp_context
1963 {
1964   union
1965   {
1966     const cpp_toklist *list;    /* Used for macro contexts only.  */
1967     const cpp_token **arg;      /* Used for arg contexts only.  */
1968   } u;
1969
1970   /* Pushed token to be returned by next call to cpp_get_token.  */
1971   const cpp_token *pushed_token;
1972
1973   struct macro_args *args;      /* 0 for arguments and object-like macros.  */
1974   unsigned short posn;          /* Current posn, index into u.  */
1975   unsigned short count;         /* No. of tokens in u.  */
1976   unsigned short level;
1977   unsigned char flags;
1978 };
1979
1980 typedef struct macro_args macro_args;
1981 struct macro_args
1982 {
1983   unsigned int *ends;
1984   const cpp_token **tokens;
1985   unsigned int capacity;
1986   unsigned int used;
1987   unsigned short level;
1988 };
1989
1990 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
1991 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
1992                                            macro_args *, unsigned int *));
1993 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
1994 static void save_token PARAMS ((macro_args *, const cpp_token *));
1995 static const cpp_token *push_arg_context PARAMS ((cpp_reader *,
1996                                                   const cpp_token *));
1997 static int do_pop_context PARAMS ((cpp_reader *));
1998 static const cpp_token *pop_context PARAMS ((cpp_reader *));
1999 static const cpp_token *push_macro_context PARAMS ((cpp_reader *,
2000                                                     cpp_hashnode *,
2001                                                     const cpp_token *));
2002 static void free_macro_args PARAMS ((macro_args *));
2003
2004 /* Free the storage allocated for macro arguments.  */
2005 static void
2006 free_macro_args (args)
2007      macro_args *args;
2008 {
2009   if (args->tokens)
2010     free (args->tokens);
2011   free (args->ends);
2012   free (args);
2013 }
2014
2015 /* Determines if a macro has been already used (and is therefore
2016    disabled).  */
2017 static int
2018 is_macro_disabled (pfile, expansion, token)
2019      cpp_reader *pfile;
2020      const cpp_toklist *expansion;
2021      const cpp_token *token;
2022 {
2023   cpp_context *context = CURRENT_CONTEXT (pfile);
2024
2025   /* Arguments on either side of ## are inserted in place without
2026      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
2027      occurs during a later rescan pass.  The effect is that we expand
2028      iff we would as part of the macro's expansion list, so we should
2029      drop to the macro's context.  */
2030   if (IS_ARG_CONTEXT (context))
2031     {
2032       if (token->flags & PASTED)
2033         context--;
2034       else if (!(context->flags & CONTEXT_RAW))
2035         return 1;
2036       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2037         context--;
2038     }
2039
2040   /* Have we already used this macro?  */
2041   while (context->level > 0)
2042     {
2043       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2044         return 1;
2045       /* Raw argument tokens are judged based on the token list they
2046          came from.  */
2047       if (context->flags & CONTEXT_RAW)
2048         context = pfile->contexts + context->level;
2049       else
2050         context--;
2051     }
2052
2053   /* Function-like macros may be disabled if the '(' is not in the
2054      current context.  We check this without disrupting the context
2055      stack.  */
2056   if (expansion->paramc >= 0)
2057     {
2058       const cpp_token *next;
2059       unsigned int prev_nme;
2060
2061       context = CURRENT_CONTEXT (pfile);
2062       /* Drop down any contexts we're at the end of: the '(' may
2063          appear in lower macro expansions, or in the rest of the file.  */
2064       while (context->posn == context->count && context > pfile->contexts)
2065         {
2066           context--;
2067           /* If we matched, we are disabled, as we appear in the
2068              expansion of each macro we meet.  */
2069           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2070             return 1;
2071         }
2072
2073       prev_nme = pfile->no_expand_level;
2074       pfile->no_expand_level = context - pfile->contexts;
2075       next = cpp_get_token (pfile);
2076       restore_macro_expansion (pfile, prev_nme);
2077       if (next->type != CPP_OPEN_PAREN)
2078         {
2079           _cpp_push_token (pfile, next);
2080           if (CPP_OPTION (pfile, warn_traditional))
2081             cpp_warning (pfile,
2082          "function macro %.*s must be used with arguments in traditional C",
2083                          (int) token->val.name.len, token->val.name.text);
2084           return 1;
2085         }
2086     }
2087
2088   return 0;
2089 }
2090
2091 /* Add a token to the set of tokens forming the arguments to the macro
2092    being parsed in parse_args.  */
2093 static void
2094 save_token (args, token)
2095      macro_args *args;
2096      const cpp_token *token;
2097 {
2098   if (args->used == args->capacity)
2099     {
2100       args->capacity += args->capacity + 100;
2101       args->tokens = (const cpp_token **)
2102         xrealloc (args->tokens, args->capacity * sizeof (const cpp_token *));
2103     }
2104   args->tokens[args->used++] = token;
2105 }
2106
2107 /* Take and save raw tokens until we finish one argument.  Empty
2108    arguments are saved as a single CPP_PLACEMARKER token.  */
2109 static const cpp_token *
2110 parse_arg (pfile, var_args, paren_context, args, pcount)
2111      cpp_reader *pfile;
2112      int var_args;
2113      unsigned int paren_context;
2114      macro_args *args;
2115      unsigned int *pcount;
2116 {
2117   const cpp_token *token;
2118   unsigned int paren = 0, count = 0;
2119   int raw, was_raw = 1;
2120
2121   for (count = 0;; count++)
2122     {
2123       token = cpp_get_token (pfile);
2124
2125       switch (token->type)
2126         {
2127         default:
2128           break;
2129
2130         case CPP_OPEN_PAREN:
2131           paren++;
2132           break;
2133
2134         case CPP_CLOSE_PAREN:
2135           if (paren-- != 0)
2136             break;
2137           goto out;
2138
2139         case CPP_COMMA:
2140           /* Commas are not terminators within parantheses or var_args.  */
2141           if (paren || var_args)
2142             break;
2143           goto out;
2144
2145         case CPP_EOF:           /* Error reported by caller.  */
2146           goto out;
2147         }
2148
2149       raw = pfile->cur_context <= paren_context;
2150       if (raw != was_raw)
2151         {
2152           was_raw = raw;
2153           save_token (args, 0);
2154           count++;
2155         }
2156       save_token (args, token);
2157     }
2158
2159  out:
2160   if (count == 0)
2161     {
2162       /* Duplicate the placemarker.  Then we can set its flags and
2163          position and safely be using more than one.  */
2164       save_token (args, duplicate_token (pfile, &placemarker_token));
2165       count++;
2166     }
2167
2168   *pcount = count;
2169   return token;
2170 }
2171
2172 /* This macro returns true if the argument starting at offset O of arglist
2173    A is empty - that is, it's either a single PLACEMARKER token, or a null
2174    pointer followed by a PLACEMARKER.  */
2175
2176 #define empty_argument(A, O) \
2177  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2178                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2179
2180 /* Parse the arguments making up a macro invocation.  Nested arguments
2181    are automatically macro expanded, but immediate macros are not
2182    expanded; this enables e.g. operator # to work correctly.  Returns
2183    non-zero on error.  */
2184 static int
2185 parse_args (pfile, hp, args)
2186      cpp_reader *pfile;
2187      cpp_hashnode *hp;
2188      macro_args *args;
2189 {
2190   const cpp_token *token;
2191   const cpp_toklist *macro;
2192   unsigned int total = 0;
2193   unsigned int paren_context = pfile->cur_context;
2194   int argc = 0;
2195
2196   macro = hp->value.expansion;
2197   do
2198     {
2199       unsigned int count;
2200
2201       token = parse_arg (pfile, (argc + 1 == macro->paramc
2202                                  && (macro->flags & VAR_ARGS)),
2203                          paren_context, args, &count);
2204       if (argc < macro->paramc)
2205         {
2206           total += count;
2207           args->ends[argc] = total;
2208         }
2209       argc++;
2210     }
2211   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2212
2213   if (token->type == CPP_EOF)
2214     {
2215       cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2216                  hp->length, hp->name);
2217       return 1;
2218     }
2219   else if (argc < macro->paramc)
2220     {
2221       /* A rest argument is allowed to not appear in the invocation at all.
2222          e.g. #define debug(format, args...) ...
2223          debug("string");
2224          This is exactly the same as if the rest argument had received no
2225          tokens - debug("string",);  */
2226
2227       if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2228         {
2229           /* Duplicate the placemarker.  Then we can set its flags and
2230              position and safely be using more than one.  */
2231           save_token (args, duplicate_token (pfile, &placemarker_token));
2232           args->ends[argc] = total + 1;
2233           return 0;
2234         }
2235       else
2236         {
2237           cpp_error (pfile,
2238                      "insufficient arguments in invocation of macro \"%.*s\"",
2239                      hp->length, hp->name);
2240           return 1;
2241         }
2242     }
2243   /* An empty argument to an empty function-like macro is fine.  */
2244   else if (argc > macro->paramc
2245            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2246     {
2247       cpp_error (pfile,
2248                  "too many arguments in invocation of macro \"%.*s\"",
2249                  hp->length, hp->name);
2250       return 1;
2251     }
2252
2253   return 0;
2254 }
2255
2256 /* Adds backslashes before all backslashes and double quotes appearing
2257    in strings.  Non-printable characters are converted to octal.  */
2258 static U_CHAR *
2259 quote_string (dest, src, len)
2260      U_CHAR *dest;
2261      const U_CHAR *src;
2262      unsigned int len;
2263 {
2264   while (len--)
2265     {
2266       U_CHAR c = *src++;
2267
2268       if (c == '\\' || c == '"')
2269         {
2270           *dest++ = '\\';
2271           *dest++ = c;
2272         }
2273       else
2274         {
2275           if (ISPRINT (c))
2276             *dest++ = c;
2277           else
2278             {
2279               sprintf ((char *) dest, "\\%03o", c);
2280               dest += 4;
2281             }
2282         }
2283     }
2284
2285   return dest;
2286 }
2287
2288 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2289    CPP_STRING token containing TEXT in quoted form.  */
2290 static cpp_token *
2291 make_string_token (token, text, len)
2292      cpp_token *token;
2293      const U_CHAR *text;
2294      unsigned int len;
2295 {
2296   U_CHAR *buf;
2297
2298   buf = (U_CHAR *) xmalloc (len * 4);
2299   token->type = CPP_STRING;
2300   token->flags = 0;
2301   token->val.name.text = buf;
2302   token->val.name.len = quote_string (buf, text, len) - buf;
2303   return token;
2304 }
2305
2306 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2307    evaluating to NUMBER.  */
2308 static cpp_token *
2309 alloc_number_token (pfile, number)
2310      cpp_reader *pfile;
2311      int number;
2312 {
2313   cpp_token *result;
2314   char *buf;
2315
2316   result = get_temp_token (pfile);
2317   buf = xmalloc (20);
2318   sprintf (buf, "%d", number);
2319
2320   result->type = CPP_NUMBER;
2321   result->flags = 0;
2322   result->val.name.text = (U_CHAR *) buf;
2323   result->val.name.len = strlen (buf);
2324   return result;
2325 }
2326
2327 /* Returns a temporary token from the temporary token store of PFILE.  */
2328 static cpp_token *
2329 get_temp_token (pfile)
2330      cpp_reader *pfile;
2331 {
2332   if (pfile->temp_used == pfile->temp_alloced)
2333     {
2334       if (pfile->temp_used == pfile->temp_cap)
2335         {
2336           pfile->temp_cap += pfile->temp_cap + 20;
2337           pfile->temp_tokens = (cpp_token **) xrealloc
2338             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2339         }
2340       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2341         (sizeof (cpp_token));
2342     }
2343
2344   return pfile->temp_tokens[pfile->temp_used++];
2345 }
2346
2347 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2348 static void
2349 release_temp_tokens (pfile)
2350      cpp_reader *pfile;
2351 {
2352   while (pfile->temp_used)
2353     {
2354       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2355
2356       if (token_spellings[token->type].type > SPELL_NONE)
2357         {
2358           free ((char *) token->val.name.text);
2359           token->val.name.text = 0;
2360         }
2361     }
2362 }
2363
2364 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2365 void
2366 _cpp_free_temp_tokens (pfile)
2367      cpp_reader *pfile;
2368 {
2369   if (pfile->temp_tokens)
2370     {
2371       /* It is possible, though unlikely (looking for '(' of a funlike
2372          macro into EOF), that we haven't released the tokens yet.  */
2373       release_temp_tokens (pfile);
2374       while (pfile->temp_alloced)
2375         free (pfile->temp_tokens[--pfile->temp_alloced]);
2376       free (pfile->temp_tokens);
2377     }
2378
2379   if (pfile->date)
2380     {
2381       free ((char *) pfile->date->val.name.text);
2382       free (pfile->date);
2383       free ((char *) pfile->time->val.name.text);
2384       free (pfile->time);
2385     }
2386 }
2387
2388 /* Copy TOKEN into a temporary token from PFILE's store.  */
2389 static cpp_token *
2390 duplicate_token (pfile, token)
2391      cpp_reader *pfile;
2392      const cpp_token *token;
2393 {
2394   cpp_token *result = get_temp_token (pfile);
2395
2396   *result = *token;
2397   if (token_spellings[token->type].type > SPELL_NONE)
2398     {
2399       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.name.len);
2400       memcpy (buff, token->val.name.text, token->val.name.len);
2401       result->val.name.text = buff;
2402     }
2403   return result;
2404 }
2405
2406 /* Determine whether two tokens can be pasted together, and if so,
2407    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2408    be pasted, or the appropriate type for the merged token if they
2409    can.  */
2410 static enum cpp_ttype
2411 can_paste (pfile, token1, token2, digraph)
2412      cpp_reader * pfile;
2413      const cpp_token *token1, *token2;
2414      int* digraph;
2415 {
2416   enum cpp_ttype a = token1->type, b = token2->type;
2417   int cxx = CPP_OPTION (pfile, cplusplus);
2418
2419   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2420     return a + (CPP_EQ_EQ - CPP_EQ);
2421
2422   switch (a)
2423     {
2424     case CPP_GREATER:
2425       if (b == a) return CPP_RSHIFT;
2426       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2427       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2428       break;
2429     case CPP_LESS:
2430       if (b == a) return CPP_LSHIFT;
2431       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2432       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2433       if (b == CPP_COLON)
2434         {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2435       if (b == CPP_MOD)
2436         {*digraph = 1; return CPP_OPEN_BRACE;}  /* <% digraph */
2437       break;
2438
2439     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2440     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2441     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2442
2443     case CPP_MINUS:
2444       if (b == a)               return CPP_MINUS_MINUS;
2445       if (b == CPP_GREATER)     return CPP_DEREF;
2446       break;
2447     case CPP_COLON:
2448       if (b == a && cxx)        return CPP_SCOPE;
2449       if (b == CPP_GREATER)
2450         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2451       break;
2452
2453     case CPP_MOD:
2454       if (b == CPP_GREATER)
2455         {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2456       if (b == CPP_COLON)
2457         {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2458       break;
2459     case CPP_DEREF:
2460       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2461       break;
2462     case CPP_DOT:
2463       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2464       if (b == CPP_NUMBER)      return CPP_NUMBER;
2465       break;
2466
2467     case CPP_HASH:
2468       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2469         /* %:%: digraph */
2470         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2471       break;
2472
2473     case CPP_NAME:
2474       if (b == CPP_NAME)        return CPP_NAME;
2475       if (b == CPP_NUMBER
2476           && is_numstart(token2->val.name.text[0]))     return CPP_NAME;
2477       if (b == CPP_CHAR
2478           && token1->val.name.len == 1
2479           && token1->val.name.text[0] == 'L')   return CPP_WCHAR;
2480       if (b == CPP_STRING
2481           && token1->val.name.len == 1
2482           && token1->val.name.text[0] == 'L')   return CPP_WSTRING;
2483       break;
2484
2485     case CPP_NUMBER:
2486       if (b == CPP_NUMBER)      return CPP_NUMBER;
2487       if (b == CPP_NAME)        return CPP_NUMBER;
2488       if (b == CPP_DOT)         return CPP_NUMBER;
2489       /* Numbers cannot have length zero, so this is safe.  */
2490       if ((b == CPP_PLUS || b == CPP_MINUS)
2491           && VALID_SIGN ('+', token1->val.name.text[token1->val.name.len - 1]))
2492         return CPP_NUMBER;
2493       break;
2494
2495     default:
2496       break;
2497     }
2498
2499   return CPP_EOF;
2500 }
2501
2502 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2503 static const cpp_token *
2504 maybe_paste_with_next (pfile, token)
2505      cpp_reader *pfile;
2506      const cpp_token *token;
2507 {
2508   cpp_token *pasted;
2509   const cpp_token *second;
2510   cpp_context *context = CURRENT_CONTEXT (pfile);
2511
2512   /* Is this token on the LHS of ## ? */
2513   if (!((context->flags & CONTEXT_PASTEL) && context->posn == context->count)
2514       && !(token->flags & PASTE_LEFT))
2515     return token;
2516
2517   /* Prevent recursion, and possibly pushing back more than one token.  */
2518   if (pfile->paste_level)
2519     return token;
2520
2521   /* Suppress macro expansion for next token, but don't conflict with
2522      the other method of suppression.  If it is an argument, macro
2523      expansion within the argument will still occur.  */
2524   pfile->paste_level = pfile->cur_context;
2525   second = cpp_get_token (pfile);
2526   pfile->paste_level = 0;
2527
2528   /* Ignore placemarker argument tokens.  */
2529   if (token->type == CPP_PLACEMARKER)
2530      pasted = duplicate_token (pfile, second);
2531   else if (second->type == CPP_PLACEMARKER)
2532     {
2533       /* GCC has special extended semantics for a ## b where b is a
2534          varargs parameter: a disappears if b consists of no tokens.
2535          This extension is deprecated.  */
2536       if (token->flags & GNU_VARARGS)
2537         {
2538           cpp_warning (pfile, "deprecated GNU ## extension used");
2539           pasted = duplicate_token (pfile, second);
2540         }
2541       else
2542         pasted = duplicate_token (pfile, token);
2543     }
2544   else
2545     {
2546       int digraph = 0;
2547       enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2548
2549       if (type == CPP_EOF)
2550         {
2551           if (CPP_OPTION (pfile, warn_paste))
2552             cpp_warning (pfile,
2553                          "pasting would not give a valid preprocessing token");
2554           _cpp_push_token (pfile, second);
2555           return token;
2556         }
2557
2558       if (type == CPP_NAME || type == CPP_NUMBER)
2559         {
2560           /* Join spellings.  */
2561           U_CHAR *buff, *buff2;
2562
2563           pasted = get_temp_token (pfile);
2564           buff = (U_CHAR *) xmalloc (TOKEN_LEN (token) + TOKEN_LEN (second));
2565           buff2 = spell_token (pfile, token, buff);
2566           buff2 = spell_token (pfile, second, buff2);
2567
2568           pasted->val.name.text = buff;
2569           pasted->val.name.len = buff2 - buff;
2570         }
2571       else if (type == CPP_WCHAR || type == CPP_WSTRING)
2572         pasted = duplicate_token (pfile, second);
2573       else
2574         {
2575           pasted = get_temp_token (pfile);
2576           pasted->val.integer = 0;
2577         }
2578
2579       pasted->type = type;
2580       pasted->flags = digraph ? DIGRAPH: 0;
2581     }
2582
2583   /* The pasted token gets the whitespace flags and position of the
2584      first token, the PASTE_LEFT flag of the second token, plus the
2585      PASTED flag to indicate it is the result of a paste.  However, we
2586      want to preserve the DIGRAPH flag.  */
2587   pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2588   pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2589                     | (second->flags & PASTE_LEFT) | PASTED);
2590   pasted->col = token->col;
2591   pasted->line = token->line;
2592
2593   return maybe_paste_with_next (pfile, pasted);
2594 }
2595
2596 /* Convert a token sequence to a single string token according to the
2597    rules of the ISO C #-operator.  */
2598 #define INIT_SIZE 200
2599 static cpp_token *
2600 stringify_arg (pfile, token)
2601      cpp_reader *pfile;
2602      const cpp_token *token;
2603 {
2604   cpp_token *result;
2605   unsigned char *main_buf;
2606   unsigned int prev_value, backslash_count = 0;
2607   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2608
2609   prev_value  = prevent_macro_expansion (pfile);
2610   main_buf = (unsigned char *) xmalloc (buf_cap);
2611
2612   result = get_temp_token (pfile);
2613   ASSIGN_FLAGS_AND_POS (result, token);
2614
2615   for (; (token = cpp_get_token (pfile))->type != CPP_EOF; )
2616     {
2617       int escape;
2618       unsigned char *buf;
2619       unsigned int len = TOKEN_LEN (token);
2620
2621       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2622                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2623       if (escape)
2624         len *= 4 + 1;
2625
2626       if (buf_used + len > buf_cap)
2627         {
2628           buf_cap = buf_used + len + INIT_SIZE;
2629           main_buf = xrealloc (main_buf, buf_cap);
2630         }
2631
2632       if (whitespace && (token->flags & PREV_WHITE))
2633         main_buf[buf_used++] = ' ';
2634
2635       if (escape)
2636         buf = (unsigned char *) xmalloc (len);
2637       else
2638         buf = main_buf + buf_used;
2639
2640       len = spell_token (pfile, token, buf) - buf;
2641       if (escape)
2642         {
2643           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2644           free (buf);
2645         }
2646       else
2647         buf_used += len;
2648
2649       whitespace = 1;
2650       if (token->type == CPP_BACKSLASH)
2651         backslash_count++;
2652       else
2653         backslash_count = 0;
2654     }
2655
2656   /* Ignore the final \ of invalid string literals.  */
2657   if (backslash_count & 1)
2658     {
2659       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2660       buf_used--;
2661     }
2662
2663   result->type = CPP_STRING;
2664   result->val.name.text = main_buf;
2665   result->val.name.len = buf_used;
2666   restore_macro_expansion (pfile, prev_value);
2667   return result;
2668 }
2669
2670 /* Allocate more room on the context stack of PFILE.  */
2671 static void
2672 expand_context_stack (pfile)
2673      cpp_reader *pfile;
2674 {
2675   pfile->context_cap += pfile->context_cap + 20;
2676   pfile->contexts = (cpp_context *)
2677     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2678 }
2679
2680 /* Push the context of macro NODE onto the context stack.  TOKEN is
2681    the CPP_NAME token invoking the macro.  */
2682 static const cpp_token *
2683 push_macro_context (pfile, node, token)
2684      cpp_reader *pfile;
2685      cpp_hashnode *node;
2686      const cpp_token *token;
2687 {
2688   unsigned char orig_flags;
2689   macro_args *args;
2690   cpp_context *context;
2691
2692   if (pfile->cur_context > CPP_STACK_MAX)
2693     {
2694       cpp_error (pfile, "infinite macro recursion invoking '%s'", node->name);
2695       return token;
2696     }
2697
2698   /* Token's flags may change when parsing args containing a nested
2699      invocation of this macro.  */
2700   orig_flags = token->flags & (PREV_WHITE | BOL);
2701   args = 0;
2702   if (node->value.expansion->paramc >= 0)
2703     {
2704       unsigned int error, prev_nme;
2705
2706       /* Allocate room for the argument contexts, and parse them.  */
2707       args  = (macro_args *) xmalloc (sizeof (macro_args));
2708       args->ends = (unsigned int *)
2709         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2710       args->tokens = 0;
2711       args->capacity = 0;
2712       args->used = 0;
2713       args->level = pfile->cur_context;
2714
2715       prev_nme = prevent_macro_expansion (pfile);
2716       pfile->args = args;
2717       error = parse_args (pfile, node, args);
2718       pfile->args = 0;
2719       restore_macro_expansion (pfile, prev_nme);
2720       if (error)
2721         {
2722           free_macro_args (args);
2723           return token;
2724         }
2725     }
2726
2727   /* Now push its context.  */
2728   pfile->cur_context++;
2729   if (pfile->cur_context == pfile->context_cap)
2730     expand_context_stack (pfile);
2731
2732   context = CURRENT_CONTEXT (pfile);
2733   context->u.list = node->value.expansion;
2734   context->args = args;
2735   context->posn = 0;
2736   context->count = context->u.list->tokens_used;
2737   context->level = pfile->cur_context;
2738   context->flags = 0;
2739   context->pushed_token = 0;
2740
2741   /* Set the flags of the first token.  We know there must
2742      be one, empty macros are a single placemarker token.  */
2743   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2744
2745   return cpp_get_token (pfile);
2746 }
2747
2748 /* Push an argument to the current macro onto the context stack.
2749    TOKEN is the MACRO_ARG token representing the argument expansion.  */
2750 static const cpp_token *
2751 push_arg_context (pfile, token)
2752      cpp_reader *pfile;
2753      const cpp_token *token;
2754 {
2755   cpp_context *context;
2756   macro_args *args;
2757
2758   pfile->cur_context++;
2759   if (pfile->cur_context == pfile->context_cap)
2760       expand_context_stack (pfile);
2761
2762   context = CURRENT_CONTEXT (pfile);
2763   args = context[-1].args;
2764
2765   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2766   context->u.arg = args->tokens + context->count;
2767   context->count = args->ends[token->val.aux] - context->count;
2768   context->args = 0;
2769   context->posn = 0;
2770   context->level = args->level;
2771   context->flags = CONTEXT_ARG | CONTEXT_RAW;
2772   context->pushed_token = 0;
2773
2774   /* Set the flags of the first token.  There is one.  */
2775   {
2776     const cpp_token *first = context->u.arg[0];
2777     if (!first)
2778       first = context->u.arg[1];
2779
2780     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2781                           token->flags & (PREV_WHITE | BOL));
2782   }
2783
2784   if (token->flags & STRINGIFY_ARG)
2785     return stringify_arg (pfile, token);
2786
2787   if (token->flags & PASTE_LEFT)
2788     context->flags |= CONTEXT_PASTEL;
2789   if (pfile->paste_level)
2790     context->flags |= CONTEXT_PASTER;
2791
2792   return get_raw_token (pfile);
2793 }
2794
2795 /* "Unget" a token.  It is effectively inserted in the token queue and
2796    will be returned by the next call to get_raw_token.  */
2797 void
2798 _cpp_push_token (pfile, token)
2799      cpp_reader *pfile;
2800      const cpp_token *token;
2801 {
2802   cpp_context *context = CURRENT_CONTEXT (pfile);
2803   if (context->pushed_token)
2804     cpp_ice (pfile, "two tokens pushed in a row");
2805   if (token->type != CPP_EOF)
2806     context->pushed_token = token;
2807   /* Don't push back a directive's CPP_EOF, step back instead.  */
2808   else if (pfile->cur_context == 0)
2809     pfile->contexts[0].posn--;
2810 }
2811
2812 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
2813    introducing the directive.  */
2814 static void
2815 process_directive (pfile, token)
2816      cpp_reader *pfile;
2817      const cpp_token *token;
2818 {
2819   const struct directive *d = pfile->token_list.directive;
2820   int prev_nme = 0;
2821
2822   /* Skip over the directive name.  */
2823   if (token[1].type == CPP_NAME)
2824     _cpp_get_raw_token (pfile);
2825   else if (token[1].type != CPP_NUMBER)
2826     cpp_ice (pfile, "directive begins with %s?!",
2827              token_names[token[1].type]);
2828
2829   /* Flush pending tokens at this point, in case the directive produces
2830      output.  XXX Directive output won't be visible to a direct caller of
2831      cpp_get_token.  */
2832   if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2833     cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2834
2835   if (! (d->flags & EXPAND))
2836     prev_nme = prevent_macro_expansion (pfile);
2837   (void) (*d->handler) (pfile);
2838   if (! (d->flags & EXPAND))
2839     restore_macro_expansion (pfile, prev_nme);
2840   _cpp_skip_rest_of_line (pfile);
2841 }
2842
2843 /* The external interface to return the next token.  All macro
2844    expansion and directive processing is handled internally, the
2845    caller only ever sees the output after preprocessing.  */
2846 const cpp_token *
2847 cpp_get_token (pfile)
2848      cpp_reader *pfile;
2849 {
2850   const cpp_token *token;
2851   cpp_hashnode *node;
2852
2853   /* Loop till we hit a non-directive, non-skipped, non-placemarker token.  */
2854   for (;;)
2855     {
2856       token = get_raw_token (pfile);
2857       if (token->flags & BOL && token->type == CPP_HASH
2858           && pfile->token_list.directive)
2859         {
2860           process_directive (pfile, token);
2861           continue;
2862         }
2863
2864       /* Short circuit EOF. */
2865       if (token->type == CPP_EOF)
2866         return token;
2867
2868       if (pfile->skipping && ! pfile->token_list.directive)
2869         {
2870           _cpp_skip_rest_of_line (pfile);
2871           continue;
2872         }
2873       break;
2874     }
2875
2876   /* If there's a potential control macro and we get here, then that
2877      #ifndef didn't cover the entire file and its argument shouldn't
2878      be taken as a control macro.  */
2879   pfile->potential_control_macro = 0;
2880
2881   token = maybe_paste_with_next (pfile, token);
2882
2883   if (token->type != CPP_NAME)
2884     return token;
2885
2886   /* Is macro expansion disabled in general?  */
2887   if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2888     return token;
2889
2890   node = cpp_lookup (pfile, token->val.name.text, token->val.name.len);
2891   if (node->type == T_VOID)
2892     return token;
2893
2894   if (node->type == T_MACRO)
2895     {
2896       if (is_macro_disabled (pfile, node->value.expansion, token))
2897         return token;
2898
2899       return push_macro_context (pfile, node, token);
2900     }
2901   else
2902     return special_symbol (pfile, node, token);
2903 }
2904
2905 /* Returns the next raw token, i.e. without performing macro
2906    expansion.  Argument contexts are automatically entered.  */
2907 static const cpp_token *
2908 get_raw_token (pfile)
2909      cpp_reader *pfile;
2910 {
2911   const cpp_token *result;
2912   cpp_context *context = CURRENT_CONTEXT (pfile);
2913
2914   if (context->pushed_token)
2915     {
2916       result = context->pushed_token;
2917       context->pushed_token = 0;
2918     }
2919   else if (context->posn == context->count)
2920     result = pop_context (pfile);
2921   else
2922     {
2923       if (IS_ARG_CONTEXT (context))
2924         {
2925           result = context->u.arg[context->posn++];
2926           if (result == 0)
2927             {
2928               context->flags ^= CONTEXT_RAW;
2929               result = context->u.arg[context->posn++];
2930             }
2931           return result;        /* Cannot be a CPP_MACRO_ARG */
2932         }
2933       result = &context->u.list->tokens[context->posn++];
2934     }
2935
2936   if (result->type == CPP_MACRO_ARG)
2937     result = push_arg_context (pfile, result);
2938   return result;
2939 }
2940
2941 /* Internal interface to get the token without macro expanding.  */
2942 const cpp_token *
2943 _cpp_get_raw_token (pfile)
2944      cpp_reader *pfile;
2945 {
2946   int prev_nme = prevent_macro_expansion (pfile);
2947   const cpp_token *result = cpp_get_token (pfile);
2948   restore_macro_expansion (pfile, prev_nme);
2949   return result;
2950 }
2951
2952 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
2953    list should be overwritten, or zero if we need to append
2954    (typically, if we are within the arguments to a macro, or looking
2955    for the '(' to start a function-like macro invocation).  */
2956 static int
2957 lex_next (pfile, clear)
2958      cpp_reader *pfile;
2959      int clear;
2960 {
2961   cpp_toklist *list = &pfile->token_list;
2962   const cpp_token *old_list = list->tokens;
2963   unsigned int old_used = list->tokens_used;
2964
2965   if (clear)
2966     {
2967       /* Release all temporary tokens.  */
2968       _cpp_clear_toklist (list);
2969       pfile->contexts[0].posn = 0;
2970       if (pfile->temp_used)
2971         release_temp_tokens (pfile);
2972     }
2973   else
2974     {
2975       /* If we are currently processing a directive, do not advance.
2976          (6.10 paragraph 2: A new-line character ends the directive
2977          even if it occurs within what would otherwise be an
2978          invocation of a function-like macro.)  */
2979       if (list->directive)
2980         return 1;
2981     }
2982
2983   lex_line (pfile, list);
2984   pfile->contexts[0].count = list->tokens_used;
2985
2986   if (!clear && pfile->args)
2987     {
2988       /* Fix up argument token pointers.  */
2989       if (old_list != list->tokens)
2990         {
2991           unsigned int i;
2992
2993           for (i = 0; i < pfile->args->used; i++)
2994             {
2995               const cpp_token *token = pfile->args->tokens[i];
2996               if (token >= old_list && token < old_list + old_used)
2997                 pfile->args->tokens[i] = (const cpp_token *)
2998                 ((char *) token + ((char *) list->tokens - (char *) old_list));
2999             }
3000         }
3001
3002       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3003          tokens within the list of arguments that would otherwise act as
3004          preprocessing directives, the behavior is undefined.
3005
3006          This implementation will report a hard error and treat the
3007          'sequence of preprocessing tokens' as part of the macro argument,
3008          not a directive.
3009
3010          Note if pfile->args == 0, we're OK since we're only inside a
3011          macro argument after a '('.  */
3012       if (list->directive)
3013         {
3014           cpp_error_with_line (pfile, list->tokens[old_used].line,
3015                                list->tokens[old_used].col,
3016                                "#%s may not be used inside a macro argument",
3017                                list->directive->name);
3018           /* Don't treat as a directive: clear list->directive,
3019              prune the final EOF from the list.  */
3020           list->directive = 0;
3021           list->tokens_used--;
3022           pfile->contexts[0].count--;
3023         }
3024     }
3025
3026   return 0;
3027 }
3028
3029 /* Pops a context of the context stack.  If we're at the bottom, lexes
3030    the next logical line.  Returns 1 if we're at the end of the
3031    argument list to the # operator, or if it is illegal to "overflow"
3032    into the rest of the file (e.g. 6.10.3.1.1).  */
3033 static int
3034 do_pop_context (pfile)
3035      cpp_reader *pfile;
3036 {
3037   cpp_context *context;
3038
3039   if (pfile->cur_context == 0)
3040     return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3041
3042   /* Argument contexts, when parsing args or handling # operator
3043      return CPP_EOF at the end.  */
3044   context = CURRENT_CONTEXT (pfile);
3045   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3046     return 1;
3047
3048   /* Free resources when leaving macro contexts.  */
3049   if (context->args)
3050     free_macro_args (context->args);
3051
3052   if (pfile->cur_context == pfile->no_expand_level)
3053     pfile->no_expand_level--;
3054   pfile->cur_context--;
3055
3056   return 0;
3057 }
3058
3059 /* Move down the context stack, and return the next raw token.  */
3060 static const cpp_token *
3061 pop_context (pfile)
3062      cpp_reader *pfile;
3063 {
3064   if (do_pop_context (pfile))
3065     return &eof_token;
3066   return get_raw_token (pfile);
3067 }
3068
3069 /* Turn off macro expansion at the current context level.  */
3070 static unsigned int
3071 prevent_macro_expansion (pfile)
3072      cpp_reader *pfile;
3073 {
3074   unsigned int prev_value = pfile->no_expand_level;
3075   pfile->no_expand_level = pfile->cur_context;
3076   return prev_value;
3077 }
3078
3079 /* Restore macro expansion to its previous state.  */
3080 static void
3081 restore_macro_expansion (pfile, prev_value)
3082      cpp_reader *pfile;
3083      unsigned int prev_value;
3084 {
3085   pfile->no_expand_level = prev_value;
3086 }
3087
3088 /* Used by cpperror.c to obtain the correct line and column to report
3089    in a diagnostic.  */
3090 unsigned int
3091 _cpp_get_line (pfile, pcol)
3092      cpp_reader *pfile;
3093      unsigned int *pcol;
3094 {
3095   unsigned int index;
3096   const cpp_token *cur_token;
3097
3098   if (pfile->in_lex_line)
3099     index = pfile->token_list.tokens_used;
3100   else
3101     index = pfile->contexts[0].posn;
3102
3103   cur_token = &pfile->token_list.tokens[index - 1];
3104   if (pcol)
3105     *pcol = cur_token->col;
3106   return cur_token->line;
3107 }
3108
3109 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
3110 static const char * const monthnames[] =
3111 {
3112   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3113   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3114 };
3115
3116 /* Handle builtin macros like __FILE__.  */
3117 static const cpp_token *
3118 special_symbol (pfile, node, token)
3119      cpp_reader *pfile;
3120      cpp_hashnode *node;
3121      const cpp_token *token;
3122 {
3123   cpp_token *result;
3124   cpp_buffer *ip;
3125
3126   switch (node->type)
3127     {
3128     case T_FILE:
3129     case T_BASE_FILE:
3130       {
3131         const char *file;
3132
3133         ip = CPP_BUFFER (pfile);
3134         if (ip == 0)
3135           file = "";
3136         else
3137           {
3138             if (node->type == T_BASE_FILE)
3139               while (CPP_PREV_BUFFER (ip) != NULL)
3140                 ip = CPP_PREV_BUFFER (ip);
3141
3142             file = ip->nominal_fname;
3143           }
3144         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3145                                     strlen (file));
3146       }
3147       break;
3148
3149     case T_INCLUDE_LEVEL:
3150       {
3151         int true_indepth = 0;
3152
3153         /* Do not count the primary source file in the include level.  */
3154         ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3155         while (ip)
3156           {
3157             true_indepth++;
3158             ip = CPP_PREV_BUFFER (ip);
3159           }
3160         result = alloc_number_token (pfile, true_indepth);
3161       }
3162       break;
3163
3164     case T_SPECLINE:
3165       /* If __LINE__ is embedded in a macro, it must expand to the
3166          line of the macro's invocation, not its definition.
3167          Otherwise things like assert() will not work properly.  */
3168       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3169       break;
3170
3171     case T_STDC:
3172       {
3173         int stdc = 1;
3174
3175 #ifdef STDC_0_IN_SYSTEM_HEADERS
3176         if (CPP_IN_SYSTEM_HEADER (pfile)
3177             && !cpp_defined (pfile, DSC("__STRICT_ANSI__")))
3178           stdc = 0;
3179 #endif
3180         result = alloc_number_token (pfile, stdc);
3181       }
3182       break;
3183
3184     case T_DATE:
3185     case T_TIME:
3186       if (pfile->date == 0)
3187         {
3188           /* Allocate __DATE__ and __TIME__ from permanent storage,
3189              and save them in pfile so we don't have to do this again.
3190              We don't generate these strings at init time because
3191              time() and localtime() are very slow on some systems.  */
3192           time_t tt = time (NULL);
3193           struct tm *tb = localtime (&tt);
3194
3195           pfile->date = make_string_token
3196             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3197           pfile->time = make_string_token
3198             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3199
3200           sprintf ((char *) pfile->date->val.name.text, "%s %2d %4d",
3201                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3202           sprintf ((char *) pfile->time->val.name.text, "%02d:%02d:%02d",
3203                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3204         }
3205       result = node->type == T_DATE ? pfile->date: pfile->time;
3206       break;
3207
3208     case T_POISON:
3209       cpp_error (pfile, "attempt to use poisoned \"%s\".", node->name);
3210       return token;
3211
3212     default:
3213       cpp_ice (pfile, "invalid special hash type");
3214       return token;
3215     }
3216
3217   ASSIGN_FLAGS_AND_POS (result, token);
3218   return result;
3219 }
3220 #undef DSC
3221
3222 /* Dump the original user's spelling of argument index ARG_NO to the
3223    macro whose expansion is LIST.  */
3224 static void
3225 dump_param_spelling (pfile, list, arg_no)
3226      cpp_reader *pfile;
3227      const cpp_toklist *list;
3228      unsigned int arg_no;
3229 {
3230   const U_CHAR *param = list->namebuf;
3231
3232   while (arg_no--)
3233     param += ustrlen (param) + 1;
3234   CPP_PUTS (pfile, param, ustrlen (param));
3235 }
3236
3237 /* Dump a token list to the output.  */
3238 void
3239 _cpp_dump_list (pfile, list, token, flush)
3240      cpp_reader *pfile;
3241      const cpp_toklist *list;
3242      const cpp_token *token;
3243      int flush;
3244 {
3245   const cpp_token *limit = list->tokens + list->tokens_used;
3246   const cpp_token *prev = 0;
3247
3248   /* Avoid the CPP_EOF.  */
3249   if (list->directive)
3250     limit--;
3251
3252   while (token < limit)
3253     {
3254       if (token->type == CPP_MACRO_ARG)
3255         {
3256           if (token->flags & PREV_WHITE)
3257             CPP_PUTC (pfile, ' ');
3258           if (token->flags & STRINGIFY_ARG)
3259             CPP_PUTC (pfile, '#');
3260           dump_param_spelling (pfile, list, token->val.aux);
3261         }
3262       else
3263         output_token (pfile, token, prev);
3264       if (token->flags & PASTE_LEFT)
3265         CPP_PUTS (pfile, " ##", 3);
3266       prev = token;
3267       token++;
3268     }
3269
3270   if (flush && pfile->printer)
3271     cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3272 }
3273
3274 /* Stub function during conversion, mainly for cppexp.c's benefit.  */
3275 enum cpp_ttype
3276 _cpp_get_directive_token (pfile)
3277      cpp_reader *pfile;
3278 {
3279   const cpp_token *tok;
3280
3281   if (pfile->no_macro_expand)
3282     tok = _cpp_get_raw_token (pfile);
3283   else
3284     tok = cpp_get_token (pfile);
3285
3286   if (tok->type == CPP_EOF)
3287     return CPP_VSPACE;  /* backward compat; and don't try to spell EOF */
3288
3289   CPP_RESERVE (pfile, TOKEN_LEN (tok));
3290   pfile->limit = spell_token (pfile, tok, pfile->limit);
3291   return tok->type;
3292 }
3293
3294 /* Allocate pfile->input_buffer, and initialize trigraph_map[]
3295    if it hasn't happened already.  */
3296
3297 void
3298 _cpp_init_input_buffer (pfile)
3299      cpp_reader *pfile;
3300 {
3301   init_trigraph_map ();
3302   pfile->context_cap = 20;
3303   pfile->contexts = (cpp_context *)
3304     xmalloc (pfile->context_cap * sizeof (cpp_context));
3305   pfile->cur_context = 0;
3306   pfile->contexts[0].u.list = &pfile->token_list;
3307
3308   pfile->contexts[0].posn = 0;
3309   pfile->contexts[0].count = 0;
3310   pfile->no_expand_level = UINT_MAX;
3311
3312   _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3313 }
3314
3315 /* Moves to the end of the directive line, popping contexts as
3316    necessary.  */
3317 void
3318 _cpp_skip_rest_of_line (pfile)
3319      cpp_reader *pfile;
3320 {
3321   /* Get to base context.  Clear parsing args and each contexts flags,
3322      since these can cause pop_context to return without popping.  */
3323   pfile->no_expand_level = UINT_MAX;
3324   while (pfile->cur_context != 0)
3325     {
3326       pfile->contexts[pfile->cur_context].flags = 0;
3327       do_pop_context (pfile);
3328     }
3329
3330   pfile->contexts[pfile->cur_context].count = 0;
3331   pfile->contexts[pfile->cur_context].posn = 0;
3332   pfile->token_list.directive = 0;
3333 }
3334
3335 /* Directive handler wrapper used by the command line option
3336    processor.  */
3337 void
3338 _cpp_run_directive (pfile, dir, buf, count)
3339      cpp_reader *pfile;
3340      const struct directive *dir;
3341      const char *buf;
3342      size_t count;
3343 {
3344   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3345     {
3346       unsigned int prev_lvl = 0;
3347       /* scan the line now, else prevent_macro_expansion won't work */
3348       do_pop_context (pfile);
3349       if (! (dir->flags & EXPAND))
3350         prev_lvl = prevent_macro_expansion (pfile);
3351
3352       (void) (*dir->handler) (pfile);
3353
3354       if (! (dir->flags & EXPAND))
3355         restore_macro_expansion (pfile, prev_lvl);
3356
3357       _cpp_skip_rest_of_line (pfile);
3358       cpp_pop_buffer (pfile);
3359     }
3360 }