gcc/cpplex.c

   1 /* CPP Library - lexical analysis.
   2    Copyright (C) 2000 Free Software Foundation, Inc.
   3    Contributed by Per Bothner, 1994-95.
   4    Based on CCCP program by Paul Rubin, June 1986
   5    Adapted to ANSI C, Richard Stallman, Jan 1987
   6    Broken out to separate file, Zack Weinberg, Mar 2000
   7    Single-pass line tokenization by Neil Booth, April 2000
   8
   9 This program is free software; you can redistribute it and/or modify it
  10 under the terms of the GNU General Public License as published by the
  11 Free Software Foundation; either version 2, or (at your option) any
  12 later version.
  13
  14 This program is distributed in the hope that it will be useful,
  15 but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 GNU General Public License for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with this program; if not, write to the Free Software
  21 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  22
  23 /*
  24
  25 Cleanups to do:-
  26
  27 o Distinguish integers, floats, and 'other' pp-numbers.
  28 o Store ints and char constants as binary values.
  29 o New command-line assertion syntax.
  30 o Comment all functions, and describe macro expansion algorithm.
  31 o Move as much out of header files as possible.
  32 o Remove single quote pairs `', and some '', from diagnostics.
  33 o Correct pastability test for CPP_NAME and CPP_NUMBER.
  34
  35 */
  36
  37 #include "config.h"
  38 #include "system.h"
  39 #include "intl.h"
  40 #include "cpplib.h"
  41 #include "cpphash.h"
  42 #include "symcat.h"
  43
  44 const unsigned char *_cpp_digraph_spellings [] = {U"%:", U"%:%:", U"<:",
  45                                                   U":>", U"<%", U"%>"};
  46 static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER,
  47                                             0 UNION_INIT_ZERO};
  48 static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
  49
  50 /* Flags for cpp_context.  */
  51 #define CONTEXT_PASTEL  (1 << 0) /* An argument context on LHS of ##.  */
  52 #define CONTEXT_PASTER  (1 << 1) /* An argument context on RHS of ##.  */
  53 #define CONTEXT_RAW     (1 << 2) /* If argument tokens already expanded.  */
  54 #define CONTEXT_ARG     (1 << 3) /* If an argument context.  */
  55
  56 typedef struct cpp_context cpp_context;
  57 struct cpp_context
  58 {
  59   union
  60   {
  61     const cpp_toklist *list;    /* Used for macro contexts only.  */
  62     const cpp_token **arg;      /* Used for arg contexts only.  */
  63   } u;
  64
  65   /* Pushed token to be returned by next call to get_raw_token.  */
  66   const cpp_token *pushed_token;
  67
  68   struct macro_args *args;      /* The arguments for a function-like
  69                                    macro.  NULL otherwise.  */
  70   unsigned short posn;          /* Current posn, index into u.  */
  71   unsigned short count;         /* No. of tokens in u.  */
  72   unsigned short level;
  73   unsigned char flags;
  74 };
  75
  76 typedef struct macro_args macro_args;
  77 struct macro_args
  78 {
  79   unsigned int *ends;
  80   const cpp_token **tokens;
  81   unsigned int capacity;
  82   unsigned int used;
  83   unsigned short level;
  84 };
  85
  86 static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
  87 static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
  88                                            macro_args *, unsigned int *));
  89 static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
  90 static void save_token PARAMS ((macro_args *, const cpp_token *));
  91 static int pop_context PARAMS ((cpp_reader *));
  92 static int push_macro_context PARAMS ((cpp_reader *, const cpp_token *));
  93 static void push_arg_context PARAMS ((cpp_reader *, const cpp_token *));
  94 static void free_macro_args PARAMS ((macro_args *));
  95
  96 static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
  97 static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
  98 static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
  99
 100 static int skip_block_comment PARAMS ((cpp_reader *));
 101 static int skip_line_comment PARAMS ((cpp_reader *));
 102 static void adjust_column PARAMS ((cpp_reader *));
 103 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
 104 static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
 105 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t));
 106 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
 107 static void unterminated PARAMS ((cpp_reader *, unsigned int, int));
 108 static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
 109 static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
 110 static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
 111 static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
 112 static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
 113 static void lex_token PARAMS ((cpp_reader *, cpp_token *));
 114 static int lex_next PARAMS ((cpp_reader *, int));
 115
 116 static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
 117                                       const cpp_token *));
 118
 119 static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
 120 static void expand_context_stack PARAMS ((cpp_reader *));
 121 static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
 122                                             unsigned char *));
 123 typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
 124                                           cpp_token *));
 125 static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
 126                                             unsigned int));
 127 static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
 128 static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
 129                                                 const cpp_token *));
 130 static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
 131 static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
 132                                                        const cpp_token *));
 133 static unsigned int prevent_macro_expansion     PARAMS ((cpp_reader *));
 134 static void restore_macro_expansion     PARAMS ((cpp_reader *, unsigned int));
 135 static cpp_token *get_temp_token        PARAMS ((cpp_reader *));
 136 static void release_temp_tokens         PARAMS ((cpp_reader *));
 137 static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
 138
 139 #define VALID_SIGN(c, prevc) \
 140   (((c) == '+' || (c) == '-') && \
 141    ((prevc) == 'e' || (prevc) == 'E' \
 142     || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
 143
 144 /* An upper bound on the number of bytes needed to spell a token,
 145    including preceding whitespace.  */
 146 static inline size_t TOKEN_LEN PARAMS ((const cpp_token *));
 147 static inline size_t
 148 TOKEN_LEN (token)
 149      const cpp_token *token;
 150 {
 151   size_t len;
 152
 153   switch (TOKEN_SPELL (token))
 154     {
 155     default:            len = 0;                        break;
 156     case SPELL_STRING:  len = token->val.str.len;       break;
 157     case SPELL_IDENT:   len = token->val.node->length;  break;
 158     }
 159   return len + 5;
 160 }
 161
 162 #define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
 163 #define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
 164 #define ON_REST_ARG(c) \
 165  (((c)->u.list->flags & VAR_ARGS) \
 166   && (c)->u.list->tokens[(c)->posn - 1].val.aux \
 167       == (unsigned int) ((c)->u.list->paramc - 1))
 168
 169 #define ASSIGN_FLAGS_AND_POS(d, s) \
 170   do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
 171       if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 172   } while (0)
 173
 174 /* f is flags, just consisting of PREV_WHITE | BOL.  */
 175 #define MODIFY_FLAGS_AND_POS(d, s, f) \
 176   do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
 177       if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
 178   } while (0)
 179
 180 #define OP(e, s) { SPELL_OPERATOR, U s           },
 181 #define TK(e, s) { s,              U STRINGX (e) },
 182
 183 const struct token_spelling
 184 _cpp_token_spellings [N_TTYPES] = {TTYPE_TABLE };
 185
 186 #undef OP
 187 #undef TK
 188
 189 /* Helper routine used by parse_include, which can't see spell_token.
 190    Reinterpret the current line as an h-char-sequence (< ... >); we are
 191    looking at the first token after the <.  */
 192 const cpp_token *
 193 _cpp_glue_header_name (pfile)
 194      cpp_reader *pfile;
 195 {
 196   const cpp_token *t;
 197   cpp_token *hdr;
 198   U_CHAR *buf, *p;
 199   size_t len, avail;
 200
 201   avail = 40;
 202   len = 0;
 203   buf = xmalloc (avail);
 204
 205   for (;;)
 206     {
 207       t = _cpp_get_token (pfile);
 208       if (t->type == CPP_GREATER || t->type == CPP_EOF)
 209         break;
 210
 211       if (len + TOKEN_LEN (t) > avail)
 212         {
 213           avail = len + TOKEN_LEN (t) + 40;
 214           buf = xrealloc (buf, avail);
 215         }
 216
 217       if (t->flags & PREV_WHITE)
 218         buf[len++] = ' ';
 219
 220       p = spell_token (pfile, t, buf + len);
 221       len = (size_t) (p - buf);  /* p known >= buf */
 222     }
 223
 224   if (t->type == CPP_EOF)
 225     cpp_error (pfile, "missing terminating > character");
 226
 227   buf = xrealloc (buf, len);
 228
 229   hdr = get_temp_token (pfile);
 230   hdr->type = CPP_HEADER_NAME;
 231   hdr->flags = 0;
 232   hdr->val.str.text = buf;
 233   hdr->val.str.len = len;
 234   return hdr;
 235 }
 236
 237 /* Token-buffer helper functions.  */
 238
 239 /* Expand a token list's string space. It is *vital* that
 240    list->tokens_used is correct, to get pointer fix-up right.  */
 241 void
 242 _cpp_expand_name_space (list, len)
 243      cpp_toklist *list;
 244      unsigned int len;
 245 {
 246   const U_CHAR *old_namebuf;
 247
 248   old_namebuf = list->namebuf;
 249   list->name_cap += len;
 250   list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
 251
 252   /* Fix up token text pointers.  */
 253   if (list->namebuf != old_namebuf)
 254     {
 255       unsigned int i;
 256
 257       for (i = 0; i < list->tokens_used; i++)
 258         if (TOKEN_SPELL (&list->tokens[i]) == SPELL_STRING)
 259           list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
 260     }
 261 }
 262
 263 /* If there is not enough room for LEN more characters, expand the
 264    list by just enough to have room for LEN characters.  */
 265 void
 266 _cpp_reserve_name_space (list, len)
 267      cpp_toklist *list;
 268      unsigned int len;
 269 {
 270   unsigned int room = list->name_cap - list->name_used;
 271
 272   if (room < len)
 273     _cpp_expand_name_space (list, len - room);
 274 }
 275
 276 /* Expand the number of tokens in a list.  */
 277 void
 278 _cpp_expand_token_space (list, count)
 279      cpp_toklist *list;
 280      unsigned int count;
 281 {
 282   list->tokens_cap += count;
 283   list->tokens = (cpp_token *)
 284     xrealloc (list->tokens, list->tokens_cap * sizeof (cpp_token));
 285 }
 286
 287 /* Initialize a token list.  If EMPTY is false, some token and name
 288    space is provided.  */
 289 void
 290 _cpp_init_toklist (list, empty)
 291      cpp_toklist *list;
 292      int empty;
 293 {
 294   if (empty)
 295     {
 296       list->tokens_cap = 0;
 297       list->tokens = 0;
 298       list->name_cap = 0;
 299       list->namebuf = 0;
 300     }
 301   else
 302     {
 303       /* Initialize token space.  */
 304       list->tokens_cap = 256;   /* 4K's worth.  */
 305       list->tokens = (cpp_token *)
 306         xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
 307
 308       /* Initialize name space.  */
 309       list->name_cap = 1024;
 310       list->namebuf = (unsigned char *) xmalloc (list->name_cap);
 311     }
 312
 313   _cpp_clear_toklist (list);
 314 }
 315
 316 /* Clear a token list.  */
 317 void
 318 _cpp_clear_toklist (list)
 319      cpp_toklist *list;
 320 {
 321   list->tokens_used = 0;
 322   list->name_used = 0;
 323   list->directive = 0;
 324   list->paramc = 0;
 325   list->params_len = 0;
 326   list->flags = 0;
 327 }
 328
 329 /* Free a token list.  Does not free the list itself, which may be
 330    embedded in a larger structure.  */
 331 void
 332 _cpp_free_toklist (list)
 333      const cpp_toklist *list;
 334 {
 335   free (list->tokens);
 336   free (list->namebuf);
 337 }
 338
 339 /* Compare two tokens.  */
 340 int
 341 _cpp_equiv_tokens (a, b)
 342      const cpp_token *a, *b;
 343 {
 344   if (a->type == b->type && a->flags == b->flags)
 345     switch (TOKEN_SPELL (a))
 346       {
 347       default:                  /* Keep compiler happy.  */
 348       case SPELL_OPERATOR:
 349         return 1;
 350       case SPELL_CHAR:
 351       case SPELL_NONE:
 352         return a->val.aux == b->val.aux; /* arg_no or character.  */
 353       case SPELL_IDENT:
 354         return a->val.node == b->val.node;
 355       case SPELL_STRING:
 356         return (a->val.str.len == b->val.str.len
 357                 && !memcmp (a->val.str.text, b->val.str.text,
 358                             a->val.str.len));
 359       }
 360
 361   return 0;
 362 }
 363
 364 /* Compare two token lists.  */
 365 int
 366 _cpp_equiv_toklists (a, b)
 367      const cpp_toklist *a, *b;
 368 {
 369   unsigned int i;
 370
 371   if (a->tokens_used != b->tokens_used
 372       || a->flags != b->flags
 373       || a->paramc != b->paramc)
 374     return 0;
 375
 376   for (i = 0; i < a->tokens_used; i++)
 377     if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
 378       return 0;
 379   return 1;
 380 }
 381
 382 /* Utility routine:
 383
 384    Compares, the token TOKEN to the NUL-terminated string STRING.
 385    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
 386
 387 int
 388 cpp_ideq (token, string)
 389      const cpp_token *token;
 390      const char *string;
 391 {
 392   if (token->type != CPP_NAME)
 393     return 0;
 394
 395   return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
 396 }
 397
 398 /* Call when meeting a newline.  Returns the character after the newline
 399    (or carriage-return newline combination), or EOF.  */
 400 static cppchar_t
 401 handle_newline (buffer, newline_char)
 402      cpp_buffer *buffer;
 403      cppchar_t newline_char;
 404 {
 405   cppchar_t next = EOF;
 406
 407   buffer->col_adjust = 0;
 408   buffer->lineno++;
 409   buffer->line_base = buffer->cur;
 410
 411   /* Handle CR-LF and LF-CR combinations, get the next character.  */
 412   if (buffer->cur < buffer->rlimit)
 413     {
 414       next = *buffer->cur++;
 415       if (next + newline_char == '\r' + '\n')
 416         {
 417           buffer->line_base = buffer->cur;
 418           if (buffer->cur < buffer->rlimit)
 419             next = *buffer->cur++;
 420           else
 421             next = EOF;
 422         }
 423     }
 424
 425   buffer->read_ahead = next;
 426   return next;
 427 }
 428
 429 /* Subroutine of skip_escaped_newlines; called when a trigraph is
 430    encountered.  It warns if necessary, and returns true if the
 431    trigraph should be honoured.  FROM_CHAR is the third character of a
 432    trigraph, and presumed to be the previous character for position
 433    reporting.  */
 434 static int
 435 trigraph_ok (pfile, from_char)
 436      cpp_reader *pfile;
 437      cppchar_t from_char;
 438 {
 439   int accept = CPP_OPTION (pfile, trigraphs);
 440
 441   /* Don't warn about trigraphs in comments.  */
 442   if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
 443     {
 444       cpp_buffer *buffer = pfile->buffer;
 445       if (accept)
 446         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 447                                "trigraph ??%c converted to %c",
 448                                (int) from_char,
 449                                (int) _cpp_trigraph_map[from_char]);
 450       else
 451         cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
 452                                "trigraph ??%c ignored", (int) from_char);
 453     }
 454
 455   return accept;
 456 }
 457
 458 /* Assumes local variables buffer and result.  */
 459 #define ACCEPT_CHAR(t) \
 460   do { result->type = t; buffer->read_ahead = EOF; } while (0)
 461
 462 /* When we move to multibyte character sets, add to these something
 463    that saves and restores the state of the multibyte conversion
 464    library.  This probably involves saving and restoring a "cookie".
 465    In the case of glibc it is an 8-byte structure, so is not a high
 466    overhead operation.  In any case, it's out of the fast path.  */
 467 #define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
 468 #define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
 469
 470 /* Skips any escaped newlines introduced by NEXT, which is either a
 471    '?' or a '\\'.  Returns the next character, which will also have
 472    been placed in buffer->read_ahead.  */
 473 static cppchar_t
 474 skip_escaped_newlines (buffer, next)
 475      cpp_buffer *buffer;
 476      cppchar_t next;
 477 {
 478   cppchar_t next1;
 479   const unsigned char *saved_cur;
 480   int space;
 481
 482   do
 483     {
 484       if (buffer->cur == buffer->rlimit)
 485         break;
 486
 487       SAVE_STATE ();
 488       if (next == '?')
 489         {
 490           next1 = *buffer->cur++;
 491           if (next1 != '?' || buffer->cur == buffer->rlimit)
 492             {
 493               RESTORE_STATE ();
 494               break;
 495             }
 496
 497           next1 = *buffer->cur++;
 498           if (!_cpp_trigraph_map[next1] || !trigraph_ok (buffer->pfile, next1))
 499             {
 500               RESTORE_STATE ();
 501               break;
 502             }
 503
 504           /* We have a full trigraph here.  */
 505           next = _cpp_trigraph_map[next1];
 506           if (next != '\\' || buffer->cur == buffer->rlimit)
 507             break;
 508           SAVE_STATE ();
 509         }
 510
 511       /* We have a backslash, and room for at least one more character.  */
 512       space = 0;
 513       do
 514         {
 515           next1 = *buffer->cur++;
 516           if (!is_nvspace (next1))
 517             break;
 518           space = 1;
 519         }
 520       while (buffer->cur < buffer->rlimit);
 521
 522       if (!is_vspace (next1))
 523         {
 524           RESTORE_STATE ();
 525           break;
 526         }
 527
 528       if (space)
 529         cpp_warning (buffer->pfile,
 530                      "backslash and newline separated by space");
 531
 532       next = handle_newline (buffer, next1);
 533       if (next == EOF)
 534         cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
 535     }
 536   while (next == '\\' || next == '?');
 537
 538   buffer->read_ahead = next;
 539   return next;
 540 }
 541
 542 /* Obtain the next character, after trigraph conversion and skipping
 543    an arbitrary string of escaped newlines.  The common case of no
 544    trigraphs or escaped newlines falls through quickly.  */
 545 static cppchar_t
 546 get_effective_char (buffer)
 547      cpp_buffer *buffer;
 548 {
 549   cppchar_t next = EOF;
 550
 551   if (buffer->cur < buffer->rlimit)
 552     {
 553       next = *buffer->cur++;
 554
 555       /* '?' can introduce trigraphs (and therefore backslash); '\\'
 556          can introduce escaped newlines, which we want to skip, or
 557          UCNs, which, depending upon lexer state, we will handle in
 558          the future.  */
 559       if (next == '?' || next == '\\')
 560         next = skip_escaped_newlines (buffer, next);
 561     }
 562
 563   buffer->read_ahead = next;
 564   return next;
 565 }
 566
 567 /* Skip a C-style block comment.  We find the end of the comment by
 568    seeing if an asterisk is before every '/' we encounter.  Returns
 569    non-zero if comment terminated by EOF, zero otherwise.  */
 570 static int
 571 skip_block_comment (pfile)
 572      cpp_reader *pfile;
 573 {
 574   cpp_buffer *buffer = pfile->buffer;
 575   cppchar_t c = EOF, prevc;
 576
 577   pfile->state.lexing_comment = 1;
 578   while (buffer->cur != buffer->rlimit)
 579     {
 580       prevc = c, c = *buffer->cur++;
 581
 582     next_char:
 583       /* FIXME: For speed, create a new character class of characters
 584          of no interest inside block comments.  */
 585       if (c == '?' || c == '\\')
 586         c = skip_escaped_newlines (buffer, c);
 587
 588       /* People like decorating comments with '*', so check for '/'
 589          instead for efficiency.  */
 590       if (c == '/')
 591         {
 592           if (prevc == '*')
 593             break;
 594
 595           /* Warn about potential nested comments, but not if the '/'
 596              comes immediately before the true comment delimeter.
 597              Don't bother to get it right across escaped newlines.  */
 598           if (CPP_OPTION (pfile, warn_comments)
 599               && buffer->cur != buffer->rlimit)
 600             {
 601               prevc = c, c = *buffer->cur++;
 602               if (c == '*' && buffer->cur != buffer->rlimit)
 603                 {
 604                   prevc = c, c = *buffer->cur++;
 605                   if (c != '/')
 606                     cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
 607                                            CPP_BUF_COL (buffer),
 608                                            "\"/*\" within comment");
 609                 }
 610               goto next_char;
 611             }
 612         }
 613       else if (is_vspace (c))
 614         {
 615           prevc = c, c = handle_newline (buffer, c);
 616           goto next_char;
 617         }
 618       else if (c == '\t')
 619         adjust_column (pfile);
 620     }
 621
 622   pfile->state.lexing_comment = 0;
 623   buffer->read_ahead = EOF;
 624   return c != '/' || prevc != '*';
 625 }
 626
 627 /* Skip a C++ line comment.  Handles escaped newlines.  Returns
 628    non-zero if a multiline comment.  The following new line, if any,
 629    is left in buffer->read_ahead.  */
 630 static int
 631 skip_line_comment (pfile)
 632      cpp_reader *pfile;
 633 {
 634   cpp_buffer *buffer = pfile->buffer;
 635   unsigned int orig_lineno = buffer->lineno;
 636   cppchar_t c;
 637
 638   pfile->state.lexing_comment = 1;
 639   do
 640     {
 641       c = EOF;
 642       if (buffer->cur == buffer->rlimit)
 643         break;
 644
 645       c = *buffer->cur++;
 646       if (c == '?' || c == '\\')
 647         c = skip_escaped_newlines (buffer, c);
 648     }
 649   while (!is_vspace (c));
 650
 651   pfile->state.lexing_comment = 0;
 652   buffer->read_ahead = c;       /* Leave any newline for caller.  */
 653   return orig_lineno != buffer->lineno;
 654 }
 655
 656 /* pfile->buffer->cur is one beyond the \t character.  Update
 657    col_adjust so we track the column correctly.  */
 658 static void
 659 adjust_column (pfile)
 660      cpp_reader *pfile;
 661 {
 662   cpp_buffer *buffer = pfile->buffer;
 663   unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column.  */
 664
 665   /* Round it up to multiple of the tabstop, but subtract 1 since the
 666      tab itself occupies a character position.  */
 667   buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
 668                          - col % CPP_OPTION (pfile, tabstop)) - 1;
 669 }
 670
 671 /* Skips whitespace, saving the next non-whitespace character.
 672    Adjusts pfile->col_adjust to account for tabs.  Without this,
 673    tokens might be assigned an incorrect column.  */
 674 static void
 675 skip_whitespace (pfile, c)
 676      cpp_reader *pfile;
 677      cppchar_t c;
 678 {
 679   cpp_buffer *buffer = pfile->buffer;
 680   unsigned int warned = 0;
 681
 682   do
 683     {
 684       /* Horizontal space always OK.  */
 685       if (c == ' ')
 686         ;
 687       else if (c == '\t')
 688         adjust_column (pfile);
 689       /* Just \f \v or \0 left.  */
 690       else if (c == '\0')
 691         {
 692           if (!warned)
 693             {
 694               cpp_warning (pfile, "null character(s) ignored");
 695               warned = 1;
 696             }
 697         }
 698       else if (IN_DIRECTIVE (pfile) && CPP_PEDANTIC (pfile))
 699         cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
 700                                CPP_BUF_COL (buffer),
 701                                "%s in preprocessing directive",
 702                                c == '\f' ? "form feed" : "vertical tab");
 703
 704       c = EOF;
 705       if (buffer->cur == buffer->rlimit)
 706         break;
 707       c = *buffer->cur++;
 708     }
 709   /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
 710   while (is_nvspace (c));
 711
 712   /* Remember the next character.  */
 713   buffer->read_ahead = c;
 714 }
 715
 716 /* Parse an identifier, skipping embedded backslash-newlines.
 717    Calculate the hash value of the token while parsing, for improved
 718    performance.  The hashing algorithm *must* match cpp_lookup().  */
 719
 720 static cpp_hashnode *
 721 parse_identifier (pfile, c)
 722      cpp_reader *pfile;
 723      cppchar_t c;
 724 {
 725   cpp_buffer *buffer = pfile->buffer;
 726   unsigned int r = 0, saw_dollar = 0;
 727   unsigned int orig_used = pfile->token_list.name_used;
 728
 729   do
 730     {
 731       do
 732         {
 733           if (pfile->token_list.name_used == pfile->token_list.name_cap)
 734             _cpp_expand_name_space (&pfile->token_list,
 735                                     pfile->token_list.name_used + 256);
 736           pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
 737           r = HASHSTEP (r, c);
 738
 739           if (c == '$')
 740             saw_dollar++;
 741
 742           c = EOF;
 743           if (buffer->cur == buffer->rlimit)
 744             break;
 745
 746           c = *buffer->cur++;
 747         }
 748       while (is_idchar (c));
 749
 750       /* Potential escaped newline?  */
 751       if (c != '?' && c != '\\')
 752         break;
 753       c = skip_escaped_newlines (buffer, c);
 754     }
 755   while (is_idchar (c));
 756
 757   /* $ is not a identifier character in the standard, but is commonly
 758      accepted as an extension.  Don't warn about it in skipped
 759      conditional blocks.  */
 760   if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
 761     cpp_pedwarn (pfile, "'$' character(s) in identifier");
 762
 763   /* Remember the next character.  */
 764   buffer->read_ahead = c;
 765   return _cpp_lookup_with_hash (pfile, &pfile->token_list.namebuf[orig_used],
 766                                 pfile->token_list.name_used - orig_used, r);
 767 }
 768
 769 /* Parse a number, skipping embedded backslash-newlines.  */
 770 static void
 771 parse_number (pfile, number, c)
 772      cpp_reader *pfile;
 773      cpp_string *number;
 774      cppchar_t c;
 775 {
 776   cppchar_t prevc;
 777   cpp_buffer *buffer = pfile->buffer;
 778   unsigned int orig_used = pfile->token_list.name_used;
 779
 780   /* Reserve space for a leading period.  */
 781   if (pfile->state.seen_dot)
 782     pfile->token_list.name_used++;
 783
 784   do
 785     {
 786       do
 787         {
 788           if (pfile->token_list.name_used >= pfile->token_list.name_cap)
 789             _cpp_expand_name_space (&pfile->token_list,
 790                                     pfile->token_list.name_used + 256);
 791           pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
 792
 793           prevc = c;
 794           c = EOF;
 795           if (buffer->cur == buffer->rlimit)
 796             break;
 797
 798           c = *buffer->cur++;
 799         }
 800       while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
 801
 802       /* Potential escaped newline?  */
 803       if (c != '?' && c != '\\')
 804         break;
 805       c = skip_escaped_newlines (buffer, c);
 806     }
 807   while (is_numchar (c) || c == '.' || VALID_SIGN (c, prevc));
 808
 809   /* Put any leading period in place, now we have the room.  */
 810   if (pfile->state.seen_dot)
 811     pfile->token_list.namebuf[orig_used] = '.';
 812
 813   /* Remember the next character.  */
 814   buffer->read_ahead = c;
 815
 816   number->text = &pfile->token_list.namebuf[orig_used];
 817   number->len = pfile->token_list.name_used - orig_used;
 818 }
 819
 820 /* Subroutine of parse_string.  Emits error for unterminated strings.  */
 821 static void
 822 unterminated (pfile, line, term)
 823      cpp_reader *pfile;
 824      unsigned int line;
 825      int term;
 826 {
 827   cpp_error (pfile, "missing terminating %c character", term);
 828
 829   if (term == '\"' && pfile->mls_line && pfile->mls_line != line)
 830     {
 831       cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_column,
 832                            "possible start of unterminated string literal");
 833       pfile->mls_line = 0;
 834     }
 835 }
 836
 837 /* Parses a string, character constant, or angle-bracketed header file
 838    name.  Handles embedded trigraphs and escaped newlines.
 839
 840    Multi-line strings are allowed, but they are deprecated within
 841    directives.  */
 842 static void
 843 parse_string (pfile, token, terminator)
 844      cpp_reader *pfile;
 845      cpp_token *token;
 846      cppchar_t terminator;
 847 {
 848   cpp_buffer *buffer = pfile->buffer;
 849   unsigned int orig_used = pfile->token_list.name_used;
 850   cppchar_t c;
 851   unsigned int nulls = 0;
 852
 853   for (;;)
 854     {
 855       if (buffer->cur == buffer->rlimit)
 856         {
 857           c = EOF;
 858           unterminated (pfile, token->line, terminator);
 859           break;
 860         }
 861       c = *buffer->cur++;
 862
 863     have_char:
 864       /* Handle trigraphs, escaped newlines etc.  */
 865       if (c == '?' || c == '\\')
 866         c = skip_escaped_newlines (buffer, c);
 867
 868       if (c == terminator)
 869         {
 870           unsigned int u = pfile->token_list.name_used;
 871
 872           /* An odd number of consecutive backslashes represents an
 873              escaped terminator.  */
 874           while (u > orig_used && pfile->token_list.namebuf[u - 1] == '\\')
 875             u--;
 876
 877           if ((pfile->token_list.name_used - u) % 2 == 0)
 878             {
 879               c = EOF;
 880               break;
 881             }
 882         }
 883       else if (is_vspace (c))
 884         {
 885           /* In assembly language, silently terminate string and
 886              character literals at end of line.  This is a kludge
 887              around not knowing where comments are.  */
 888           if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
 889             break;
 890
 891           /* Character constants and header names may not extend over
 892              multiple lines.  In Standard C, neither may strings.
 893              Unfortunately, we accept multiline strings as an
 894              extension.  (Deprecatedly even in directives - otherwise,
 895              glibc's longlong.h breaks.)  */
 896           if (terminator != '"')
 897             {
 898               unterminated (pfile, token->line, terminator);
 899               break;
 900             }
 901
 902           if (pfile->mls_line == 0)
 903             {
 904               pfile->mls_line = token->line;
 905               pfile->mls_column = token->col;
 906               if (CPP_PEDANTIC (pfile))
 907                 cpp_pedwarn (pfile, "multi-line string constant");
 908             }
 909
 910           handle_newline (buffer, c);  /* Stores to read_ahead.  */
 911           c = '\n';
 912         }
 913       else if (c == '\0')
 914         {
 915           if (nulls++ == 0)
 916             cpp_warning (pfile, "null character(s) preserved in literal");
 917         }
 918
 919       if (pfile->token_list.name_used == pfile->token_list.name_cap)
 920         _cpp_expand_name_space (&pfile->token_list,
 921                                 pfile->token_list.name_used + 256);
 922
 923       pfile->token_list.namebuf[pfile->token_list.name_used++] = c;
 924       /* If we had a new line, the next character is in read_ahead.  */
 925       if (c != '\n')
 926         continue;
 927       c = buffer->read_ahead;
 928       if (c != EOF)
 929         goto have_char;
 930     }
 931
 932   buffer->read_ahead = c;
 933
 934   token->val.str.text = &pfile->token_list.namebuf[orig_used];
 935   token->val.str.len = pfile->token_list.name_used - orig_used;
 936 }
 937
 938 /* For output routine simplicity, the stored comment includes the
 939    comment start and any terminator.  */
 940 static void
 941 save_comment (pfile, token, from)
 942      cpp_reader *pfile;
 943      cpp_token *token;
 944      const unsigned char *from;
 945 {
 946   unsigned char *buffer;
 947   unsigned int len;
 948   cpp_toklist *list = &pfile->token_list;
 949
 950   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
 951   _cpp_reserve_name_space (list, len);
 952   buffer = list->namebuf + list->name_used;
 953   list->name_used += len;
 954
 955   token->type = CPP_COMMENT;
 956   token->val.str.len = len;
 957   token->val.str.text = buffer;
 958
 959   buffer[0] = '/';
 960   memcpy (buffer + 1, from, len - 1);
 961 }
 962
 963 /* Subroutine of lex_token to handle '%'.  A little tricky, since we
 964    want to avoid stepping back when lexing %:%X.  */
 965 static void
 966 lex_percent (buffer, result)
 967      cpp_buffer *buffer;
 968      cpp_token *result;
 969 {
 970   cppchar_t c;
 971
 972   result->type = CPP_MOD;
 973   /* Parsing %:%X could leave an extra character.  */
 974   if (buffer->extra_char == EOF)
 975     c = get_effective_char (buffer);
 976   else
 977     {
 978       c = buffer->read_ahead = buffer->extra_char;
 979       buffer->extra_char = EOF;
 980     }
 981
 982   if (c == '=')
 983     ACCEPT_CHAR (CPP_MOD_EQ);
 984   else if (CPP_OPTION (buffer->pfile, digraphs))
 985     {
 986       if (c == ':')
 987         {
 988           result->flags |= DIGRAPH;
 989           ACCEPT_CHAR (CPP_HASH);
 990           if (get_effective_char (buffer) == '%')
 991             {
 992               buffer->extra_char = get_effective_char (buffer);
 993               if (buffer->extra_char == ':')
 994                 {
 995                   buffer->extra_char = EOF;
 996                   ACCEPT_CHAR (CPP_PASTE);
 997                 }
 998               else
 999                 /* We'll catch the extra_char when we're called back.  */
1000                 buffer->read_ahead = '%';
1001             }
1002         }
1003       else if (c == '>')
1004         {
1005           result->flags |= DIGRAPH;
1006           ACCEPT_CHAR (CPP_CLOSE_BRACE);
1007         }
1008     }
1009 }
1010
1011 /* Subroutine of lex_token to handle '.'.  This is tricky, since we
1012    want to avoid stepping back when lexing '...' or '.123'.  In the
1013    latter case we should also set a flag for parse_number.  */
1014 static void
1015 lex_dot (pfile, result)
1016      cpp_reader *pfile;
1017      cpp_token *result;
1018 {
1019   cpp_buffer *buffer = pfile->buffer;
1020   cppchar_t c;
1021
1022   /* Parsing ..X could leave an extra character.  */
1023   if (buffer->extra_char == EOF)
1024     c = get_effective_char (buffer);
1025   else
1026     {
1027       c = buffer->read_ahead = buffer->extra_char;
1028       buffer->extra_char = EOF;
1029     }
1030
1031   /* All known character sets have 0...9 contiguous.  */
1032   if (c >= '0' && c <= '9')
1033     {
1034       result->type = CPP_NUMBER;
1035       buffer->pfile->state.seen_dot = 1;
1036       parse_number (pfile, &result->val.str, c);
1037       buffer->pfile->state.seen_dot = 0;
1038     }
1039   else
1040     {
1041       result->type = CPP_DOT;
1042       if (c == '.')
1043         {
1044           buffer->extra_char = get_effective_char (buffer);
1045           if (buffer->extra_char == '.')
1046             {
1047               buffer->extra_char = EOF;
1048               ACCEPT_CHAR (CPP_ELLIPSIS);
1049             }
1050           else
1051             /* We'll catch the extra_char when we're called back.  */
1052             buffer->read_ahead = '.';
1053         }
1054       else if (c == '*' && CPP_OPTION (pfile, cplusplus))
1055         ACCEPT_CHAR (CPP_DOT_STAR);
1056     }
1057 }
1058
1059 static void
1060 lex_token (pfile, result)
1061      cpp_reader *pfile;
1062      cpp_token *result;
1063 {
1064   cppchar_t c;
1065   cpp_buffer *buffer = pfile->buffer;
1066   const unsigned char *comment_start;
1067
1068   result->flags = 0;
1069  next_char:
1070   result->line = CPP_BUF_LINE (buffer);
1071  next_char2:
1072   result->col = CPP_BUF_COLUMN (buffer, buffer->cur);
1073
1074   c = buffer->read_ahead;
1075   if (c == EOF && buffer->cur < buffer->rlimit)
1076     {
1077       c = *buffer->cur++;
1078       result->col++;
1079     }
1080
1081  do_switch:
1082   buffer->read_ahead = EOF;
1083   switch (c)
1084     {
1085     case EOF:
1086       /* Non-empty files should end in a newline.  Testing
1087          skip_newlines ensures we only emit the warning once.  */
1088       if (buffer->cur != buffer->line_base && buffer->cur != buffer->buf
1089           && pfile->state.skip_newlines)
1090         cpp_pedwarn_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer),
1091                                "no newline at end of file");
1092       result->type = CPP_EOF;
1093       break;
1094
1095     case ' ': case '\t': case '\f': case '\v': case '\0':
1096       skip_whitespace (pfile, c);
1097       result->flags |= PREV_WHITE;
1098       goto next_char2;
1099
1100     case '\n': case '\r':
1101       result->type = CPP_EOF;
1102       handle_newline (buffer, c);
1103       /* Handling here will change significantly when moving to
1104          token-at-a-time.  */
1105       if (pfile->state.skip_newlines)
1106         {
1107           result->flags &= ~PREV_WHITE; /* Clear any whitespace flag.   */
1108           goto next_char;
1109         }
1110       break;
1111
1112     case '?':
1113     case '\\':
1114       /* These could start an escaped newline, or '?' a trigraph.  Let
1115          skip_escaped_newlines do all the work.  */
1116       {
1117         unsigned int lineno = buffer->lineno;
1118
1119         c = skip_escaped_newlines (buffer, c);
1120         if (lineno != buffer->lineno)
1121           /* We had at least one escaped newline of some sort, and the
1122              next character is in buffer->read_ahead.  Update the
1123              token's line and column.  */
1124             goto next_char;
1125
1126         /* We are either the original '?' or '\\', or a trigraph.  */
1127         result->type = CPP_QUERY;
1128         buffer->read_ahead = EOF;
1129         if (c == '\\')
1130           result->type = CPP_BACKSLASH;
1131         else if (c != '?')
1132           goto do_switch;
1133       }
1134       break;
1135
1136     case '0': case '1': case '2': case '3': case '4':
1137     case '5': case '6': case '7': case '8': case '9':
1138       result->type = CPP_NUMBER;
1139       parse_number (pfile, &result->val.str, c);
1140       break;
1141
1142     case '$':
1143       if (!CPP_OPTION (pfile, dollars_in_ident))
1144         goto random_char;
1145       /* Fall through... */
1146
1147     case '_':
1148     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1149     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1150     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1151     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1152     case 'y': case 'z':
1153     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1154     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1155     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1156     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1157     case 'Y': case 'Z':
1158       result->type = CPP_NAME;
1159       result->val.node = parse_identifier (pfile, c);
1160
1161       /* 'L' may introduce wide characters or strings.  */
1162       if (result->val.node == pfile->spec_nodes->n_L)
1163         {
1164           c = buffer->read_ahead; /* For make_string.  */
1165           if (c == '\'' || c == '"')
1166             {
1167               ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
1168               goto make_string;
1169             }
1170         }
1171       /* Convert named operators to their proper types.  */
1172       else if (result->val.node->type == T_OPERATOR)
1173         {
1174           result->flags |= NAMED_OP;
1175           result->type = result->val.node->value.code;
1176         }
1177       break;
1178
1179     case '\'':
1180     case '"':
1181       result->type = c == '"' ? CPP_STRING: CPP_CHAR;
1182     make_string:
1183       parse_string (pfile, result, c);
1184       break;
1185
1186     case '/':
1187       /* A potential block or line comment.  */
1188       comment_start = buffer->cur;
1189       result->type = CPP_DIV;
1190       c = get_effective_char (buffer);
1191       if (c == '=')
1192         ACCEPT_CHAR (CPP_DIV_EQ);
1193       if (c != '/' && c != '*')
1194         break;
1195
1196       if (c == '*')
1197         {
1198           if (skip_block_comment (pfile))
1199             cpp_error_with_line (pfile, result->line, result->col,
1200                                  "unterminated comment");
1201         }
1202       else
1203         {
1204           if (!CPP_OPTION (pfile, cplusplus_comments)
1205               && !CPP_IN_SYSTEM_HEADER (pfile))
1206             break;
1207
1208           /* We silently allow C++ comments in system headers,
1209              irrespective of conformance mode, because lots of
1210              broken systems do that and trying to clean it up in
1211              fixincludes is a nightmare.  */
1212           if (CPP_OPTION (pfile, cplusplus_comments)
1213               || CPP_IN_SYSTEM_HEADER (pfile))
1214             {
1215               if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1216                   && ! buffer->warned_cplusplus_comments)
1217                 {
1218                   cpp_pedwarn (pfile,
1219                        "C++ style comments are not allowed in ISO C89");
1220                   cpp_pedwarn (pfile,
1221                        "(this will be reported only once per input file)");
1222                   buffer->warned_cplusplus_comments = 1;
1223                 }
1224               comment_start = buffer->cur;
1225
1226               /* Skip_line_comment updates buffer->read_ahead.  */
1227               if (skip_line_comment (pfile))
1228                 cpp_warning_with_line (pfile, result->line, result->col,
1229                                        "multi-line comment");
1230               cpp_pedwarn (pfile,
1231                            "C++ style comments are not allowed in ISO C89");
1232               cpp_pedwarn (pfile,
1233                            "(this will be reported only once per input file)");
1234               buffer->warned_cplusplus_comments = 1;
1235             }
1236
1237           if (skip_line_comment (pfile))
1238             cpp_warning_with_line (pfile, result->line, result->col,
1239                                    "multi-line comment");
1240         }
1241
1242       /* Skipping the comment has updated buffer->read_ahead.  */
1243       if (!pfile->state.save_comments)
1244         {
1245           result->flags |= PREV_WHITE;
1246           goto next_char;
1247         }
1248
1249       /* Save the comment as a token in its own right.  */
1250       save_comment (pfile, result, comment_start);
1251       break;
1252
1253     case '<':
1254       if (pfile->state.angled_headers)
1255         {
1256           result->type = CPP_HEADER_NAME;
1257           c = '>';              /* terminator.  */
1258           goto make_string;
1259         }
1260
1261       result->type = CPP_LESS;
1262       c = get_effective_char (buffer);
1263       if (c == '=')
1264         ACCEPT_CHAR (CPP_LESS_EQ);
1265       else if (c == '<')
1266         {
1267           ACCEPT_CHAR (CPP_LSHIFT);
1268           if (get_effective_char (buffer) == '=')
1269             ACCEPT_CHAR (CPP_LSHIFT_EQ);
1270         }
1271       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1272         {
1273           ACCEPT_CHAR (CPP_MIN);
1274           if (get_effective_char (buffer) == '=')
1275             ACCEPT_CHAR (CPP_MIN_EQ);
1276         }
1277       else if (c == ':' && CPP_OPTION (pfile, digraphs))
1278         {
1279           ACCEPT_CHAR (CPP_OPEN_SQUARE);
1280           result->flags |= DIGRAPH;
1281         }
1282       else if (c == '%' && CPP_OPTION (pfile, digraphs))
1283         {
1284           ACCEPT_CHAR (CPP_OPEN_BRACE);
1285           result->flags |= DIGRAPH;
1286         }
1287       break;
1288
1289     case '>':
1290       result->type = CPP_GREATER;
1291       c = get_effective_char (buffer);
1292       if (c == '=')
1293         ACCEPT_CHAR (CPP_GREATER_EQ);
1294       else if (c == '>')
1295         {
1296           ACCEPT_CHAR (CPP_RSHIFT);
1297           if (get_effective_char (buffer) == '=')
1298             ACCEPT_CHAR (CPP_RSHIFT_EQ);
1299         }
1300       else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1301         {
1302           ACCEPT_CHAR (CPP_MAX);
1303           if (get_effective_char (buffer) == '=')
1304             ACCEPT_CHAR (CPP_MAX_EQ);
1305         }
1306       break;
1307
1308     case '%':
1309       lex_percent (buffer, result);
1310       break;
1311
1312     case '.':
1313       lex_dot (pfile, result);
1314       break;
1315
1316     case '+':
1317       result->type = CPP_PLUS;
1318       c = get_effective_char (buffer);
1319       if (c == '=')
1320         ACCEPT_CHAR (CPP_PLUS_EQ);
1321       else if (c == '+')
1322         ACCEPT_CHAR (CPP_PLUS_PLUS);
1323       break;
1324
1325     case '-':
1326       result->type = CPP_MINUS;
1327       c = get_effective_char (buffer);
1328       if (c == '>')
1329         {
1330           ACCEPT_CHAR (CPP_DEREF);
1331           if (CPP_OPTION (pfile, cplusplus)
1332               && get_effective_char (buffer) == '*')
1333             ACCEPT_CHAR (CPP_DEREF_STAR);
1334         }
1335       else if (c == '=')
1336         ACCEPT_CHAR (CPP_MINUS_EQ);
1337       else if (c == '-')
1338         ACCEPT_CHAR (CPP_MINUS_MINUS);
1339       break;
1340
1341     case '*':
1342       result->type = CPP_MULT;
1343       if (get_effective_char (buffer) == '=')
1344         ACCEPT_CHAR (CPP_MULT_EQ);
1345       break;
1346
1347     case '=':
1348       result->type = CPP_EQ;
1349       if (get_effective_char (buffer) == '=')
1350         ACCEPT_CHAR (CPP_EQ_EQ);
1351       break;
1352
1353     case '!':
1354       result->type = CPP_NOT;
1355       if (get_effective_char (buffer) == '=')
1356         ACCEPT_CHAR (CPP_NOT_EQ);
1357       break;
1358
1359     case '&':
1360       result->type = CPP_AND;
1361       c = get_effective_char (buffer);
1362       if (c == '=')
1363         ACCEPT_CHAR (CPP_AND_EQ);
1364       else if (c == '&')
1365         ACCEPT_CHAR (CPP_AND_AND);
1366       break;
1367
1368     case '#':
1369       result->type = CPP_HASH;
1370       if (get_effective_char (buffer) == '#')
1371         ACCEPT_CHAR (CPP_PASTE);
1372       break;
1373
1374     case '|':
1375       result->type = CPP_OR;
1376       c = get_effective_char (buffer);
1377       if (c == '=')
1378         ACCEPT_CHAR (CPP_OR_EQ);
1379       else if (c == '|')
1380         ACCEPT_CHAR (CPP_OR_OR);
1381       break;
1382
1383     case '^':
1384       result->type = CPP_XOR;
1385       if (get_effective_char (buffer) == '=')
1386         ACCEPT_CHAR (CPP_XOR_EQ);
1387       break;
1388
1389     case ':':
1390       result->type = CPP_COLON;
1391       c = get_effective_char (buffer);
1392       if (c == ':' && CPP_OPTION (pfile, cplusplus))
1393         ACCEPT_CHAR (CPP_SCOPE);
1394       else if (c == '>' && CPP_OPTION (pfile, digraphs))
1395         {
1396           result->flags |= DIGRAPH;
1397           ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1398         }
1399       break;
1400
1401     case '~': result->type = CPP_COMPL; break;
1402     case ',': result->type = CPP_COMMA; break;
1403     case '(': result->type = CPP_OPEN_PAREN; break;
1404     case ')': result->type = CPP_CLOSE_PAREN; break;
1405     case '[': result->type = CPP_OPEN_SQUARE; break;
1406     case ']': result->type = CPP_CLOSE_SQUARE; break;
1407     case '{': result->type = CPP_OPEN_BRACE; break;
1408     case '}': result->type = CPP_CLOSE_BRACE; break;
1409     case ';': result->type = CPP_SEMICOLON; break;
1410
1411     case '@':
1412       if (CPP_OPTION (pfile, objc))
1413         {
1414           /* In Objective C, '@' may begin keywords or strings, like
1415              @keyword or @"string".  It would be nice to call
1416              get_effective_char here and test the result.  However, we
1417              would then need to pass 2 characters to parse_identifier,
1418              making it ugly and slowing down its main loop.  Instead,
1419              we assume we have an identifier, and recover if not.  */
1420           result->type = CPP_NAME;
1421           result->val.node = parse_identifier (pfile, c);
1422           if (result->val.node->length != 1)
1423             break;
1424
1425           /* OK, so it wasn't an identifier.  Maybe a string?  */
1426           if (buffer->read_ahead == '"')
1427             {
1428               c = '"';
1429               ACCEPT_CHAR (CPP_OSTRING);
1430               goto make_string;
1431             }
1432         }
1433       goto random_char;
1434
1435     random_char:
1436     default:
1437       result->type = CPP_OTHER;
1438       result->val.aux = c;
1439       break;
1440     }
1441 }
1442
1443 /*
1444  *  The tokenizer's main loop.  Returns a token list, representing a
1445  *  logical line in the input file.  On EOF after some tokens have
1446  *  been processed, we return immediately.  Then in next call, or if
1447  *  EOF occurred at the beginning of a logical line, a single CPP_EOF
1448  *  token is placed in the list.
1449  */
1450
1451 static void
1452 lex_line (pfile, list)
1453      cpp_reader *pfile;
1454      cpp_toklist *list;
1455 {
1456   unsigned int first_token;
1457   cpp_token *cur_token, *first;
1458   cpp_buffer *buffer = pfile->buffer;
1459
1460   pfile->state.in_lex_line = 1;
1461   if (pfile->buffer->cur == pfile->buffer->buf)
1462     list->flags |= BEG_OF_FILE;
1463
1464  retry:
1465   pfile->state.in_directive = 0;
1466   pfile->state.angled_headers = 0;
1467   pfile->state.skip_newlines = 1;
1468   pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);
1469   first_token = list->tokens_used;
1470   list->file = buffer->nominal_fname;
1471
1472   do
1473     {
1474       if (list->tokens_used >= list->tokens_cap)
1475         _cpp_expand_token_space (list, 256);
1476
1477       cur_token = list->tokens + list->tokens_used;
1478       lex_token (pfile, cur_token);
1479
1480       if (pfile->state.skip_newlines)
1481         {
1482           pfile->state.skip_newlines = 0;
1483           list->line = buffer->lineno;
1484           if (cur_token->type == CPP_HASH)
1485             {
1486               pfile->state.in_directive = 1;
1487               pfile->state.save_comments = 0;
1488               pfile->state.indented = cur_token->flags & PREV_WHITE;
1489             }
1490           /* 6.10.3.10: Within the sequence of preprocessing tokens
1491              making up the invocation of a function-like macro, new
1492              line is considered a normal white-space character.  */
1493           else if (first_token != 0)
1494             cur_token->flags |= PREV_WHITE;
1495         }
1496       else if (IN_DIRECTIVE (pfile) && list->tokens_used == first_token + 1)
1497         {
1498           if (cur_token->type == CPP_NUMBER)
1499             list->directive = _cpp_check_linemarker (pfile, cur_token);
1500           else
1501             list->directive = _cpp_check_directive (pfile, cur_token);
1502         }
1503
1504       /* _cpp_get_line assumes list->tokens_used refers to the current
1505          token being lexed.  So do this after _cpp_check_directive to
1506          get the warnings therein correct.  */
1507       list->tokens_used++;
1508     }
1509   while (cur_token->type != CPP_EOF);
1510
1511   /* All tokens are allocated, so the memory location is fixed.  */
1512   first = &list->tokens[first_token];
1513   first->flags |= BOL;
1514   pfile->first_directive_token = first;
1515
1516   /* Don't complain about the null directive, nor directives in
1517      assembly source: we don't know where the comments are, and # may
1518      introduce assembler pseudo-ops.  Don't complain about invalid
1519      directives in skipped conditional groups (6.10 p4).  */
1520   if (IN_DIRECTIVE (pfile) && !KNOWN_DIRECTIVE (list) && !pfile->skipping
1521       && !CPP_OPTION (pfile, lang_asm))
1522     {
1523       if (cur_token > first + 1)
1524         {
1525           if (first[1].type == CPP_NAME)
1526             cpp_error_with_line (pfile, first->line, first->col,
1527                                  "invalid preprocessing directive #%s",
1528                                  first[1].val.node->name);
1529           else
1530             cpp_error_with_line (pfile, first->line, first->col,
1531                                  "invalid preprocessing directive");
1532         }
1533
1534       /* Discard this line to prevent further errors from cc1.  */
1535       _cpp_clear_toklist (list);
1536       goto retry;
1537     }
1538
1539   /* Drop the EOF unless really at EOF or in a directive.  */
1540   if (cur_token != first && !KNOWN_DIRECTIVE (list)
1541       && pfile->done_initializing)
1542     list->tokens_used--;
1543
1544   pfile->state.in_lex_line = 0;
1545 }
1546
1547 /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1548    already contain the enough space to hold the token's spelling.
1549    Returns a pointer to the character after the last character
1550    written.  */
1551
1552 static unsigned char *
1553 spell_token (pfile, token, buffer)
1554      cpp_reader *pfile;         /* Would be nice to be rid of this...  */
1555      const cpp_token *token;
1556      unsigned char *buffer;
1557 {
1558   switch (TOKEN_SPELL (token))
1559     {
1560     case SPELL_OPERATOR:
1561       {
1562         const unsigned char *spelling;
1563         unsigned char c;
1564
1565         if (token->flags & DIGRAPH)
1566           spelling = _cpp_digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1567         else if (token->flags & NAMED_OP)
1568           goto spell_ident;
1569         else
1570           spelling = TOKEN_NAME (token);
1571
1572         while ((c = *spelling++) != '\0')
1573           *buffer++ = c;
1574       }
1575       break;
1576
1577     case SPELL_IDENT:
1578       spell_ident:
1579       memcpy (buffer, token->val.node->name, token->val.node->length);
1580       buffer += token->val.node->length;
1581       break;
1582
1583     case SPELL_STRING:
1584       {
1585         int left, right, tag;
1586         switch (token->type)
1587           {
1588           case CPP_STRING:      left = '"';  right = '"';  tag = '\0'; break;
1589           case CPP_WSTRING:     left = '"';  right = '"';  tag = 'L';  break;
1590           case CPP_OSTRING:     left = '"';  right = '"';  tag = '@';  break;
1591           case CPP_CHAR:        left = '\''; right = '\''; tag = '\0'; break;
1592           case CPP_WCHAR:       left = '\''; right = '\''; tag = 'L';  break;
1593           case CPP_HEADER_NAME: left = '<';  right = '>';  tag = '\0'; break;
1594           default:              left = '\0'; right = '\0'; tag = '\0'; break;
1595           }
1596         if (tag) *buffer++ = tag;
1597         if (left) *buffer++ = left;
1598         memcpy (buffer, token->val.str.text, token->val.str.len);
1599         buffer += token->val.str.len;
1600         if (right) *buffer++ = right;
1601       }
1602       break;
1603
1604     case SPELL_CHAR:
1605       *buffer++ = token->val.aux;
1606       break;
1607
1608     case SPELL_NONE:
1609       cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
1610       break;
1611     }
1612
1613   return buffer;
1614 }
1615
1616 /* Macro expansion algorithm.
1617
1618 Macro expansion is implemented by a single-pass algorithm; there are
1619 no rescan passes involved.  cpp_get_token expands just enough to be
1620 able to return a token to the caller, a consequence is that when it
1621 returns the preprocessor can be in a state of mid-expansion.  The
1622 algorithm does not work by fully expanding a macro invocation into
1623 some kind of token list, and then returning them one by one.
1624
1625 Our expansion state is recorded in a context stack.  We start out with
1626 a single context on the stack, let's call it base context.  This
1627 consists of the token list returned by lex_line that forms the next
1628 logical line in the source file.
1629
1630 The current level in the context stack is stored in the cur_context
1631 member of the cpp_reader structure.  The context it references keeps,
1632 amongst other things, a count of how many tokens form that context and
1633 our position within those tokens.
1634
1635 Fundamentally, calling cpp_get_token will return the next token from
1636 the current context.  If we're at the end of the current context, that
1637 context is popped from the stack first, unless it is the base context,
1638 in which case the next logical line is lexed from the source file.
1639
1640 However, before returning the token, if it is a CPP_NAME token
1641 _cpp_get_token checks to see if it is a macro and if it is enabled.
1642 Each time it encounters a macro name, it calls push_macro_context.
1643 This function checks that the macro should be expanded (with
1644 is_macro_enabled), and if so pushes a new macro context on the stack
1645 which becomes the current context.  It then loops back to read the
1646 first token of the macro context.
1647
1648 A macro context basically consists of the token list representing the
1649 macro's replacement list, which was saved in the hash table by
1650 save_macro_expansion when its #define statement was parsed.  If the
1651 macro is function-like, it also contains the tokens that form the
1652 arguments to the macro.  I say more about macro arguments below, but
1653 for now just saying that each argument is a set of pointers to tokens
1654 is enough.
1655
1656 When taking tokens from a macro context, we may get a CPP_MACRO_ARG
1657 token.  This represents an argument passed to the macro, with the
1658 argument number stored in the token's AUX field.  The argument should
1659 be substituted, this is achieved by pushing an "argument context".  An
1660 argument context is just refers to the tokens forming the argument,
1661 which are obtained directly from the macro context.  The STRINGIFY
1662 flag on a CPP_MACRO_ARG token indicates that the argument should be
1663 stringified.
1664
1665 Here's a few simple rules the context stack obeys:-
1666
1667   1) The lex_line token list is always context zero.
1668
1669   2) Context 1, if it exists, must be a macro context.
1670
1671   3) An argument context can only appear above a macro context.
1672
1673   4) A macro context can appear above the base context, another macro
1674   context, or an argument context.
1675
1676   5) These imply that the minimal level of an argument context is 2.
1677
1678 The only tricky thing left is ensuring that macros are enabled and
1679 disabled correctly.  The algorithm controls macro expansion by the
1680 level of the context a token is taken from in the context stack.  If a
1681 token is taken from a level equal to no_expand_level (a member of
1682 struct cpp_reader), no expansion is performed.
1683
1684 When popping a context off the stack, if no_expand_level equals the
1685 level of the popped context, it is reduced by one to match the new
1686 context level, so that expansion is still disabled.  It does not
1687 increase if a context is pushed, though.  It starts out life as
1688 UINT_MAX, which has the effect that initially macro expansion is
1689 enabled.  I explain how this mechanism works below.
1690
1691 The standard requires:-
1692
1693   1) Arguments to be fully expanded before substitution.
1694
1695   2) Stringified arguments to not be expanded, nor the tokens
1696   immediately surrounding a ## operator.
1697
1698   3) Continual rescanning until there are no more macros left to
1699   replace.
1700
1701   4) Once a macro has been expanded in stage 1) or 3), it cannot be
1702   expanded again during later rescans.  This prevents infinite
1703   recursion.
1704
1705 The first thing to observe is that stage 3) is mostly redundant.
1706 Since a macro is disabled once it has been expanded, how can a rescan
1707 find an unexpanded macro name?  There are only two cases where this is
1708 possible:-
1709
1710   a) If the macro name results from a token paste operation.
1711
1712   b) If the macro in question is a function-like macro that hasn't
1713   already been expanded because previously there was not the required
1714   '(' token immediately following it.  This is only possible when an
1715   argument is substituted, and after substitution the last token of
1716   the argument can bind with a parenthesis appearing in the tokens
1717   following the substitution.  Note that if the '(' appears within the
1718   argument, the ')' must too, as expanding macro arguments cannot
1719   "suck in" tokens outside the argument.
1720
1721 So we tackle this as follows.  When parsing the macro invocation for
1722 arguments, we record the tokens forming each argument as a list of
1723 pointers to those tokens.  We do not expand any tokens that are "raw",
1724 i.e. directly from the macro invocation, but other tokens that come
1725 from (nested) argument substitution are fully expanded.
1726
1727 This is achieved by setting the no_expand_level to that of the macro
1728 invocation.  A CPP_MACRO_ARG token never appears in the list of tokens
1729 forming an argument, because parse_args (indirectly) calls
1730 get_raw_token which automatically pushes argument contexts and traces
1731 into them.  Since these contexts are at a higher level than the
1732 no_expand_level, they get fully macro expanded.
1733
1734 "Raw" and non-raw tokens are separated in arguments by null pointers,
1735 with the policy that the initial state of an argument is raw.  If the
1736 first token is not raw, it should be preceded by a null pointer.  When
1737 tracing through the tokens of an argument context, each time
1738 get_raw_token encounters a null pointer, it toggles the flag
1739 CONTEXT_RAW.
1740
1741 This flag, when set, indicates to is_macro_disabled that we are
1742 reading raw tokens which should be macro-expanded.  Similarly, if
1743 clear, is_macro_disabled suppresses re-expansion.
1744
1745 It's probably time for an example.
1746
1747 #define hash #
1748 #define str(x) #x
1749 #define xstr(y) str(y hash)
1750 str(hash)                       // "hash"
1751 xstr(hash)                      // "# hash"
1752
1753 In the invocation of str, parse_args turns off macro expansion and so
1754 parses the argument as <hash>.  This is the only token (pointer)
1755 passed as the argument to str.  Since <hash> is raw there is no need
1756 for an initial null pointer.  stringify_arg is called from
1757 get_raw_token when tracing through the expansion of str, since the
1758 argument has the STRINGIFY flag set.  stringify_arg turns off
1759 macro_expansion by setting the no_expand_level to that of the argument
1760 context.  Thus it gets the token <hash> and stringifies it to "hash"
1761 correctly.
1762
1763 Similary xstr is passed <hash>.  However, when parse_args is parsing
1764 the invocation of str() in xstr's expansion, get_raw_token encounters
1765 a CPP_MACRO_ARG token for y.  Transparently to parse_args, it pushes
1766 an argument context, and enters the tokens of the argument,
1767 i.e. <hash>.  This is at a higher context level than parse_args
1768 disabled, and so is_macro_disabled permits expansion of it and a macro
1769 context is pushed on top of the argument context.  This contains the
1770 <#> token, and the end result is that <hash> is macro expanded.
1771 However, after popping off the argument context, the <hash> of xstr's
1772 expansion does not get macro expanded because we're back at the
1773 no_expand_level.  The end result is that the argument passed to str is
1774 <NULL> <#> <NULL> <hash>.  Note the nulls - policy is we start off
1775 raw, <#> is not raw, but then <hash> is.
1776
1777 */
1778
1779
1780 /* Free the storage allocated for macro arguments.  */
1781 static void
1782 free_macro_args (args)
1783      macro_args *args;
1784 {
1785   if (args->tokens)
1786     free ((PTR) args->tokens);
1787   free (args->ends);
1788   free (args);
1789 }
1790
1791 /* Determines if a macro has been already used (and is therefore
1792    disabled).  */
1793 static int
1794 is_macro_disabled (pfile, expansion, token)
1795      cpp_reader *pfile;
1796      const cpp_toklist *expansion;
1797      const cpp_token *token;
1798 {
1799   cpp_context *context = CURRENT_CONTEXT (pfile);
1800
1801   /* Arguments on either side of ## are inserted in place without
1802      macro expansion (6.10.3.3.2).  Conceptually, any macro expansion
1803      occurs during a later rescan pass.  The effect is that we expand
1804      iff we would as part of the macro's expansion list, so we should
1805      drop to the macro's context.  */
1806   if (IS_ARG_CONTEXT (context))
1807     {
1808       if (token->flags & PASTED)
1809         context--;
1810       else if (!(context->flags & CONTEXT_RAW))
1811         return 1;
1812       else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
1813         context--;
1814     }
1815
1816   /* Have we already used this macro?  */
1817   while (context->level > 0)
1818     {
1819       if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
1820         return 1;
1821       /* Raw argument tokens are judged based on the token list they
1822          came from.  */
1823       if (context->flags & CONTEXT_RAW)
1824         context = pfile->contexts + context->level;
1825       else
1826         context--;
1827     }
1828
1829   /* Function-like macros may be disabled if the '(' is not in the
1830      current context.  We check this without disrupting the context
1831      stack.  */
1832   if (expansion->paramc >= 0)
1833     {
1834       const cpp_token *next;
1835       unsigned int prev_nme;
1836
1837       context = CURRENT_CONTEXT (pfile);
1838       /* Drop down any contexts we're at the end of: the '(' may
1839          appear in lower macro expansions, or in the rest of the file.  */
1840       while (context->posn == context->count && context > pfile->contexts)
1841         {
1842           context--;
1843           /* If we matched, we are disabled, as we appear in the
1844              expansion of each macro we meet.  */
1845           if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
1846             return 1;
1847         }
1848
1849       prev_nme = pfile->no_expand_level;
1850       pfile->no_expand_level = context - pfile->contexts;
1851       next = _cpp_get_token (pfile);
1852       restore_macro_expansion (pfile, prev_nme);
1853       if (next->type != CPP_OPEN_PAREN)
1854         {
1855           _cpp_push_token (pfile, next);
1856           if (CPP_WTRADITIONAL (pfile))
1857             cpp_warning (pfile,
1858          "function macro %s must be used with arguments in traditional C",
1859                          token->val.node->name);
1860           return 1;
1861         }
1862     }
1863
1864   return 0;
1865 }
1866
1867 /* Add a token to the set of tokens forming the arguments to the macro
1868    being parsed in parse_args.  */
1869 static void
1870 save_token (args, token)
1871      macro_args *args;
1872      const cpp_token *token;
1873 {
1874   if (args->used == args->capacity)
1875     {
1876       args->capacity += args->capacity + 100;
1877       args->tokens = (const cpp_token **)
1878         xrealloc ((PTR) args->tokens,
1879                   args->capacity * sizeof (const cpp_token *));
1880     }
1881   args->tokens[args->used++] = token;
1882 }
1883
1884 /* Take and save raw tokens until we finish one argument.  Empty
1885    arguments are saved as a single CPP_PLACEMARKER token.  */
1886 static const cpp_token *
1887 parse_arg (pfile, var_args, paren_context, args, pcount)
1888      cpp_reader *pfile;
1889      int var_args;
1890      unsigned int paren_context;
1891      macro_args *args;
1892      unsigned int *pcount;
1893 {
1894   const cpp_token *token;
1895   unsigned int paren = 0, count = 0;
1896   int raw, was_raw = 1;
1897
1898   for (count = 0;; count++)
1899     {
1900       token = _cpp_get_token (pfile);
1901
1902       switch (token->type)
1903         {
1904         default:
1905           break;
1906
1907         case CPP_OPEN_PAREN:
1908           paren++;
1909           break;
1910
1911         case CPP_CLOSE_PAREN:
1912           if (paren-- != 0)
1913             break;
1914           goto out;
1915
1916         case CPP_COMMA:
1917           /* Commas are not terminators within parantheses or var_args.  */
1918           if (paren || var_args)
1919             break;
1920           goto out;
1921
1922         case CPP_EOF:           /* Error reported by caller.  */
1923           goto out;
1924         }
1925
1926       raw = pfile->cur_context <= paren_context;
1927       if (raw != was_raw)
1928         {
1929           was_raw = raw;
1930           save_token (args, 0);
1931           count++;
1932         }
1933       save_token (args, token);
1934     }
1935
1936  out:
1937   if (count == 0)
1938     {
1939       /* Duplicate the placemarker.  Then we can set its flags and
1940          position and safely be using more than one.  */
1941       save_token (args, duplicate_token (pfile, &placemarker_token));
1942       count++;
1943     }
1944
1945   *pcount = count;
1946   return token;
1947 }
1948
1949 /* This macro returns true if the argument starting at offset O of arglist
1950    A is empty - that is, it's either a single PLACEMARKER token, or a null
1951    pointer followed by a PLACEMARKER.  */
1952
1953 #define empty_argument(A, O) \
1954  ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
1955                  : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
1956
1957 /* Parse the arguments making up a macro invocation.  Nested arguments
1958    are automatically macro expanded, but immediate macros are not
1959    expanded; this enables e.g. operator # to work correctly.  Returns
1960    non-zero on error.  */
1961 static int
1962 parse_args (pfile, hp, args)
1963      cpp_reader *pfile;
1964      cpp_hashnode *hp;
1965      macro_args *args;
1966 {
1967   const cpp_token *token;
1968   const cpp_toklist *macro;
1969   unsigned int total = 0;
1970   unsigned int paren_context = pfile->cur_context;
1971   int argc = 0;
1972
1973   macro = hp->value.expansion;
1974   do
1975     {
1976       unsigned int count;
1977
1978       token = parse_arg (pfile, (argc + 1 == macro->paramc
1979                                  && (macro->flags & VAR_ARGS)),
1980                          paren_context, args, &count);
1981       if (argc < macro->paramc)
1982         {
1983           total += count;
1984           args->ends[argc] = total;
1985         }
1986       argc++;
1987     }
1988   while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
1989
1990   if (token->type == CPP_EOF)
1991     {
1992       cpp_error(pfile, "unterminated argument list for macro \"%s\"", hp->name);
1993       return 1;
1994     }
1995   else if (argc < macro->paramc)
1996     {
1997       /* A rest argument is allowed to not appear in the invocation at all.
1998          e.g. #define debug(format, args...) ...
1999          debug("string");
2000          This is exactly the same as if the rest argument had received no
2001          tokens - debug("string",);  This extension is deprecated.  */
2002
2003       if (argc + 1 == macro->paramc && (macro->flags & VAR_ARGS))
2004         {
2005           /* Duplicate the placemarker.  Then we can set its flags and
2006              position and safely be using more than one.  */
2007           cpp_token *pm = duplicate_token (pfile, &placemarker_token);
2008           pm->flags = VOID_REST;
2009           save_token (args, pm);
2010           args->ends[argc] = total + 1;
2011
2012           if (CPP_OPTION (pfile, c99) && CPP_PEDANTIC (pfile))
2013             cpp_pedwarn (pfile, "ISO C99 requires rest arguments to be used");
2014
2015           return 0;
2016         }
2017       else
2018         {
2019           cpp_error (pfile, "not enough arguments for macro \"%s\"", hp->name);
2020           return 1;
2021         }
2022     }
2023   /* An empty argument to an empty function-like macro is fine.  */
2024   else if (argc > macro->paramc
2025            && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2026     {
2027       cpp_error (pfile, "too many arguments for macro \"%s\"", hp->name);
2028       return 1;
2029     }
2030
2031   return 0;
2032 }
2033
2034 /* Adds backslashes before all backslashes and double quotes appearing
2035    in strings.  Non-printable characters are converted to octal.  */
2036 static U_CHAR *
2037 quote_string (dest, src, len)
2038      U_CHAR *dest;
2039      const U_CHAR *src;
2040      unsigned int len;
2041 {
2042   while (len--)
2043     {
2044       U_CHAR c = *src++;
2045
2046       if (c == '\\' || c == '"')
2047         {
2048           *dest++ = '\\';
2049           *dest++ = c;
2050         }
2051       else
2052         {
2053           if (ISPRINT (c))
2054             *dest++ = c;
2055           else
2056             {
2057               sprintf ((char *) dest, "\\%03o", c);
2058               dest += 4;
2059             }
2060         }
2061     }
2062
2063   return dest;
2064 }
2065
2066 /* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2067    CPP_STRING token containing TEXT in quoted form.  */
2068 static cpp_token *
2069 make_string_token (token, text, len)
2070      cpp_token *token;
2071      const U_CHAR *text;
2072      unsigned int len;
2073 {
2074   U_CHAR *buf;
2075
2076   buf = (U_CHAR *) xmalloc (len * 4);
2077   token->type = CPP_STRING;
2078   token->flags = 0;
2079   token->val.str.text = buf;
2080   token->val.str.len = quote_string (buf, text, len) - buf;
2081   return token;
2082 }
2083
2084 /* Allocates and converts a temporary token to a CPP_NUMBER token,
2085    evaluating to NUMBER.  */
2086 static cpp_token *
2087 alloc_number_token (pfile, number)
2088      cpp_reader *pfile;
2089      int number;
2090 {
2091   cpp_token *result;
2092   char *buf;
2093
2094   result = get_temp_token (pfile);
2095   buf = xmalloc (20);
2096   sprintf (buf, "%d", number);
2097
2098   result->type = CPP_NUMBER;
2099   result->flags = 0;
2100   result->val.str.text = (U_CHAR *) buf;
2101   result->val.str.len = strlen (buf);
2102   return result;
2103 }
2104
2105 /* Returns a temporary token from the temporary token store of PFILE.  */
2106 static cpp_token *
2107 get_temp_token (pfile)
2108      cpp_reader *pfile;
2109 {
2110   if (pfile->temp_used == pfile->temp_alloced)
2111     {
2112       if (pfile->temp_used == pfile->temp_cap)
2113         {
2114           pfile->temp_cap += pfile->temp_cap + 20;
2115           pfile->temp_tokens = (cpp_token **) xrealloc
2116             (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2117         }
2118       pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2119         (sizeof (cpp_token));
2120     }
2121
2122   return pfile->temp_tokens[pfile->temp_used++];
2123 }
2124
2125 /* Release (not free) for re-use the temporary tokens of PFILE.  */
2126 static void
2127 release_temp_tokens (pfile)
2128      cpp_reader *pfile;
2129 {
2130   while (pfile->temp_used)
2131     {
2132       cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2133
2134       if (TOKEN_SPELL (token) == SPELL_STRING)
2135         {
2136           free ((char *) token->val.str.text);
2137           token->val.str.text = 0;
2138         }
2139     }
2140 }
2141
2142 /* Free all of PFILE's dynamically-allocated temporary tokens.  */
2143 void
2144 _cpp_free_temp_tokens (pfile)
2145      cpp_reader *pfile;
2146 {
2147   if (pfile->temp_tokens)
2148     {
2149       /* It is possible, though unlikely (looking for '(' of a funlike
2150          macro into EOF), that we haven't released the tokens yet.  */
2151       release_temp_tokens (pfile);
2152       while (pfile->temp_alloced)
2153         free (pfile->temp_tokens[--pfile->temp_alloced]);
2154       free (pfile->temp_tokens);
2155     }
2156
2157   if (pfile->date)
2158     {
2159       free ((char *) pfile->date->val.str.text);
2160       free (pfile->date);
2161       free ((char *) pfile->time->val.str.text);
2162       free (pfile->time);
2163     }
2164 }
2165
2166 /* Copy TOKEN into a temporary token from PFILE's store.  */
2167 static cpp_token *
2168 duplicate_token (pfile, token)
2169      cpp_reader *pfile;
2170      const cpp_token *token;
2171 {
2172   cpp_token *result = get_temp_token (pfile);
2173
2174   *result = *token;
2175   if (TOKEN_SPELL (token) == SPELL_STRING)
2176     {
2177       U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2178       memcpy (buff, token->val.str.text, token->val.str.len);
2179       result->val.str.text = buff;
2180     }
2181   return result;
2182 }
2183
2184 /* Determine whether two tokens can be pasted together, and if so,
2185    what the resulting token is.  Returns CPP_EOF if the tokens cannot
2186    be pasted, or the appropriate type for the merged token if they
2187    can.  */
2188 enum cpp_ttype
2189 _cpp_can_paste (pfile, token1, token2, digraph)
2190      cpp_reader * pfile;
2191      const cpp_token *token1, *token2;
2192      int* digraph;
2193 {
2194   enum cpp_ttype a = token1->type, b = token2->type;
2195   int cxx = CPP_OPTION (pfile, cplusplus);
2196
2197   /* Treat named operators as if they were ordinary NAMEs.  */
2198   if (token1->flags & NAMED_OP)
2199     a = CPP_NAME;
2200   if (token2->flags & NAMED_OP)
2201     b = CPP_NAME;
2202
2203   if (a <= CPP_LAST_EQ && b == CPP_EQ)
2204     return a + (CPP_EQ_EQ - CPP_EQ);
2205
2206   switch (a)
2207     {
2208     case CPP_GREATER:
2209       if (b == a) return CPP_RSHIFT;
2210       if (b == CPP_QUERY && cxx)        return CPP_MAX;
2211       if (b == CPP_GREATER_EQ)  return CPP_RSHIFT_EQ;
2212       break;
2213     case CPP_LESS:
2214       if (b == a) return CPP_LSHIFT;
2215       if (b == CPP_QUERY && cxx)        return CPP_MIN;
2216       if (b == CPP_LESS_EQ)     return CPP_LSHIFT_EQ;
2217       if (CPP_OPTION (pfile, digraphs))
2218         {
2219           if (b == CPP_COLON)
2220             {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2221           if (b == CPP_MOD)
2222             {*digraph = 1; return CPP_OPEN_BRACE;}      /* <% digraph */
2223         }
2224       break;
2225
2226     case CPP_PLUS: if (b == a)  return CPP_PLUS_PLUS; break;
2227     case CPP_AND:  if (b == a)  return CPP_AND_AND; break;
2228     case CPP_OR:   if (b == a)  return CPP_OR_OR;   break;
2229
2230     case CPP_MINUS:
2231       if (b == a)               return CPP_MINUS_MINUS;
2232       if (b == CPP_GREATER)     return CPP_DEREF;
2233       break;
2234     case CPP_COLON:
2235       if (b == a && cxx)        return CPP_SCOPE;
2236       if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
2237         {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2238       break;
2239
2240     case CPP_MOD:
2241       if (CPP_OPTION (pfile, digraphs))
2242         {
2243           if (b == CPP_GREATER)
2244             {*digraph = 1; return CPP_CLOSE_BRACE;}  /* %> digraph */
2245           if (b == CPP_COLON)
2246             {*digraph = 1; return CPP_HASH;}         /* %: digraph */
2247         }
2248       break;
2249     case CPP_DEREF:
2250       if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2251       break;
2252     case CPP_DOT:
2253       if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2254       if (b == CPP_NUMBER)      return CPP_NUMBER;
2255       break;
2256
2257     case CPP_HASH:
2258       if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2259         /* %:%: digraph */
2260         {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2261       break;
2262
2263     case CPP_NAME:
2264       if (b == CPP_NAME)        return CPP_NAME;
2265       if (b == CPP_NUMBER
2266           && is_numstart(token2->val.str.text[0]))       return CPP_NAME;
2267       if (b == CPP_CHAR
2268           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
2269       if (b == CPP_STRING
2270           && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
2271       break;
2272
2273     case CPP_NUMBER:
2274       if (b == CPP_NUMBER)      return CPP_NUMBER;
2275       if (b == CPP_NAME)        return CPP_NUMBER;
2276       if (b == CPP_DOT)         return CPP_NUMBER;
2277       /* Numbers cannot have length zero, so this is safe.  */
2278       if ((b == CPP_PLUS || b == CPP_MINUS)
2279           && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
2280         return CPP_NUMBER;
2281       break;
2282
2283     case CPP_OTHER:
2284       if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
2285         {
2286           if (b == CPP_NAME)    return CPP_NAME;
2287           if (b == CPP_STRING)  return CPP_OSTRING;
2288         }
2289
2290     default:
2291       break;
2292     }
2293
2294   return CPP_EOF;
2295 }
2296
2297 /* Check if TOKEN is to be ##-pasted with the token after it.  */
2298 static const cpp_token *
2299 maybe_paste_with_next (pfile, token)
2300      cpp_reader *pfile;
2301      const cpp_token *token;
2302 {
2303   cpp_token *pasted;
2304   const cpp_token *second;
2305   cpp_context *context = CURRENT_CONTEXT (pfile);
2306
2307   /* Is this token on the LHS of ## ? */
2308
2309   while ((token->flags & PASTE_LEFT)
2310          || ((context->flags & CONTEXT_PASTEL)
2311              && context->posn == context->count))
2312     {
2313       /* Suppress macro expansion for next token, but don't conflict
2314          with the other method of suppression.  If it is an argument,
2315          macro expansion within the argument will still occur.  */
2316       pfile->paste_level = pfile->cur_context;
2317       second = _cpp_get_token (pfile);
2318       pfile->paste_level = 0;
2319
2320       /* Ignore placemarker argument tokens (cannot be from an empty
2321          macro since macros are not expanded).  */
2322       if (token->type == CPP_PLACEMARKER)
2323         pasted = duplicate_token (pfile, second);
2324       else if (second->type == CPP_PLACEMARKER)
2325         {
2326           /* GCC has special extended semantics for , ## b where b is
2327              a varargs parameter: the comma disappears if b was given
2328              no actual arguments (not merely if b is an empty
2329              argument).  */
2330           if (token->type == CPP_COMMA && second->flags & VOID_REST)
2331             pasted = duplicate_token (pfile, second);
2332           else
2333             pasted = duplicate_token (pfile, token);
2334         }
2335       else
2336         {
2337           int digraph = 0;
2338           enum cpp_ttype type = _cpp_can_paste (pfile, token, second, &digraph);
2339
2340           if (type == CPP_EOF)
2341             {
2342               if (CPP_OPTION (pfile, warn_paste))
2343                 {
2344                   /* Do not complain about , ## <whatever> if
2345                      <whatever> came from a variable argument, because
2346                      the author probably intended the ## to trigger
2347                      the special extended semantics (see above).  */
2348                   if (token->type == CPP_COMMA
2349                       && IS_ARG_CONTEXT (CURRENT_CONTEXT (pfile))
2350                       && ON_REST_ARG (CURRENT_CONTEXT (pfile) - 1))
2351                     /* no warning */;
2352                   else
2353                     cpp_warning (pfile,
2354                         "pasting would not give a valid preprocessing token");
2355                 }
2356               _cpp_push_token (pfile, second);
2357               /* A short term hack to safely clear the PASTE_LEFT flag.  */
2358               pasted = duplicate_token (pfile, token);
2359               pasted->flags &= ~PASTE_LEFT;
2360               return pasted;
2361             }
2362
2363           if (type == CPP_NAME || type == CPP_NUMBER)
2364             {
2365               /* Join spellings.  */
2366               U_CHAR *buf, *end;
2367
2368               pasted = get_temp_token (pfile);
2369               buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2370               end = spell_token (pfile, token, buf);
2371               end = spell_token (pfile, second, end);
2372               *end = '\0';
2373
2374               if (type == CPP_NAME)
2375                 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2376               else
2377                 {
2378                   pasted->val.str.text = uxstrdup (buf);
2379                   pasted->val.str.len = end - buf;
2380                 }
2381             }
2382           else if (type == CPP_WCHAR || type == CPP_WSTRING
2383                    || type == CPP_OSTRING)
2384             pasted = duplicate_token (pfile, second);
2385           else
2386             {
2387               pasted = get_temp_token (pfile);
2388               pasted->val.integer = 0;
2389             }
2390
2391           pasted->type = type;
2392           pasted->flags = digraph ? DIGRAPH : 0;
2393
2394           if (type == CPP_NAME && pasted->val.node->type == T_OPERATOR)
2395             {
2396               pasted->type = pasted->val.node->value.code;
2397               pasted->flags |= NAMED_OP;
2398             }
2399         }
2400
2401       /* The pasted token gets the whitespace flags and position of the
2402          first token, the PASTE_LEFT flag of the second token, plus the
2403          PASTED flag to indicate it is the result of a paste.  However, we
2404          want to preserve the DIGRAPH flag.  */
2405       pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2406       pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2407                         | (second->flags & PASTE_LEFT) | PASTED);
2408       pasted->col = token->col;
2409       pasted->line = token->line;
2410
2411       /* See if there is another token to be pasted onto the one we just
2412          constructed.  */
2413       token = pasted;
2414       context = CURRENT_CONTEXT (pfile);
2415       /* and loop */
2416     }
2417   return token;
2418 }
2419
2420 /* Convert a token sequence to a single string token according to the
2421    rules of the ISO C #-operator.  */
2422 #define INIT_SIZE 200
2423 static cpp_token *
2424 stringify_arg (pfile, token)
2425      cpp_reader *pfile;
2426      const cpp_token *token;
2427 {
2428   cpp_token *result;
2429   unsigned char *main_buf;
2430   unsigned int prev_value, backslash_count = 0;
2431   unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2432
2433   push_arg_context (pfile, token);
2434   prev_value  = prevent_macro_expansion (pfile);
2435   main_buf = (unsigned char *) xmalloc (buf_cap);
2436
2437   result = get_temp_token (pfile);
2438   ASSIGN_FLAGS_AND_POS (result, token);
2439
2440   for (; (token = _cpp_get_token (pfile))->type != CPP_EOF; )
2441     {
2442       int escape;
2443       unsigned char *buf;
2444       unsigned int len = TOKEN_LEN (token);
2445
2446       if (token->type == CPP_PLACEMARKER)
2447         continue;
2448
2449       escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2450                 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2451       if (escape)
2452         len *= 4 + 1;
2453
2454       if (buf_used + len > buf_cap)
2455         {
2456           buf_cap = buf_used + len + INIT_SIZE;
2457           main_buf = xrealloc (main_buf, buf_cap);
2458         }
2459
2460       if (whitespace && (token->flags & PREV_WHITE))
2461         main_buf[buf_used++] = ' ';
2462
2463       if (escape)
2464         buf = (unsigned char *) xmalloc (len);
2465       else
2466         buf = main_buf + buf_used;
2467
2468       len = spell_token (pfile, token, buf) - buf;
2469       if (escape)
2470         {
2471           buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2472           free (buf);
2473         }
2474       else
2475         buf_used += len;
2476
2477       whitespace = 1;
2478       if (token->type == CPP_BACKSLASH)
2479         backslash_count++;
2480       else
2481         backslash_count = 0;
2482     }
2483
2484   /* Ignore the final \ of invalid string literals.  */
2485   if (backslash_count & 1)
2486     {
2487       cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2488       buf_used--;
2489     }
2490
2491   result->type = CPP_STRING;
2492   result->val.str.text = main_buf;
2493   result->val.str.len = buf_used;
2494   restore_macro_expansion (pfile, prev_value);
2495   return result;
2496 }
2497
2498 /* Allocate more room on the context stack of PFILE.  */
2499 static void
2500 expand_context_stack (pfile)
2501      cpp_reader *pfile;
2502 {
2503   pfile->context_cap += pfile->context_cap + 20;
2504   pfile->contexts = (cpp_context *)
2505     xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2506 }
2507
2508 /* Push the context of macro NODE onto the context stack.  TOKEN is
2509    the CPP_NAME token invoking the macro.  */
2510 static int
2511 push_macro_context (pfile, token)
2512      cpp_reader *pfile;
2513      const cpp_token *token;
2514 {
2515   unsigned char orig_flags;
2516   macro_args *args;
2517   cpp_context *context;
2518   cpp_hashnode *node = token->val.node;
2519
2520   /* Token's flags may change when parsing args containing a nested
2521      invocation of this macro.  */
2522   orig_flags = token->flags & (PREV_WHITE | BOL);
2523   args = 0;
2524   if (node->value.expansion->paramc >= 0)
2525     {
2526       unsigned int error, prev_nme;
2527
2528       /* Allocate room for the argument contexts, and parse them.  */
2529       args  = (macro_args *) xmalloc (sizeof (macro_args));
2530       args->ends = (unsigned int *)
2531         xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2532       args->tokens = 0;
2533       args->capacity = 0;
2534       args->used = 0;
2535
2536       prev_nme = prevent_macro_expansion (pfile);
2537       pfile->args = args;
2538       error = parse_args (pfile, node, args);
2539       pfile->args = 0;
2540       restore_macro_expansion (pfile, prev_nme);
2541       if (error)
2542         {
2543           free_macro_args (args);
2544           return 1;
2545         }
2546       /* Set the level after the call to parse_args.  */
2547       args->level = pfile->cur_context;
2548     }
2549
2550   /* Now push its context.  */
2551   pfile->cur_context++;
2552   if (pfile->cur_context == pfile->context_cap)
2553     expand_context_stack (pfile);
2554
2555   context = CURRENT_CONTEXT (pfile);
2556   context->u.list = node->value.expansion;
2557   context->args = args;
2558   context->posn = 0;
2559   context->count = context->u.list->tokens_used;
2560   context->level = pfile->cur_context;
2561   context->flags = 0;
2562   context->pushed_token = 0;
2563
2564   /* Set the flags of the first token.  We know there must
2565      be one, empty macros are a single placemarker token.  */
2566   MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2567
2568   return 0;
2569 }
2570
2571 /* Push an argument to the current macro onto the context stack.
2572    TOKEN is the MACRO_ARG token representing the argument expansion.  */
2573 static void
2574 push_arg_context (pfile, token)
2575      cpp_reader *pfile;
2576      const cpp_token *token;
2577 {
2578   cpp_context *context;
2579   macro_args *args;
2580
2581   pfile->cur_context++;
2582   if (pfile->cur_context == pfile->context_cap)
2583       expand_context_stack (pfile);
2584
2585   context = CURRENT_CONTEXT (pfile);
2586   args = context[-1].args;
2587
2588   context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2589   context->u.arg = args->tokens + context->count;
2590   context->count = args->ends[token->val.aux] - context->count;
2591   context->args = 0;
2592   context->posn = 0;
2593   context->level = args->level;
2594   context->flags = CONTEXT_ARG | CONTEXT_RAW;
2595   context->pushed_token = 0;
2596
2597   /* Set the flags of the first token.  There is one.  */
2598   {
2599     const cpp_token *first = context->u.arg[0];
2600     if (!first)
2601       first = context->u.arg[1];
2602
2603     MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2604                           token->flags & (PREV_WHITE | BOL));
2605   }
2606
2607   if (token->flags & PASTE_LEFT)
2608     context->flags |= CONTEXT_PASTEL;
2609   if (pfile->paste_level)
2610     context->flags |= CONTEXT_PASTER;
2611 }
2612
2613 /* "Unget" a token.  It is effectively inserted in the token queue and
2614    will be returned by the next call to get_raw_token.  */
2615 void
2616 _cpp_push_token (pfile, token)
2617      cpp_reader *pfile;
2618      const cpp_token *token;
2619 {
2620   cpp_context *context = CURRENT_CONTEXT (pfile);
2621
2622   if (context->posn > 0)
2623     {
2624       const cpp_token *prev;
2625       if (IS_ARG_CONTEXT (context))
2626         prev = context->u.arg[context->posn - 1];
2627       else
2628         prev = &context->u.list->tokens[context->posn - 1];
2629
2630       if (prev == token)
2631         {
2632           context->posn--;
2633           return;
2634         }
2635     }
2636
2637   if (context->pushed_token)
2638     cpp_ice (pfile, "two tokens pushed in a row");
2639   if (token->type != CPP_EOF)
2640     context->pushed_token = token;
2641   /* Don't push back a directive's CPP_EOF, step back instead.  */
2642   else if (pfile->cur_context == 0)
2643     pfile->contexts[0].posn--;
2644 }
2645
2646 /* Handle a preprocessing directive.  TOKEN is the CPP_HASH token
2647    introducing the directive.  */
2648 void
2649 _cpp_process_directive (pfile, token)
2650      cpp_reader *pfile;
2651      const cpp_token *token;
2652 {
2653   const struct directive *d = pfile->token_list.directive;
2654   int prev_nme = 0;
2655
2656   /* Skip over the directive name.  */
2657   if (token[1].type == CPP_NAME)
2658     _cpp_get_raw_token (pfile);
2659   else if (token[1].type != CPP_NUMBER)
2660     cpp_ice (pfile, "directive begins with %s?!", TOKEN_NAME (token));
2661
2662   if (! (d->flags & EXPAND))
2663     prev_nme = prevent_macro_expansion (pfile);
2664   (void) (*d->handler) (pfile);
2665   if (! (d->flags & EXPAND))
2666     restore_macro_expansion (pfile, prev_nme);
2667   _cpp_skip_rest_of_line (pfile);
2668 }
2669
2670 /* The external interface to return the next token.  All macro
2671    expansion and directive processing is handled internally, the
2672    caller only ever sees the output after preprocessing.  */
2673 const cpp_token *
2674 cpp_get_token (pfile)
2675      cpp_reader *pfile;
2676 {
2677   const cpp_token *token;
2678   /* Loop till we hit a non-directive, non-placemarker token.  */
2679   for (;;)
2680     {
2681       token = _cpp_get_token (pfile);
2682
2683       if (token->type == CPP_PLACEMARKER)
2684         continue;
2685
2686       if (token->type == CPP_HASH && token->flags & BOL
2687           && pfile->token_list.directive)
2688         {
2689           _cpp_process_directive (pfile, token);
2690           continue;
2691         }
2692
2693       return token;
2694     }
2695 }
2696
2697 /* The internal interface to return the next token.  There are two
2698    differences between the internal and external interfaces: the
2699    internal interface may return a PLACEMARKER token, and it does not
2700    process directives.  */
2701 const cpp_token *
2702 _cpp_get_token (pfile)
2703      cpp_reader *pfile;
2704 {
2705   const cpp_token *token, *old_token;
2706   cpp_hashnode *node;
2707
2708   /* Loop until we hit a non-macro token.  */
2709   for (;;)
2710     {
2711       token = get_raw_token (pfile);
2712
2713       /* Short circuit EOF. */
2714       if (token->type == CPP_EOF)
2715         return token;
2716
2717       /* If we are skipping... */
2718       if (pfile->skipping)
2719         {
2720           /* we still have to process directives,  */
2721           if (pfile->token_list.directive)
2722             return token;
2723
2724           /* but everything else is ignored.  */
2725           _cpp_skip_rest_of_line (pfile);
2726           continue;
2727         }
2728
2729       /* If there's a potential control macro and we get here, then that
2730          #ifndef didn't cover the entire file and its argument shouldn't
2731          be taken as a control macro.  */
2732       pfile->potential_control_macro = 0;
2733
2734       /* If we are rescanning preprocessed input, no macro expansion or
2735          token pasting may occur.  */
2736       if (CPP_OPTION (pfile, preprocessed))
2737         return token;
2738
2739       old_token = token;
2740
2741       /* See if there's a token to paste with this one.  */
2742       if (!pfile->paste_level)
2743         token = maybe_paste_with_next (pfile, token);
2744
2745       /* If it isn't a macro, return it now.  */
2746       if (token->type != CPP_NAME || token->val.node->type == T_VOID)
2747         return token;
2748
2749       /* Is macro expansion disabled in general, or are we in the
2750          middle of a token paste, or was this token just pasted?
2751          (Note we don't check token->flags & PASTED, because that
2752          counts tokens that were pasted at some point in the past,
2753          we're only interested in tokens that were pasted by this call
2754          to maybe_paste_with_next.)  */
2755       if (pfile->no_expand_level == pfile->cur_context
2756           || pfile->paste_level
2757           || (token != old_token
2758               && pfile->no_expand_level + 1 == pfile->cur_context))
2759         return token;
2760
2761       node = token->val.node;
2762       if (node->type != T_MACRO)
2763         return special_symbol (pfile, node, token);
2764
2765       if (is_macro_disabled (pfile, node->value.expansion, token))
2766         return token;
2767
2768       if (push_macro_context (pfile, token))
2769         return token;
2770       /* else loop */
2771     }
2772 }
2773
2774 /* Returns the next raw token, i.e. without performing macro
2775    expansion.  Argument contexts are automatically entered.  */
2776 static const cpp_token *
2777 get_raw_token (pfile)
2778      cpp_reader *pfile;
2779 {
2780   const cpp_token *result;
2781   cpp_context *context;
2782
2783   for (;;)
2784     {
2785       context = CURRENT_CONTEXT (pfile);
2786       if (context->pushed_token)
2787         {
2788           result = context->pushed_token;
2789           context->pushed_token = 0;
2790           return result;        /* Cannot be a CPP_MACRO_ARG */
2791         }
2792       else if (context->posn == context->count)
2793         {
2794           if (pop_context (pfile))
2795             return &eof_token;
2796           continue;
2797         }
2798       else if (IS_ARG_CONTEXT (context))
2799         {
2800           result = context->u.arg[context->posn++];
2801           if (result == 0)
2802             {
2803               context->flags ^= CONTEXT_RAW;
2804               result = context->u.arg[context->posn++];
2805             }
2806           return result;        /* Cannot be a CPP_MACRO_ARG */
2807         }
2808
2809       result = &context->u.list->tokens[context->posn++];
2810
2811       if (result->type != CPP_MACRO_ARG)
2812         return result;
2813
2814       if (result->flags & STRINGIFY_ARG)
2815         return stringify_arg (pfile, result);
2816
2817       push_arg_context (pfile, result);
2818     }
2819 }
2820
2821 /* Internal interface to get the token without macro expanding.  */
2822 const cpp_token *
2823 _cpp_get_raw_token (pfile)
2824      cpp_reader *pfile;
2825 {
2826   int prev_nme = prevent_macro_expansion (pfile);
2827   const cpp_token *result = _cpp_get_token (pfile);
2828   restore_macro_expansion (pfile, prev_nme);
2829   return result;
2830 }
2831
2832 /* A thin wrapper to lex_line.  CLEAR is non-zero if the current token
2833    list should be overwritten, or zero if we need to append
2834    (typically, if we are within the arguments to a macro, or looking
2835    for the '(' to start a function-like macro invocation).  */
2836 static int
2837 lex_next (pfile, clear)
2838      cpp_reader *pfile;
2839      int clear;
2840 {
2841   cpp_toklist *list = &pfile->token_list;
2842   const cpp_token *old_list = list->tokens;
2843   unsigned int old_used = list->tokens_used;
2844
2845   if (clear)
2846     {
2847       /* Release all temporary tokens.  */
2848       _cpp_clear_toklist (list);
2849       pfile->contexts[0].posn = 0;
2850       if (pfile->temp_used)
2851         release_temp_tokens (pfile);
2852     }
2853   lex_line (pfile, list);
2854   pfile->contexts[0].count = list->tokens_used;
2855
2856   if (!clear && pfile->args)
2857     {
2858       /* Fix up argument token pointers.  */
2859       if (old_list != list->tokens)
2860         {
2861           unsigned int i;
2862
2863           for (i = 0; i < pfile->args->used; i++)
2864             {
2865               const cpp_token *token = pfile->args->tokens[i];
2866               if (token >= old_list && token < old_list + old_used)
2867                 pfile->args->tokens[i] = (const cpp_token *)
2868                 ((char *) token + ((char *) list->tokens - (char *) old_list));
2869             }
2870         }
2871
2872       /* 6.10.3 paragraph 11: If there are sequences of preprocessing
2873          tokens within the list of arguments that would otherwise act as
2874          preprocessing directives, the behavior is undefined.
2875
2876          This implementation will report a hard error and treat the
2877          'sequence of preprocessing tokens' as part of the macro argument,
2878          not a directive.
2879
2880          Note if pfile->args == 0, we're OK since we're only inside a
2881          macro argument after a '('.  */
2882       if (list->directive)
2883         {
2884           cpp_error_with_line (pfile, list->tokens[old_used].line,
2885                                list->tokens[old_used].col,
2886                                "#%s may not be used inside a macro argument",
2887                                list->directive->name);
2888           return 1;
2889         }
2890     }
2891
2892   return 0;
2893 }
2894
2895 /* Pops a context off the context stack.  If we're at the bottom, lexes
2896    the next logical line.  Returns EOF if we're at the end of the
2897    argument list to the # operator, or we should not "overflow"
2898    into the rest of the file (e.g. 6.10.3.1.1).  */
2899 static int
2900 pop_context (pfile)
2901      cpp_reader *pfile;
2902 {
2903   cpp_context *context;
2904
2905   if (pfile->cur_context == 0)
2906     {
2907       /* If we are currently processing a directive, do not advance.  6.10
2908          paragraph 2: A new-line character ends the directive even if it
2909          occurs within what would otherwise be an invocation of a
2910          function-like macro.  */
2911       if (pfile->token_list.directive)
2912         return 1;
2913
2914       return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
2915     }
2916
2917   /* Argument contexts, when parsing args or handling # operator
2918      return CPP_EOF at the end.  */
2919   context = CURRENT_CONTEXT (pfile);
2920   if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
2921     return 1;
2922
2923   /* Free resources when leaving macro contexts.  */
2924   if (context->args)
2925     free_macro_args (context->args);
2926
2927   if (pfile->cur_context == pfile->no_expand_level)
2928     pfile->no_expand_level--;
2929   pfile->cur_context--;
2930
2931   return 0;
2932 }
2933
2934 /* Turn off macro expansion at the current context level.  */
2935 static unsigned int
2936 prevent_macro_expansion (pfile)
2937      cpp_reader *pfile;
2938 {
2939   unsigned int prev_value = pfile->no_expand_level;
2940   pfile->no_expand_level = pfile->cur_context;
2941   return prev_value;
2942 }
2943
2944 /* Restore macro expansion to its previous state.  */
2945 static void
2946 restore_macro_expansion (pfile, prev_value)
2947      cpp_reader *pfile;
2948      unsigned int prev_value;
2949 {
2950   pfile->no_expand_level = prev_value;
2951 }
2952
2953 /* Used by cpperror.c to obtain the correct line and column to report
2954    in a diagnostic.  */
2955 unsigned int
2956 _cpp_get_line (pfile, pcol)
2957      cpp_reader *pfile;
2958      unsigned int *pcol;
2959 {
2960   unsigned int index;
2961   const cpp_token *cur_token;
2962
2963   if (pfile->state.in_lex_line)
2964     index = pfile->token_list.tokens_used;
2965   else
2966     {
2967       index = pfile->contexts[0].posn;
2968
2969       if (index == 0)
2970         {
2971           if (pcol)
2972             *pcol = 0;
2973           return 0;
2974         }
2975       index--;
2976     }
2977
2978   cur_token = &pfile->token_list.tokens[index];
2979   if (pcol)
2980     *pcol = cur_token->col;
2981   return cur_token->line;
2982 }
2983
2984 #define DSC(str) (const U_CHAR *)str, sizeof str - 1
2985 static const char * const monthnames[] =
2986 {
2987   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
2988   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
2989 };
2990
2991 /* Handle builtin macros like __FILE__.  */
2992 static const cpp_token *
2993 special_symbol (pfile, node, token)
2994      cpp_reader *pfile;
2995      cpp_hashnode *node;
2996      const cpp_token *token;
2997 {
2998   cpp_token *result;
2999   cpp_buffer *ip;
3000
3001   switch (node->type)
3002     {
3003     case T_FILE:
3004     case T_BASE_FILE:
3005       {
3006         const char *file;
3007
3008         ip = CPP_BUFFER (pfile);
3009         if (ip == 0)
3010           file = "";
3011         else
3012           {
3013             if (node->type == T_BASE_FILE)
3014               while (CPP_PREV_BUFFER (ip) != NULL)
3015                 ip = CPP_PREV_BUFFER (ip);
3016
3017             file = ip->nominal_fname;
3018           }
3019         result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3020                                     strlen (file));
3021       }
3022       break;
3023
3024     case T_INCLUDE_LEVEL:
3025       /* pfile->include_depth counts the primary source as level 1,
3026          but historically __INCLUDE_DEPTH__ has called the primary
3027          source level 0.  */
3028       result = alloc_number_token (pfile, pfile->include_depth - 1);
3029       break;
3030
3031     case T_SPECLINE:
3032       /* If __LINE__ is embedded in a macro, it must expand to the
3033          line of the macro's invocation, not its definition.
3034          Otherwise things like assert() will not work properly.  */
3035       result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3036       break;
3037
3038     case T_STDC:
3039       {
3040         int stdc = 1;
3041
3042 #ifdef STDC_0_IN_SYSTEM_HEADERS
3043         if (CPP_IN_SYSTEM_HEADER (pfile)
3044             && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
3045           stdc = 0;
3046 #endif
3047         result = alloc_number_token (pfile, stdc);
3048       }
3049       break;
3050
3051     case T_DATE:
3052     case T_TIME:
3053       if (pfile->date == 0)
3054         {
3055           /* Allocate __DATE__ and __TIME__ from permanent storage,
3056              and save them in pfile so we don't have to do this again.
3057              We don't generate these strings at init time because
3058              time() and localtime() are very slow on some systems.  */
3059           time_t tt = time (NULL);
3060           struct tm *tb = localtime (&tt);
3061
3062           pfile->date = make_string_token
3063             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3064           pfile->time = make_string_token
3065             ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3066
3067           sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
3068                    monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
3069           sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
3070                    tb->tm_hour, tb->tm_min, tb->tm_sec);
3071         }
3072       result = node->type == T_DATE ? pfile->date: pfile->time;
3073       break;
3074
3075     case T_POISON:
3076       cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
3077       return token;
3078
3079     default:
3080       cpp_ice (pfile, "invalid special hash type");
3081       return token;
3082     }
3083
3084   ASSIGN_FLAGS_AND_POS (result, token);
3085   return result;
3086 }
3087 #undef DSC
3088
3089 /* Allocate pfile->input_buffer, and initialize _cpp_trigraph_map[]
3090    if it hasn't happened already.  */
3091
3092 void
3093 _cpp_init_input_buffer (pfile)
3094      cpp_reader *pfile;
3095 {
3096   cpp_context *base;
3097
3098   _cpp_init_toklist (&pfile->token_list, 0);
3099   pfile->no_expand_level = UINT_MAX;
3100   pfile->context_cap = 20;
3101   pfile->cur_context = 0;
3102
3103   pfile->contexts = (cpp_context *)
3104     xmalloc (pfile->context_cap * sizeof (cpp_context));
3105
3106   /* Clear the base context.  */
3107   base = &pfile->contexts[0];
3108   base->u.list = &pfile->token_list;
3109   base->posn = 0;
3110   base->count = 0;
3111   base->args = 0;
3112   base->level = 0;
3113   base->flags = 0;
3114   base->pushed_token = 0;
3115 }
3116
3117 /* Moves to the end of the directive line, popping contexts as
3118    necessary.  */
3119 void
3120 _cpp_skip_rest_of_line (pfile)
3121      cpp_reader *pfile;
3122 {
3123   /* Discard all stacked contexts.  */
3124   int i;
3125   for (i = pfile->cur_context; i > 0; i--)
3126     if (pfile->contexts[i].args)
3127       free_macro_args (pfile->contexts[i].args);
3128
3129   if (pfile->no_expand_level <= pfile->cur_context)
3130     pfile->no_expand_level = 0;
3131   pfile->cur_context = 0;
3132
3133   /* Clear the base context, and clear the directive pointer so that
3134      get_raw_token will advance to the next line.  */
3135   pfile->contexts[0].count = 0;
3136   pfile->contexts[0].posn = 0;
3137   pfile->token_list.directive = 0;
3138 }
3139
3140 /* Directive handler wrapper used by the command line option
3141    processor.  */
3142 void
3143 _cpp_run_directive (pfile, dir, buf, count, name)
3144      cpp_reader *pfile;
3145      const struct directive *dir;
3146      const char *buf;
3147      size_t count;
3148      const char *name;
3149 {
3150   if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3151     {
3152       unsigned int prev_lvl = 0;
3153
3154       if (name)
3155         CPP_BUFFER (pfile)->nominal_fname = name;
3156       else
3157         CPP_BUFFER (pfile)->nominal_fname = _("<command line>");
3158       CPP_BUFFER (pfile)->lineno = (unsigned int)-1;
3159
3160       /* Scan the line now, else prevent_macro_expansion won't work.  */
3161       lex_next (pfile, 1);
3162       if (! (dir->flags & EXPAND))
3163         prev_lvl = prevent_macro_expansion (pfile);
3164
3165       (void) (*dir->handler) (pfile);
3166
3167       if (! (dir->flags & EXPAND))
3168         restore_macro_expansion (pfile, prev_lvl);
3169
3170       _cpp_skip_rest_of_line (pfile);
3171       cpp_pop_buffer (pfile);
3172     }
3173 }