gcc/cpptrad.c

   1 /* CPP Library - traditional lexical analysis and macro expansion.
   2    Copyright (C) 2002 Free Software Foundation, Inc.
   3    Contributed by Neil Booth, May 2002
   4
   5 This program is free software; you can redistribute it and/or modify it
   6 under the terms of the GNU General Public License as published by the
   7 Free Software Foundation; either version 2, or (at your option) any
   8 later version.
   9
  10 This program is distributed in the hope that it will be useful,
  11 but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 GNU General Public License for more details.
  14
  15 You should have received a copy of the GNU General Public License
  16 along with this program; if not, write to the Free Software
  17 Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  18
  19 #include "config.h"
  20 #include "system.h"
  21 #include "cpplib.h"
  22 #include "cpphash.h"
  23
  24 /* Lexing TODO: Handle -C, maybe -CC, and space in escaped newlines.
  25    Stop cpplex.c from recognizing comments and directives during its
  26    lexing pass.  Get rid of line_base usage - seems pointless?  Do we
  27    get escaped newline at EOF correct?  */
  28
  29 static const uchar *handle_newline PARAMS ((cpp_reader *, const uchar *));
  30 static const uchar *skip_escaped_newlines PARAMS ((cpp_reader *,
  31                                                    const uchar *));
  32 static const uchar *skip_whitespace PARAMS ((cpp_reader *, const uchar *));
  33 static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *));
  34 static const uchar *skip_comment PARAMS ((cpp_reader *, const uchar *));
  35 static void scan_out_logical_line PARAMS ((cpp_reader *pfile));
  36 static void check_output_buffer PARAMS ((cpp_reader *, size_t));
  37 static void restore_buff PARAMS ((cpp_reader *));
  38 static void push_replacement_text PARAMS ((cpp_reader *, cpp_hashnode *));
  39
  40 /* Ensures we have N bytes' space in the output buffer, and
  41    reallocates it if not.  */
  42 static void
  43 check_output_buffer (pfile, n)
  44      cpp_reader *pfile;
  45      size_t n;
  46 {
  47   if (n > (size_t) (pfile->trad_out_limit - pfile->trad_out_cur))
  48     {
  49       size_t size = pfile->trad_out_cur - pfile->trad_out_base;
  50       size_t new_size = (size + n) * 3 / 2;
  51
  52       pfile->trad_out_base
  53         = (uchar *) xrealloc (pfile->trad_out_base, new_size);
  54       pfile->trad_out_limit = pfile->trad_out_base + new_size;
  55       pfile->trad_out_cur = pfile->trad_out_base + size;
  56     }
  57 }
  58
  59 /* To be called whenever a newline character is encountered in the
  60    input file, at CUR.  Handles DOS, MAC and Unix ends of line, and
  61    returns the character after the newline sequence.  */
  62 static const uchar *
  63 handle_newline (pfile, cur)
  64      cpp_reader *pfile;
  65      const uchar *cur;
  66 {
  67   pfile->line++;
  68   if (cur[0] + cur[1] == '\r' + '\n')
  69     cur++;
  70   pfile->buffer->line_base = cur + 1;
  71   return cur + 1;
  72 }
  73
  74 /* CUR points to any character in the buffer, not necessarily a
  75    backslash.  Advances CUR until all escaped newlines are skipped,
  76    and returns the new position.  */
  77 static const uchar *
  78 skip_escaped_newlines (pfile, cur)
  79      cpp_reader *pfile;
  80      const uchar *cur;
  81 {
  82   while (*cur == '\\' && is_vspace (cur[1]))
  83     cur = handle_newline (pfile, cur + 1);
  84
  85   return cur;
  86 }
  87
  88 /* CUR points to the character after the asterisk introducing a
  89    comment.  Returns the position after the comment.  */
  90 static const uchar *
  91 skip_comment (pfile, cur)
  92      cpp_reader *pfile;
  93      const uchar *cur;
  94 {
  95   unsigned int from_line = pfile->line;
  96   unsigned int c = 0, prevc = 0;
  97   const uchar *limit = RLIMIT (pfile->context);
  98
  99   while (cur < limit)
 100     {
 101       prevc = c;
 102       c = *cur++;
 103
 104       if (c == '/')
 105         {
 106           if (prevc == '*')
 107             break;
 108           if (*cur == '*' && cur[1] != '/'
 109               && CPP_OPTION (pfile, warn_comments))
 110             cpp_error_with_line (pfile, DL_WARNING, pfile->line, 0,
 111                                  "\"/*\" within comment");
 112         }
 113       else if (is_vspace (c))
 114         cur = handle_newline (pfile, cur - 1);
 115     }
 116
 117   if (c != '/' || prevc != '*')
 118     cpp_error_with_line (pfile, DL_ERROR, from_line, 0,
 119                          "unterminated comment");
 120
 121   return cur;
 122 }
 123
 124 /* Skip any horizontal whitespace and comments beginning at CUR,
 125    returning the following character.  */
 126 static const uchar *
 127 skip_whitespace (pfile, cur)
 128      cpp_reader *pfile;
 129      const uchar *cur;
 130 {
 131   const uchar *tmp;
 132
 133   for (;;)
 134     {
 135       while (is_nvspace (*cur) && *cur != 0)
 136         cur++;
 137
 138       if (*cur == '\0' && cur != RLIMIT (pfile->context))
 139         continue;
 140
 141       if (*cur == '\\')
 142         {
 143           tmp = cur;
 144           cur = skip_escaped_newlines (pfile, cur);
 145           if (tmp != cur)
 146             continue;
 147         }
 148
 149       if (*cur == '/')
 150         {
 151           tmp = skip_escaped_newlines (pfile, cur + 1);
 152           if (*tmp == '*')
 153             {
 154               cur = skip_comment (pfile, tmp + 1);
 155               continue;
 156             }
 157         }
 158
 159       break;
 160     }
 161
 162   return cur;
 163 }
 164
 165 /* Lexes and outputs an identifier starting at CUR, which is assumed
 166    to point to a valid first character of an identifier.  Returns
 167    the hashnode, and updates trad_out_cur.  */
 168 static cpp_hashnode *
 169 lex_identifier (pfile, cur)
 170      cpp_reader *pfile;
 171      const uchar *cur;
 172 {
 173   size_t len;
 174   uchar *out = pfile->trad_out_cur;
 175   cpp_hashnode *result;
 176
 177   do
 178     {
 179       do
 180         *out++ = *cur++;
 181       while (ISIDNUM (*cur));
 182       cur = skip_escaped_newlines (pfile, cur);
 183     }
 184   while (ISIDNUM (*cur));
 185
 186   CUR (pfile->context) = cur;
 187   len = out - pfile->trad_out_cur;
 188   result = (cpp_hashnode *) ht_lookup (pfile->hash_table, pfile->trad_out_cur,
 189                                        len, HT_ALLOC);
 190   pfile->trad_out_cur = out;
 191   return result;
 192 }
 193
 194 /* Reads an identifier, returning its hashnode.  If the next token is
 195    not an identifier, returns NULL.  */
 196 cpp_hashnode *
 197 _cpp_lex_identifier_trad (pfile)
 198      cpp_reader *pfile;
 199 {
 200   const uchar *cur = skip_whitespace (pfile, CUR (pfile->context));
 201
 202   if (!ISIDST (*cur))
 203     {
 204       CUR (pfile->context) = cur;
 205       return NULL;
 206     }
 207
 208   return lex_identifier (pfile, cur);
 209 }
 210
 211 /* Overlays the true file buffer temporarily with text of length LEN
 212    starting at START.  The true buffer is restored upon calling
 213    restore_buff().  */
 214 void
 215 _cpp_overlay_buffer (pfile, start, len)
 216      cpp_reader *pfile;
 217      const uchar *start;
 218      size_t len;
 219 {
 220   cpp_buffer *buffer = pfile->buffer;
 221
 222   buffer->saved_cur = buffer->cur;
 223   buffer->saved_rlimit = buffer->rlimit;
 224   buffer->saved_line_base = buffer->line_base;
 225
 226   buffer->cur = start;
 227   buffer->line_base = start;
 228   buffer->rlimit = start + len;
 229 }
 230
 231 /* Restores a buffer overlaid by _cpp_overlay_buffer().  */
 232 static void
 233 restore_buff (pfile)
 234      cpp_reader *pfile;
 235 {
 236   cpp_buffer *buffer = pfile->buffer;
 237
 238   buffer->cur = buffer->saved_cur;
 239   buffer->rlimit = buffer->saved_rlimit;
 240   buffer->line_base = buffer->saved_line_base;
 241 }
 242
 243 /* Reads a logical line into the output buffer.  Returns TRUE if there
 244    is more text left in the buffer.  */
 245 bool
 246 _cpp_read_logical_line_trad (pfile)
 247      cpp_reader *pfile;
 248 {
 249   cpp_buffer *buffer;
 250   unsigned int first_line;
 251
 252   restore_buff (pfile);
 253
 254   first_line = pfile->line = pfile->trad_line;
 255
 256   buffer = pfile->buffer;
 257   if (buffer->cur == buffer->rlimit)
 258     {
 259       bool stop = true;
 260
 261       /* Don't pop the last buffer.  */
 262       if (buffer->prev)
 263         {
 264           stop = buffer->return_at_eof;
 265           _cpp_pop_buffer (pfile);
 266         }
 267
 268       if (stop)
 269         return false;
 270     }
 271
 272   CUR (pfile->context) = buffer->cur;
 273   RLIMIT (pfile->context) = buffer->rlimit;
 274   pfile->trad_out_cur = pfile->trad_out_base;
 275   scan_out_logical_line (pfile);
 276   buffer->cur = CUR (pfile->context);
 277
 278   pfile->trad_line = pfile->line;
 279   pfile->line = first_line;
 280   _cpp_overlay_buffer (pfile, pfile->trad_out_base,
 281                        pfile->trad_out_cur - pfile->trad_out_base);
 282   return true;
 283 }
 284
 285 /* Copies the next logical line in the current buffer to the output
 286    buffer.  The output is guaranteed to terminate with a NUL
 287    character.  */
 288 static void
 289 scan_out_logical_line (pfile)
 290      cpp_reader *pfile;
 291 {
 292   cpp_context *context;
 293   const uchar *cur;
 294   unsigned int c, quote = 0;
 295   uchar *out;
 296
 297  new_context:
 298   context = pfile->context;
 299   cur = CUR (context);
 300   check_output_buffer (pfile, RLIMIT (context) - cur);
 301   out = pfile->trad_out_cur;
 302
 303   for (;;)
 304     {
 305       c = *cur++;
 306       *out++ = c;
 307
 308       /* There are only a few entities we need to catch: comments,
 309          identifiers, newlines, escaped newlines, # and '\0'.  */
 310       switch (c)
 311         {
 312         case '\0':
 313           if (cur - 1 != RLIMIT (context))
 314             break;
 315
 316           /* If this is a macro's expansion, pop it.  */
 317           if (context->prev)
 318             {
 319               pfile->trad_out_cur = out - 1;
 320               _cpp_pop_context (pfile);
 321               goto new_context;
 322             }
 323
 324           /* Premature end of file.  Fake a new line.  */
 325           cur--;
 326           if (!pfile->buffer->from_stage3)
 327             cpp_error (pfile, DL_PEDWARN, "no newline at end of file");
 328           pfile->line++;
 329           goto finish_output;
 330
 331         case '\r': case '\n':
 332           cur = handle_newline (pfile, cur - 1);
 333           out[-1] = '\0';
 334         finish_output:
 335           CUR (context) = cur;
 336           pfile->trad_out_cur = out - 1;
 337           return;
 338
 339         case '"':
 340         case '\'':
 341           if (c == quote)
 342             quote = 0;
 343           else if (!quote)
 344             quote = c;
 345           break;
 346
 347         case '\\':
 348           if (is_vspace (*cur))
 349             out--, cur = skip_escaped_newlines (pfile, cur - 1);
 350           else
 351             {
 352               /* Skip escaped quotes here, it's easier than above, but
 353                  take care to first skip escaped newlines.  */
 354               cur = skip_escaped_newlines (pfile, cur);
 355               if (*cur == '\\' || *cur == '"' || *cur == '\'')
 356                 *out++ = *cur++;
 357             }
 358           break;
 359
 360         case '/':
 361           /* Traditional CPP does not recognize comments within
 362              literals.  */
 363           if (!quote)
 364             {
 365               cur = skip_escaped_newlines (pfile, cur);
 366               if (*cur == '*')
 367                 out--, cur = skip_comment (pfile, cur + 1);
 368             }
 369           break;
 370
 371         case '_':
 372         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 373         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 374         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 375         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 376         case 'y': case 'z':
 377         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 378         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 379         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 380         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 381         case 'Y': case 'Z':
 382           {
 383             cpp_hashnode *node;
 384
 385             pfile->trad_out_cur = --out;
 386             node = lex_identifier (pfile, cur - 1);
 387             if (node->type == NT_MACRO)
 388               {
 389                 /* Remove the macro name from the output.  */
 390                 pfile->trad_out_cur = out;
 391                 push_replacement_text (pfile, node);
 392                 goto new_context;
 393               }
 394             out = pfile->trad_out_cur;
 395             cur = CUR (context);
 396           }
 397           break;
 398
 399         default:
 400           break;
 401         }
 402     }
 403 }
 404
 405 /* Push a context holding the replacement text of the macro NODE on
 406    the context stack.  Doesn't yet handle special built-ins or
 407    function-like macros.  */
 408 static void
 409 push_replacement_text (pfile, node)
 410      cpp_reader *pfile;
 411      cpp_hashnode *node;
 412 {
 413   cpp_macro *macro = node->value.macro;
 414
 415   _cpp_push_text_context (pfile, node,
 416                           macro->exp.text,
 417                           macro->exp.text + macro->count);
 418 }
 419
 420 /* Analyze and save the replacement text of a macro.  */
 421 bool
 422 _cpp_create_trad_definition (pfile, macro)
 423      cpp_reader *pfile;
 424      cpp_macro *macro;
 425 {
 426   const uchar *cur, *limit;
 427   uchar *exp;
 428   size_t len;
 429
 430   /* Skip leading whitespace now.  */
 431   CUR (pfile->context) = skip_whitespace (pfile, CUR (pfile->context));
 432
 433   pfile->trad_out_cur = pfile->trad_out_base;
 434   scan_out_logical_line (pfile);
 435
 436   /* Skip trailing white space.  */
 437   cur = pfile->trad_out_base;
 438   limit = pfile->trad_out_cur;
 439   while (limit > cur && is_space (limit[-1]))
 440     limit--;
 441
 442   len = (size_t) (limit - cur);
 443   exp = _cpp_unaligned_alloc (pfile, len + 1);
 444   memcpy (exp, cur, len);
 445   exp[len] = '\0';
 446
 447   macro->exp.text = exp;
 448   /* Include NUL.  */
 449   macro->count = len;
 450
 451   return true;
 452 }
 453
 454 /* Prepare to be able to scan the current buffer.  */
 455 void
 456 _cpp_set_trad_context (pfile)
 457      cpp_reader *pfile;
 458 {
 459   cpp_buffer *buffer = pfile->buffer;
 460   cpp_context *context = pfile->context;
 461
 462   if (pfile->context->prev)
 463     abort ();
 464
 465   pfile->trad_out_cur = pfile->trad_out_base;
 466   CUR (context) = buffer->cur;
 467   RLIMIT (context) = buffer->rlimit;
 468   check_output_buffer (pfile, RLIMIT (context) - CUR (context));
 469 }