X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fcpplex.c;h=d03096cdc638c8e4e2d580304cc40347106f9516;hb=e2585cd835ae2dfa42f12b9ff4bbb56d339a83c9;hp=e8d7b7e56d6544920d9a75e008613b6f1b03bf78;hpb=805e22b2051e9c6a75377ea6599654d7415da483;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/cpplex.c b/gcc/cpplex.c index e8d7b7e56d6..d03096cdc63 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -1,10 +1,9 @@ /* CPP Library - lexical analysis. - Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc. + Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. Contributed by Per Bothner, 1994-95. Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 Broken out to separate file, Zack Weinberg, Mar 2000 - Single-pass line tokenization by Neil Booth, April 2000 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -22,25 +21,14 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "config.h" #include "system.h" -#include "coretypes.h" -#include "tm.h" #include "cpplib.h" #include "cpphash.h" -#ifdef MULTIBYTE_CHARS -#include "mbchar.h" -#include -#endif - -/* Tokens with SPELL_STRING store their spelling in the token list, - and it's length in the token->val.name.len. */ enum spell_type { SPELL_OPERATOR = 0, - SPELL_CHAR, SPELL_IDENT, - SPELL_NUMBER, - SPELL_STRING, + SPELL_LITERAL, SPELL_NONE }; @@ -54,49 +42,37 @@ static const unsigned char *const digraph_spellings[] = { U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" }; #define OP(e, s) { SPELL_OPERATOR, U s }, -#define TK(e, s) { s, U STRINGX (e) }, +#define TK(e, s) { s, U #e }, static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; #undef OP #undef TK #define TOKEN_SPELL(token) (token_spellings[(token)->type].category) #define TOKEN_NAME(token) (token_spellings[(token)->type].name) -#define BACKUP() do {buffer->cur = buffer->backup_to;} while (0) - -static void handle_newline PARAMS ((cpp_reader *)); -static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *)); -static cppchar_t get_effective_char PARAMS ((cpp_reader *)); - -static int skip_block_comment PARAMS ((cpp_reader *)); -static int skip_line_comment PARAMS ((cpp_reader *)); -static void adjust_column PARAMS ((cpp_reader *)); -static int skip_whitespace PARAMS ((cpp_reader *, cppchar_t)); -static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *)); -static uchar *parse_slow PARAMS ((cpp_reader *, const uchar *, int, - unsigned int *)); -static void parse_number PARAMS ((cpp_reader *, cpp_string *, int)); -static int unescaped_terminator_p PARAMS ((cpp_reader *, const uchar *)); -static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t)); -static bool trigraph_p PARAMS ((cpp_reader *)); -static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *, - cppchar_t)); -static bool continue_after_nul PARAMS ((cpp_reader *)); -static int name_p PARAMS ((cpp_reader *, const cpp_string *)); -static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **, - const unsigned char *, cppchar_t *)); -static tokenrun *next_tokenrun PARAMS ((tokenrun *)); - -static unsigned int hex_digit_value PARAMS ((unsigned int)); -static _cpp_buff *new_buff PARAMS ((size_t)); + +static void add_line_note (cpp_buffer *, const uchar *, unsigned int); +static int skip_line_comment (cpp_reader *); +static void skip_whitespace (cpp_reader *, cppchar_t); +static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *); +static void lex_number (cpp_reader *, cpp_string *); +static bool forms_identifier_p (cpp_reader *, int); +static void lex_string (cpp_reader *, cpp_token *, const uchar *); +static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t); +static void create_literal (cpp_reader *, cpp_token *, const uchar *, + unsigned int, enum cpp_ttype); +static bool warn_in_comment (cpp_reader *, _cpp_line_note *); +static int name_p (cpp_reader *, const cpp_string *); +static tokenrun *next_tokenrun (tokenrun *); + +static _cpp_buff *new_buff (size_t); + /* Utility routine: Compares, the token TOKEN to the NUL-terminated string STRING. TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ int -cpp_ideq (token, string) - const cpp_token *token; - const char *string; +cpp_ideq (const cpp_token *token, const char *string) { if (token->type != CPP_NAME) return 0; @@ -104,302 +80,327 @@ cpp_ideq (token, string) return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string); } -/* Call when meeting a newline, assumed to be in buffer->cur[-1]. - Returns with buffer->cur pointing to the character immediately - following the newline (combination). */ +/* Record a note TYPE at byte POS into the current cleaned logical + line. */ static void -handle_newline (pfile) - cpp_reader *pfile; +add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type) { - cpp_buffer *buffer = pfile->buffer; - - /* Handle CR-LF and LF-CR. Most other implementations (e.g. java) - only accept CR-LF; maybe we should fall back to that behavior? */ - if (buffer->cur[-1] + buffer->cur[0] == '\r' + '\n') - buffer->cur++; - - buffer->line_base = buffer->cur; - buffer->col_adjust = 0; - pfile->line++; -} - -/* Subroutine of skip_escaped_newlines; called when a 3-character - sequence beginning with "??" is encountered. buffer->cur points to - the second '?'. - - Warn if necessary, and returns true if the sequence forms a - trigraph and the trigraph should be honored. */ -static bool -trigraph_p (pfile) - cpp_reader *pfile; -{ - cpp_buffer *buffer = pfile->buffer; - cppchar_t from_char = buffer->cur[1]; - bool accept; - - if (!_cpp_trigraph_map[from_char]) - return false; - - accept = CPP_OPTION (pfile, trigraphs); - - /* Don't warn about trigraphs in comments. */ - if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment) + if (buffer->notes_used == buffer->notes_cap) { - if (accept) - cpp_error_with_line (pfile, DL_WARNING, - pfile->line, CPP_BUF_COL (buffer) - 1, - "trigraph ??%c converted to %c", - (int) from_char, - (int) _cpp_trigraph_map[from_char]); - else if (buffer->cur != buffer->last_Wtrigraphs) - { - buffer->last_Wtrigraphs = buffer->cur; - cpp_error_with_line (pfile, DL_WARNING, - pfile->line, CPP_BUF_COL (buffer) - 1, - "trigraph ??%c ignored", (int) from_char); - } + buffer->notes_cap = buffer->notes_cap * 2 + 200; + buffer->notes = xrealloc (buffer->notes, + buffer->notes_cap * sizeof (_cpp_line_note)); } - return accept; + buffer->notes[buffer->notes_used].pos = pos; + buffer->notes[buffer->notes_used].type = type; + buffer->notes_used++; } -/* Skips any escaped newlines introduced by '?' or a '\\', assumed to - lie in buffer->cur[-1]. Returns the next byte, which will be in - buffer->cur[-1]. This routine performs preprocessing stages 1 and - 2 of the ISO C standard. */ -static cppchar_t -skip_escaped_newlines (pfile) - cpp_reader *pfile; +/* Returns with a logical line that contains no escaped newlines or + trigraphs. This is a time-critical inner loop. */ +void +_cpp_clean_line (cpp_reader *pfile) { - cpp_buffer *buffer = pfile->buffer; - cppchar_t next = buffer->cur[-1]; + cpp_buffer *buffer; + const uchar *s; + uchar c, *d, *p; + + buffer = pfile->buffer; + buffer->cur_note = buffer->notes_used = 0; + buffer->cur = buffer->line_base = buffer->next_line; + buffer->need_line = false; + s = buffer->next_line - 1; - /* Only do this if we apply stages 1 and 2. */ if (!buffer->from_stage3) { - const unsigned char *saved_cur; - cppchar_t next1; - - do + /* Short circuit for the common case of an un-escaped line with + no trigraphs. The primary win here is by not writing any + data back to memory until we have to. */ + for (;;) { - if (next == '?') + c = *++s; + if (c == '\n' || c == '\r') { - if (buffer->cur[0] != '?' || !trigraph_p (pfile)) - break; - - /* Translate the trigraph. */ - next = _cpp_trigraph_map[buffer->cur[1]]; - buffer->cur += 2; - if (next != '\\') - break; + d = (uchar *) s; + + if (s == buffer->rlimit) + goto done; + + /* DOS line ending? */ + if (c == '\r' && s[1] == '\n') + s++; + + if (s == buffer->rlimit) + goto done; + + /* check for escaped newline */ + p = d; + while (p != buffer->next_line && is_nvspace (p[-1])) + p--; + if (p == buffer->next_line || p[-1] != '\\') + goto done; + + /* Have an escaped newline; process it and proceed to + the slow path. */ + add_line_note (buffer, p - 1, p != d ? ' ' : '\\'); + d = p - 2; + buffer->next_line = p - 1; + break; + } + if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) + { + /* Have a trigraph. We may or may not have to convert + it. Add a line note regardless, for -Wtrigraphs. */ + add_line_note (buffer, s, s[2]); + if (CPP_OPTION (pfile, trigraphs)) + { + /* We do, and that means we have to switch to the + slow path. */ + d = (uchar *) s; + *d = _cpp_trigraph_map[s[2]]; + s += 2; + break; + } } + } - if (buffer->cur == buffer->rlimit) - break; - /* We have a backslash, and room for at least one more - character. Skip horizontal whitespace. */ - saved_cur = buffer->cur; - do - next1 = *buffer->cur++; - while (is_nvspace (next1) && buffer->cur < buffer->rlimit); + for (;;) + { + c = *++s; + *++d = c; - if (!is_vspace (next1)) + if (c == '\n' || c == '\r') { - buffer->cur = saved_cur; - break; - } + /* Handle DOS line endings. */ + if (c == '\r' && s != buffer->rlimit && s[1] == '\n') + s++; + if (s == buffer->rlimit) + break; - if (saved_cur != buffer->cur - 1 - && !pfile->state.lexing_comment) - cpp_error (pfile, DL_WARNING, - "backslash and newline separated by space"); + /* Escaped? */ + p = d; + while (p != buffer->next_line && is_nvspace (p[-1])) + p--; + if (p == buffer->next_line || p[-1] != '\\') + break; - handle_newline (pfile); - buffer->backup_to = buffer->cur; - if (buffer->cur == buffer->rlimit) + add_line_note (buffer, p - 1, p != d ? ' ': '\\'); + d = p - 2; + buffer->next_line = p - 1; + } + else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) { - cpp_error (pfile, DL_PEDWARN, - "backslash-newline at end of file"); - next = EOF; + /* Add a note regardless, for the benefit of -Wtrigraphs. */ + add_line_note (buffer, d, s[2]); + if (CPP_OPTION (pfile, trigraphs)) + { + *d = _cpp_trigraph_map[s[2]]; + s += 2; + } } - else - next = *buffer->cur++; } - while (next == '\\' || next == '?'); } + else + { + do + s++; + while (*s != '\n' && *s != '\r'); + d = (uchar *) s; - return next; + /* Handle DOS line endings. */ + if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') + s++; + } + + done: + *d = '\n'; + /* A sentinel note that should never be processed. */ + add_line_note (buffer, d + 1, '\n'); + buffer->next_line = s + 1; +} + +/* Return true if the trigraph indicated by NOTE should be warned + about in a comment. */ +static bool +warn_in_comment (cpp_reader *pfile, _cpp_line_note *note) +{ + const uchar *p; + + /* Within comments we don't warn about trigraphs, unless the + trigraph forms an escaped newline, as that may change + behavior. */ + if (note->type != '/') + return false; + + /* If -trigraphs, then this was an escaped newline iff the next note + is coincident. */ + if (CPP_OPTION (pfile, trigraphs)) + return note[1].pos == note->pos; + + /* Otherwise, see if this forms an escaped newline. */ + p = note->pos + 3; + while (is_nvspace (*p)) + p++; + + /* There might have been escaped newlines between the trigraph and the + newline we found. Hence the position test. */ + return (*p == '\n' && p < note[1].pos); } -/* Obtain the next character, after trigraph conversion and skipping - an arbitrarily long string of escaped newlines. The common case of - no trigraphs or escaped newlines falls through quickly. On return, - buffer->backup_to points to where to return to if the character is - not to be processed. */ -static cppchar_t -get_effective_char (pfile) - cpp_reader *pfile; +/* Process the notes created by add_line_note as far as the current + location. */ +void +_cpp_process_line_notes (cpp_reader *pfile, int in_comment) { - cppchar_t next; cpp_buffer *buffer = pfile->buffer; - buffer->backup_to = buffer->cur; - next = *buffer->cur++; - if (__builtin_expect (next == '?' || next == '\\', 0)) - next = skip_escaped_newlines (pfile); + for (;;) + { + _cpp_line_note *note = &buffer->notes[buffer->cur_note]; + unsigned int col; - return next; + if (note->pos > buffer->cur) + break; + + buffer->cur_note++; + col = CPP_BUF_COLUMN (buffer, note->pos + 1); + + if (note->type == '\\' || note->type == ' ') + { + if (note->type == ' ' && !in_comment) + cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, + "backslash and newline separated by space"); + + if (buffer->next_line > buffer->rlimit) + { + cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col, + "backslash-newline at end of file"); + /* Prevent "no newline at end of file" warning. */ + buffer->next_line = buffer->rlimit; + } + + buffer->line_base = note->pos; + CPP_INCREMENT_LINE (pfile, 0); + } + else if (_cpp_trigraph_map[note->type]) + { + if (CPP_OPTION (pfile, warn_trigraphs) + && (!in_comment || warn_in_comment (pfile, note))) + { + if (CPP_OPTION (pfile, trigraphs)) + cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, + "trigraph ??%c converted to %c", + note->type, + (int) _cpp_trigraph_map[note->type]); + else + { + cpp_error_with_line + (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, + "trigraph ??%c ignored, use -trigraphs to enable", + note->type); + } + } + } + else + abort (); + } } /* Skip a C-style block comment. We find the end of the comment by seeing if an asterisk is before every '/' we encounter. Returns - nonzero if comment terminated by EOF, zero otherwise. */ -static int -skip_block_comment (pfile) - cpp_reader *pfile; + nonzero if comment terminated by EOF, zero otherwise. + + Buffer->cur points to the initial asterisk of the comment. */ +bool +_cpp_skip_block_comment (cpp_reader *pfile) { cpp_buffer *buffer = pfile->buffer; - cppchar_t c = EOF, prevc = EOF; + const uchar *cur = buffer->cur; + uchar c; - pfile->state.lexing_comment = 1; - while (buffer->cur != buffer->rlimit) - { - prevc = c, c = *buffer->cur++; - - /* FIXME: For speed, create a new character class of characters - of interest inside block comments. */ - if (c == '?' || c == '\\') - c = skip_escaped_newlines (pfile); + cur++; + if (*cur == '/') + cur++; + for (;;) + { /* People like decorating comments with '*', so check for '/' instead for efficiency. */ + c = *cur++; + if (c == '/') { - if (prevc == '*') + if (cur[-2] == '*') break; /* Warn about potential nested comments, but not if the '/' comes immediately before the true comment delimiter. Don't bother to get it right across escaped newlines. */ if (CPP_OPTION (pfile, warn_comments) - && buffer->cur[0] == '*' && buffer->cur[1] != '/') - cpp_error_with_line (pfile, DL_WARNING, - pfile->line, CPP_BUF_COL (buffer), - "\"/*\" within comment"); + && cur[0] == '*' && cur[1] != '/') + { + buffer->cur = cur; + cpp_error_with_line (pfile, CPP_DL_WARNING, + pfile->line_table->highest_line, CPP_BUF_COL (buffer), + "\"/*\" within comment"); + } + } + else if (c == '\n') + { + unsigned int cols; + buffer->cur = cur - 1; + _cpp_process_line_notes (pfile, true); + if (buffer->next_line >= buffer->rlimit) + return true; + _cpp_clean_line (pfile); + + cols = buffer->next_line - buffer->line_base; + CPP_INCREMENT_LINE (pfile, cols); + + cur = buffer->cur; } - else if (is_vspace (c)) - handle_newline (pfile); - else if (c == '\t') - adjust_column (pfile); } - pfile->state.lexing_comment = 0; - return c != '/' || prevc != '*'; + buffer->cur = cur; + _cpp_process_line_notes (pfile, true); + return false; } /* Skip a C++ line comment, leaving buffer->cur pointing to the terminating newline. Handles escaped newlines. Returns nonzero if a multiline comment. */ static int -skip_line_comment (pfile) - cpp_reader *pfile; +skip_line_comment (cpp_reader *pfile) { cpp_buffer *buffer = pfile->buffer; - unsigned int orig_line = pfile->line; - cppchar_t c; -#ifdef MULTIBYTE_CHARS - wchar_t wc; - int char_len; -#endif - - pfile->state.lexing_comment = 1; -#ifdef MULTIBYTE_CHARS - /* Reset multibyte conversion state. */ - (void) local_mbtowc (NULL, NULL, 0); -#endif - do - { - if (buffer->cur == buffer->rlimit) - goto at_eof; + unsigned int orig_line = pfile->line_table->highest_line; -#ifdef MULTIBYTE_CHARS - char_len = local_mbtowc (&wc, (const char *) buffer->cur, - buffer->rlimit - buffer->cur); - if (char_len == -1) - { - cpp_error (pfile, DL_WARNING, - "ignoring invalid multibyte character"); - char_len = 1; - c = *buffer->cur++; - } - else - { - buffer->cur += char_len; - c = wc; - } -#else - c = *buffer->cur++; -#endif - if (c == '?' || c == '\\') - c = skip_escaped_newlines (pfile); - } - while (!is_vspace (c)); - - /* Step back over the newline, except at EOF. */ - buffer->cur--; - at_eof: + while (*buffer->cur != '\n') + buffer->cur++; - pfile->state.lexing_comment = 0; - return orig_line != pfile->line; + _cpp_process_line_notes (pfile, true); + return orig_line != pfile->line_table->highest_line; } -/* pfile->buffer->cur is one beyond the \t character. Update - col_adjust so we track the column correctly. */ +/* Skips whitespace, saving the next non-whitespace character. */ static void -adjust_column (pfile) - cpp_reader *pfile; -{ - cpp_buffer *buffer = pfile->buffer; - unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */ - - /* Round it up to multiple of the tabstop, but subtract 1 since the - tab itself occupies a character position. */ - buffer->col_adjust += (CPP_OPTION (pfile, tabstop) - - col % CPP_OPTION (pfile, tabstop)) - 1; -} - -/* Skips whitespace, saving the next non-whitespace character. - Adjusts pfile->col_adjust to account for tabs. Without this, - tokens might be assigned an incorrect column. */ -static int -skip_whitespace (pfile, c) - cpp_reader *pfile; - cppchar_t c; +skip_whitespace (cpp_reader *pfile, cppchar_t c) { cpp_buffer *buffer = pfile->buffer; - unsigned int warned = 0; + bool saw_NUL = false; do { /* Horizontal space always OK. */ - if (c == ' ') + if (c == ' ' || c == '\t') ; - else if (c == '\t') - adjust_column (pfile); /* Just \f \v or \0 left. */ else if (c == '\0') - { - if (buffer->cur - 1 == buffer->rlimit) - return 0; - if (!warned) - { - cpp_error (pfile, DL_WARNING, "null character(s) ignored"); - warned = 1; - } - } + saw_NUL = true; else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) - cpp_error_with_line (pfile, DL_PEDWARN, pfile->line, + cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, CPP_BUF_COL (buffer), "%s in preprocessing directive", c == '\f' ? "form feed" : "vertical tab"); @@ -409,16 +410,16 @@ skip_whitespace (pfile, c) /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ while (is_nvspace (c)); + if (saw_NUL) + cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored"); + buffer->cur--; - return 1; } /* See if the characters of a number token are valid in a name (no '.', '+' or '-'). */ static int -name_p (pfile, string) - cpp_reader *pfile; - const cpp_string *string; +name_p (cpp_reader *pfile, const cpp_string *string) { unsigned int i; @@ -429,315 +430,183 @@ name_p (pfile, string) return 1; } -/* Parse an identifier, skipping embedded backslash-newlines. This is - a critical inner loop. The common case is an identifier which has - not been split by backslash-newline, does not contain a dollar - sign, and has already been scanned (roughly 10:1 ratio of - seen:unseen identifiers in normal code; the distribution is - Poisson-like). Second most common case is a new identifier, not - split and no dollar sign. The other possibilities are rare and - have been relegated to parse_slow. */ -static cpp_hashnode * -parse_identifier (pfile) - cpp_reader *pfile; +/* Returns TRUE if the sequence starting at buffer->cur is invalid in + an identifier. FIRST is TRUE if this starts an identifier. */ +static bool +forms_identifier_p (cpp_reader *pfile, int first) { - cpp_hashnode *result; - const uchar *cur, *base; - - /* Fast-path loop. Skim over a normal identifier. - N.B. ISIDNUM does not include $. */ - cur = pfile->buffer->cur; - while (ISIDNUM (*cur)) - cur++; + cpp_buffer *buffer = pfile->buffer; - /* Check for slow-path cases. */ - if (*cur == '?' || *cur == '\\' || *cur == '$') + if (*buffer->cur == '$') { - unsigned int len; + if (!CPP_OPTION (pfile, dollars_in_ident)) + return false; + + buffer->cur++; + if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) + { + CPP_OPTION (pfile, warn_dollars) = 0; + cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); + } - base = parse_slow (pfile, cur, 0, &len); - result = (cpp_hashnode *) - ht_lookup (pfile->hash_table, base, len, HT_ALLOCED); + return true; } - else + + /* Is this a syntactically valid UCN? */ + if (0 && *buffer->cur == '\\' + && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) { - base = pfile->buffer->cur - 1; + buffer->cur += 2; + if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first)) + return true; + buffer->cur -= 2; + } + + return false; +} + +/* Lex an identifier starting at BUFFER->CUR - 1. */ +static cpp_hashnode * +lex_identifier (cpp_reader *pfile, const uchar *base) +{ + cpp_hashnode *result; + const uchar *cur; + + do + { + cur = pfile->buffer->cur; + + /* N.B. ISIDNUM does not include $. */ + while (ISIDNUM (*cur)) + cur++; + pfile->buffer->cur = cur; - result = (cpp_hashnode *) - ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC); } + while (forms_identifier_p (pfile, false)); + + result = (cpp_hashnode *) + ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC); - /* Rarely, identifiers require diagnostics when lexed. - XXX Has to be forced out of the fast path. */ + /* Rarely, identifiers require diagnostics when lexed. */ if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) && !pfile->state.skipping, 0)) { /* It is allowed to poison the same identifier twice. */ if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) - cpp_error (pfile, DL_ERROR, "attempt to use poisoned \"%s\"", + cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", NODE_NAME (result)); /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the replacement list of a variadic macro. */ if (result == pfile->spec_nodes.n__VA_ARGS__ && !pfile->state.va_args_ok) - cpp_error (pfile, DL_PEDWARN, - "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro"); + cpp_error (pfile, CPP_DL_PEDWARN, + "__VA_ARGS__ can only appear in the expansion" + " of a C99 variadic macro"); } return result; } -/* Slow path. This handles numbers and identifiers which have been - split, or contain dollar signs. The part of the token from - PFILE->buffer->cur-1 to CUR has already been scanned. NUMBER_P is - 1 if it's a number, and 2 if it has a leading period. Returns a - pointer to the token's NUL-terminated spelling in permanent - storage, and sets PLEN to its length. */ -static uchar * -parse_slow (pfile, cur, number_p, plen) - cpp_reader *pfile; - const uchar *cur; - int number_p; - unsigned int *plen; -{ - cpp_buffer *buffer = pfile->buffer; - const uchar *base = buffer->cur - 1; - struct obstack *stack = &pfile->hash_table->stack; - unsigned int c, prevc, saw_dollar = 0; - - /* Place any leading period. */ - if (number_p == 2) - obstack_1grow (stack, '.'); - - /* Copy the part of the token which is known to be okay. */ - obstack_grow (stack, base, cur - base); - - /* Now process the part which isn't. We are looking at one of - '$', '\\', or '?' on entry to this loop. */ - prevc = cur[-1]; - c = *cur++; - buffer->cur = cur; - for (;;) - { - /* Potential escaped newline? */ - buffer->backup_to = buffer->cur - 1; - if (c == '?' || c == '\\') - c = skip_escaped_newlines (pfile); - - if (!is_idchar (c)) - { - if (!number_p) - break; - if (c != '.' && !VALID_SIGN (c, prevc)) - break; - } - - /* Handle normal identifier characters in this loop. */ - do - { - prevc = c; - obstack_1grow (stack, c); - - if (c == '$') - saw_dollar++; - - c = *buffer->cur++; - } - while (is_idchar (c)); - } - - /* Step back over the unwanted char. */ - BACKUP (); - - /* $ is not an identifier character in the standard, but is commonly - accepted as an extension. Don't warn about it in skipped - conditional blocks. */ - if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping) - cpp_error (pfile, DL_PEDWARN, "'$' character(s) in identifier or number"); - - /* Identifiers and numbers are null-terminated. */ - *plen = obstack_object_size (stack); - obstack_1grow (stack, '\0'); - return obstack_finish (stack); -} - -/* Parse a number, beginning with character C, skipping embedded - backslash-newlines. LEADING_PERIOD is nonzero if there was a "." - before C. Place the result in NUMBER. */ +/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */ static void -parse_number (pfile, number, leading_period) - cpp_reader *pfile; - cpp_string *number; - int leading_period; +lex_number (cpp_reader *pfile, cpp_string *number) { const uchar *cur; + const uchar *base; + uchar *dest; - /* Fast-path loop. Skim over a normal number. - N.B. ISIDNUM does not include $. */ - cur = pfile->buffer->cur; - while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) - cur++; - - /* Check for slow-path cases. */ - if (*cur == '?' || *cur == '\\' || *cur == '$') - number->text = parse_slow (pfile, cur, 1 + leading_period, &number->len); - else + base = pfile->buffer->cur - 1; + do { - const uchar *base = pfile->buffer->cur - 1; - uchar *dest; + cur = pfile->buffer->cur; - number->len = cur - base + leading_period; - dest = _cpp_unaligned_alloc (pfile, number->len + 1); - dest[number->len] = '\0'; - number->text = dest; + /* N.B. ISIDNUM does not include $. */ + while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) + cur++; - if (leading_period) - *dest++ = '.'; - memcpy (dest, base, cur - base); pfile->buffer->cur = cur; } + while (forms_identifier_p (pfile, false)); + + number->len = cur - base; + dest = _cpp_unaligned_alloc (pfile, number->len + 1); + memcpy (dest, base, number->len); + dest[number->len] = '\0'; + number->text = dest; } -/* Subroutine of parse_string. */ -static int -unescaped_terminator_p (pfile, dest) - cpp_reader *pfile; - const unsigned char *dest; +/* Create a token of type TYPE with a literal spelling. */ +static void +create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, + unsigned int len, enum cpp_ttype type) { - const unsigned char *start, *temp; - - /* In #include-style directives, terminators are not escapeable. */ - if (pfile->state.angled_headers) - return 1; + uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); - start = BUFF_FRONT (pfile->u_buff); - - /* An odd number of consecutive backslashes represents an escaped - terminator. */ - for (temp = dest; temp > start && temp[-1] == '\\'; temp--) - ; - - return ((dest - temp) & 1) == 0; + memcpy (dest, base, len); + dest[len] = '\0'; + token->type = type; + token->val.str.len = len; + token->val.str.text = dest; } -/* Parses a string, character constant, or angle-bracketed header file - name. Handles embedded trigraphs and escaped newlines. The stored - string is guaranteed NUL-terminated, but it is not guaranteed that - this is the first NUL since embedded NULs are preserved. +/* Lexes a string, character constant, or angle-bracketed header file + name. The stored string contains the spelling, including opening + quote and leading any leading 'L'. It returns the type of the + literal, or CPP_OTHER if it was not properly terminated. - When this function returns, buffer->cur points to the next - character to be processed. */ + The spelling is NUL-terminated, but it is not guaranteed that this + is the first NUL since embedded NULs are preserved. */ static void -parse_string (pfile, token, terminator) - cpp_reader *pfile; - cpp_token *token; - cppchar_t terminator; +lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) { - cpp_buffer *buffer = pfile->buffer; - unsigned char *dest, *limit; - cppchar_t c; - bool warned_nulls = false; -#ifdef MULTIBYTE_CHARS - wchar_t wc; - int char_len; -#endif - - dest = BUFF_FRONT (pfile->u_buff); - limit = BUFF_LIMIT (pfile->u_buff); + bool saw_NUL = false; + const uchar *cur; + cppchar_t terminator; + enum cpp_ttype type; + + cur = base; + terminator = *cur++; + if (terminator == 'L') + terminator = *cur++; + if (terminator == '\"') + type = *base == 'L' ? CPP_WSTRING: CPP_STRING; + else if (terminator == '\'') + type = *base == 'L' ? CPP_WCHAR: CPP_CHAR; + else + terminator = '>', type = CPP_HEADER_NAME; -#ifdef MULTIBYTE_CHARS - /* Reset multibyte conversion state. */ - (void) local_mbtowc (NULL, NULL, 0); -#endif for (;;) { - /* We need room for another char, possibly the terminating NUL. */ - if ((size_t) (limit - dest) < 1) - { - size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff); - _cpp_extend_buff (pfile, &pfile->u_buff, 2); - dest = BUFF_FRONT (pfile->u_buff) + len_so_far; - limit = BUFF_LIMIT (pfile->u_buff); - } + cppchar_t c = *cur++; -#ifdef MULTIBYTE_CHARS - char_len = local_mbtowc (&wc, (const char *) buffer->cur, - buffer->rlimit - buffer->cur); - if (char_len == -1) - { - cpp_error (pfile, DL_WARNING, - "ignoring invalid multibyte character"); - char_len = 1; - c = *buffer->cur++; - } - else - { - buffer->cur += char_len; - c = wc; - } -#else - c = *buffer->cur++; -#endif - - /* Handle trigraphs, escaped newlines etc. */ - if (c == '?' || c == '\\') - c = skip_escaped_newlines (pfile); - - if (c == terminator) - { - if (unescaped_terminator_p (pfile, dest)) - break; - } - else if (is_vspace (c)) + /* In #include-style directives, terminators are not escapable. */ + if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') + cur++; + else if (c == terminator) + break; + else if (c == '\n') { - /* No string literal may extend over multiple lines. In - assembly language, suppress the error except for <> - includes. This is a kludge around not knowing where - comments are. */ - unterminated: - if (CPP_OPTION (pfile, lang) != CLK_ASM || terminator == '>') - cpp_error (pfile, DL_ERROR, "missing terminating %c character", - (int) terminator); - buffer->cur--; + cur--; + type = CPP_OTHER; break; } else if (c == '\0') - { - if (buffer->cur - 1 == buffer->rlimit) - goto unterminated; - if (!warned_nulls) - { - warned_nulls = true; - cpp_error (pfile, DL_WARNING, - "null character(s) preserved in literal"); - } - } -#ifdef MULTIBYTE_CHARS - if (char_len > 1) - { - for ( ; char_len > 0; --char_len) - *dest++ = (*buffer->cur - char_len); - } - else -#endif - *dest++ = c; + saw_NUL = true; } - *dest = '\0'; + if (saw_NUL && !pfile->state.skipping) + cpp_error (pfile, CPP_DL_WARNING, + "null character(s) preserved in literal"); - token->val.str.text = BUFF_FRONT (pfile->u_buff); - token->val.str.len = dest - BUFF_FRONT (pfile->u_buff); - BUFF_FRONT (pfile->u_buff) = dest + 1; + pfile->buffer->cur = cur; + create_literal (pfile, token, base, cur - base, type); } /* The stored comment includes the comment start and any terminator. */ static void -save_comment (pfile, token, from, type) - cpp_reader *pfile; - cpp_token *token; - const unsigned char *from; - cppchar_t type; +save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from, + cppchar_t type) { unsigned char *buffer; unsigned int len, clen; @@ -777,9 +646,7 @@ save_comment (pfile, token, from, type) /* Allocate COUNT tokens for RUN. */ void -_cpp_init_tokenrun (run, count) - tokenrun *run; - unsigned int count; +_cpp_init_tokenrun (tokenrun *run, unsigned int count) { run->base = xnewvec (cpp_token, count); run->limit = run->base + count; @@ -788,8 +655,7 @@ _cpp_init_tokenrun (run, count) /* Returns the next tokenrun, or creates one if there is none. */ static tokenrun * -next_tokenrun (run) - tokenrun *run; +next_tokenrun (tokenrun *run) { if (run->next == NULL) { @@ -806,8 +672,7 @@ next_tokenrun (run) same as the last lexed token, so that diagnostics appear in the right place. */ cpp_token * -_cpp_temp_token (pfile) - cpp_reader *pfile; +_cpp_temp_token (cpp_reader *pfile) { cpp_token *old, *result; @@ -819,8 +684,7 @@ _cpp_temp_token (pfile) } result = pfile->cur_token++; - result->line = old->line; - result->col = old->col; + result->src_loc = old->src_loc; return result; } @@ -828,8 +692,7 @@ _cpp_temp_token (pfile) like directive handling, token lookahead, multiple include optimization and skipping. */ const cpp_token * -_cpp_lex_token (pfile) - cpp_reader *pfile; +_cpp_lex_token (cpp_reader *pfile) { cpp_token *result; @@ -861,7 +724,7 @@ _cpp_lex_token (pfile) && _cpp_handle_directive (pfile, result->flags & PREV_WHITE)) continue; if (pfile->cb.line_change && !pfile->state.skipping) - (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args); + pfile->cb.line_change (pfile, result, pfile->state.parsing_args); } /* We don't skip tokens in directives. */ @@ -870,7 +733,7 @@ _cpp_lex_token (pfile) /* Outside a directive, invalidate controlling macros. At file EOF, _cpp_lex_direct takes care of popping the buffer, so we never - get here and MI optimisation works. */ + get here and MI optimization works. */ pfile->mi_valid = false; if (!pfile->state.skipping || result->type == CPP_EOF) @@ -880,73 +743,66 @@ _cpp_lex_token (pfile) return result; } -/* A NUL terminates the current buffer. For ISO preprocessing this is - EOF, but for traditional preprocessing it indicates we need a line - refill. Returns TRUE to continue preprocessing a new buffer, FALSE - to return a CPP_EOF to the caller. */ -static bool -continue_after_nul (pfile) - cpp_reader *pfile; +/* Returns true if a fresh line has been loaded. */ +bool +_cpp_get_fresh_line (cpp_reader *pfile) { - cpp_buffer *buffer = pfile->buffer; - bool more = false; + int return_at_eof; - buffer->saved_flags = BOL; - if (CPP_OPTION (pfile, traditional)) - { - if (pfile->state.in_directive) - return false; + /* We can't get a new line until we leave the current directive. */ + if (pfile->state.in_directive) + return false; - _cpp_remove_overlay (pfile); - more = _cpp_read_logical_line_trad (pfile); - _cpp_overlay_buffer (pfile, pfile->out.base, - pfile->out.cur - pfile->out.base); - pfile->line = pfile->out.first_line; - } - else + for (;;) { - /* Stop parsing arguments with a CPP_EOF. When we finally come - back here, do the work of popping the buffer. */ - if (!pfile->state.parsing_args) + cpp_buffer *buffer = pfile->buffer; + + if (!buffer->need_line) + return true; + + if (buffer->next_line < buffer->rlimit) { - if (buffer->cur != buffer->line_base) - { - /* Non-empty files should end in a newline. Don't warn - for command line and _Pragma buffers. */ - if (!buffer->from_stage3) - cpp_error (pfile, DL_PEDWARN, "no newline at end of file"); - handle_newline (pfile); - } + _cpp_clean_line (pfile); + return true; + } - /* Similarly, finish an in-progress directive with CPP_EOF - before popping the buffer. */ - if (!pfile->state.in_directive && buffer->prev) - { - more = !buffer->return_at_eof; - _cpp_pop_buffer (pfile); - } + /* First, get out of parsing arguments state. */ + if (pfile->state.parsing_args) + return false; + + /* End of buffer. Non-empty files should end in a newline. */ + if (buffer->buf != buffer->rlimit + && buffer->next_line > buffer->rlimit + && !buffer->from_stage3) + { + /* Only warn once. */ + buffer->next_line = buffer->rlimit; + cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, + CPP_BUF_COLUMN (buffer, buffer->cur), + "no newline at end of file"); } - } - return more; + return_at_eof = buffer->return_at_eof; + _cpp_pop_buffer (pfile); + if (pfile->buffer == NULL || return_at_eof) + return false; + } } -#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ - do { \ - if (get_effective_char (pfile) == CHAR) \ - result->type = THEN_TYPE; \ - else \ - { \ - BACKUP (); \ - result->type = ELSE_TYPE; \ - } \ - } while (0) +#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ + do \ + { \ + result->type = ELSE_TYPE; \ + if (*buffer->cur == CHAR) \ + buffer->cur++, result->type = THEN_TYPE; \ + } \ + while (0) /* Lex a token into pfile->cur_token, which is also incremented, to get diagnostics pointing to the correct location. Does not handle issues such as token lookahead, multiple-include - optimisation, directives, skipping etc. This function is only + optimization, directives, skipping etc. This function is only suitable for use by _cpp_lex_token, and in special cases like lex_expansion_token which doesn't care for any of these issues. @@ -954,8 +810,7 @@ continue_after_nul (pfile) otherwise returns to the start of the token buffer if permissible. Returns the location of the lexed token. */ cpp_token * -_cpp_lex_direct (pfile) - cpp_reader *pfile; +_cpp_lex_direct (cpp_reader *pfile) { cppchar_t c; cpp_buffer *buffer; @@ -963,98 +818,75 @@ _cpp_lex_direct (pfile) cpp_token *result = pfile->cur_token++; fresh_line: + result->flags = 0; + buffer = pfile->buffer; + if (buffer->need_line) + { + if (!_cpp_get_fresh_line (pfile)) + { + result->type = CPP_EOF; + if (!pfile->state.in_directive) + { + /* Tell the compiler the line number of the EOF token. */ + result->src_loc = pfile->line_table->highest_line; + result->flags = BOL; + } + return result; + } + if (!pfile->keep_tokens) + { + pfile->cur_run = &pfile->base_run; + result = pfile->base_run.base; + pfile->cur_token = result + 1; + } + result->flags = BOL; + if (pfile->state.parsing_args == 2) + result->flags |= PREV_WHITE; + } buffer = pfile->buffer; - result->flags = buffer->saved_flags; - buffer->saved_flags = 0; update_tokens_line: - result->line = pfile->line; + result->src_loc = pfile->line_table->highest_line; skipped_white: + if (buffer->cur >= buffer->notes[buffer->cur_note].pos + && !pfile->overlaid_buffer) + { + _cpp_process_line_notes (pfile, false); + result->src_loc = pfile->line_table->highest_line; + } c = *buffer->cur++; - result->col = CPP_BUF_COLUMN (buffer, buffer->cur); - trigraph: + LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table, + CPP_BUF_COLUMN (buffer, buffer->cur)); + switch (c) { case ' ': case '\t': case '\f': case '\v': case '\0': result->flags |= PREV_WHITE; - if (skip_whitespace (pfile, c)) - goto skipped_white; - - /* End of buffer. */ - buffer->cur--; - if (continue_after_nul (pfile)) - goto fresh_line; - result->type = CPP_EOF; - break; - - case '\n': case '\r': - handle_newline (pfile); - buffer->saved_flags = BOL; - if (! pfile->state.in_directive) - { - if (pfile->state.parsing_args == 2) - buffer->saved_flags |= PREV_WHITE; - if (!pfile->keep_tokens) - { - pfile->cur_run = &pfile->base_run; - result = pfile->base_run.base; - pfile->cur_token = result + 1; - } - goto fresh_line; - } - result->type = CPP_EOF; - break; + skip_whitespace (pfile, c); + goto skipped_white; - case '?': - case '\\': - /* These could start an escaped newline, or '?' a trigraph. Let - skip_escaped_newlines do all the work. */ - { - unsigned int line = pfile->line; - - c = skip_escaped_newlines (pfile); - if (line != pfile->line) - { - buffer->cur--; - /* We had at least one escaped newline of some sort. - Update the token's line and column. */ - goto update_tokens_line; - } - } - - /* We are either the original '?' or '\\', or a trigraph. */ - if (c == '?') - result->type = CPP_QUERY; - else if (c == '\\') - goto random_char; - else - goto trigraph; - break; + case '\n': + if (buffer->cur < buffer->rlimit) + CPP_INCREMENT_LINE (pfile, 0); + buffer->need_line = true; + goto fresh_line; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': result->type = CPP_NUMBER; - parse_number (pfile, &result->val.str, 0); + lex_number (pfile, &result->val.str); break; case 'L': /* 'L' may introduce wide characters or strings. */ - { - const unsigned char *pos = buffer->cur; - - c = get_effective_char (pfile); - if (c == '\'' || c == '"') - { - result->type = (c == '"' ? CPP_WSTRING: CPP_WCHAR); - parse_string (pfile, result, c); - break; - } - buffer->cur = pos; - } + if (*buffer->cur == '\'' || *buffer->cur == '"') + { + lex_string (pfile, result, buffer->cur - 1); + break; + } /* Fall through. */ - start_ident: case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': @@ -1067,7 +899,7 @@ _cpp_lex_direct (pfile) case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': result->type = CPP_NAME; - result->val.node = parse_identifier (pfile); + result->val.node = lex_identifier (pfile, buffer->cur - 1); /* Convert named operators to their proper types. */ if (result->val.node->flags & NODE_OPERATOR) @@ -1079,46 +911,45 @@ _cpp_lex_direct (pfile) case '\'': case '"': - result->type = c == '"' ? CPP_STRING: CPP_CHAR; - parse_string (pfile, result, c); + lex_string (pfile, result, buffer->cur - 1); break; case '/': /* A potential block or line comment. */ comment_start = buffer->cur; - c = get_effective_char (pfile); - + c = *buffer->cur; + if (c == '*') { - if (skip_block_comment (pfile)) - cpp_error (pfile, DL_ERROR, "unterminated comment"); + if (_cpp_skip_block_comment (pfile)) + cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); } else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) - || CPP_IN_SYSTEM_HEADER (pfile))) + || cpp_in_system_header (pfile))) { /* Warn about comments only if pedantically GNUC89, and not in system headers. */ if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) && ! buffer->warned_cplusplus_comments) { - cpp_error (pfile, DL_PEDWARN, + cpp_error (pfile, CPP_DL_PEDWARN, "C++ style comments are not allowed in ISO C90"); - cpp_error (pfile, DL_PEDWARN, + cpp_error (pfile, CPP_DL_PEDWARN, "(this will be reported only once per input file)"); buffer->warned_cplusplus_comments = 1; } if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) - cpp_error (pfile, DL_WARNING, "multi-line comment"); + cpp_error (pfile, CPP_DL_WARNING, "multi-line comment"); } else if (c == '=') { + buffer->cur++; result->type = CPP_DIV_EQ; break; } else { - BACKUP (); result->type = CPP_DIV; break; } @@ -1136,183 +967,141 @@ _cpp_lex_direct (pfile) case '<': if (pfile->state.angled_headers) { - result->type = CPP_HEADER_NAME; - parse_string (pfile, result, '>'); + lex_string (pfile, result, buffer->cur - 1); break; } - c = get_effective_char (pfile); - if (c == '=') - result->type = CPP_LESS_EQ; - else if (c == '<') - IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); - else if (c == '?' && CPP_OPTION (pfile, cplusplus)) - IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN); - else if (c == ':' && CPP_OPTION (pfile, digraphs)) + result->type = CPP_LESS; + if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_LESS_EQ; + else if (*buffer->cur == '<') { - result->type = CPP_OPEN_SQUARE; - result->flags |= DIGRAPH; + buffer->cur++; + IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); } - else if (c == '%' && CPP_OPTION (pfile, digraphs)) + else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus)) { - result->type = CPP_OPEN_BRACE; - result->flags |= DIGRAPH; + buffer->cur++; + IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN); } - else + else if (CPP_OPTION (pfile, digraphs)) { - BACKUP (); - result->type = CPP_LESS; + if (*buffer->cur == ':') + { + buffer->cur++; + result->flags |= DIGRAPH; + result->type = CPP_OPEN_SQUARE; + } + else if (*buffer->cur == '%') + { + buffer->cur++; + result->flags |= DIGRAPH; + result->type = CPP_OPEN_BRACE; + } } break; case '>': - c = get_effective_char (pfile); - if (c == '=') - result->type = CPP_GREATER_EQ; - else if (c == '>') - IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); - else if (c == '?' && CPP_OPTION (pfile, cplusplus)) - IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX); - else + result->type = CPP_GREATER; + if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_GREATER_EQ; + else if (*buffer->cur == '>') + { + buffer->cur++; + IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); + } + else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus)) { - BACKUP (); - result->type = CPP_GREATER; + buffer->cur++; + IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX); } break; case '%': - c = get_effective_char (pfile); - if (c == '=') - result->type = CPP_MOD_EQ; - else if (CPP_OPTION (pfile, digraphs) && c == ':') + result->type = CPP_MOD; + if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_MOD_EQ; + else if (CPP_OPTION (pfile, digraphs)) { - result->flags |= DIGRAPH; - result->type = CPP_HASH; - if (get_effective_char (pfile) == '%') + if (*buffer->cur == ':') { - const unsigned char *pos = buffer->cur; - - if (get_effective_char (pfile) == ':') - result->type = CPP_PASTE; - else - buffer->cur = pos - 1; + buffer->cur++; + result->flags |= DIGRAPH; + result->type = CPP_HASH; + if (*buffer->cur == '%' && buffer->cur[1] == ':') + buffer->cur += 2, result->type = CPP_PASTE; + } + else if (*buffer->cur == '>') + { + buffer->cur++; + result->flags |= DIGRAPH; + result->type = CPP_CLOSE_BRACE; } - else - BACKUP (); - } - else if (CPP_OPTION (pfile, digraphs) && c == '>') - { - result->flags |= DIGRAPH; - result->type = CPP_CLOSE_BRACE; - } - else - { - BACKUP (); - result->type = CPP_MOD; } break; case '.': result->type = CPP_DOT; - c = get_effective_char (pfile); - if (c == '.') - { - const unsigned char *pos = buffer->cur; - - if (get_effective_char (pfile) == '.') - result->type = CPP_ELLIPSIS; - else - buffer->cur = pos - 1; - } - /* All known character sets have 0...9 contiguous. */ - else if (ISDIGIT (c)) + if (ISDIGIT (*buffer->cur)) { result->type = CPP_NUMBER; - parse_number (pfile, &result->val.str, 1); + lex_number (pfile, &result->val.str); } - else if (c == '*' && CPP_OPTION (pfile, cplusplus)) - result->type = CPP_DOT_STAR; - else - BACKUP (); + else if (*buffer->cur == '.' && buffer->cur[1] == '.') + buffer->cur += 2, result->type = CPP_ELLIPSIS; + else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) + buffer->cur++, result->type = CPP_DOT_STAR; break; case '+': - c = get_effective_char (pfile); - if (c == '+') - result->type = CPP_PLUS_PLUS; - else if (c == '=') - result->type = CPP_PLUS_EQ; - else - { - BACKUP (); - result->type = CPP_PLUS; - } + result->type = CPP_PLUS; + if (*buffer->cur == '+') + buffer->cur++, result->type = CPP_PLUS_PLUS; + else if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_PLUS_EQ; break; case '-': - c = get_effective_char (pfile); - if (c == '>') + result->type = CPP_MINUS; + if (*buffer->cur == '>') { + buffer->cur++; result->type = CPP_DEREF; - if (CPP_OPTION (pfile, cplusplus)) - { - if (get_effective_char (pfile) == '*') - result->type = CPP_DEREF_STAR; - else - BACKUP (); - } - } - else if (c == '-') - result->type = CPP_MINUS_MINUS; - else if (c == '=') - result->type = CPP_MINUS_EQ; - else - { - BACKUP (); - result->type = CPP_MINUS; + if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) + buffer->cur++, result->type = CPP_DEREF_STAR; } + else if (*buffer->cur == '-') + buffer->cur++, result->type = CPP_MINUS_MINUS; + else if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_MINUS_EQ; break; case '&': - c = get_effective_char (pfile); - if (c == '&') - result->type = CPP_AND_AND; - else if (c == '=') - result->type = CPP_AND_EQ; - else - { - BACKUP (); - result->type = CPP_AND; - } + result->type = CPP_AND; + if (*buffer->cur == '&') + buffer->cur++, result->type = CPP_AND_AND; + else if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_AND_EQ; break; case '|': - c = get_effective_char (pfile); - if (c == '|') - result->type = CPP_OR_OR; - else if (c == '=') - result->type = CPP_OR_EQ; - else - { - BACKUP (); - result->type = CPP_OR; - } + result->type = CPP_OR; + if (*buffer->cur == '|') + buffer->cur++, result->type = CPP_OR_OR; + else if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_OR_EQ; break; case ':': - c = get_effective_char (pfile); - if (c == ':' && CPP_OPTION (pfile, cplusplus)) - result->type = CPP_SCOPE; - else if (c == '>' && CPP_OPTION (pfile, digraphs)) + result->type = CPP_COLON; + if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus)) + buffer->cur++, result->type = CPP_SCOPE; + else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)) { + buffer->cur++; result->flags |= DIGRAPH; result->type = CPP_CLOSE_SQUARE; } - else - { - BACKUP (); - result->type = CPP_COLON; - } break; case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; @@ -1321,6 +1110,7 @@ _cpp_lex_direct (pfile) case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break; + case '?': result->type = CPP_QUERY; break; case '~': result->type = CPP_COMPL; break; case ',': result->type = CPP_COMMA; break; case '(': result->type = CPP_OPEN_PAREN; break; @@ -1335,48 +1125,51 @@ _cpp_lex_direct (pfile) case '@': result->type = CPP_ATSIGN; break; case '$': - if (CPP_OPTION (pfile, dollars_in_ident)) - goto start_ident; - /* Fall through... */ + case '\\': + { + const uchar *base = --buffer->cur; + + if (forms_identifier_p (pfile, true)) + { + result->type = CPP_NAME; + result->val.node = lex_identifier (pfile, base); + break; + } + buffer->cur++; + } - random_char: default: - result->type = CPP_OTHER; - result->val.c = c; + create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); break; } return result; } -/* An upper bound on the number of bytes needed to spell TOKEN, - including preceding whitespace. */ +/* An upper bound on the number of bytes needed to spell TOKEN. + Does not include preceding whitespace. */ unsigned int -cpp_token_len (token) - const cpp_token *token; +cpp_token_len (const cpp_token *token) { unsigned int len; switch (TOKEN_SPELL (token)) { - default: len = 0; break; - case SPELL_NUMBER: - case SPELL_STRING: len = token->val.str.len; break; + default: len = 4; break; + case SPELL_LITERAL: len = token->val.str.len; break; case SPELL_IDENT: len = NODE_LEN (token->val.node); break; } - /* 1 for whitespace, 4 for comment delimiters. */ - return len + 5; + + return len; } /* Write the spelling of a token TOKEN to BUFFER. The buffer must already contain the enough space to hold the token's spelling. - Returns a pointer to the character after the last character - written. */ + Returns a pointer to the character after the last character written. + FIXME: Would be nice if we didn't need the PFILE argument. */ unsigned char * -cpp_spell_token (pfile, token, buffer) - cpp_reader *pfile; /* Would be nice to be rid of this... */ - const cpp_token *token; - unsigned char *buffer; +cpp_spell_token (cpp_reader *pfile, const cpp_token *token, + unsigned char *buffer) { switch (TOKEN_SPELL (token)) { @@ -1398,46 +1191,20 @@ cpp_spell_token (pfile, token, buffer) } break; - case SPELL_CHAR: - *buffer++ = token->val.c; - break; - spell_ident: case SPELL_IDENT: memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node)); buffer += NODE_LEN (token->val.node); break; - case SPELL_NUMBER: + case SPELL_LITERAL: memcpy (buffer, token->val.str.text, token->val.str.len); buffer += token->val.str.len; break; - case SPELL_STRING: - { - int left, right, tag; - switch (token->type) - { - case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break; - case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break; - case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break; - case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break; - case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break; - default: - cpp_error (pfile, DL_ICE, "unknown string token %s\n", - TOKEN_NAME (token)); - return buffer; - } - if (tag) *buffer++ = tag; - *buffer++ = left; - memcpy (buffer, token->val.str.text, token->val.str.len); - buffer += token->val.str.len; - *buffer++ = right; - } - break; - case SPELL_NONE: - cpp_error (pfile, DL_ICE, "unspellable token %s", TOKEN_NAME (token)); + cpp_error (pfile, CPP_DL_ICE, + "unspellable token %s", TOKEN_NAME (token)); break; } @@ -1447,11 +1214,9 @@ cpp_spell_token (pfile, token, buffer) /* Returns TOKEN spelt as a null-terminated string. The string is freed when the reader is destroyed. Useful for diagnostics. */ unsigned char * -cpp_token_as_text (pfile, token) - cpp_reader *pfile; - const cpp_token *token; -{ - unsigned int len = cpp_token_len (token); +cpp_token_as_text (cpp_reader *pfile, const cpp_token *token) +{ + unsigned int len = cpp_token_len (token) + 1; unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; end = cpp_spell_token (pfile, token, start); @@ -1463,8 +1228,7 @@ cpp_token_as_text (pfile, token) /* Used by C front ends, which really should move to using cpp_token_as_text. */ const char * -cpp_type2name (type) - enum cpp_ttype type; +cpp_type2name (enum cpp_ttype type) { return (const char *) token_spellings[type].name; } @@ -1473,9 +1237,7 @@ cpp_type2name (type) Separated from cpp_spell_token for efficiency - to avoid stdio double-buffering. */ void -cpp_output_token (token, fp) - const cpp_token *token; - FILE *fp; +cpp_output_token (const cpp_token *token, FILE *fp) { switch (TOKEN_SPELL (token)) { @@ -1499,40 +1261,15 @@ cpp_output_token (token, fp) } break; - case SPELL_CHAR: - putc (token->val.c, fp); - break; - spell_ident: case SPELL_IDENT: fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp); break; - case SPELL_NUMBER: + case SPELL_LITERAL: fwrite (token->val.str.text, 1, token->val.str.len, fp); break; - case SPELL_STRING: - { - int left, right, tag; - switch (token->type) - { - case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break; - case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break; - case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break; - case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break; - case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break; - default: - fprintf (stderr, "impossible STRING token %s\n", TOKEN_NAME (token)); - return; - } - if (tag) putc (tag, fp); - putc (left, fp); - fwrite (token->val.str.text, 1, token->val.str.len, fp); - putc (right, fp); - } - break; - case SPELL_NONE: /* An error, most probably. */ break; @@ -1541,8 +1278,7 @@ cpp_output_token (token, fp) /* Compare two tokens. */ int -_cpp_equiv_tokens (a, b) - const cpp_token *a, *b; +_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) { if (a->type == b->type && a->flags == b->flags) switch (TOKEN_SPELL (a)) @@ -1550,14 +1286,11 @@ _cpp_equiv_tokens (a, b) default: /* Keep compiler happy. */ case SPELL_OPERATOR: return 1; - case SPELL_CHAR: - return a->val.c == b->val.c; /* Character. */ case SPELL_NONE: return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no); case SPELL_IDENT: return a->val.node == b->val.node; - case SPELL_NUMBER: - case SPELL_STRING: + case SPELL_LITERAL: return (a->val.str.len == b->val.str.len && !memcmp (a->val.str.text, b->val.str.text, a->val.str.len)); @@ -1571,9 +1304,8 @@ _cpp_equiv_tokens (a, b) conservative, and occasionally advises a space where one is not needed, e.g. "." and ".2". */ int -cpp_avoid_paste (pfile, token1, token2) - cpp_reader *pfile; - const cpp_token *token1, *token2; +cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1, + const cpp_token *token2) { enum cpp_ttype a = token1->type, b = token2->type; cppchar_t c; @@ -1613,9 +1345,12 @@ cpp_avoid_paste (pfile, token1, token2) || b == CPP_CHAR || b == CPP_STRING); /* L */ case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME || c == '.' || c == '+' || c == '-'); - case CPP_OTHER: return (CPP_OPTION (pfile, objc) - && token1->val.c == '@' - && (b == CPP_NAME || b == CPP_STRING)); + /* UCNs */ + case CPP_OTHER: return ((token1->val.str.text[0] == '\\' + && b == CPP_NAME) + || (CPP_OPTION (pfile, objc) + && token1->val.str.text[0] == '@' + && (b == CPP_NAME || b == CPP_STRING))); default: break; } @@ -1626,9 +1361,7 @@ cpp_avoid_paste (pfile, token1, token2) character, to FP. Leading whitespace is removed. If there are macros, special token padding is not performed. */ void -cpp_output_line (pfile, fp) - cpp_reader *pfile; - FILE *fp; +cpp_output_line (cpp_reader *pfile, FILE *fp) { const cpp_token *token; @@ -1644,369 +1377,6 @@ cpp_output_line (pfile, fp) putc ('\n', fp); } -/* Returns the value of a hexadecimal digit. */ -static unsigned int -hex_digit_value (c) - unsigned int c; -{ - if (hex_p (c)) - return hex_value (c); - else - abort (); -} - -/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate - failure if cpplib is not parsing C++ or C99. Such failure is - silent, and no variables are updated. Otherwise returns 0, and - warns if -Wtraditional. - - [lex.charset]: The character designated by the universal character - name \UNNNNNNNN is that character whose character short name in - ISO/IEC 10646 is NNNNNNNN; the character designated by the - universal character name \uNNNN is that character whose character - short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value - for a universal character name is less than 0x20 or in the range - 0x7F-0x9F (inclusive), or if the universal character name - designates a character in the basic source character set, then the - program is ill-formed. - - We assume that wchar_t is Unicode, so we don't need to do any - mapping. Is this ever wrong? - - PC points to the 'u' or 'U', PSTR is points to the byte after PC, - LIMIT is the end of the string or charconst. PSTR is updated to - point after the UCS on return, and the UCS is written into PC. */ - -static int -maybe_read_ucs (pfile, pstr, limit, pc) - cpp_reader *pfile; - const unsigned char **pstr; - const unsigned char *limit; - cppchar_t *pc; -{ - const unsigned char *p = *pstr; - unsigned int code = 0; - unsigned int c = *pc, length; - - /* Only attempt to interpret a UCS for C++ and C99. */ - if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))) - return 1; - - if (CPP_WTRADITIONAL (pfile)) - cpp_error (pfile, DL_WARNING, - "the meaning of '\\%c' is different in traditional C", c); - - length = (c == 'u' ? 4: 8); - - if ((size_t) (limit - p) < length) - { - cpp_error (pfile, DL_ERROR, "incomplete universal-character-name"); - /* Skip to the end to avoid more diagnostics. */ - p = limit; - } - else - { - for (; length; length--, p++) - { - c = *p; - if (ISXDIGIT (c)) - code = (code << 4) + hex_digit_value (c); - else - { - cpp_error (pfile, DL_ERROR, - "non-hex digit '%c' in universal-character-name", c); - /* We shouldn't skip in case there are multibyte chars. */ - break; - } - } - } - -#ifdef TARGET_EBCDIC - cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target"); - code = 0x3f; /* EBCDIC invalid character */ -#else - /* True extended characters are OK. */ - if (code >= 0xa0 - && !(code & 0x80000000) - && !(code >= 0xD800 && code <= 0xDFFF)) - ; - /* The standard permits $, @ and ` to be specified as UCNs. We use - hex escapes so that this also works with EBCDIC hosts. */ - else if (code == 0x24 || code == 0x40 || code == 0x60) - ; - /* Don't give another error if one occurred above. */ - else if (length == 0) - cpp_error (pfile, DL_ERROR, "universal-character-name out of range"); -#endif - - *pstr = p; - *pc = code; - return 0; -} - -/* Returns the value of an escape sequence, truncated to the correct - target precision. PSTR points to the input pointer, which is just - after the backslash. LIMIT is how much text we have. WIDE is true - if the escape sequence is part of a wide character constant or - string literal. Handles all relevant diagnostics. */ -cppchar_t -cpp_parse_escape (pfile, pstr, limit, wide) - cpp_reader *pfile; - const unsigned char **pstr; - const unsigned char *limit; - int wide; -{ - int unknown = 0; - const unsigned char *str = *pstr; - cppchar_t c, mask; - unsigned int width; - - if (wide) - width = CPP_OPTION (pfile, wchar_precision); - else - width = CPP_OPTION (pfile, char_precision); - if (width < BITS_PER_CPPCHAR_T) - mask = ((cppchar_t) 1 << width) - 1; - else - mask = ~0; - - c = *str++; - switch (c) - { - case '\\': case '\'': case '"': case '?': break; - case 'b': c = TARGET_BS; break; - case 'f': c = TARGET_FF; break; - case 'n': c = TARGET_NEWLINE; break; - case 'r': c = TARGET_CR; break; - case 't': c = TARGET_TAB; break; - case 'v': c = TARGET_VT; break; - - case '(': case '{': case '[': case '%': - /* '\(', etc, are used at beginning of line to avoid confusing Emacs. - '\%' is used to prevent SCCS from getting confused. */ - unknown = CPP_PEDANTIC (pfile); - break; - - case 'a': - if (CPP_WTRADITIONAL (pfile)) - cpp_error (pfile, DL_WARNING, - "the meaning of '\\a' is different in traditional C"); - c = TARGET_BELL; - break; - - case 'e': case 'E': - if (CPP_PEDANTIC (pfile)) - cpp_error (pfile, DL_PEDWARN, - "non-ISO-standard escape sequence, '\\%c'", (int) c); - c = TARGET_ESC; - break; - - case 'u': case 'U': - unknown = maybe_read_ucs (pfile, &str, limit, &c); - break; - - case 'x': - if (CPP_WTRADITIONAL (pfile)) - cpp_error (pfile, DL_WARNING, - "the meaning of '\\x' is different in traditional C"); - - { - cppchar_t i = 0, overflow = 0; - int digits_found = 0; - - while (str < limit) - { - c = *str; - if (! ISXDIGIT (c)) - break; - str++; - overflow |= i ^ (i << 4 >> 4); - i = (i << 4) + hex_digit_value (c); - digits_found = 1; - } - - if (!digits_found) - cpp_error (pfile, DL_ERROR, - "\\x used with no following hex digits"); - - if (overflow | (i != (i & mask))) - { - cpp_error (pfile, DL_PEDWARN, - "hex escape sequence out of range"); - i &= mask; - } - c = i; - } - break; - - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - { - size_t count = 0; - cppchar_t i = c - '0'; - - while (str < limit && ++count < 3) - { - c = *str; - if (c < '0' || c > '7') - break; - str++; - i = (i << 3) + c - '0'; - } - - if (i != (i & mask)) - { - cpp_error (pfile, DL_PEDWARN, - "octal escape sequence out of range"); - i &= mask; - } - c = i; - } - break; - - default: - unknown = 1; - break; - } - - if (unknown) - { - if (ISGRAPH (c)) - cpp_error (pfile, DL_PEDWARN, - "unknown escape sequence '\\%c'", (int) c); - else - cpp_error (pfile, DL_PEDWARN, - "unknown escape sequence: '\\%03o'", (int) c); - } - - if (c > mask) - { - cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for its type"); - c &= mask; - } - - *pstr = str; - return c; -} - -/* Interpret a (possibly wide) character constant in TOKEN. - WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN - points to a variable that is filled in with the number of - characters seen, and UNSIGNEDP to a variable that indicates whether - the result has signed type. */ -cppchar_t -cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp) - cpp_reader *pfile; - const cpp_token *token; - unsigned int *pchars_seen; - int *unsignedp; -{ - const unsigned char *str = token->val.str.text; - const unsigned char *limit = str + token->val.str.len; - unsigned int chars_seen = 0; - size_t width, max_chars; - cppchar_t c, mask, result = 0; - bool unsigned_p; - -#ifdef MULTIBYTE_CHARS - (void) local_mbtowc (NULL, NULL, 0); -#endif - - /* Width in bits. */ - if (token->type == CPP_CHAR) - { - width = CPP_OPTION (pfile, char_precision); - max_chars = CPP_OPTION (pfile, int_precision) / width; - unsigned_p = CPP_OPTION (pfile, unsigned_char); - } - else - { - width = CPP_OPTION (pfile, wchar_precision); - max_chars = 1; - unsigned_p = CPP_OPTION (pfile, unsigned_wchar); - } - - if (width < BITS_PER_CPPCHAR_T) - mask = ((cppchar_t) 1 << width) - 1; - else - mask = ~0; - - while (str < limit) - { -#ifdef MULTIBYTE_CHARS - wchar_t wc; - int char_len; - - char_len = local_mbtowc (&wc, str, limit - str); - if (char_len == -1) - { - cpp_error (pfile, DL_WARNING, - "ignoring invalid multibyte character"); - c = *str++; - } - else - { - str += char_len; - c = wc; - } -#else - c = *str++; -#endif - - if (c == '\\') - c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR); - -#ifdef MAP_CHARACTER - if (ISPRINT (c)) - c = MAP_CHARACTER (c); -#endif - - chars_seen++; - - /* Truncate the character, scale the result and merge the two. */ - c &= mask; - if (width < BITS_PER_CPPCHAR_T) - result = (result << width) | c; - else - result = c; - } - - if (chars_seen == 0) - cpp_error (pfile, DL_ERROR, "empty character constant"); - else if (chars_seen > 1) - { - /* Multichar charconsts are of type int and therefore signed. */ - unsigned_p = 0; - - if (chars_seen > max_chars) - { - chars_seen = max_chars; - cpp_error (pfile, DL_WARNING, - "character constant too long for its type"); - } - else if (CPP_OPTION (pfile, warn_multichar)) - cpp_error (pfile, DL_WARNING, "multi-character character constant"); - } - - /* Sign-extend or truncate the constant to cppchar_t. The value is - in WIDTH bits, but for multi-char charconsts it's value is the - full target type's width. */ - if (chars_seen > 1) - width *= max_chars; - if (width < BITS_PER_CPPCHAR_T) - { - mask = ((cppchar_t) 1 << width) - 1; - if (unsigned_p || !(result & (1 << (width - 1)))) - result &= mask; - else - result |= ~mask; - } - - *pchars_seen = chars_seen; - *unsignedp = unsigned_p; - return result; -} - /* Memory buffers. Changing these three constants can have a dramatic effect on performance. The values here are reasonable defaults, but might be tuned. If you adjust them, be sure to test across a @@ -2025,8 +1395,7 @@ cpp_interpret_charconst (pfile, token, pchars_seen, unsignedp) /* Create a new allocation buffer. Place the control block at the end of the buffer, so that buffer overflows will cause immediate chaos. */ static _cpp_buff * -new_buff (len) - size_t len; +new_buff (size_t len) { _cpp_buff *result; unsigned char *base; @@ -2046,9 +1415,7 @@ new_buff (len) /* Place a chain of unwanted allocation buffers on the free list. */ void -_cpp_release_buff (pfile, buff) - cpp_reader *pfile; - _cpp_buff *buff; +_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff) { _cpp_buff *end = buff; @@ -2060,9 +1427,7 @@ _cpp_release_buff (pfile, buff) /* Return a free buffer of size at least MIN_SIZE. */ _cpp_buff * -_cpp_get_buff (pfile, min_size) - cpp_reader *pfile; - size_t min_size; +_cpp_get_buff (cpp_reader *pfile, size_t min_size) { _cpp_buff *result, **p; @@ -2091,10 +1456,7 @@ _cpp_get_buff (pfile, min_size) the excess bytes to the new buffer. Chains the new buffer after BUFF, and returns the new buffer. */ _cpp_buff * -_cpp_append_extend_buff (pfile, buff, min_extra) - cpp_reader *pfile; - _cpp_buff *buff; - size_t min_extra; +_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra) { size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); _cpp_buff *new_buff = _cpp_get_buff (pfile, size); @@ -2110,10 +1472,7 @@ _cpp_append_extend_buff (pfile, buff, min_extra) Chains the new buffer before the buffer pointed to by BUFF, and updates the pointer to point to the new buffer. */ void -_cpp_extend_buff (pfile, pbuff, min_extra) - cpp_reader *pfile; - _cpp_buff **pbuff; - size_t min_extra; +_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra) { _cpp_buff *new_buff, *old_buff = *pbuff; size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); @@ -2126,8 +1485,7 @@ _cpp_extend_buff (pfile, pbuff, min_extra) /* Free a chain of buffers starting at BUFF. */ void -_cpp_free_buff (buff) - _cpp_buff *buff; +_cpp_free_buff (_cpp_buff *buff) { _cpp_buff *next; @@ -2140,9 +1498,7 @@ _cpp_free_buff (buff) /* Allocate permanent, unaligned storage of length LEN. */ unsigned char * -_cpp_unaligned_alloc (pfile, len) - cpp_reader *pfile; - size_t len; +_cpp_unaligned_alloc (cpp_reader *pfile, size_t len) { _cpp_buff *buff = pfile->u_buff; unsigned char *result = buff->cur; @@ -2170,9 +1526,7 @@ _cpp_unaligned_alloc (pfile, len) All existing other uses clearly fit this restriction: storing registered pragmas during initialization. */ unsigned char * -_cpp_aligned_alloc (pfile, len) - cpp_reader *pfile; - size_t len; +_cpp_aligned_alloc (cpp_reader *pfile, size_t len) { _cpp_buff *buff = pfile->a_buff; unsigned char *result = buff->cur;