X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fcpplex.c;h=d03096cdc638c8e4e2d580304cc40347106f9516;hb=e2585cd835ae2dfa42f12b9ff4bbb56d339a83c9;hp=26bda3605fb10231fc9a59f9ae9b1d87679b60d2;hpb=4b31a107f926a44e8f962d648c959ef5da936471;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/cpplex.c b/gcc/cpplex.c index 26bda3605fb..d03096cdc63 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -1,10 +1,9 @@ /* CPP Library - lexical analysis. - Copyright (C) 2000 Free Software Foundation, Inc. + Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. Contributed by Per Bothner, 1994-95. Based on CCCP program by Paul Rubin, June 1986 Adapted to ANSI C, Richard Stallman, Jan 1987 Broken out to separate file, Zack Weinberg, Mar 2000 - Single-pass line tokenization by Neil Booth, April 2000 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -20,45 +19,16 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* This lexer works with a single pass of the file. Recently I - re-wrote it to minimize the places where we step backwards in the - input stream, to make future changes to support multi-byte - character sets fairly straight-forward. - - There is now only one routine where we do step backwards: - skip_escaped_newlines. This routine could probably also be changed - so that it doesn't need to step back. One possibility is to use a - trick similar to that used in lex_period and lex_percent. Two - extra characters might be needed, but skip_escaped_newlines itself - would probably be the only place that needs to be aware of that, - and changes to the remaining routines would probably only be needed - if they process a backslash. */ - #include "config.h" #include "system.h" #include "cpplib.h" #include "cpphash.h" -/* MULTIBYTE_CHARS support only works for native compilers. - ??? Ideally what we want is to model widechar support after - the current floating point support. */ -#ifdef CROSS_COMPILE -#undef MULTIBYTE_CHARS -#endif - -#ifdef MULTIBYTE_CHARS -#include "mbchar.h" -#include -#endif - -/* Tokens with SPELL_STRING store their spelling in the token list, - and it's length in the token->val.name.len. */ enum spell_type { SPELL_OPERATOR = 0, - SPELL_CHAR, SPELL_IDENT, - SPELL_STRING, + SPELL_LITERAL, SPELL_NONE }; @@ -68,402 +38,388 @@ struct token_spelling const unsigned char *name; }; -const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:", - U":>", U"<%", U"%>"}; +static const unsigned char *const digraph_spellings[] = +{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" }; #define OP(e, s) { SPELL_OPERATOR, U s }, -#define TK(e, s) { s, U STRINGX (e) }, -const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE }; +#define TK(e, s) { s, U #e }, +static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; #undef OP #undef TK #define TOKEN_SPELL(token) (token_spellings[(token)->type].category) #define TOKEN_NAME(token) (token_spellings[(token)->type].name) -static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t)); -static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t)); -static cppchar_t get_effective_char PARAMS ((cpp_reader *)); - -static int skip_block_comment PARAMS ((cpp_reader *)); -static int skip_line_comment PARAMS ((cpp_reader *)); -static void adjust_column PARAMS ((cpp_reader *)); -static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t)); -static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *)); -static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *, - const U_CHAR *)); -static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int)); -static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *)); -static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t)); -static void unterminated PARAMS ((cpp_reader *, int)); -static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t)); -static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *)); -static void lex_percent PARAMS ((cpp_reader *, cpp_token *)); -static void lex_dot PARAMS ((cpp_reader *, cpp_token *)); -static int name_p PARAMS ((cpp_reader *, const cpp_string *)); -static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **, - const unsigned char *, unsigned int *)); -static tokenrun *next_tokenrun PARAMS ((tokenrun *)); - -static cpp_chunk *new_chunk PARAMS ((unsigned int)); -static int chunk_suitable PARAMS ((cpp_chunk *, unsigned int)); -static unsigned int hex_digit_value PARAMS ((unsigned int)); -static _cpp_buff *new_buff PARAMS ((size_t)); +static void add_line_note (cpp_buffer *, const uchar *, unsigned int); +static int skip_line_comment (cpp_reader *); +static void skip_whitespace (cpp_reader *, cppchar_t); +static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *); +static void lex_number (cpp_reader *, cpp_string *); +static bool forms_identifier_p (cpp_reader *, int); +static void lex_string (cpp_reader *, cpp_token *, const uchar *); +static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t); +static void create_literal (cpp_reader *, cpp_token *, const uchar *, + unsigned int, enum cpp_ttype); +static bool warn_in_comment (cpp_reader *, _cpp_line_note *); +static int name_p (cpp_reader *, const cpp_string *); +static tokenrun *next_tokenrun (tokenrun *); + +static _cpp_buff *new_buff (size_t); + /* Utility routine: Compares, the token TOKEN to the NUL-terminated string STRING. TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */ - int -cpp_ideq (token, string) - const cpp_token *token; - const char *string; +cpp_ideq (const cpp_token *token, const char *string) { if (token->type != CPP_NAME) return 0; - return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string); + return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string); } -/* Call when meeting a newline. Returns the character after the newline - (or carriage-return newline combination), or EOF. */ -static cppchar_t -handle_newline (pfile, newline_char) - cpp_reader *pfile; - cppchar_t newline_char; +/* Record a note TYPE at byte POS into the current cleaned logical + line. */ +static void +add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type) { - cpp_buffer *buffer; - cppchar_t next = EOF; - - pfile->line++; - buffer = pfile->buffer; - buffer->col_adjust = 0; - buffer->line_base = buffer->cur; - - /* Handle CR-LF and LF-CR combinations, get the next character. */ - if (buffer->cur < buffer->rlimit) + if (buffer->notes_used == buffer->notes_cap) { - next = *buffer->cur++; - if (next + newline_char == '\r' + '\n') - { - buffer->line_base = buffer->cur; - if (buffer->cur < buffer->rlimit) - next = *buffer->cur++; - else - next = EOF; - } + buffer->notes_cap = buffer->notes_cap * 2 + 200; + buffer->notes = xrealloc (buffer->notes, + buffer->notes_cap * sizeof (_cpp_line_note)); } - buffer->read_ahead = next; - return next; + buffer->notes[buffer->notes_used].pos = pos; + buffer->notes[buffer->notes_used].type = type; + buffer->notes_used++; } -/* Subroutine of skip_escaped_newlines; called when a trigraph is - encountered. It warns if necessary, and returns true if the - trigraph should be honoured. FROM_CHAR is the third character of a - trigraph, and presumed to be the previous character for position - reporting. */ -static int -trigraph_ok (pfile, from_char) - cpp_reader *pfile; - cppchar_t from_char; +/* Returns with a logical line that contains no escaped newlines or + trigraphs. This is a time-critical inner loop. */ +void +_cpp_clean_line (cpp_reader *pfile) { - int accept = CPP_OPTION (pfile, trigraphs); - - /* Don't warn about trigraphs in comments. */ - if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment) - { - cpp_buffer *buffer = pfile->buffer; - - if (accept) - cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2, - "trigraph ??%c converted to %c", - (int) from_char, - (int) _cpp_trigraph_map[from_char]); - else if (buffer->cur != buffer->last_Wtrigraphs) - { - buffer->last_Wtrigraphs = buffer->cur; - cpp_warning_with_line (pfile, pfile->line, - CPP_BUF_COL (buffer) - 2, - "trigraph ??%c ignored", (int) from_char); - } - } - - return accept; -} + cpp_buffer *buffer; + const uchar *s; + uchar c, *d, *p; -/* Assumes local variables buffer and result. */ -#define ACCEPT_CHAR(t) \ - do { result->type = t; buffer->read_ahead = EOF; } while (0) - -/* When we move to multibyte character sets, add to these something - that saves and restores the state of the multibyte conversion - library. This probably involves saving and restoring a "cookie". - In the case of glibc it is an 8-byte structure, so is not a high - overhead operation. In any case, it's out of the fast path. */ -#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0) -#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0) - -/* Skips any escaped newlines introduced by NEXT, which is either a - '?' or a '\\'. Returns the next character, which will also have - been placed in buffer->read_ahead. This routine performs - preprocessing stages 1 and 2 of the ISO C standard. */ -static cppchar_t -skip_escaped_newlines (pfile, next) - cpp_reader *pfile; - cppchar_t next; -{ - cpp_buffer *buffer = pfile->buffer; + buffer = pfile->buffer; + buffer->cur_note = buffer->notes_used = 0; + buffer->cur = buffer->line_base = buffer->next_line; + buffer->need_line = false; + s = buffer->next_line - 1; - /* Only do this if we apply stages 1 and 2. */ if (!buffer->from_stage3) { - cppchar_t next1; - const unsigned char *saved_cur; - int space; - - do + /* Short circuit for the common case of an un-escaped line with + no trigraphs. The primary win here is by not writing any + data back to memory until we have to. */ + for (;;) { - if (buffer->cur == buffer->rlimit) - break; - - SAVE_STATE (); - if (next == '?') + c = *++s; + if (c == '\n' || c == '\r') + { + d = (uchar *) s; + + if (s == buffer->rlimit) + goto done; + + /* DOS line ending? */ + if (c == '\r' && s[1] == '\n') + s++; + + if (s == buffer->rlimit) + goto done; + + /* check for escaped newline */ + p = d; + while (p != buffer->next_line && is_nvspace (p[-1])) + p--; + if (p == buffer->next_line || p[-1] != '\\') + goto done; + + /* Have an escaped newline; process it and proceed to + the slow path. */ + add_line_note (buffer, p - 1, p != d ? ' ' : '\\'); + d = p - 2; + buffer->next_line = p - 1; + break; + } + if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) { - next1 = *buffer->cur++; - if (next1 != '?' || buffer->cur == buffer->rlimit) + /* Have a trigraph. We may or may not have to convert + it. Add a line note regardless, for -Wtrigraphs. */ + add_line_note (buffer, s, s[2]); + if (CPP_OPTION (pfile, trigraphs)) { - RESTORE_STATE (); + /* We do, and that means we have to switch to the + slow path. */ + d = (uchar *) s; + *d = _cpp_trigraph_map[s[2]]; + s += 2; break; } + } + } - next1 = *buffer->cur++; - if (!_cpp_trigraph_map[next1] - || !trigraph_ok (pfile, next1)) - { - RESTORE_STATE (); - break; - } - /* We have a full trigraph here. */ - next = _cpp_trigraph_map[next1]; - if (next != '\\' || buffer->cur == buffer->rlimit) - break; - SAVE_STATE (); - } + for (;;) + { + c = *++s; + *++d = c; - /* We have a backslash, and room for at least one more character. */ - space = 0; - do + if (c == '\n' || c == '\r') { - next1 = *buffer->cur++; - if (!is_nvspace (next1)) + /* Handle DOS line endings. */ + if (c == '\r' && s != buffer->rlimit && s[1] == '\n') + s++; + if (s == buffer->rlimit) + break; + + /* Escaped? */ + p = d; + while (p != buffer->next_line && is_nvspace (p[-1])) + p--; + if (p == buffer->next_line || p[-1] != '\\') break; - space = 1; - } - while (buffer->cur < buffer->rlimit); - if (!is_vspace (next1)) + add_line_note (buffer, p - 1, p != d ? ' ': '\\'); + d = p - 2; + buffer->next_line = p - 1; + } + else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]]) { - RESTORE_STATE (); - break; + /* Add a note regardless, for the benefit of -Wtrigraphs. */ + add_line_note (buffer, d, s[2]); + if (CPP_OPTION (pfile, trigraphs)) + { + *d = _cpp_trigraph_map[s[2]]; + s += 2; + } } - - if (space && !pfile->state.lexing_comment) - cpp_warning (pfile, "backslash and newline separated by space"); - - next = handle_newline (pfile, next1); - if (next == EOF) - cpp_pedwarn (pfile, "backslash-newline at end of file"); } - while (next == '\\' || next == '?'); + } + else + { + do + s++; + while (*s != '\n' && *s != '\r'); + d = (uchar *) s; + + /* Handle DOS line endings. */ + if (*s == '\r' && s != buffer->rlimit && s[1] == '\n') + s++; } - buffer->read_ahead = next; - return next; + done: + *d = '\n'; + /* A sentinel note that should never be processed. */ + add_line_note (buffer, d + 1, '\n'); + buffer->next_line = s + 1; } -/* Obtain the next character, after trigraph conversion and skipping - an arbitrary string of escaped newlines. The common case of no - trigraphs or escaped newlines falls through quickly. */ -static cppchar_t -get_effective_char (pfile) - cpp_reader *pfile; +/* Return true if the trigraph indicated by NOTE should be warned + about in a comment. */ +static bool +warn_in_comment (cpp_reader *pfile, _cpp_line_note *note) +{ + const uchar *p; + + /* Within comments we don't warn about trigraphs, unless the + trigraph forms an escaped newline, as that may change + behavior. */ + if (note->type != '/') + return false; + + /* If -trigraphs, then this was an escaped newline iff the next note + is coincident. */ + if (CPP_OPTION (pfile, trigraphs)) + return note[1].pos == note->pos; + + /* Otherwise, see if this forms an escaped newline. */ + p = note->pos + 3; + while (is_nvspace (*p)) + p++; + + /* There might have been escaped newlines between the trigraph and the + newline we found. Hence the position test. */ + return (*p == '\n' && p < note[1].pos); +} + +/* Process the notes created by add_line_note as far as the current + location. */ +void +_cpp_process_line_notes (cpp_reader *pfile, int in_comment) { cpp_buffer *buffer = pfile->buffer; - cppchar_t next = EOF; - if (buffer->cur < buffer->rlimit) + for (;;) { - next = *buffer->cur++; - - /* '?' can introduce trigraphs (and therefore backslash); '\\' - can introduce escaped newlines, which we want to skip, or - UCNs, which, depending upon lexer state, we will handle in - the future. */ - if (next == '?' || next == '\\') - next = skip_escaped_newlines (pfile, next); - } + _cpp_line_note *note = &buffer->notes[buffer->cur_note]; + unsigned int col; + + if (note->pos > buffer->cur) + break; + + buffer->cur_note++; + col = CPP_BUF_COLUMN (buffer, note->pos + 1); + + if (note->type == '\\' || note->type == ' ') + { + if (note->type == ' ' && !in_comment) + cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, + "backslash and newline separated by space"); - buffer->read_ahead = next; - return next; + if (buffer->next_line > buffer->rlimit) + { + cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col, + "backslash-newline at end of file"); + /* Prevent "no newline at end of file" warning. */ + buffer->next_line = buffer->rlimit; + } + + buffer->line_base = note->pos; + CPP_INCREMENT_LINE (pfile, 0); + } + else if (_cpp_trigraph_map[note->type]) + { + if (CPP_OPTION (pfile, warn_trigraphs) + && (!in_comment || warn_in_comment (pfile, note))) + { + if (CPP_OPTION (pfile, trigraphs)) + cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, + "trigraph ??%c converted to %c", + note->type, + (int) _cpp_trigraph_map[note->type]); + else + { + cpp_error_with_line + (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col, + "trigraph ??%c ignored, use -trigraphs to enable", + note->type); + } + } + } + else + abort (); + } } /* Skip a C-style block comment. We find the end of the comment by seeing if an asterisk is before every '/' we encounter. Returns - non-zero if comment terminated by EOF, zero otherwise. */ -static int -skip_block_comment (pfile) - cpp_reader *pfile; + nonzero if comment terminated by EOF, zero otherwise. + + Buffer->cur points to the initial asterisk of the comment. */ +bool +_cpp_skip_block_comment (cpp_reader *pfile) { cpp_buffer *buffer = pfile->buffer; - cppchar_t c = EOF, prevc = EOF; - - pfile->state.lexing_comment = 1; - while (buffer->cur != buffer->rlimit) - { - prevc = c, c = *buffer->cur++; + const uchar *cur = buffer->cur; + uchar c; - next_char: - /* FIXME: For speed, create a new character class of characters - of interest inside block comments. */ - if (c == '?' || c == '\\') - c = skip_escaped_newlines (pfile, c); + cur++; + if (*cur == '/') + cur++; + for (;;) + { /* People like decorating comments with '*', so check for '/' instead for efficiency. */ + c = *cur++; + if (c == '/') { - if (prevc == '*') + if (cur[-2] == '*') break; /* Warn about potential nested comments, but not if the '/' - comes immediately before the true comment delimeter. + comes immediately before the true comment delimiter. Don't bother to get it right across escaped newlines. */ if (CPP_OPTION (pfile, warn_comments) - && buffer->cur != buffer->rlimit) + && cur[0] == '*' && cur[1] != '/') { - prevc = c, c = *buffer->cur++; - if (c == '*' && buffer->cur != buffer->rlimit) - { - prevc = c, c = *buffer->cur++; - if (c != '/') - cpp_warning_with_line (pfile, pfile->line, - CPP_BUF_COL (buffer) - 2, - "\"/*\" within comment"); - } - goto next_char; + buffer->cur = cur; + cpp_error_with_line (pfile, CPP_DL_WARNING, + pfile->line_table->highest_line, CPP_BUF_COL (buffer), + "\"/*\" within comment"); } } - else if (is_vspace (c)) + else if (c == '\n') { - prevc = c, c = handle_newline (pfile, c); - goto next_char; + unsigned int cols; + buffer->cur = cur - 1; + _cpp_process_line_notes (pfile, true); + if (buffer->next_line >= buffer->rlimit) + return true; + _cpp_clean_line (pfile); + + cols = buffer->next_line - buffer->line_base; + CPP_INCREMENT_LINE (pfile, cols); + + cur = buffer->cur; } - else if (c == '\t') - adjust_column (pfile); } - pfile->state.lexing_comment = 0; - buffer->read_ahead = EOF; - return c != '/' || prevc != '*'; + buffer->cur = cur; + _cpp_process_line_notes (pfile, true); + return false; } -/* Skip a C++ line comment. Handles escaped newlines. Returns - non-zero if a multiline comment. The following new line, if any, - is left in buffer->read_ahead. */ +/* Skip a C++ line comment, leaving buffer->cur pointing to the + terminating newline. Handles escaped newlines. Returns nonzero + if a multiline comment. */ static int -skip_line_comment (pfile) - cpp_reader *pfile; +skip_line_comment (cpp_reader *pfile) { cpp_buffer *buffer = pfile->buffer; - unsigned int orig_line = pfile->line; - cppchar_t c; + unsigned int orig_line = pfile->line_table->highest_line; - pfile->state.lexing_comment = 1; - do - { - c = EOF; - if (buffer->cur == buffer->rlimit) - break; - - c = *buffer->cur++; - if (c == '?' || c == '\\') - c = skip_escaped_newlines (pfile, c); - } - while (!is_vspace (c)); - - pfile->state.lexing_comment = 0; - buffer->read_ahead = c; /* Leave any newline for caller. */ - return orig_line != pfile->line; -} - -/* pfile->buffer->cur is one beyond the \t character. Update - col_adjust so we track the column correctly. */ -static void -adjust_column (pfile) - cpp_reader *pfile; -{ - cpp_buffer *buffer = pfile->buffer; - unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */ + while (*buffer->cur != '\n') + buffer->cur++; - /* Round it up to multiple of the tabstop, but subtract 1 since the - tab itself occupies a character position. */ - buffer->col_adjust += (CPP_OPTION (pfile, tabstop) - - col % CPP_OPTION (pfile, tabstop)) - 1; + _cpp_process_line_notes (pfile, true); + return orig_line != pfile->line_table->highest_line; } -/* Skips whitespace, saving the next non-whitespace character. - Adjusts pfile->col_adjust to account for tabs. Without this, - tokens might be assigned an incorrect column. */ +/* Skips whitespace, saving the next non-whitespace character. */ static void -skip_whitespace (pfile, c) - cpp_reader *pfile; - cppchar_t c; +skip_whitespace (cpp_reader *pfile, cppchar_t c) { cpp_buffer *buffer = pfile->buffer; - unsigned int warned = 0; + bool saw_NUL = false; do { /* Horizontal space always OK. */ - if (c == ' ') + if (c == ' ' || c == '\t') ; - else if (c == '\t') - adjust_column (pfile); /* Just \f \v or \0 left. */ else if (c == '\0') - { - if (!warned) - { - cpp_warning (pfile, "null character(s) ignored"); - warned = 1; - } - } + saw_NUL = true; else if (pfile->state.in_directive && CPP_PEDANTIC (pfile)) - cpp_pedwarn_with_line (pfile, pfile->line, - CPP_BUF_COL (buffer), - "%s in preprocessing directive", - c == '\f' ? "form feed" : "vertical tab"); + cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, + CPP_BUF_COL (buffer), + "%s in preprocessing directive", + c == '\f' ? "form feed" : "vertical tab"); - c = EOF; - if (buffer->cur == buffer->rlimit) - break; c = *buffer->cur++; } /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */ while (is_nvspace (c)); - /* Remember the next character. */ - buffer->read_ahead = c; + if (saw_NUL) + cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored"); + + buffer->cur--; } /* See if the characters of a number token are valid in a name (no '.', '+' or '-'). */ static int -name_p (pfile, string) - cpp_reader *pfile; - const cpp_string *string; +name_p (cpp_reader *pfile, const cpp_string *string) { unsigned int i; @@ -471,458 +427,226 @@ name_p (pfile, string) if (!is_idchar (string->text[i])) return 0; - return 1; + return 1; } -/* Parse an identifier, skipping embedded backslash-newlines. This is - a critical inner loop. The common case is an identifier which has - not been split by backslash-newline, does not contain a dollar - sign, and has already been scanned (roughly 10:1 ratio of - seen:unseen identifiers in normal code; the distribution is - Poisson-like). Second most common case is a new identifier, not - split and no dollar sign. The other possibilities are rare and - have been relegated to parse_identifier_slow. */ +/* Returns TRUE if the sequence starting at buffer->cur is invalid in + an identifier. FIRST is TRUE if this starts an identifier. */ +static bool +forms_identifier_p (cpp_reader *pfile, int first) +{ + cpp_buffer *buffer = pfile->buffer; + + if (*buffer->cur == '$') + { + if (!CPP_OPTION (pfile, dollars_in_ident)) + return false; + + buffer->cur++; + if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) + { + CPP_OPTION (pfile, warn_dollars) = 0; + cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); + } + + return true; + } + + /* Is this a syntactically valid UCN? */ + if (0 && *buffer->cur == '\\' + && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) + { + buffer->cur += 2; + if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first)) + return true; + buffer->cur -= 2; + } + return false; +} + +/* Lex an identifier starting at BUFFER->CUR - 1. */ static cpp_hashnode * -parse_identifier (pfile) - cpp_reader *pfile; +lex_identifier (cpp_reader *pfile, const uchar *base) { cpp_hashnode *result; - const U_CHAR *cur, *rlimit; + const uchar *cur; - /* Fast-path loop. Skim over a normal identifier. - N.B. ISIDNUM does not include $. */ - cur = pfile->buffer->cur - 1; - rlimit = pfile->buffer->rlimit; do - cur++; - while (cur < rlimit && ISIDNUM (*cur)); - - /* Check for slow-path cases. */ - if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$')) - result = parse_identifier_slow (pfile, cur); - else { - const U_CHAR *base = pfile->buffer->cur - 1; - result = (cpp_hashnode *) - ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC); + cur = pfile->buffer->cur; + + /* N.B. ISIDNUM does not include $. */ + while (ISIDNUM (*cur)) + cur++; + pfile->buffer->cur = cur; } + while (forms_identifier_p (pfile, false)); + + result = (cpp_hashnode *) + ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC); - /* Rarely, identifiers require diagnostics when lexed. - XXX Has to be forced out of the fast path. */ + /* Rarely, identifiers require diagnostics when lexed. */ if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) && !pfile->state.skipping, 0)) { /* It is allowed to poison the same identifier twice. */ if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok) - cpp_error (pfile, "attempt to use poisoned \"%s\"", + cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"", NODE_NAME (result)); /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the replacement list of a variadic macro. */ if (result == pfile->spec_nodes.n__VA_ARGS__ && !pfile->state.va_args_ok) - cpp_pedwarn (pfile, - "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro"); + cpp_error (pfile, CPP_DL_PEDWARN, + "__VA_ARGS__ can only appear in the expansion" + " of a C99 variadic macro"); } return result; } -/* Slow path. This handles identifiers which have been split, and - identifiers which contain dollar signs. The part of the identifier - from PFILE->buffer->cur-1 to CUR has already been scanned. */ -static cpp_hashnode * -parse_identifier_slow (pfile, cur) - cpp_reader *pfile; - const U_CHAR *cur; -{ - cpp_buffer *buffer = pfile->buffer; - const U_CHAR *base = buffer->cur - 1; - struct obstack *stack = &pfile->hash_table->stack; - unsigned int c, saw_dollar = 0, len; - - /* Copy the part of the token which is known to be okay. */ - obstack_grow (stack, base, cur - base); - - /* Now process the part which isn't. We are looking at one of - '$', '\\', or '?' on entry to this loop. */ - c = *cur++; - buffer->cur = cur; - do - { - while (is_idchar (c)) - { - obstack_1grow (stack, c); - - if (c == '$') - saw_dollar++; - - c = EOF; - if (buffer->cur == buffer->rlimit) - break; - - c = *buffer->cur++; - } - - /* Potential escaped newline? */ - if (c != '?' && c != '\\') - break; - c = skip_escaped_newlines (pfile, c); - } - while (is_idchar (c)); - - /* Remember the next character. */ - buffer->read_ahead = c; - - /* $ is not a identifier character in the standard, but is commonly - accepted as an extension. Don't warn about it in skipped - conditional blocks. */ - if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->state.skipping) - cpp_pedwarn (pfile, "'$' character(s) in identifier"); - - /* Identifiers are null-terminated. */ - len = obstack_object_size (stack); - obstack_1grow (stack, '\0'); - - return (cpp_hashnode *) - ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED); -} - -/* Parse a number, skipping embedded backslash-newlines. */ +/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */ static void -parse_number (pfile, number, c, leading_period) - cpp_reader *pfile; - cpp_string *number; - cppchar_t c; - int leading_period; +lex_number (cpp_reader *pfile, cpp_string *number) { - cpp_buffer *buffer = pfile->buffer; - unsigned char *dest, *limit; - - dest = BUFF_FRONT (pfile->u_buff); - limit = BUFF_LIMIT (pfile->u_buff); + const uchar *cur; + const uchar *base; + uchar *dest; - /* Place a leading period. */ - if (leading_period) - { - if (dest == limit) - { - pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 1); - dest = BUFF_FRONT (pfile->u_buff); - limit = BUFF_LIMIT (pfile->u_buff); - } - *dest++ = '.'; - } - + base = pfile->buffer->cur - 1; do { - do - { - /* Need room for terminating null. */ - if ((size_t) (limit - dest) < 2) - { - size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff); - pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 2); - dest = BUFF_FRONT (pfile->u_buff) + len_so_far; - limit = BUFF_LIMIT (pfile->u_buff); - } - *dest++ = c; - - c = EOF; - if (buffer->cur == buffer->rlimit) - break; + cur = pfile->buffer->cur; - c = *buffer->cur++; - } - while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1])); + /* N.B. ISIDNUM does not include $. */ + while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1])) + cur++; - /* Potential escaped newline? */ - if (c != '?' && c != '\\') - break; - c = skip_escaped_newlines (pfile, c); + pfile->buffer->cur = cur; } - while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1])); - - /* Remember the next character. */ - buffer->read_ahead = c; - - /* Null-terminate the number. */ - *dest = '\0'; + while (forms_identifier_p (pfile, false)); - number->text = BUFF_FRONT (pfile->u_buff); - number->len = dest - number->text; - BUFF_FRONT (pfile->u_buff) = dest + 1; + number->len = cur - base; + dest = _cpp_unaligned_alloc (pfile, number->len + 1); + memcpy (dest, base, number->len); + dest[number->len] = '\0'; + number->text = dest; } -/* Subroutine of parse_string. Emits error for unterminated strings. */ +/* Create a token of type TYPE with a literal spelling. */ static void -unterminated (pfile, term) - cpp_reader *pfile; - int term; +create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, + unsigned int len, enum cpp_ttype type) { - cpp_error (pfile, "missing terminating %c character", term); + uchar *dest = _cpp_unaligned_alloc (pfile, len + 1); - if (term == '\"' && pfile->mls_line && pfile->mls_line != pfile->line) - { - cpp_error_with_line (pfile, pfile->mls_line, pfile->mls_col, - "possible start of unterminated string literal"); - pfile->mls_line = 0; - } -} - -/* Subroutine of parse_string. */ -static int -unescaped_terminator_p (pfile, dest) - cpp_reader *pfile; - const unsigned char *dest; -{ - const unsigned char *start, *temp; - - /* In #include-style directives, terminators are not escapeable. */ - if (pfile->state.angled_headers) - return 1; - - start = BUFF_FRONT (pfile->u_buff); - - /* An odd number of consecutive backslashes represents an escaped - terminator. */ - for (temp = dest; temp > start && temp[-1] == '\\'; temp--) - ; - - return ((dest - temp) & 1) == 0; + memcpy (dest, base, len); + dest[len] = '\0'; + token->type = type; + token->val.str.len = len; + token->val.str.text = dest; } -/* Parses a string, character constant, or angle-bracketed header file - name. Handles embedded trigraphs and escaped newlines. The stored - string is guaranteed NUL-terminated, but it is not guaranteed that - this is the first NUL since embedded NULs are preserved. +/* Lexes a string, character constant, or angle-bracketed header file + name. The stored string contains the spelling, including opening + quote and leading any leading 'L'. It returns the type of the + literal, or CPP_OTHER if it was not properly terminated. - Multi-line strings are allowed, but they are deprecated. */ + The spelling is NUL-terminated, but it is not guaranteed that this + is the first NUL since embedded NULs are preserved. */ static void -parse_string (pfile, token, terminator) - cpp_reader *pfile; - cpp_token *token; - cppchar_t terminator; +lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) { - cpp_buffer *buffer = pfile->buffer; - unsigned char *dest, *limit; - cppchar_t c; - bool warned_nulls = false, warned_multi = false; - - dest = BUFF_FRONT (pfile->u_buff); - limit = BUFF_LIMIT (pfile->u_buff); + bool saw_NUL = false; + const uchar *cur; + cppchar_t terminator; + enum cpp_ttype type; + + cur = base; + terminator = *cur++; + if (terminator == 'L') + terminator = *cur++; + if (terminator == '\"') + type = *base == 'L' ? CPP_WSTRING: CPP_STRING; + else if (terminator == '\'') + type = *base == 'L' ? CPP_WCHAR: CPP_CHAR; + else + terminator = '>', type = CPP_HEADER_NAME; for (;;) { - if (buffer->cur == buffer->rlimit) - c = EOF; - else - c = *buffer->cur++; - - have_char: - /* We need space for the terminating NUL. */ - if ((size_t) (limit - dest) < 1) - { - size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff); - pfile->u_buff = _cpp_extend_buff (pfile, pfile->u_buff, 2); - dest = BUFF_FRONT (pfile->u_buff) + len_so_far; - limit = BUFF_LIMIT (pfile->u_buff); - } - - if (c == EOF) - { - unterminated (pfile, terminator); - break; - } + cppchar_t c = *cur++; - /* Handle trigraphs, escaped newlines etc. */ - if (c == '?' || c == '\\') - c = skip_escaped_newlines (pfile, c); - - if (c == terminator && unescaped_terminator_p (pfile, dest)) + /* In #include-style directives, terminators are not escapable. */ + if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') + cur++; + else if (c == terminator) + break; + else if (c == '\n') { - c = EOF; + cur--; + type = CPP_OTHER; break; } - else if (is_vspace (c)) - { - /* In assembly language, silently terminate string and - character literals at end of line. This is a kludge - around not knowing where comments are. */ - if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>') - break; - - /* Character constants and header names may not extend over - multiple lines. In Standard C, neither may strings. - Unfortunately, we accept multiline strings as an - extension, except in #include family directives. */ - if (terminator != '"' || pfile->state.angled_headers) - { - unterminated (pfile, terminator); - break; - } - - if (!warned_multi) - { - warned_multi = true; - cpp_pedwarn (pfile, "multi-line string literals are deprecated"); - } - - if (pfile->mls_line == 0) - { - pfile->mls_line = token->line; - pfile->mls_col = token->col; - } - - c = handle_newline (pfile, c); - *dest++ = '\n'; - goto have_char; - } - else if (c == '\0' && !warned_nulls) - { - warned_nulls = true; - cpp_warning (pfile, "null character(s) preserved in literal"); - } - - *dest++ = c; + else if (c == '\0') + saw_NUL = true; } - /* Remember the next character. */ - buffer->read_ahead = c; - *dest = '\0'; + if (saw_NUL && !pfile->state.skipping) + cpp_error (pfile, CPP_DL_WARNING, + "null character(s) preserved in literal"); - token->val.str.text = BUFF_FRONT (pfile->u_buff); - token->val.str.len = dest - BUFF_FRONT (pfile->u_buff); - BUFF_FRONT (pfile->u_buff) = dest + 1; + pfile->buffer->cur = cur; + create_literal (pfile, token, base, cur - base, type); } /* The stored comment includes the comment start and any terminator. */ static void -save_comment (pfile, token, from) - cpp_reader *pfile; - cpp_token *token; - const unsigned char *from; +save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from, + cppchar_t type) { unsigned char *buffer; - unsigned int len; - + unsigned int len, clen; + len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */ + /* C++ comments probably (not definitely) have moved past a new line, which we don't want to save in the comment. */ - if (pfile->buffer->read_ahead != EOF) + if (is_vspace (pfile->buffer->cur[-1])) len--; - buffer = _cpp_unaligned_alloc (pfile, len); - - token->type = CPP_COMMENT; - token->val.str.len = len; - token->val.str.text = buffer; - buffer[0] = '/'; - memcpy (buffer + 1, from, len - 1); -} + /* If we are currently in a directive, then we need to store all + C++ comments as C comments internally, and so we need to + allocate a little extra space in that case. -/* Subroutine of _cpp_lex_direct to handle '%'. A little tricky, since we - want to avoid stepping back when lexing %:%X. */ -static void -lex_percent (pfile, result) - cpp_reader *pfile; - cpp_token *result; -{ - cpp_buffer *buffer= pfile->buffer; - cppchar_t c; - - result->type = CPP_MOD; - /* Parsing %:%X could leave an extra character. */ - if (buffer->extra_char == EOF) - c = get_effective_char (pfile); - else - { - c = buffer->read_ahead = buffer->extra_char; - buffer->extra_char = EOF; - } + Note that the only time we encounter a directive here is + when we are saving comments in a "#define". */ + clen = (pfile->state.in_directive && type == '/') ? len + 2 : len; - if (c == '=') - ACCEPT_CHAR (CPP_MOD_EQ); - else if (CPP_OPTION (pfile, digraphs)) - { - if (c == ':') - { - result->flags |= DIGRAPH; - ACCEPT_CHAR (CPP_HASH); - if (get_effective_char (pfile) == '%') - { - buffer->extra_char = get_effective_char (pfile); - if (buffer->extra_char == ':') - { - buffer->extra_char = EOF; - ACCEPT_CHAR (CPP_PASTE); - } - else - /* We'll catch the extra_char when we're called back. */ - buffer->read_ahead = '%'; - } - } - else if (c == '>') - { - result->flags |= DIGRAPH; - ACCEPT_CHAR (CPP_CLOSE_BRACE); - } - } -} + buffer = _cpp_unaligned_alloc (pfile, clen); -/* Subroutine of _cpp_lex_direct to handle '.'. This is tricky, since we - want to avoid stepping back when lexing '...' or '.123'. In the - latter case we should also set a flag for parse_number. */ -static void -lex_dot (pfile, result) - cpp_reader *pfile; - cpp_token *result; -{ - cpp_buffer *buffer = pfile->buffer; - cppchar_t c; + token->type = CPP_COMMENT; + token->val.str.len = clen; + token->val.str.text = buffer; - /* Parsing ..X could leave an extra character. */ - if (buffer->extra_char == EOF) - c = get_effective_char (pfile); - else - { - c = buffer->read_ahead = buffer->extra_char; - buffer->extra_char = EOF; - } + buffer[0] = '/'; + memcpy (buffer + 1, from, len - 1); - /* All known character sets have 0...9 contiguous. */ - if (c >= '0' && c <= '9') - { - result->type = CPP_NUMBER; - parse_number (pfile, &result->val.str, c, 1); - } - else + /* Finish conversion to a C comment, if necessary. */ + if (pfile->state.in_directive && type == '/') { - result->type = CPP_DOT; - if (c == '.') - { - buffer->extra_char = get_effective_char (pfile); - if (buffer->extra_char == '.') - { - buffer->extra_char = EOF; - ACCEPT_CHAR (CPP_ELLIPSIS); - } - else - /* We'll catch the extra_char when we're called back. */ - buffer->read_ahead = '.'; - } - else if (c == '*' && CPP_OPTION (pfile, cplusplus)) - ACCEPT_CHAR (CPP_DOT_STAR); + buffer[1] = '*'; + buffer[clen - 2] = '*'; + buffer[clen - 1] = '/'; } } /* Allocate COUNT tokens for RUN. */ void -_cpp_init_tokenrun (run, count) - tokenrun *run; - unsigned int count; +_cpp_init_tokenrun (tokenrun *run, unsigned int count) { run->base = xnewvec (cpp_token, count); run->limit = run->base + count; @@ -931,8 +655,7 @@ _cpp_init_tokenrun (run, count) /* Returns the next tokenrun, or creates one if there is none. */ static tokenrun * -next_tokenrun (run) - tokenrun *run; +next_tokenrun (tokenrun *run) { if (run->next == NULL) { @@ -949,8 +672,7 @@ next_tokenrun (run) same as the last lexed token, so that diagnostics appear in the right place. */ cpp_token * -_cpp_temp_token (pfile) - cpp_reader *pfile; +_cpp_temp_token (cpp_reader *pfile) { cpp_token *old, *result; @@ -962,17 +684,15 @@ _cpp_temp_token (pfile) } result = pfile->cur_token++; - result->line = old->line; - result->col = old->col; + result->src_loc = old->src_loc; return result; } /* Lex a token into RESULT (external interface). Takes care of issues like directive handling, token lookahead, multiple include - opimisation and skipping. */ + optimization and skipping. */ const cpp_token * -_cpp_lex_token (pfile) - cpp_reader *pfile; +_cpp_lex_token (cpp_reader *pfile) { cpp_token *result; @@ -997,11 +717,14 @@ _cpp_lex_token (pfile) /* Is this a directive. If _cpp_handle_directive returns false, it is an assembler #. */ if (result->type == CPP_HASH - && !pfile->state.parsing_args + /* 6.10.3 p 11: Directives in a list of macro arguments + gives undefined behavior. This implementation + handles the directive as normal. */ + && pfile->state.parsing_args != 1 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE)) continue; if (pfile->cb.line_change && !pfile->state.skipping) - (*pfile->cb.line_change)(pfile, result, pfile->state.parsing_args); + pfile->cb.line_change (pfile, result, pfile->state.parsing_args); } /* We don't skip tokens in directives. */ @@ -1010,7 +733,7 @@ _cpp_lex_token (pfile) /* Outside a directive, invalidate controlling macros. At file EOF, _cpp_lex_direct takes care of popping the buffer, so we never - get here and MI optimisation works. */ + get here and MI optimization works. */ pfile->mi_valid = false; if (!pfile->state.skipping || result->type == CPP_EOF) @@ -1020,11 +743,66 @@ _cpp_lex_token (pfile) return result; } +/* Returns true if a fresh line has been loaded. */ +bool +_cpp_get_fresh_line (cpp_reader *pfile) +{ + int return_at_eof; + + /* We can't get a new line until we leave the current directive. */ + if (pfile->state.in_directive) + return false; + + for (;;) + { + cpp_buffer *buffer = pfile->buffer; + + if (!buffer->need_line) + return true; + + if (buffer->next_line < buffer->rlimit) + { + _cpp_clean_line (pfile); + return true; + } + + /* First, get out of parsing arguments state. */ + if (pfile->state.parsing_args) + return false; + + /* End of buffer. Non-empty files should end in a newline. */ + if (buffer->buf != buffer->rlimit + && buffer->next_line > buffer->rlimit + && !buffer->from_stage3) + { + /* Only warn once. */ + buffer->next_line = buffer->rlimit; + cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, + CPP_BUF_COLUMN (buffer, buffer->cur), + "no newline at end of file"); + } + + return_at_eof = buffer->return_at_eof; + _cpp_pop_buffer (pfile); + if (pfile->buffer == NULL || return_at_eof) + return false; + } +} + +#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \ + do \ + { \ + result->type = ELSE_TYPE; \ + if (*buffer->cur == CHAR) \ + buffer->cur++, result->type = THEN_TYPE; \ + } \ + while (0) + /* Lex a token into pfile->cur_token, which is also incremented, to get diagnostics pointing to the correct location. Does not handle issues such as token lookahead, multiple-include - optimisation, directives, skipping etc. This function is only + optimization, directives, skipping etc. This function is only suitable for use by _cpp_lex_token, and in special cases like lex_expansion_token which doesn't care for any of these issues. @@ -1032,8 +810,7 @@ _cpp_lex_token (pfile) otherwise returns to the start of the token buffer if permissible. Returns the location of the lexed token. */ cpp_token * -_cpp_lex_direct (pfile) - cpp_reader *pfile; +_cpp_lex_direct (cpp_reader *pfile) { cppchar_t c; cpp_buffer *buffer; @@ -1041,105 +818,74 @@ _cpp_lex_direct (pfile) cpp_token *result = pfile->cur_token++; fresh_line: + result->flags = 0; buffer = pfile->buffer; - result->flags = buffer->saved_flags; - buffer->saved_flags = 0; - update_tokens_line: - result->line = pfile->line; - - skipped_white: - c = buffer->read_ahead; - if (c == EOF && buffer->cur < buffer->rlimit) - c = *buffer->cur++; - result->col = CPP_BUF_COLUMN (buffer, buffer->cur); - buffer->read_ahead = EOF; - - trigraph: - switch (c) + if (buffer->need_line) { - case EOF: - buffer->saved_flags = BOL; - if (!pfile->state.parsing_args && !pfile->state.in_directive) + if (!_cpp_get_fresh_line (pfile)) { - if (buffer->cur != buffer->line_base) + result->type = CPP_EOF; + if (!pfile->state.in_directive) { - /* Non-empty files should end in a newline. Don't warn - for command line and _Pragma buffers. */ - if (!buffer->from_stage3) - cpp_pedwarn (pfile, "no newline at end of file"); - handle_newline (pfile, '\n'); + /* Tell the compiler the line number of the EOF token. */ + result->src_loc = pfile->line_table->highest_line; + result->flags = BOL; } + return result; + } + if (!pfile->keep_tokens) + { + pfile->cur_run = &pfile->base_run; + result = pfile->base_run.base; + pfile->cur_token = result + 1; + } + result->flags = BOL; + if (pfile->state.parsing_args == 2) + result->flags |= PREV_WHITE; + } + buffer = pfile->buffer; + update_tokens_line: + result->src_loc = pfile->line_table->highest_line; - /* Don't pop the last buffer. */ - if (buffer->prev) - { - unsigned char stop = buffer->return_at_eof; + skipped_white: + if (buffer->cur >= buffer->notes[buffer->cur_note].pos + && !pfile->overlaid_buffer) + { + _cpp_process_line_notes (pfile, false); + result->src_loc = pfile->line_table->highest_line; + } + c = *buffer->cur++; - _cpp_pop_buffer (pfile); - if (!stop) - goto fresh_line; - } - } - result->type = CPP_EOF; - break; + LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table, + CPP_BUF_COLUMN (buffer, buffer->cur)); + switch (c) + { case ' ': case '\t': case '\f': case '\v': case '\0': - skip_whitespace (pfile, c); result->flags |= PREV_WHITE; - goto skipped_white; - - case '\n': case '\r': - handle_newline (pfile, c); - buffer->saved_flags = BOL; - if (! pfile->state.in_directive) - { - if (pfile->state.parsing_args == 2) - buffer->saved_flags |= PREV_WHITE; - if (!pfile->keep_tokens) - { - pfile->cur_run = &pfile->base_run; - result = pfile->base_run.base; - pfile->cur_token = result + 1; - } - goto fresh_line; - } - result->type = CPP_EOF; - break; + skip_whitespace (pfile, c); + goto skipped_white; - case '?': - case '\\': - /* These could start an escaped newline, or '?' a trigraph. Let - skip_escaped_newlines do all the work. */ - { - unsigned int line = pfile->line; - - c = skip_escaped_newlines (pfile, c); - if (line != pfile->line) - /* We had at least one escaped newline of some sort, and the - next character is in buffer->read_ahead. Update the - token's line and column. */ - goto update_tokens_line; - - /* We are either the original '?' or '\\', or a trigraph. */ - result->type = CPP_QUERY; - buffer->read_ahead = EOF; - if (c == '\\') - goto random_char; - else if (c != '?') - goto trigraph; - } - break; + case '\n': + if (buffer->cur < buffer->rlimit) + CPP_INCREMENT_LINE (pfile, 0); + buffer->need_line = true; + goto fresh_line; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': result->type = CPP_NUMBER; - parse_number (pfile, &result->val.str, c, 0); + lex_number (pfile, &result->val.str); break; - case '$': - if (!CPP_OPTION (pfile, dollars_in_ident)) - goto random_char; - /* Fall through... */ + case 'L': + /* 'L' may introduce wide characters or strings. */ + if (*buffer->cur == '\'' || *buffer->cur == '"') + { + lex_string (pfile, result, buffer->cur - 1); + break; + } + /* Fall through. */ case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': @@ -1148,80 +894,66 @@ _cpp_lex_direct (pfile) case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': result->type = CPP_NAME; - result->val.node = parse_identifier (pfile); + result->val.node = lex_identifier (pfile, buffer->cur - 1); - /* 'L' may introduce wide characters or strings. */ - if (result->val.node == pfile->spec_nodes.n_L) - { - c = buffer->read_ahead; - if (c == EOF && buffer->cur < buffer->rlimit) - c = *buffer->cur; - if (c == '\'' || c == '"') - { - buffer->cur++; - ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR); - goto make_string; - } - } /* Convert named operators to their proper types. */ - else if (result->val.node->flags & NODE_OPERATOR) + if (result->val.node->flags & NODE_OPERATOR) { result->flags |= NAMED_OP; - result->type = result->val.node->value.operator; + result->type = result->val.node->directive_index; } break; case '\'': case '"': - result->type = c == '"' ? CPP_STRING: CPP_CHAR; - make_string: - parse_string (pfile, result, c); + lex_string (pfile, result, buffer->cur - 1); break; case '/': /* A potential block or line comment. */ comment_start = buffer->cur; - result->type = CPP_DIV; - c = get_effective_char (pfile); - if (c == '=') - ACCEPT_CHAR (CPP_DIV_EQ); - if (c != '/' && c != '*') - break; + c = *buffer->cur; if (c == '*') { - if (skip_block_comment (pfile)) - cpp_error (pfile, "unterminated comment"); + if (_cpp_skip_block_comment (pfile)) + cpp_error (pfile, CPP_DL_ERROR, "unterminated comment"); } - else + else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments) + || cpp_in_system_header (pfile))) { - if (!CPP_OPTION (pfile, cplusplus_comments) - && !CPP_IN_SYSTEM_HEADER (pfile)) - break; - /* Warn about comments only if pedantically GNUC89, and not in system headers. */ if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile) && ! buffer->warned_cplusplus_comments) { - cpp_pedwarn (pfile, - "C++ style comments are not allowed in ISO C89"); - cpp_pedwarn (pfile, - "(this will be reported only once per input file)"); + cpp_error (pfile, CPP_DL_PEDWARN, + "C++ style comments are not allowed in ISO C90"); + cpp_error (pfile, CPP_DL_PEDWARN, + "(this will be reported only once per input file)"); buffer->warned_cplusplus_comments = 1; } - /* Skip_line_comment updates buffer->read_ahead. */ if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments)) - cpp_warning (pfile, "multi-line comment"); + cpp_error (pfile, CPP_DL_WARNING, "multi-line comment"); + } + else if (c == '=') + { + buffer->cur++; + result->type = CPP_DIV_EQ; + break; + } + else + { + result->type = CPP_DIV; + break; } - /* Skipping the comment has updated buffer->read_ahead. */ if (!pfile->state.save_comments) { result->flags |= PREV_WHITE; @@ -1229,157 +961,156 @@ _cpp_lex_direct (pfile) } /* Save the comment as a token in its own right. */ - save_comment (pfile, result, comment_start); + save_comment (pfile, result, comment_start, c); break; case '<': if (pfile->state.angled_headers) { - result->type = CPP_HEADER_NAME; - c = '>'; /* terminator. */ - goto make_string; + lex_string (pfile, result, buffer->cur - 1); + break; } result->type = CPP_LESS; - c = get_effective_char (pfile); - if (c == '=') - ACCEPT_CHAR (CPP_LESS_EQ); - else if (c == '<') - { - ACCEPT_CHAR (CPP_LSHIFT); - if (get_effective_char (pfile) == '=') - ACCEPT_CHAR (CPP_LSHIFT_EQ); - } - else if (c == '?' && CPP_OPTION (pfile, cplusplus)) + if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_LESS_EQ; + else if (*buffer->cur == '<') { - ACCEPT_CHAR (CPP_MIN); - if (get_effective_char (pfile) == '=') - ACCEPT_CHAR (CPP_MIN_EQ); + buffer->cur++; + IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT); } - else if (c == ':' && CPP_OPTION (pfile, digraphs)) + else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus)) { - ACCEPT_CHAR (CPP_OPEN_SQUARE); - result->flags |= DIGRAPH; + buffer->cur++; + IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN); } - else if (c == '%' && CPP_OPTION (pfile, digraphs)) + else if (CPP_OPTION (pfile, digraphs)) { - ACCEPT_CHAR (CPP_OPEN_BRACE); - result->flags |= DIGRAPH; + if (*buffer->cur == ':') + { + buffer->cur++; + result->flags |= DIGRAPH; + result->type = CPP_OPEN_SQUARE; + } + else if (*buffer->cur == '%') + { + buffer->cur++; + result->flags |= DIGRAPH; + result->type = CPP_OPEN_BRACE; + } } break; case '>': result->type = CPP_GREATER; - c = get_effective_char (pfile); - if (c == '=') - ACCEPT_CHAR (CPP_GREATER_EQ); - else if (c == '>') + if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_GREATER_EQ; + else if (*buffer->cur == '>') { - ACCEPT_CHAR (CPP_RSHIFT); - if (get_effective_char (pfile) == '=') - ACCEPT_CHAR (CPP_RSHIFT_EQ); + buffer->cur++; + IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT); } - else if (c == '?' && CPP_OPTION (pfile, cplusplus)) + else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus)) { - ACCEPT_CHAR (CPP_MAX); - if (get_effective_char (pfile) == '=') - ACCEPT_CHAR (CPP_MAX_EQ); + buffer->cur++; + IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX); } break; case '%': - lex_percent (pfile, result); + result->type = CPP_MOD; + if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_MOD_EQ; + else if (CPP_OPTION (pfile, digraphs)) + { + if (*buffer->cur == ':') + { + buffer->cur++; + result->flags |= DIGRAPH; + result->type = CPP_HASH; + if (*buffer->cur == '%' && buffer->cur[1] == ':') + buffer->cur += 2, result->type = CPP_PASTE; + } + else if (*buffer->cur == '>') + { + buffer->cur++; + result->flags |= DIGRAPH; + result->type = CPP_CLOSE_BRACE; + } + } break; case '.': - lex_dot (pfile, result); + result->type = CPP_DOT; + if (ISDIGIT (*buffer->cur)) + { + result->type = CPP_NUMBER; + lex_number (pfile, &result->val.str); + } + else if (*buffer->cur == '.' && buffer->cur[1] == '.') + buffer->cur += 2, result->type = CPP_ELLIPSIS; + else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) + buffer->cur++, result->type = CPP_DOT_STAR; break; case '+': result->type = CPP_PLUS; - c = get_effective_char (pfile); - if (c == '=') - ACCEPT_CHAR (CPP_PLUS_EQ); - else if (c == '+') - ACCEPT_CHAR (CPP_PLUS_PLUS); + if (*buffer->cur == '+') + buffer->cur++, result->type = CPP_PLUS_PLUS; + else if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_PLUS_EQ; break; case '-': result->type = CPP_MINUS; - c = get_effective_char (pfile); - if (c == '>') + if (*buffer->cur == '>') { - ACCEPT_CHAR (CPP_DEREF); - if (CPP_OPTION (pfile, cplusplus) - && get_effective_char (pfile) == '*') - ACCEPT_CHAR (CPP_DEREF_STAR); + buffer->cur++; + result->type = CPP_DEREF; + if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus)) + buffer->cur++, result->type = CPP_DEREF_STAR; } - else if (c == '=') - ACCEPT_CHAR (CPP_MINUS_EQ); - else if (c == '-') - ACCEPT_CHAR (CPP_MINUS_MINUS); - break; - - case '*': - result->type = CPP_MULT; - if (get_effective_char (pfile) == '=') - ACCEPT_CHAR (CPP_MULT_EQ); - break; - - case '=': - result->type = CPP_EQ; - if (get_effective_char (pfile) == '=') - ACCEPT_CHAR (CPP_EQ_EQ); - break; - - case '!': - result->type = CPP_NOT; - if (get_effective_char (pfile) == '=') - ACCEPT_CHAR (CPP_NOT_EQ); + else if (*buffer->cur == '-') + buffer->cur++, result->type = CPP_MINUS_MINUS; + else if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_MINUS_EQ; break; case '&': result->type = CPP_AND; - c = get_effective_char (pfile); - if (c == '=') - ACCEPT_CHAR (CPP_AND_EQ); - else if (c == '&') - ACCEPT_CHAR (CPP_AND_AND); - break; - - case '#': - result->type = CPP_HASH; - if (get_effective_char (pfile) == '#') - ACCEPT_CHAR (CPP_PASTE); + if (*buffer->cur == '&') + buffer->cur++, result->type = CPP_AND_AND; + else if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_AND_EQ; break; case '|': result->type = CPP_OR; - c = get_effective_char (pfile); - if (c == '=') - ACCEPT_CHAR (CPP_OR_EQ); - else if (c == '|') - ACCEPT_CHAR (CPP_OR_OR); - break; - - case '^': - result->type = CPP_XOR; - if (get_effective_char (pfile) == '=') - ACCEPT_CHAR (CPP_XOR_EQ); + if (*buffer->cur == '|') + buffer->cur++, result->type = CPP_OR_OR; + else if (*buffer->cur == '=') + buffer->cur++, result->type = CPP_OR_EQ; break; case ':': result->type = CPP_COLON; - c = get_effective_char (pfile); - if (c == ':' && CPP_OPTION (pfile, cplusplus)) - ACCEPT_CHAR (CPP_SCOPE); - else if (c == '>' && CPP_OPTION (pfile, digraphs)) + if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus)) + buffer->cur++, result->type = CPP_SCOPE; + else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs)) { + buffer->cur++; result->flags |= DIGRAPH; - ACCEPT_CHAR (CPP_CLOSE_SQUARE); + result->type = CPP_CLOSE_SQUARE; } break; + case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break; + case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break; + case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break; + case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break; + case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break; + + case '?': result->type = CPP_QUERY; break; case '~': result->type = CPP_COMPL; break; case ',': result->type = CPP_COMMA; break; case '(': result->type = CPP_OPEN_PAREN; break; @@ -1390,46 +1121,55 @@ _cpp_lex_direct (pfile) case '}': result->type = CPP_CLOSE_BRACE; break; case ';': result->type = CPP_SEMICOLON; break; - /* @ is a punctuator in Objective C. */ + /* @ is a punctuator in Objective-C. */ case '@': result->type = CPP_ATSIGN; break; - random_char: + case '$': + case '\\': + { + const uchar *base = --buffer->cur; + + if (forms_identifier_p (pfile, true)) + { + result->type = CPP_NAME; + result->val.node = lex_identifier (pfile, base); + break; + } + buffer->cur++; + } + default: - result->type = CPP_OTHER; - result->val.c = c; + create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER); break; } return result; } -/* An upper bound on the number of bytes needed to spell a token, - including preceding whitespace. */ +/* An upper bound on the number of bytes needed to spell TOKEN. + Does not include preceding whitespace. */ unsigned int -cpp_token_len (token) - const cpp_token *token; +cpp_token_len (const cpp_token *token) { unsigned int len; switch (TOKEN_SPELL (token)) { - default: len = 0; break; - case SPELL_STRING: len = token->val.str.len; break; + default: len = 4; break; + case SPELL_LITERAL: len = token->val.str.len; break; case SPELL_IDENT: len = NODE_LEN (token->val.node); break; } - /* 1 for whitespace, 4 for comment delimeters. */ - return len + 5; + + return len; } /* Write the spelling of a token TOKEN to BUFFER. The buffer must already contain the enough space to hold the token's spelling. - Returns a pointer to the character after the last character - written. */ + Returns a pointer to the character after the last character written. + FIXME: Would be nice if we didn't need the PFILE argument. */ unsigned char * -cpp_spell_token (pfile, token, buffer) - cpp_reader *pfile; /* Would be nice to be rid of this... */ - const cpp_token *token; - unsigned char *buffer; +cpp_spell_token (cpp_reader *pfile, const cpp_token *token, + unsigned char *buffer) { switch (TOKEN_SPELL (token)) { @@ -1445,58 +1185,38 @@ cpp_spell_token (pfile, token, buffer) goto spell_ident; else spelling = TOKEN_NAME (token); - + while ((c = *spelling++) != '\0') *buffer++ = c; } break; + spell_ident: case SPELL_IDENT: - spell_ident: memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node)); buffer += NODE_LEN (token->val.node); break; - case SPELL_STRING: - { - int left, right, tag; - switch (token->type) - { - case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break; - case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break; - case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break; - case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break; - case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break; - default: left = '\0'; right = '\0'; tag = '\0'; break; - } - if (tag) *buffer++ = tag; - if (left) *buffer++ = left; - memcpy (buffer, token->val.str.text, token->val.str.len); - buffer += token->val.str.len; - if (right) *buffer++ = right; - } - break; - - case SPELL_CHAR: - *buffer++ = token->val.c; + case SPELL_LITERAL: + memcpy (buffer, token->val.str.text, token->val.str.len); + buffer += token->val.str.len; break; case SPELL_NONE: - cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token)); + cpp_error (pfile, CPP_DL_ICE, + "unspellable token %s", TOKEN_NAME (token)); break; } return buffer; } -/* Returns a token as a null-terminated string. The string is - temporary, and automatically freed later. Useful for diagnostics. */ +/* Returns TOKEN spelt as a null-terminated string. The string is + freed when the reader is destroyed. Useful for diagnostics. */ unsigned char * -cpp_token_as_text (pfile, token) - cpp_reader *pfile; - const cpp_token *token; -{ - unsigned int len = cpp_token_len (token); +cpp_token_as_text (cpp_reader *pfile, const cpp_token *token) +{ + unsigned int len = cpp_token_len (token) + 1; unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; end = cpp_spell_token (pfile, token, start); @@ -1505,10 +1225,10 @@ cpp_token_as_text (pfile, token) return start; } -/* Used by C front ends. Should really move to using cpp_token_as_text. */ +/* Used by C front ends, which really should move to using + cpp_token_as_text. */ const char * -cpp_type2name (type) - enum cpp_ttype type; +cpp_type2name (enum cpp_ttype type) { return (const char *) token_spellings[type].name; } @@ -1517,9 +1237,7 @@ cpp_type2name (type) Separated from cpp_spell_token for efficiency - to avoid stdio double-buffering. */ void -cpp_output_token (token, fp) - const cpp_token *token; - FILE *fp; +cpp_output_token (const cpp_token *token, FILE *fp) { switch (TOKEN_SPELL (token)) { @@ -1548,27 +1266,8 @@ cpp_output_token (token, fp) fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp); break; - case SPELL_STRING: - { - int left, right, tag; - switch (token->type) - { - case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break; - case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break; - case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break; - case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break; - case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break; - default: left = '\0'; right = '\0'; tag = '\0'; break; - } - if (tag) putc (tag, fp); - if (left) putc (left, fp); - fwrite (token->val.str.text, 1, token->val.str.len, fp); - if (right) putc (right, fp); - } - break; - - case SPELL_CHAR: - putc (token->val.c, fp); + case SPELL_LITERAL: + fwrite (token->val.str.text, 1, token->val.str.len, fp); break; case SPELL_NONE: @@ -1579,8 +1278,7 @@ cpp_output_token (token, fp) /* Compare two tokens. */ int -_cpp_equiv_tokens (a, b) - const cpp_token *a, *b; +_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b) { if (a->type == b->type && a->flags == b->flags) switch (TOKEN_SPELL (a)) @@ -1588,13 +1286,11 @@ _cpp_equiv_tokens (a, b) default: /* Keep compiler happy. */ case SPELL_OPERATOR: return 1; - case SPELL_CHAR: - return a->val.c == b->val.c; /* Character. */ case SPELL_NONE: return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no); case SPELL_IDENT: return a->val.node == b->val.node; - case SPELL_STRING: + case SPELL_LITERAL: return (a->val.str.len == b->val.str.len && !memcmp (a->val.str.text, b->val.str.text, a->val.str.len)); @@ -1607,11 +1303,9 @@ _cpp_equiv_tokens (a, b) accidental token paste for output. For simplicity, it is conservative, and occasionally advises a space where one is not needed, e.g. "." and ".2". */ - int -cpp_avoid_paste (pfile, token1, token2) - cpp_reader *pfile; - const cpp_token *token1, *token2; +cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1, + const cpp_token *token2) { enum cpp_ttype a = token1->type, b = token2->type; cppchar_t c; @@ -1651,9 +1345,12 @@ cpp_avoid_paste (pfile, token1, token2) || b == CPP_CHAR || b == CPP_STRING); /* L */ case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME || c == '.' || c == '+' || c == '-'); - case CPP_OTHER: return (CPP_OPTION (pfile, objc) - && token1->val.c == '@' - && (b == CPP_NAME || b == CPP_STRING)); + /* UCNs */ + case CPP_OTHER: return ((token1->val.str.text[0] == '\\' + && b == CPP_NAME) + || (CPP_OPTION (pfile, objc) + && token1->val.str.text[0] == '@' + && (b == CPP_NAME || b == CPP_STRING))); default: break; } @@ -1664,9 +1361,7 @@ cpp_avoid_paste (pfile, token1, token2) character, to FP. Leading whitespace is removed. If there are macros, special token padding is not performed. */ void -cpp_output_line (pfile, fp) - cpp_reader *pfile; - FILE *fp; +cpp_output_line (cpp_reader *pfile, FILE *fp) { const cpp_token *token; @@ -1682,348 +1377,6 @@ cpp_output_line (pfile, fp) putc ('\n', fp); } -/* Returns the value of a hexadecimal digit. */ -static unsigned int -hex_digit_value (c) - unsigned int c; -{ - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - if (c >= 'A' && c <= 'F') - return c - 'A' + 10; - if (c >= '0' && c <= '9') - return c - '0'; - abort (); -} - -/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate - failure if cpplib is not parsing C++ or C99. Such failure is - silent, and no variables are updated. Otherwise returns 0, and - warns if -Wtraditional. - - [lex.charset]: The character designated by the universal character - name \UNNNNNNNN is that character whose character short name in - ISO/IEC 10646 is NNNNNNNN; the character designated by the - universal character name \uNNNN is that character whose character - short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value - for a universal character name is less than 0x20 or in the range - 0x7F-0x9F (inclusive), or if the universal character name - designates a character in the basic source character set, then the - program is ill-formed. - - We assume that wchar_t is Unicode, so we don't need to do any - mapping. Is this ever wrong? - - PC points to the 'u' or 'U', PSTR is points to the byte after PC, - LIMIT is the end of the string or charconst. PSTR is updated to - point after the UCS on return, and the UCS is written into PC. */ - -static int -maybe_read_ucs (pfile, pstr, limit, pc) - cpp_reader *pfile; - const unsigned char **pstr; - const unsigned char *limit; - unsigned int *pc; -{ - const unsigned char *p = *pstr; - unsigned int code = 0; - unsigned int c = *pc, length; - - /* Only attempt to interpret a UCS for C++ and C99. */ - if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))) - return 1; - - if (CPP_WTRADITIONAL (pfile)) - cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c); - - length = (c == 'u' ? 4: 8); - - if ((size_t) (limit - p) < length) - { - cpp_error (pfile, "incomplete universal-character-name"); - /* Skip to the end to avoid more diagnostics. */ - p = limit; - } - else - { - for (; length; length--, p++) - { - c = *p; - if (ISXDIGIT (c)) - code = (code << 4) + hex_digit_value (c); - else - { - cpp_error (pfile, - "non-hex digit '%c' in universal-character-name", c); - /* We shouldn't skip in case there are multibyte chars. */ - break; - } - } - } - -#ifdef TARGET_EBCDIC - cpp_error (pfile, "universal-character-name on EBCDIC target"); - code = 0x3f; /* EBCDIC invalid character */ -#else - /* True extended characters are OK. */ - if (code >= 0xa0 - && !(code & 0x80000000) - && !(code >= 0xD800 && code <= 0xDFFF)) - ; - /* The standard permits $, @ and ` to be specified as UCNs. We use - hex escapes so that this also works with EBCDIC hosts. */ - else if (code == 0x24 || code == 0x40 || code == 0x60) - ; - /* Don't give another error if one occurred above. */ - else if (length == 0) - cpp_error (pfile, "universal-character-name out of range"); -#endif - - *pstr = p; - *pc = code; - return 0; -} - -/* Interpret an escape sequence, and return its value. PSTR points to - the input pointer, which is just after the backslash. LIMIT is how - much text we have. MASK is a bitmask for the precision for the - destination type (char or wchar_t). TRADITIONAL, if true, does not - interpret escapes that did not exist in traditional C. - - Handles all relevant diagnostics. */ - -unsigned int -cpp_parse_escape (pfile, pstr, limit, mask, traditional) - cpp_reader *pfile; - const unsigned char **pstr; - const unsigned char *limit; - unsigned HOST_WIDE_INT mask; - int traditional; -{ - int unknown = 0; - const unsigned char *str = *pstr; - unsigned int c = *str++; - - switch (c) - { - case '\\': case '\'': case '"': case '?': break; - case 'b': c = TARGET_BS; break; - case 'f': c = TARGET_FF; break; - case 'n': c = TARGET_NEWLINE; break; - case 'r': c = TARGET_CR; break; - case 't': c = TARGET_TAB; break; - case 'v': c = TARGET_VT; break; - - case '(': case '{': case '[': case '%': - /* '\(', etc, are used at beginning of line to avoid confusing Emacs. - '\%' is used to prevent SCCS from getting confused. */ - unknown = CPP_PEDANTIC (pfile); - break; - - case 'a': - if (CPP_WTRADITIONAL (pfile)) - cpp_warning (pfile, "the meaning of '\\a' varies with -traditional"); - if (!traditional) - c = TARGET_BELL; - break; - - case 'e': case 'E': - if (CPP_PEDANTIC (pfile)) - cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c); - c = TARGET_ESC; - break; - - case 'u': case 'U': - unknown = maybe_read_ucs (pfile, &str, limit, &c); - break; - - case 'x': - if (CPP_WTRADITIONAL (pfile)) - cpp_warning (pfile, "the meaning of '\\x' varies with -traditional"); - - if (!traditional) - { - unsigned int i = 0, overflow = 0; - int digits_found = 0; - - while (str < limit) - { - c = *str; - if (! ISXDIGIT (c)) - break; - str++; - overflow |= i ^ (i << 4 >> 4); - i = (i << 4) + hex_digit_value (c); - digits_found = 1; - } - - if (!digits_found) - cpp_error (pfile, "\\x used with no following hex digits"); - - if (overflow | (i != (i & mask))) - { - cpp_pedwarn (pfile, "hex escape sequence out of range"); - i &= mask; - } - c = i; - } - break; - - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - { - unsigned int i = c - '0'; - int count = 0; - - while (str < limit && ++count < 3) - { - c = *str; - if (c < '0' || c > '7') - break; - str++; - i = (i << 3) + c - '0'; - } - - if (i != (i & mask)) - { - cpp_pedwarn (pfile, "octal escape sequence out of range"); - i &= mask; - } - c = i; - } - break; - - default: - unknown = 1; - break; - } - - if (unknown) - { - if (ISGRAPH (c)) - cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c); - else - cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c); - } - - if (c > mask) - cpp_pedwarn (pfile, "escape sequence out of range for character"); - - *pstr = str; - return c; -} - -#ifndef MAX_CHAR_TYPE_SIZE -#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE -#endif - -#ifndef MAX_WCHAR_TYPE_SIZE -#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE -#endif - -/* Interpret a (possibly wide) character constant in TOKEN. - WARN_MULTI warns about multi-character charconsts, if not - TRADITIONAL. TRADITIONAL also indicates not to interpret escapes - that did not exist in traditional C. PCHARS_SEEN points to a - variable that is filled in with the number of characters seen. */ -HOST_WIDE_INT -cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen) - cpp_reader *pfile; - const cpp_token *token; - int warn_multi; - int traditional; - unsigned int *pchars_seen; -{ - const unsigned char *str = token->val.str.text; - const unsigned char *limit = str + token->val.str.len; - unsigned int chars_seen = 0; - unsigned int width, max_chars, c; - unsigned HOST_WIDE_INT mask; - HOST_WIDE_INT result = 0; - -#ifdef MULTIBYTE_CHARS - (void) local_mbtowc (NULL, NULL, 0); -#endif - - /* Width in bits. */ - if (token->type == CPP_CHAR) - width = MAX_CHAR_TYPE_SIZE; - else - width = MAX_WCHAR_TYPE_SIZE; - - if (width < HOST_BITS_PER_WIDE_INT) - mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1; - else - mask = ~0; - max_chars = HOST_BITS_PER_WIDE_INT / width; - - while (str < limit) - { -#ifdef MULTIBYTE_CHARS - wchar_t wc; - int char_len; - - char_len = local_mbtowc (&wc, str, limit - str); - if (char_len == -1) - { - cpp_warning (pfile, "ignoring invalid multibyte character"); - c = *str++; - } - else - { - str += char_len; - c = wc; - } -#else - c = *str++; -#endif - - if (c == '\\') - c = cpp_parse_escape (pfile, &str, limit, mask, traditional); - -#ifdef MAP_CHARACTER - if (ISPRINT (c)) - c = MAP_CHARACTER (c); -#endif - - /* Merge character into result; ignore excess chars. */ - if (++chars_seen <= max_chars) - { - if (width < HOST_BITS_PER_WIDE_INT) - result = (result << width) | (c & mask); - else - result = c; - } - } - - if (chars_seen == 0) - cpp_error (pfile, "empty character constant"); - else if (chars_seen > max_chars) - { - chars_seen = max_chars; - cpp_warning (pfile, "character constant too long"); - } - else if (chars_seen > 1 && !traditional && warn_multi) - cpp_warning (pfile, "multi-character character constant"); - - /* If char type is signed, sign-extend the constant. The - __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */ - if (token->type == CPP_CHAR && chars_seen) - { - unsigned int nbits = chars_seen * width; - unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits); - - if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO - || ((result >> (nbits - 1)) & 1) == 0) - result &= mask; - else - result |= ~mask; - } - - *pchars_seen = chars_seen; - return result; -} - /* Memory buffers. Changing these three constants can have a dramatic effect on performance. The values here are reasonable defaults, but might be tuned. If you adjust them, be sure to test across a @@ -2031,35 +1384,25 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen) expansion. Also check the change in peak memory usage (NJAMD is a good tool for this). */ #define MIN_BUFF_SIZE 8000 -#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2) +#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2) #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \ (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2) -struct dummy -{ - char c; - union - { - double d; - int *p; - } u; -}; - -#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u)) -#define CPP_ALIGN(size, align) (((size) + ((align) - 1)) & ~((align) - 1)) +#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0) + #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE! +#endif /* Create a new allocation buffer. Place the control block at the end of the buffer, so that buffer overflows will cause immediate chaos. */ static _cpp_buff * -new_buff (len) - size_t len; +new_buff (size_t len) { _cpp_buff *result; unsigned char *base; if (len < MIN_BUFF_SIZE) len = MIN_BUFF_SIZE; - len = CPP_ALIGN (len, DEFAULT_ALIGNMENT); + len = CPP_ALIGN (len); base = xmalloc (len + sizeof (_cpp_buff)); result = (_cpp_buff *) (base + len); @@ -2072,9 +1415,7 @@ new_buff (len) /* Place a chain of unwanted allocation buffers on the free list. */ void -_cpp_release_buff (pfile, buff) - cpp_reader *pfile; - _cpp_buff *buff; +_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff) { _cpp_buff *end = buff; @@ -2086,9 +1427,7 @@ _cpp_release_buff (pfile, buff) /* Return a free buffer of size at least MIN_SIZE. */ _cpp_buff * -_cpp_get_buff (pfile, min_size) - cpp_reader *pfile; - size_t min_size; +_cpp_get_buff (cpp_reader *pfile, size_t min_size) { _cpp_buff *result, **p; @@ -2102,7 +1441,7 @@ _cpp_get_buff (pfile, min_size) size = result->limit - result->base; /* Return a buffer that's big enough, but don't waste one that's way too big. */ - if (size >= min_size && size < BUFF_SIZE_UPPER_BOUND (min_size)) + if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size)) break; } @@ -2112,26 +1451,41 @@ _cpp_get_buff (pfile, min_size) return result; } -/* Return a buffer chained on the end of BUFF. Copy to it the - uncommitted remaining bytes of BUFF, with at least MIN_EXTRA more - bytes. */ +/* Creates a new buffer with enough space to hold the uncommitted + remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies + the excess bytes to the new buffer. Chains the new buffer after + BUFF, and returns the new buffer. */ _cpp_buff * -_cpp_extend_buff (pfile, buff, min_extra) - cpp_reader *pfile; - _cpp_buff *buff; - size_t min_extra; +_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra) { size_t size = EXTENDED_BUFF_SIZE (buff, min_extra); + _cpp_buff *new_buff = _cpp_get_buff (pfile, size); + + buff->next = new_buff; + memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff)); + return new_buff; +} - buff->next = _cpp_get_buff (pfile, size); - memcpy (buff->next->base, buff->cur, buff->limit - buff->cur); - return buff->next; +/* Creates a new buffer with enough space to hold the uncommitted + remaining bytes of the buffer pointed to by BUFF, and at least + MIN_EXTRA more bytes. Copies the excess bytes to the new buffer. + Chains the new buffer before the buffer pointed to by BUFF, and + updates the pointer to point to the new buffer. */ +void +_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra) +{ + _cpp_buff *new_buff, *old_buff = *pbuff; + size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra); + + new_buff = _cpp_get_buff (pfile, size); + memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff)); + new_buff->next = old_buff; + *pbuff = new_buff; } /* Free a chain of buffers starting at BUFF. */ void -_cpp_free_buff (buff) - _cpp_buff *buff; +_cpp_free_buff (_cpp_buff *buff) { _cpp_buff *next; @@ -2144,9 +1498,7 @@ _cpp_free_buff (buff) /* Allocate permanent, unaligned storage of length LEN. */ unsigned char * -_cpp_unaligned_alloc (pfile, len) - cpp_reader *pfile; - size_t len; +_cpp_unaligned_alloc (cpp_reader *pfile, size_t len) { _cpp_buff *buff = pfile->u_buff; unsigned char *result = buff->cur; @@ -2163,120 +1515,30 @@ _cpp_unaligned_alloc (pfile, len) return result; } -static int -chunk_suitable (chunk, size) - cpp_chunk *chunk; - unsigned int size; -{ - /* Being at least twice SIZE means we can use memcpy in - _cpp_next_chunk rather than memmove. Besides, it's a good idea - anyway. */ - return (chunk && (unsigned int) (chunk->limit - chunk->base) >= size * 2); -} +/* Allocate permanent, unaligned storage of length LEN from a_buff. + That buffer is used for growing allocations when saving macro + replacement lists in a #define, and when parsing an answer to an + assertion in #assert, #unassert or #if (and therefore possibly + whilst expanding macros). It therefore must not be used by any + code that they might call: specifically the lexer and the guts of + the macro expander. -/* Returns the end of the new pool. PTR points to a char in the old - pool, and is updated to point to the same char in the new pool. */ + All existing other uses clearly fit this restriction: storing + registered pragmas during initialization. */ unsigned char * -_cpp_next_chunk (pool, len, ptr) - cpp_pool *pool; - unsigned int len; - unsigned char **ptr; -{ - cpp_chunk *chunk = pool->cur->next; - - /* LEN is the minimum size we want in the new pool. */ - len += POOL_ROOM (pool); - if (! chunk_suitable (chunk, len)) - { - chunk = new_chunk (POOL_SIZE (pool) * 2 + len); - - chunk->next = pool->cur->next; - pool->cur->next = chunk; - } - - /* Update the pointer before changing chunk's front. */ - if (ptr) - *ptr += chunk->base - POOL_FRONT (pool); - - memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool)); - chunk->front = chunk->base; - - pool->cur = chunk; - return POOL_LIMIT (pool); -} - -static cpp_chunk * -new_chunk (size) - unsigned int size; -{ - unsigned char *base; - cpp_chunk *result; - - size = POOL_ALIGN (size, DEFAULT_ALIGNMENT); - base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk)); - /* Put the chunk descriptor at the end. Then chunk overruns will - cause obvious chaos. */ - result = (cpp_chunk *) (base + size); - result->base = base; - result->front = base; - result->limit = base + size; - result->next = 0; - - return result; -} - -void -_cpp_init_pool (pool, size, align, temp) - cpp_pool *pool; - unsigned int size, align, temp; -{ - if (align == 0) - align = DEFAULT_ALIGNMENT; - if (align & (align - 1)) - abort (); - pool->align = align; - pool->first = new_chunk (size); - pool->cur = pool->first; - if (temp) - pool->cur->next = pool->cur; -} - -void -_cpp_free_pool (pool) - cpp_pool *pool; +_cpp_aligned_alloc (cpp_reader *pfile, size_t len) { - cpp_chunk *chunk = pool->first, *next; + _cpp_buff *buff = pfile->a_buff; + unsigned char *result = buff->cur; - do + if (len > (size_t) (buff->limit - result)) { - next = chunk->next; - free (chunk->base); - chunk = next; + buff = _cpp_get_buff (pfile, len); + buff->next = pfile->a_buff; + pfile->a_buff = buff; + result = buff->cur; } - while (chunk && chunk != pool->first); -} - -/* Reserve LEN bytes from a memory pool. */ -unsigned char * -_cpp_pool_reserve (pool, len) - cpp_pool *pool; - unsigned int len; -{ - len = POOL_ALIGN (len, pool->align); - if (len > (unsigned int) POOL_ROOM (pool)) - _cpp_next_chunk (pool, len, 0); - - return POOL_FRONT (pool); -} -/* Allocate LEN bytes from a memory pool. */ -unsigned char * -_cpp_pool_alloc (pool, len) - cpp_pool *pool; - unsigned int len; -{ - unsigned char *result = _cpp_pool_reserve (pool, len); - - POOL_COMMIT (pool, len); + buff->cur = result + len; return result; }