From 13c457e1923eefa04f4b52e5321ef6e2379d6a8b Mon Sep 17 00:00:00 2001 From: neil Date: Sat, 4 May 2002 07:30:32 +0000 Subject: [PATCH] * c-lex.c (lex_string): Let cpp_parse_escape handles truncation and sign-extension. (lex_charconst): Update for change in prototype of cpp_interpret_charconst. Extend from cppchar_t to HOST_WIDE_INT appropriately. * cpphash.h (BITS_PER_CPPCHAR_T): New. * cppinit.c (cpp_create_reader): Initialize them for no change in semantics. (cpp_post_options): Add sanity checks. * cpplex.c (cpp_parse_escape): Handle precision, sign-extension and truncation issues. Calculate in type cppchar_t. (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove. (cpp_interpret_charconst): Calculate in type cppchar_t. Handle run-time dependent precision correctly. Return whether the result is signed or not. * cpplib.c (dequote_string): Use cppchar_t; update. * cpplib.h (cppchar_signed_t): New. struct cpp_options): New precision members. (cpp_interpret_charconst, cpp_parse_escape): Update prototypes. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@53152 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 22 ++++++++++++++++ gcc/c-lex.c | 66 +++++++++++++++-------------------------------- gcc/cppexp.c | 8 +++--- gcc/cpphash.h | 2 ++ gcc/cppinit.c | 33 ++++++++++++++++++++++++ gcc/cpplex.c | 82 ++++++++++++++++++++++++++++++++--------------------------- gcc/cpplib.c | 12 ++------- gcc/cpplib.h | 28 ++++++++++++++------ 8 files changed, 149 insertions(+), 104 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 144f74499d5..ba33bc4e61e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2002-05-04 Neil Booth + + * c-lex.c (lex_string): Let cpp_parse_escape handles truncation + and sign-extension. + (lex_charconst): Update for change in prototype of + cpp_interpret_charconst. Extend from cppchar_t to HOST_WIDE_INT + appropriately. + * cpphash.h (BITS_PER_CPPCHAR_T): New. + * cppinit.c (cpp_create_reader): Initialize them for no + change in semantics. + (cpp_post_options): Add sanity checks. + * cpplex.c (cpp_parse_escape): Handle precision, sign-extension + and truncation issues. Calculate in type cppchar_t. + (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): Remove. + (cpp_interpret_charconst): Calculate in type cppchar_t. Handle + run-time dependent precision correctly. Return whether the + result is signed or not. + * cpplib.c (dequote_string): Use cppchar_t; update. + * cpplib.h (cppchar_signed_t): New. + struct cpp_options): New precision members. + (cpp_interpret_charconst, cpp_parse_escape): Update prototypes. + 2002-05-03 David S. Miller * config/sparc/sparc-protos.h (sparc_rtx_costs): New. diff --git a/gcc/c-lex.c b/gcc/c-lex.c index acdcf340c32..0c10f30672e 100644 --- a/gcc/c-lex.c +++ b/gcc/c-lex.c @@ -1238,9 +1238,7 @@ lex_string (str, len, wide) char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1)); char *q = buf; const unsigned char *p = str, *limit = str + len; - unsigned int c; - unsigned width = wide ? WCHAR_TYPE_SIZE - : TYPE_PRECISION (char_type_node); + cppchar_t c; #ifdef MULTIBYTE_CHARS /* Reset multibyte conversion state. */ @@ -1270,15 +1268,7 @@ lex_string (str, len, wide) #endif if (c == '\\' && !ignore_escape_flag) - { - unsigned int mask; - - if (width < HOST_BITS_PER_INT) - mask = ((unsigned int) 1 << width) - 1; - else - mask = ~0; - c = cpp_parse_escape (parse_in, &p, limit, mask); - } + c = cpp_parse_escape (parse_in, &p, limit, wide); /* Add this single character into the buffer either as a wchar_t, a multibyte sequence, or as a single byte. */ @@ -1345,45 +1335,31 @@ static tree lex_charconst (token) const cpp_token *token; { - HOST_WIDE_INT result; + cppchar_t result; tree type, value; unsigned int chars_seen; + int unsignedp; result = cpp_interpret_charconst (parse_in, token, warn_multichar, - &chars_seen); - if (token->type == CPP_WCHAR) - { - value = build_int_2 (result, 0); - type = wchar_type_node; - } - else - { - if (result < 0) - value = build_int_2 (result, -1); - else - value = build_int_2 (result, 0); - - /* In C, a character constant has type 'int'. - In C++ 'char', but multi-char charconsts have type 'int'. */ - if (c_language == clk_cplusplus && chars_seen <= 1) - type = char_type_node; - else - type = integer_type_node; - } + &chars_seen, &unsignedp); - /* cpp_interpret_charconst issues a warning if the constant - overflows, but if the number fits in HOST_WIDE_INT anyway, it - will return it un-truncated, which may cause problems down the - line. So set the type to widest_integer_literal_type, call - convert to truncate it to the proper type, then clear - TREE_OVERFLOW so we don't get a second warning. - - FIXME: cpplib's assessment of overflow may not be accurate on a - platform where the final type can change at (compiler's) runtime. */ + /* Cast to cppchar_signed_t to get correct sign-extension of RESULT + before possibly widening to HOST_WIDE_INT for build_int_2. */ + if (unsignedp || (cppchar_signed_t) result >= 0) + value = build_int_2 (result, 0); + else + value = build_int_2 ((cppchar_signed_t) result, -1); - TREE_TYPE (value) = widest_integer_literal_type_node; - value = convert (type, value); - TREE_OVERFLOW (value) = 0; + if (token->type == CPP_WCHAR) + type = wchar_type_node; + /* In C, a character constant has type 'int'. + In C++ 'char', but multi-char charconsts have type 'int'. */ + else if ((c_language == clk_c || c_language == clk_objective_c) + || chars_seen > 1) + type = integer_type_node; + else + type = char_type_node; + TREE_TYPE (value) = type; return value; } diff --git a/gcc/cppexp.c b/gcc/cppexp.c index 914a2070ac4..b71b02abfd2 100644 --- a/gcc/cppexp.c +++ b/gcc/cppexp.c @@ -283,10 +283,10 @@ eval_token (pfile, token) const cpp_token *token; { unsigned int temp; + int unsignedp = 0; struct op op; op.op = CPP_NUMBER; - op.unsignedp = 0; switch (token->type) { @@ -294,9 +294,8 @@ eval_token (pfile, token) return parse_number (pfile, token); case CPP_WCHAR: - op.unsignedp = WCHAR_UNSIGNED; - case CPP_CHAR: /* Always unsigned. */ - op.value = cpp_interpret_charconst (pfile, token, 1, &temp); + case CPP_CHAR: + op.value = cpp_interpret_charconst (pfile, token, 1, &temp, &unsignedp); break; case CPP_NAME: @@ -331,6 +330,7 @@ eval_token (pfile, token) op.value = temp; } + op.unsignedp = unsignedp; return op; } diff --git a/gcc/cpphash.h b/gcc/cpphash.h index 5ad0c6e1a31..7baf8ff0bb0 100644 --- a/gcc/cpphash.h +++ b/gcc/cpphash.h @@ -29,6 +29,8 @@ struct directive; /* Deliberately incomplete. */ struct pending_option; struct op; +#define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t)) + /* Test if a sign is valid within a preprocessing number. */ #define VALID_SIGN(c, prevc) \ (((c) == '+' || (c) == '-') && \ diff --git a/gcc/cppinit.c b/gcc/cppinit.c index cee75713447..cb5b263151b 100644 --- a/gcc/cppinit.c +++ b/gcc/cppinit.c @@ -502,6 +502,18 @@ cpp_create_reader (lang) CPP_OPTION (pfile, pending) = (struct cpp_pending *) xcalloc (1, sizeof (struct cpp_pending)); + /* CPP arithmetic done to existing rules for now. */ +#define BITS_PER_HOST_WIDEST_INT (CHAR_BIT * sizeof (HOST_WIDEST_INT)) + CPP_OPTION (pfile, precision) = BITS_PER_HOST_WIDEST_INT; +#ifndef MAX_CHAR_TYPE_SIZE +#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE +#endif + CPP_OPTION (pfile, char_precision) = MAX_CHAR_TYPE_SIZE; +#ifndef MAX_WCHAR_TYPE_SIZE +#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE +#endif + CPP_OPTION (pfile, wchar_precision) = MAX_WCHAR_TYPE_SIZE; + /* It's simplest to just create this struct whether or not it will be needed. */ pfile->deps = deps_init (); @@ -1796,6 +1808,27 @@ cpp_post_options (pfile) fputc ('\n', stderr); } +#if ENABLE_CHECKING + /* Sanity checks for CPP arithmetic. */ + if (CPP_OPTION (pfile, precision) > BITS_PER_HOST_WIDEST_INT) + cpp_error (pfile, DL_FATAL, + "preprocessor arithmetic has maximum precision of %u bits; target requires %u bits", + BITS_PER_HOST_WIDEST_INT, CPP_OPTION (pfile, precision)); + + if (CPP_OPTION (pfile, char_precision) > BITS_PER_CPPCHAR_T + || CPP_OPTION (pfile, wchar_precision) > BITS_PER_CPPCHAR_T) + cpp_error (pfile, DL_FATAL, + "CPP cannot handle (wide) character constants over %u bits", + BITS_PER_CPPCHAR_T); + + { + cppchar_t test = 0; + test--; + if (test < 1) + cpp_error (pfile, DL_FATAL, "cppchar_t must be an unsigned type"); + } +#endif + /* Canonicalize in_fname and out_fname. We guarantee they are not NULL, and that the empty string represents stdin / stdout. */ if (CPP_OPTION (pfile, in_fname) == NULL diff --git a/gcc/cpplex.c b/gcc/cpplex.c index bc129784561..0a260490af7 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -1710,23 +1710,33 @@ maybe_read_ucs (pfile, pstr, limit, pc) return 0; } -/* Interpret an escape sequence, and return its value. PSTR points to - the input pointer, which is just after the backslash. LIMIT is how - much text we have. MASK is a bitmask for the precision for the - destination type (char or wchar_t). - - Handles all relevant diagnostics. */ -unsigned int -cpp_parse_escape (pfile, pstr, limit, mask) +/* Returns the value of an escape sequence, truncated to the correct + target precision. PSTR points to the input pointer, which is just + after the backslash. LIMIT is how much text we have. WIDE is true + if the escape sequence is part of a wide character constant or + string literal. Handles all relevant diagnostics. */ +cppchar_t +cpp_parse_escape (pfile, pstr, limit, wide) cpp_reader *pfile; const unsigned char **pstr; const unsigned char *limit; - unsigned HOST_WIDE_INT mask; + int wide; { int unknown = 0; const unsigned char *str = *pstr; - unsigned int c = *str++; + cppchar_t c, mask; + unsigned int width; + + if (wide) + width = CPP_OPTION (pfile, wchar_precision); + else + width = CPP_OPTION (pfile, char_precision); + if (width < BITS_PER_CPPCHAR_T) + mask = ((cppchar_t) 1 << width) - 1; + else + mask = ~0; + c = *str++; switch (c) { case '\\': case '\'': case '"': case '?': break; @@ -1767,7 +1777,7 @@ cpp_parse_escape (pfile, pstr, limit, mask) "the meaning of '\\x' is different in traditional C"); { - unsigned int i = 0, overflow = 0; + cppchar_t i = 0, overflow = 0; int digits_found = 0; while (str < limit) @@ -1798,8 +1808,8 @@ cpp_parse_escape (pfile, pstr, limit, mask) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { - unsigned int i = c - '0'; - int count = 0; + size_t count = 0; + cppchar_t i = c - '0'; while (str < limit && ++count < 3) { @@ -1834,36 +1844,33 @@ cpp_parse_escape (pfile, pstr, limit, mask) } if (c > mask) - cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type"); + { + cpp_error (pfile, DL_PEDWARN, "escape sequence out of range for type"); + c &= mask; + } *pstr = str; return c; } -#ifndef MAX_CHAR_TYPE_SIZE -#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE -#endif - -#ifndef MAX_WCHAR_TYPE_SIZE -#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE -#endif - /* Interpret a (possibly wide) character constant in TOKEN. - WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN points - to a variable that is filled in with the number of characters seen. */ -HOST_WIDE_INT -cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) + WARN_MULTI warns about multi-character charconsts. PCHARS_SEEN + points to a variable that is filled in with the number of + characters seen, and UNSIGNEDP to a variable that indicates whether + the result has signed type. */ +cppchar_t +cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen, unsignedp) cpp_reader *pfile; const cpp_token *token; int warn_multi; unsigned int *pchars_seen; + int *unsignedp; { const unsigned char *str = token->val.str.text; const unsigned char *limit = str + token->val.str.len; unsigned int chars_seen = 0; - unsigned int width, max_chars, c; - unsigned HOST_WIDE_INT mask; - HOST_WIDE_INT result = 0; + unsigned int width, max_chars; + cppchar_t c, mask, result = 0; bool unsigned_p; #ifdef MULTIBYTE_CHARS @@ -1873,20 +1880,20 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) /* Width in bits. */ if (token->type == CPP_CHAR) { - width = MAX_CHAR_TYPE_SIZE; + width = CPP_OPTION (pfile, char_precision); unsigned_p = CPP_OPTION (pfile, signed_char) == 0; } else { - width = MAX_WCHAR_TYPE_SIZE; + width = CPP_OPTION (pfile, wchar_precision); unsigned_p = WCHAR_UNSIGNED; } - if (width < HOST_BITS_PER_WIDE_INT) - mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1; + if (width < BITS_PER_CPPCHAR_T) + mask = ((cppchar_t) 1 << width) - 1; else mask = ~0; - max_chars = HOST_BITS_PER_WIDE_INT / width; + max_chars = BITS_PER_CPPCHAR_T / width; while (str < limit) { @@ -1911,7 +1918,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) #endif if (c == '\\') - c = cpp_parse_escape (pfile, &str, limit, mask); + c = cpp_parse_escape (pfile, &str, limit, token->type == CPP_WCHAR); #ifdef MAP_CHARACTER if (ISPRINT (c)) @@ -1921,7 +1928,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) /* Merge character into result; ignore excess chars. */ if (++chars_seen <= max_chars) { - if (width < HOST_BITS_PER_WIDE_INT) + if (width < BITS_PER_CPPCHAR_T) result = (result << width) | (c & mask); else result = c; @@ -1943,7 +1950,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) { unsigned int nbits = chars_seen * width; - mask = (unsigned HOST_WIDE_INT) ~0 >> (HOST_BITS_PER_WIDE_INT - nbits); + mask = (cppchar_t) ~0 >> (BITS_PER_CPPCHAR_T - nbits); if (unsigned_p || ((result >> (nbits - 1)) & 1) == 0) result &= mask; else @@ -1951,6 +1958,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, pchars_seen) } *pchars_seen = chars_seen; + *unsignedp = unsigned_p; return result; } diff --git a/gcc/cpplib.c b/gcc/cpplib.c index b210209b2e7..c90224c0b0a 100644 --- a/gcc/cpplib.c +++ b/gcc/cpplib.c @@ -726,23 +726,15 @@ dequote_string (pfile, str, len) uchar *result = _cpp_unaligned_alloc (pfile, len + 1); uchar *dst = result; const uchar *limit = str + len; - unsigned int c; - unsigned HOST_WIDE_INT mask; + cppchar_t c; - /* We need the mask to match the host's 'unsigned char', not the - target's. */ - if (CHAR_BIT < HOST_BITS_PER_WIDE_INT) - mask = ((unsigned HOST_WIDE_INT) 1 << CHAR_BIT) - 1; - else - mask = ~(unsigned HOST_WIDE_INT)0; - while (str < limit) { c = *str++; if (c != '\\') *dst++ = c; else - *dst++ = cpp_parse_escape (pfile, (const uchar **)&str, limit, mask); + *dst++ = cpp_parse_escape (pfile, &str, limit, 0); } *dst++ = '\0'; return result; diff --git a/gcc/cpplib.h b/gcc/cpplib.h index bbf272be202..520f2a2eac9 100644 --- a/gcc/cpplib.h +++ b/gcc/cpplib.h @@ -190,9 +190,12 @@ struct cpp_token } val; }; -/* A standalone character. It is unsigned for the same reason we use - unsigned char - to avoid signedness issues. */ +/* A type wide enough to hold any multibyte source character. + cpplib's character constant interpreter uses shifts, and so + requires an unsigned type. */ typedef unsigned int cppchar_t; +/* Its signed equivalent. */ +typedef int cppchar_signed_t; /* Values for opts.dump_macros. dump_only means inhibit output of the preprocessed text @@ -237,6 +240,10 @@ struct cpp_options /* -fleading_underscore sets this to "_". */ const char *user_label_prefix; + /* Precision for target CPP arithmetic, target characters and target + wide characters, respectively. */ + size_t precision, char_precision, wchar_precision; + /* The language we're preprocessing. */ enum c_lang lang; @@ -535,9 +542,9 @@ extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *, extern void _cpp_backup_tokens PARAMS ((cpp_reader *, unsigned int)); /* Evaluate a CPP_CHAR or CPP_WCHAR token. */ -extern HOST_WIDE_INT +extern cppchar_t cpp_interpret_charconst PARAMS ((cpp_reader *, const cpp_token *, - int, unsigned int *)); + int, unsigned int *, int *)); extern void cpp_define PARAMS ((cpp_reader *, const char *)); extern void cpp_assert PARAMS ((cpp_reader *, const char *)); @@ -600,10 +607,15 @@ extern int cpp_ideq PARAMS ((const cpp_token *, extern void cpp_output_line PARAMS ((cpp_reader *, FILE *)); extern void cpp_output_token PARAMS ((const cpp_token *, FILE *)); extern const char *cpp_type2name PARAMS ((enum cpp_ttype)); -extern unsigned int cpp_parse_escape PARAMS ((cpp_reader *, - const unsigned char **, - const unsigned char *, - unsigned HOST_WIDE_INT)); +/* Returns the value of an escape sequence, truncated to the correct + target precision. PSTR points to the input pointer, which is just + after the backslash. LIMIT is how much text we have. WIDE is true + if the escape sequence is part of a wide character constant or + string literal. Handles all relevant diagnostics. */ +extern cppchar_t cpp_parse_escape PARAMS ((cpp_reader *, + const unsigned char ** pstr, + const unsigned char *limit, + int wide)); /* In cpphash.c */ -- 2.11.0