-/* Returns the value of a hexadecimal digit. */
-static unsigned int
-hex_digit_value (c)
- unsigned int c;
-{
- if (c >= 'a' && c <= 'f')
- return c - 'a' + 10;
- if (c >= 'A' && c <= 'F')
- return c - 'A' + 10;
- if (c >= '0' && c <= '9')
- return c - '0';
- abort ();
-}
-
-/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate
- failure if cpplib is not parsing C++ or C99. Such failure is
- silent, and no variables are updated. Otherwise returns 0, and
- warns if -Wtraditional.
-
- [lex.charset]: The character designated by the universal character
- name \UNNNNNNNN is that character whose character short name in
- ISO/IEC 10646 is NNNNNNNN; the character designated by the
- universal character name \uNNNN is that character whose character
- short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
- for a universal character name is less than 0x20 or in the range
- 0x7F-0x9F (inclusive), or if the universal character name
- designates a character in the basic source character set, then the
- program is ill-formed.
-
- We assume that wchar_t is Unicode, so we don't need to do any
- mapping. Is this ever wrong?
-
- PC points to the 'u' or 'U', PSTR is points to the byte after PC,
- LIMIT is the end of the string or charconst. PSTR is updated to
- point after the UCS on return, and the UCS is written into PC. */
-
-static int
-maybe_read_ucs (pfile, pstr, limit, pc)
- cpp_reader *pfile;
- const unsigned char **pstr;
- const unsigned char *limit;
- unsigned int *pc;
-{
- const unsigned char *p = *pstr;
- unsigned int code = 0;
- unsigned int c = *pc, length;
-
- /* Only attempt to interpret a UCS for C++ and C99. */
- if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99)))
- return 1;
-
- if (CPP_WTRADITIONAL (pfile))
- cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
-
- length = (c == 'u' ? 4: 8);
-
- if ((size_t) (limit - p) < length)
- {
- cpp_error (pfile, "incomplete universal-character-name");
- /* Skip to the end to avoid more diagnostics. */
- p = limit;
- }
- else
- {
- for (; length; length--, p++)
- {
- c = *p;
- if (ISXDIGIT (c))
- code = (code << 4) + hex_digit_value (c);
- else
- {
- cpp_error (pfile,
- "non-hex digit '%c' in universal-character-name", c);
- /* We shouldn't skip in case there are multibyte chars. */
- break;
- }
- }
- }
-
-#ifdef TARGET_EBCDIC
- cpp_error (pfile, "universal-character-name on EBCDIC target");
- code = 0x3f; /* EBCDIC invalid character */
-#else
- /* True extended characters are OK. */
- if (code >= 0xa0
- && !(code & 0x80000000)
- && !(code >= 0xD800 && code <= 0xDFFF))
- ;
- /* The standard permits $, @ and ` to be specified as UCNs. We use
- hex escapes so that this also works with EBCDIC hosts. */
- else if (code == 0x24 || code == 0x40 || code == 0x60)
- ;
- /* Don't give another error if one occurred above. */
- else if (length == 0)
- cpp_error (pfile, "universal-character-name out of range");
-#endif
-
- *pstr = p;
- *pc = code;
- return 0;
-}
-
-/* Interpret an escape sequence, and return its value. PSTR points to
- the input pointer, which is just after the backslash. LIMIT is how
- much text we have. MASK is a bitmask for the precision for the
- destination type (char or wchar_t). TRADITIONAL, if true, does not
- interpret escapes that did not exist in traditional C.
-
- Handles all relevant diagnostics. */
-
-unsigned int
-cpp_parse_escape (pfile, pstr, limit, mask, traditional)
- cpp_reader *pfile;
- const unsigned char **pstr;
- const unsigned char *limit;
- unsigned HOST_WIDE_INT mask;
- int traditional;
-{
- int unknown = 0;
- const unsigned char *str = *pstr;
- unsigned int c = *str++;
-
- switch (c)
- {
- case '\\': case '\'': case '"': case '?': break;
- case 'b': c = TARGET_BS; break;
- case 'f': c = TARGET_FF; break;
- case 'n': c = TARGET_NEWLINE; break;
- case 'r': c = TARGET_CR; break;
- case 't': c = TARGET_TAB; break;
- case 'v': c = TARGET_VT; break;
-
- case '(': case '{': case '[': case '%':
- /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
- '\%' is used to prevent SCCS from getting confused. */
- unknown = CPP_PEDANTIC (pfile);
- break;
-
- case 'a':
- if (CPP_WTRADITIONAL (pfile))
- cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
- if (!traditional)
- c = TARGET_BELL;
- break;
-
- case 'e': case 'E':
- if (CPP_PEDANTIC (pfile))
- cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
- c = TARGET_ESC;
- break;
-
- case 'u': case 'U':
- unknown = maybe_read_ucs (pfile, &str, limit, &c);
- break;
-
- case 'x':
- if (CPP_WTRADITIONAL (pfile))
- cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
-
- if (!traditional)
- {
- unsigned int i = 0, overflow = 0;
- int digits_found = 0;
-
- while (str < limit)
- {
- c = *str;
- if (! ISXDIGIT (c))
- break;
- str++;
- overflow |= i ^ (i << 4 >> 4);
- i = (i << 4) + hex_digit_value (c);
- digits_found = 1;
- }
-
- if (!digits_found)
- cpp_error (pfile, "\\x used with no following hex digits");
-
- if (overflow | (i != (i & mask)))
- {
- cpp_pedwarn (pfile, "hex escape sequence out of range");
- i &= mask;
- }
- c = i;
- }
- break;
-
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- {
- unsigned int i = c - '0';
- int count = 0;
-
- while (str < limit && ++count < 3)
- {
- c = *str;
- if (c < '0' || c > '7')
- break;
- str++;
- i = (i << 3) + c - '0';
- }
-
- if (i != (i & mask))
- {
- cpp_pedwarn (pfile, "octal escape sequence out of range");
- i &= mask;
- }
- c = i;
- }
- break;
-
- default:
- unknown = 1;
- break;
- }
-
- if (unknown)
- {
- if (ISGRAPH (c))
- cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
- else
- cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
- }
-
- if (c > mask)
- cpp_pedwarn (pfile, "escape sequence out of range for character");
-
- *pstr = str;
- return c;
-}
-
-#ifndef MAX_CHAR_TYPE_SIZE
-#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
-#endif
-
-#ifndef MAX_WCHAR_TYPE_SIZE
-#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
-#endif
-
-/* Interpret a (possibly wide) character constant in TOKEN.
- WARN_MULTI warns about multi-character charconsts, if not
- TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
- that did not exist in traditional C. PCHARS_SEEN points to a
- variable that is filled in with the number of characters seen. */
-HOST_WIDE_INT
-cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
- cpp_reader *pfile;
- const cpp_token *token;
- int warn_multi;
- int traditional;
- unsigned int *pchars_seen;
-{
- const unsigned char *str = token->val.str.text;
- const unsigned char *limit = str + token->val.str.len;
- unsigned int chars_seen = 0;
- unsigned int width, max_chars, c;
- unsigned HOST_WIDE_INT mask;
- HOST_WIDE_INT result = 0;
-
-#ifdef MULTIBYTE_CHARS
- (void) local_mbtowc (NULL, NULL, 0);
-#endif
-
- /* Width in bits. */
- if (token->type == CPP_CHAR)
- width = MAX_CHAR_TYPE_SIZE;
- else
- width = MAX_WCHAR_TYPE_SIZE;
-
- if (width < HOST_BITS_PER_WIDE_INT)
- mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
- else
- mask = ~0;
- max_chars = HOST_BITS_PER_WIDE_INT / width;
-
- while (str < limit)
- {
-#ifdef MULTIBYTE_CHARS
- wchar_t wc;
- int char_len;
-
- char_len = local_mbtowc (&wc, str, limit - str);
- if (char_len == -1)
- {
- cpp_warning (pfile, "ignoring invalid multibyte character");
- c = *str++;
- }
- else
- {
- str += char_len;
- c = wc;
- }
-#else
- c = *str++;
-#endif
-
- if (c == '\\')
- c = cpp_parse_escape (pfile, &str, limit, mask, traditional);
-
-#ifdef MAP_CHARACTER
- if (ISPRINT (c))
- c = MAP_CHARACTER (c);
-#endif
-
- /* Merge character into result; ignore excess chars. */
- if (++chars_seen <= max_chars)
- {
- if (width < HOST_BITS_PER_WIDE_INT)
- result = (result << width) | (c & mask);
- else
- result = c;
- }
- }
-
- if (chars_seen == 0)
- cpp_error (pfile, "empty character constant");
- else if (chars_seen > max_chars)
- {
- chars_seen = max_chars;
- cpp_warning (pfile, "character constant too long");
- }
- else if (chars_seen > 1 && !traditional && warn_multi)
- cpp_warning (pfile, "multi-character character constant");
-
- /* If char type is signed, sign-extend the constant. The
- __CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
- if (token->type == CPP_CHAR && chars_seen)
- {
- unsigned int nbits = chars_seen * width;
- unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
-
- if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
- || ((result >> (nbits - 1)) & 1) == 0)
- result &= mask;
- else
- result |= ~mask;
- }
-
- *pchars_seen = chars_seen;
- return result;
-}
-