From bb1fa6bb7346b75421ab702f475f3a711a02f822 Mon Sep 17 00:00:00 2001 From: geoffk Date: Sat, 12 Mar 2005 10:44:06 +0000 Subject: [PATCH] Index: libcpp/ChangeLog 2005-03-12 Geoffrey Keating * directives.c (glue_header_name): Update call to cpp_spell_token. * internal.h (_cpp_interpret_identifier): New. * charset.c (_cpp_interpret_identifier): New. (_cpp_valid_ucn): Allow UCN version of '$'. * lex.c (lex_identifier): Add extra parameter to indicate if initial character was '$' or '\'. Support identifiers with UCNs. (forms_identifier_p): Allow UCNs. (_cpp_lex_direct): Pass extra parameter to lex_identifier. (utf8_to_ucn): New. (cpp_spell_token): Add FORSTRING parameter. Use it. (cpp_token_as_text): Update call to cpp_spell_token. (cpp_output_token): Write UCNs back out. (stringify_arg): Update call to cpp_spell_token. (paste_tokens): Likewise. (cpp_macro_definition): Likewise. * macro.c (stringify_arg): Likewise. (paste_tokens): Likewise. (cpp_macro_definition): Likewise. * include/cpplib.h: Add parameter to cpp_spell_token. Index: gcc/ChangeLog 2005-03-12 Geoffrey Keating * c-lex.c (c_lex_with_flags): Add parameter to call to cpp_spell_token. Index: gcc/testsuite/ChangeLog 2005-03-12 Geoffrey Keating * gcc.dg/ucnid-1.c: New. * gcc.dg/ucnid-2.c: New. * gcc.dg/ucnid-3.c: New. * gcc.dg/ucnid-4.c: New. * gcc.dg/ucnid-5.c: New. * gcc.dg/ucnid-6.c: New. * gcc.dg/cpp/ucnid-1.c: New. * gcc.dg/cpp/ucnid-2.c: New. * gcc.dg/cpp/ucnid-3.c: New. * g++.dg/other/ucnid-1.C: New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@96333 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 5 ++ gcc/c-lex.c | 2 +- gcc/testsuite/ChangeLog | 13 ++++ gcc/testsuite/g++.dg/other/ucnid-1.C | 25 +++++++ gcc/testsuite/gcc.dg/cpp/ucnid-1.c | 26 +++++++ gcc/testsuite/gcc.dg/cpp/ucnid-2.c | 16 ++++ gcc/testsuite/gcc.dg/cpp/ucnid-3.c | 7 ++ gcc/testsuite/gcc.dg/ucnid-1.c | 25 +++++++ gcc/testsuite/gcc.dg/ucnid-2.c | 26 +++++++ gcc/testsuite/gcc.dg/ucnid-3.c | 26 +++++++ gcc/testsuite/gcc.dg/ucnid-4.c | 26 +++++++ gcc/testsuite/gcc.dg/ucnid-5.c | 14 ++++ gcc/testsuite/gcc.dg/ucnid-6.c | 26 +++++++ libcpp/ChangeLog | 22 ++++++ libcpp/charset.c | 62 ++++++++++++++++ libcpp/directives.c | 3 +- libcpp/include/cpplib.h | 2 +- libcpp/internal.h | 3 + libcpp/lex.c | 138 ++++++++++++++++++++++++++--------- libcpp/macro.c | 10 +-- 20 files changed, 434 insertions(+), 43 deletions(-) create mode 100644 gcc/testsuite/g++.dg/other/ucnid-1.C create mode 100644 gcc/testsuite/gcc.dg/cpp/ucnid-1.c create mode 100644 gcc/testsuite/gcc.dg/cpp/ucnid-2.c create mode 100644 gcc/testsuite/gcc.dg/cpp/ucnid-3.c create mode 100644 gcc/testsuite/gcc.dg/ucnid-1.c create mode 100644 gcc/testsuite/gcc.dg/ucnid-2.c create mode 100644 gcc/testsuite/gcc.dg/ucnid-3.c create mode 100644 gcc/testsuite/gcc.dg/ucnid-4.c create mode 100644 gcc/testsuite/gcc.dg/ucnid-5.c create mode 100644 gcc/testsuite/gcc.dg/ucnid-6.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5d85aba3b12..b22a17ea8c0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2005-03-12 Geoffrey Keating + + * c-lex.c (c_lex_with_flags): Add parameter to call to + cpp_spell_token. + 2005-03-11 Per Bothner * c-tree.h (struct c_declarator): New id_loc field. diff --git a/gcc/c-lex.c b/gcc/c-lex.c index fdc1ff6e311..5be65f16f2b 100644 --- a/gcc/c-lex.c +++ b/gcc/c-lex.c @@ -425,7 +425,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags) { unsigned char name[4]; - *cpp_spell_token (parse_in, tok, name) = 0; + *cpp_spell_token (parse_in, tok, name, true) = 0; error ("stray %qs in program", name); } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7760588137a..5e748ada585 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,18 @@ 2005-03-12 Geoffrey Keating + * gcc.dg/ucnid-1.c: New. + * gcc.dg/ucnid-2.c: New. + * gcc.dg/ucnid-3.c: New. + * gcc.dg/ucnid-4.c: New. + * gcc.dg/ucnid-5.c: New. + * gcc.dg/ucnid-6.c: New. + * gcc.dg/cpp/ucnid-1.c: New. + * gcc.dg/cpp/ucnid-2.c: New. + * gcc.dg/cpp/ucnid-3.c: New. + * g++.dg/other/ucnid-1.C: New. + +2005-03-12 Geoffrey Keating + * gcc.dg/vmx/darwin-abi-3.c: Delete. 2005-03-11 Per Bothner diff --git a/gcc/testsuite/g++.dg/other/ucnid-1.C b/gcc/testsuite/g++.dg/other/ucnid-1.C new file mode 100644 index 00000000000..a5884bc8888 --- /dev/null +++ b/gcc/testsuite/g++.dg/other/ucnid-1.C @@ -0,0 +1,25 @@ +/* { dg-do run } */ +#include + +int \u00C0(void) { return 1; } +int \u00C1(void) { return 2; } +int \U000000C2(void) { return 3; } +int wh\u00ff(void) { return 4; } +int a\u00c4b\u0441\U000003b4e(void) { return 5; } + +int main (void) +{ + + if (\u00C0() != 1) + abort (); + if (\u00c1() != 2) + abort (); + if (\u00C2() != 3) + abort (); + if (wh\u00ff() != 4) + abort (); + if (a\u00c4b\u0441\U000003b4e() != 5) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-1.c b/gcc/testsuite/gcc.dg/cpp/ucnid-1.c new file mode 100644 index 00000000000..bc516cae92e --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/ucnid-1.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-std=c99" } */ +void abort (void); + +#define \u00C0 1 +#define \u00C1 2 +#define \U000000C2 3 +#define wh\u00ff 4 +#define a\u00c4b\u0441\U000003b4e 5 + +int main (void) +{ + + if (\u00C0 != 1) + abort (); + if (\u00c1 != 2) + abort (); + if (\u00C2 != 3) + abort (); + if (wh\u00ff != 4) + abort (); + if (a\u00c4b\u0441\U000003b4e != 5) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-2.c b/gcc/testsuite/gcc.dg/cpp/ucnid-2.c new file mode 100644 index 00000000000..616680a969e --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/ucnid-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-std=c99" } */ +#include +#include + +#define str(t) #t + +int main (void) +{ + const char s[] = str (\u30b2); + + if (strcmp (s, "\u30b2") != 0) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-3.c b/gcc/testsuite/gcc.dg/cpp/ucnid-3.c new file mode 100644 index 00000000000..a910037c316 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/ucnid-3.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99" } */ + +#define paste(x, y) x ## y + +int paste(\u00aa, \u0531) = 3; + diff --git a/gcc/testsuite/gcc.dg/ucnid-1.c b/gcc/testsuite/gcc.dg/ucnid-1.c new file mode 100644 index 00000000000..a8d49a3e16d --- /dev/null +++ b/gcc/testsuite/gcc.dg/ucnid-1.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-std=c99" } */ +void abort (void); + +int main (void) +{ + int \u00C0 = 1; + int \u00C1 = 2; + int \U000000C2 = 3; + int wh\u00ff = 4; + int a\u00c4b\u0441\U000003b4e = 5; + + if (\u00C0 != 1) + abort (); + if (\u00c1 != 2) + abort (); + if (\u00C2 != 3) + abort (); + if (wh\u00ff != 4) + abort (); + if (a\u00c4b\u0441\U000003b4e != 5) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/ucnid-2.c b/gcc/testsuite/gcc.dg/ucnid-2.c new file mode 100644 index 00000000000..8288514ecd6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ucnid-2.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-std=c99" } */ +void abort (void); + +static int \u00C0 = 1; +static int \u00C1 = 2; +static int \U000000C2 = 3; +static int wh\u00ff = 4; +static int a\u00c4b\u0441\U000003b4e = 5; + +int main (void) +{ + + if (\u00C0 != 1) + abort (); + if (\u00c1 != 2) + abort (); + if (\u00C2 != 3) + abort (); + if (wh\u00ff != 4) + abort (); + if (a\u00c4b\u0441\U000003b4e != 5) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/ucnid-3.c b/gcc/testsuite/gcc.dg/ucnid-3.c new file mode 100644 index 00000000000..66e84141fdf --- /dev/null +++ b/gcc/testsuite/gcc.dg/ucnid-3.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-std=c99" } */ +void abort (void); + +int \u00C0 = 1; +int \u00C1 = 2; +int \U000000C2 = 3; +int wh\u00ff = 4; +int a\u00c4b\u0441\U000003b4e = 5; + +int main (void) +{ + + if (\u00C0 != 1) + abort (); + if (\u00c1 != 2) + abort (); + if (\u00C2 != 3) + abort (); + if (wh\u00ff != 4) + abort (); + if (a\u00c4b\u0441\U000003b4e != 5) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/ucnid-4.c b/gcc/testsuite/gcc.dg/ucnid-4.c new file mode 100644 index 00000000000..35725a35f94 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ucnid-4.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-std=c99" } */ +void abort (void); + +int \u00C0(void) { return 1; } +int \u00C1(void) { return 2; } +int \U000000C2(void) { return 3; } +int wh\u00ff(void) { return 4; } +int a\u00c4b\u0441\U000003b4e(void) { return 5; } + +int main (void) +{ + + if (\u00C0() != 1) + abort (); + if (\u00c1() != 2) + abort (); + if (\u00C2() != 3) + abort (); + if (wh\u00ff() != 4) + abort (); + if (a\u00c4b\u0441\U000003b4e() != 5) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/ucnid-5.c b/gcc/testsuite/gcc.dg/ucnid-5.c new file mode 100644 index 00000000000..08cd9c2bf7d --- /dev/null +++ b/gcc/testsuite/gcc.dg/ucnid-5.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-options "-std=c99 -fdollars-in-identifiers" } */ +void abort (void); + +int a$b(void) { return 1; } + +int main (void) +{ + + if (a\u0024b() != 1) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/ucnid-6.c b/gcc/testsuite/gcc.dg/ucnid-6.c new file mode 100644 index 00000000000..61c8c3e089a --- /dev/null +++ b/gcc/testsuite/gcc.dg/ucnid-6.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-std=c99 -save-temps" } */ +void abort (void); + +int \u00C0(void) { return 1; } +int \u00C1(void) { return 2; } +int \U000000C2(void) { return 3; } +int wh\u00ff(void) { return 4; } +int a\u00c4b\u0441\U000003b4e(void) { return 5; } + +int main (void) +{ + + if (\u00C0() != 1) + abort (); + if (\u00c1() != 2) + abort (); + if (\u00C2() != 3) + abort (); + if (wh\u00ff() != 4) + abort (); + if (a\u00c4b\u0441\U000003b4e() != 5) + abort (); + + return 0; +} diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index b246de782da..5190599b9b0 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,25 @@ +2005-03-11 Geoffrey Keating + + * directives.c (glue_header_name): Update call to cpp_spell_token. + * internal.h (_cpp_interpret_identifier): New. + * charset.c (_cpp_interpret_identifier): New. + (_cpp_valid_ucn): Allow UCN version of '$'. + * lex.c (lex_identifier): Add extra parameter to indicate if initial + character was '$' or '\'. Support identifiers with UCNs. + (forms_identifier_p): Allow UCNs. + (_cpp_lex_direct): Pass extra parameter to lex_identifier. + (utf8_to_ucn): New. + (cpp_spell_token): Add FORSTRING parameter. Use it. + (cpp_token_as_text): Update call to cpp_spell_token. + (cpp_output_token): Write UCNs back out. + (stringify_arg): Update call to cpp_spell_token. + (paste_tokens): Likewise. + (cpp_macro_definition): Likewise. + * macro.c (stringify_arg): Likewise. + (paste_tokens): Likewise. + (cpp_macro_definition): Likewise. + * include/cpplib.h: Add parameter to cpp_spell_token. + 2005-03-04 Jakub Jelinek PR bootstrap/20282 diff --git a/libcpp/charset.c b/libcpp/charset.c index 6b6c360f73d..cd25f10a2e6 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -907,6 +907,15 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, (int) (str - base), base); result = 1; } + else if (identifier_pos && result == 0x24 + && CPP_OPTION (pfile, dollars_in_ident)) + { + if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) + { + CPP_OPTION (pfile, warn_dollars) = 0; + cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); + } + } else if (identifier_pos) { int validity = ucn_valid_in_identifier (pfile, result); @@ -1414,7 +1423,60 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, return result; } + +/* Convert an identifier denoted by ID and LEN, which might contain + UCN escapes, to the source character set, either UTF-8 or + UTF-EBCDIC. Assumes that the identifier is actually a valid identifier. */ +cpp_hashnode * +_cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len) +{ + /* It turns out that a UCN escape always turns into fewer characters + than the escape itself, so we can allocate a temporary in advance. */ + uchar * buf = alloca (len + 1); + uchar * bufp = buf; + size_t idp; + + for (idp = 0; idp < len; idp++) + if (id[idp] != '\\') + *bufp++ = id[idp]; + else + { + unsigned length = id[idp+1] == 'u' ? 4 : 8; + cppchar_t value = 0; + size_t bufleft = len - (bufp - buf); + int rval; + + idp += 2; + while (length && idp < len && ISXDIGIT (id[idp])) + { + value = (value << 4) + hex_value (id[idp]); + idp++; + length--; + } + idp--; + + /* Special case for EBCDIC: if the identifier contains + a '$' specified using a UCN, translate it to EBCDIC. */ + if (value == 0x24) + { + *bufp++ = '$'; + continue; + } + + rval = one_cppchar_to_utf8 (value, &bufp, &bufleft); + if (rval) + { + errno = rval; + cpp_errno (pfile, CPP_DL_ERROR, + "converting UCN to source character set"); + break; + } + } + return CPP_HASHNODE (ht_lookup (pfile->hash_table, + buf, bufp - buf, HT_ALLOC)); +} + /* Convert an input buffer (containing the complete contents of one source file) from INPUT_CHARSET to the source character set. INPUT points to the input buffer, SIZE is its allocated size, and LEN is diff --git a/libcpp/directives.c b/libcpp/directives.c index 84065052630..957e879caec 100644 --- a/libcpp/directives.c +++ b/libcpp/directives.c @@ -608,7 +608,8 @@ glue_header_name (cpp_reader *pfile) if (token->flags & PREV_WHITE) buffer[total_len++] = ' '; - total_len = (cpp_spell_token (pfile, token, (uchar *) &buffer[total_len]) + total_len = (cpp_spell_token (pfile, token, (uchar *) &buffer[total_len], + true) - (uchar *) buffer); } diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 70f8d895afd..ccf8bff47e4 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -637,7 +637,7 @@ extern unsigned int cpp_errors (cpp_reader *); extern unsigned int cpp_token_len (const cpp_token *); extern unsigned char *cpp_token_as_text (cpp_reader *, const cpp_token *); extern unsigned char *cpp_spell_token (cpp_reader *, const cpp_token *, - unsigned char *); + unsigned char *, bool); extern void cpp_register_pragma (cpp_reader *, const char *, const char *, void (*) (cpp_reader *), bool); extern void cpp_handle_deferred_pragma (cpp_reader *, const cpp_string *); diff --git a/libcpp/internal.h b/libcpp/internal.h index 0ae13d58cb6..af823d766b3 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -571,6 +571,9 @@ extern unsigned char *_cpp_convert_input (cpp_reader *, const char *, unsigned char *, size_t, size_t, off_t *); extern const char *_cpp_default_encoding (void); +extern cpp_hashnode * _cpp_interpret_identifier (cpp_reader *pfile, + const unsigned char *id, + size_t len); /* Utility routines and macros. */ #define DSC(str) (const unsigned char *)str, sizeof str - 1 diff --git a/libcpp/lex.c b/libcpp/lex.c index 62a28f81b87..8398c7ca061 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -53,7 +53,7 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; static void add_line_note (cpp_buffer *, const uchar *, unsigned int); static int skip_line_comment (cpp_reader *); static void skip_whitespace (cpp_reader *, cppchar_t); -static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *); +static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *, bool); static void lex_number (cpp_reader *, cpp_string *); static bool forms_identifier_p (cpp_reader *, int); static void lex_string (cpp_reader *, cpp_token *, const uchar *); @@ -453,7 +453,7 @@ forms_identifier_p (cpp_reader *pfile, int first) } /* Is this a syntactically valid UCN? */ - if (0 && *buffer->cur == '\\' + if (*buffer->cur == '\\' && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) { buffer->cur += 2; @@ -467,39 +467,39 @@ forms_identifier_p (cpp_reader *pfile, int first) /* Lex an identifier starting at BUFFER->CUR - 1. */ static cpp_hashnode * -lex_identifier (cpp_reader *pfile, const uchar *base) +lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn) { cpp_hashnode *result; - const uchar *cur, *limit; + const uchar *cur; unsigned int len; unsigned int hash = HT_HASHSTEP (0, *base); cur = pfile->buffer->cur; - for (;;) + if (! starts_ucn) + while (ISIDNUM (*cur)) + { + hash = HT_HASHSTEP (hash, *cur); + cur++; + } + pfile->buffer->cur = cur; + if (starts_ucn || forms_identifier_p (pfile, false)) { - /* N.B. ISIDNUM does not include $. */ - while (ISIDNUM (*cur)) - { - hash = HT_HASHSTEP (hash, *cur); - cur++; - } - - pfile->buffer->cur = cur; - if (!forms_identifier_p (pfile, false)) - break; - - limit = pfile->buffer->cur; - while (cur < limit) - { - hash = HT_HASHSTEP (hash, *cur); - cur++; - } + /* Slower version for identifiers containing UCNs (or $). */ + do { + while (ISIDNUM (*pfile->buffer->cur)) + pfile->buffer->cur++; + } while (forms_identifier_p (pfile, false)); + result = _cpp_interpret_identifier (pfile, base, + pfile->buffer->cur - base); } - len = cur - base; - hash = HT_HASHFINISH (hash, len); + else + { + len = cur - base; + hash = HT_HASHFINISH (hash, len); - result = (cpp_hashnode *) - ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC); + result = (cpp_hashnode *) + ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC); + } /* Rarely, identifiers require diagnostics when lexed. */ if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC) @@ -922,7 +922,7 @@ _cpp_lex_direct (cpp_reader *pfile) case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': result->type = CPP_NAME; - result->val.node = lex_identifier (pfile, buffer->cur - 1); + result->val.node = lex_identifier (pfile, buffer->cur - 1, false); /* Convert named operators to their proper types. */ if (result->val.node->flags & NODE_OPERATOR) @@ -1155,7 +1155,7 @@ _cpp_lex_direct (cpp_reader *pfile) if (forms_identifier_p (pfile, true)) { result->type = CPP_NAME; - result->val.node = lex_identifier (pfile, base); + result->val.node = lex_identifier (pfile, base, true); break; } buffer->cur++; @@ -1180,19 +1180,56 @@ cpp_token_len (const cpp_token *token) { default: len = 4; break; case SPELL_LITERAL: len = token->val.str.len; break; - case SPELL_IDENT: len = NODE_LEN (token->val.node); break; + case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break; } return len; } +/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER. + Return the number of bytes read out of NAME. (There are always + 10 bytes written to BUFFER.) */ + +static size_t +utf8_to_ucn (unsigned char *buffer, const unsigned char *name) +{ + int j; + int ucn_len = 0; + int ucn_len_c; + unsigned t; + unsigned long utf32; + + /* Compute the length of the UTF-8 sequence. */ + for (t = *name; t & 0x80; t <<= 1) + ucn_len++; + + utf32 = *name & (0x7F >> ucn_len); + for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++) + { + utf32 = (utf32 << 6) | (*++name & 0x3F); + + /* Ill-formed UTF-8. */ + if ((*name & ~0x3F) != 0x80) + abort (); + } + + *buffer++ = '\\'; + *buffer++ = 'U'; + for (j = 7; j >= 0; j--) + *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF]; + return ucn_len; +} + + /* Write the spelling of a token TOKEN to BUFFER. The buffer must already contain the enough space to hold the token's spelling. Returns a pointer to the character after the last character written. + FORSTRING is true if this is to be the spelling after translation + phase 1 (this is different for UCNs). FIXME: Would be nice if we didn't need the PFILE argument. */ unsigned char * cpp_spell_token (cpp_reader *pfile, const cpp_token *token, - unsigned char *buffer) + unsigned char *buffer, bool forstring) { switch (TOKEN_SPELL (token)) { @@ -1216,8 +1253,26 @@ cpp_spell_token (cpp_reader *pfile, const cpp_token *token, spell_ident: case SPELL_IDENT: - memcpy (buffer, NODE_NAME (token->val.node), NODE_LEN (token->val.node)); - buffer += NODE_LEN (token->val.node); + if (forstring) + { + memcpy (buffer, NODE_NAME (token->val.node), + NODE_LEN (token->val.node)); + buffer += NODE_LEN (token->val.node); + } + else + { + size_t i; + const unsigned char * name = NODE_NAME (token->val.node); + + for (i = 0; i < NODE_LEN (token->val.node); i++) + if (name[i] & ~0x7F) + { + i += utf8_to_ucn (buffer, name + i) - 1; + buffer += 10; + } + else + *buffer++ = NODE_NAME (token->val.node)[i]; + } break; case SPELL_LITERAL: @@ -1242,7 +1297,7 @@ cpp_token_as_text (cpp_reader *pfile, const cpp_token *token) unsigned int len = cpp_token_len (token) + 1; unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end; - end = cpp_spell_token (pfile, token, start); + end = cpp_spell_token (pfile, token, start, false); end[0] = '\0'; return start; @@ -1286,8 +1341,21 @@ cpp_output_token (const cpp_token *token, FILE *fp) spell_ident: case SPELL_IDENT: - fwrite (NODE_NAME (token->val.node), 1, NODE_LEN (token->val.node), fp); - break; + { + size_t i; + const unsigned char * name = NODE_NAME (token->val.node); + + for (i = 0; i < NODE_LEN (token->val.node); i++) + if (name[i] & ~0x7F) + { + unsigned char buffer[10]; + i += utf8_to_ucn (buffer, name + i) - 1; + fwrite (buffer, 1, 10, fp); + } + else + fputc (NODE_NAME (token->val.node)[i], fp); + } + break; case SPELL_LITERAL: fwrite (token->val.str.text, 1, token->val.str.len, fp); diff --git a/libcpp/macro.c b/libcpp/macro.c index 7d65886a390..441b3b32ed3 100644 --- a/libcpp/macro.c +++ b/libcpp/macro.c @@ -380,12 +380,12 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg) { _cpp_buff *buff = _cpp_get_buff (pfile, len); unsigned char *buf = BUFF_FRONT (buff); - len = cpp_spell_token (pfile, token, buf) - buf; + len = cpp_spell_token (pfile, token, buf, true) - buf; dest = cpp_quote_string (dest, buf, len); _cpp_release_buff (pfile, buff); } else - dest = cpp_spell_token (pfile, token, dest); + dest = cpp_spell_token (pfile, token, dest, true); if (token->type == CPP_OTHER && token->val.str.text[0] == '\\') backslash_count++; @@ -422,7 +422,7 @@ paste_tokens (cpp_reader *pfile, const cpp_token **plhs, const cpp_token *rhs) lhs = *plhs; len = cpp_token_len (lhs) + cpp_token_len (rhs) + 1; buf = alloca (len); - end = cpp_spell_token (pfile, lhs, buf); + end = cpp_spell_token (pfile, lhs, buf, false); /* Avoid comment headers, since they are still processed in stage 3. It is simpler to insert a space here, rather than modifying the @@ -430,7 +430,7 @@ paste_tokens (cpp_reader *pfile, const cpp_token **plhs, const cpp_token *rhs) false doesn't work, since we want to clear the PASTE_LEFT flag. */ if (lhs->type == CPP_DIV && rhs->type != CPP_EQ) *end++ = ' '; - end = cpp_spell_token (pfile, rhs, end); + end = cpp_spell_token (pfile, rhs, end, false); *end = '\n'; cpp_push_buffer (pfile, buf, end - buf, /* from_stage3 */ true); @@ -1751,7 +1751,7 @@ cpp_macro_definition (cpp_reader *pfile, const cpp_hashnode *node) buffer += NODE_LEN (macro->params[token->val.arg_no - 1]); } else - buffer = cpp_spell_token (pfile, token, buffer); + buffer = cpp_spell_token (pfile, token, buffer, false); if (token->flags & PASTE_LEFT) { -- 2.11.0