2010-03-29 Jason Merrill <jason@redhat.com>
+ N3077
+ * c-c++-common/raw-string-1.c: Update handling of trigraphs, line
+ splicing and UCNs.
+ * c-c++-common/raw-string-2.c: Add trigraph test.
+ * c-c++-common/raw-string-8.c: New.
+ * c-c++-common/raw-string-9.c: New.
+ * c-c++-common/raw-string-10.c: New.
+
* c-c++-common/raw-string-1.c: Combine C and C++ raw string tests.
* c-c++-common/raw-string-2.c: Combine C and C++ raw string tests.
* c-c++-common/raw-string-3.c: Combine C and C++ raw string tests.
// { dg-do run }
// { dg-require-effective-target wchar }
-// { dg-options "-std=gnu99 -Wno-c++-compat" { target c } }
+// { dg-options "-std=gnu99 -Wno-c++-compat -trigraphs" { target c } }
// { dg-options "-std=c++0x" { target c++ } }
#ifndef __cplusplus
const char s0[] = R"(a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c)";
-const char s1[] = "a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char s1[] = "a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char s2[] = R"*|*(a\
b
c)"
c)*|"
c)*|*";
-const char s3[] = "ab\nc)\"\nc)*|\"\nc";
+const char s3[] = "a\\\nb\nc)\"\nc)*|\"\nc";
+// The ) in ??) below is part of the raw string suffix )".
+const char s4[] = R"(??/
+??/
+??(??<??=??'??!??-??>??)";
+const char s5[] = "?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
const char t0[] = u8R"(a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c)";
-const char t1[] = u8"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char t1[] = u8"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char t2[] = u8R"*|*(a\
b
c)"
c)*|"
c)*|*";
-const char t3[] = u8"ab\nc)\"\nc)*|\"\nc";
+const char t3[] = u8"a\\\nb\nc)\"\nc)*|\"\nc";
+const char t4[] = u8R"(??/
+??/
+??(??<??=??'??!??-??>??)";
+const char t5[] = u8"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
const char16_t u0[] = uR"(a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c)";
-const char16_t u1[] = u"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char16_t u1[] = u"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char16_t u2[] = uR"*|*(a\
b
c)"
c)*|"
c)*|*";
-const char16_t u3[] = u"ab\nc)\"\nc)*|\"\nc";
+const char16_t u3[] = u"a\\\nb\nc)\"\nc)*|\"\nc";
+const char16_t u4[] = uR"(??/
+??/
+??(??<??=??'??!??-??>??)";
+const char16_t u5[] = u"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
const char32_t U0[] = UR"(a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c)";
-const char32_t U1[] = U"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const char32_t U1[] = U"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const char32_t U2[] = UR"*|*(a\
b
c)"
c)*|"
c)*|*";
-const char32_t U3[] = U"ab\nc)\"\nc)*|\"\nc";
+const char32_t U3[] = U"a\\\nb\nc)\"\nc)*|\"\nc";
+const char32_t U4[] = UR"(??/
+??/
+??(??<??=??'??!??-??>??)";
+const char32_t U5[] = U"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
const wchar_t L0[] = LR"(a\
\u010d\U0000010D\\\'\"\?\a\b\f\n\r\t\v\0\00\000\xa\xabb
c)";
-const wchar_t L1[] = L"a\U0000010d\u010d\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
+const wchar_t L1[] = L"a\\\n\\u010d\\U0000010D\\\\\\'\\\"\\?\\a\\b\\f\\n\\r\\t\\v\\0\\00\\000\\xa\\xabb\nc";
const wchar_t L2[] = LR"*|*(a\
b
c)"
c)*|"
c)*|*";
-const wchar_t L3[] = L"ab\nc)\"\nc)*|\"\nc";
+const wchar_t L3[] = L"a\\\nb\nc)\"\nc)*|\"\nc";
+const wchar_t L4[] = LR"(??/
+??/
+??(??<??=??'??!??-??>??)";
+const wchar_t L5[] = L"?\?/\n?\?/\n?\?(?\?<?\?=?\?'?\?!?\?-?\?>?\?";
int
main (void)
if (sizeof (s2) != sizeof (s3)
|| __builtin_memcmp (s2, s3, sizeof (s2)) != 0)
__builtin_abort ();
+ if (sizeof (s4) != sizeof (s5)
+ || __builtin_memcmp (s4, s5, sizeof (s4)) != 0)
+ __builtin_abort ();
if (sizeof (t0) != sizeof (t1)
|| __builtin_memcmp (t0, t1, sizeof (t0)) != 0)
__builtin_abort ();
if (sizeof (t2) != sizeof (t3)
|| __builtin_memcmp (t2, t3, sizeof (t2)) != 0)
__builtin_abort ();
+ if (sizeof (t4) != sizeof (t5)
+ || __builtin_memcmp (t4, t5, sizeof (t4)) != 0)
+ __builtin_abort ();
if (sizeof (u0) != sizeof (u1)
|| __builtin_memcmp (u0, u1, sizeof (u0)) != 0)
__builtin_abort ();
if (sizeof (u2) != sizeof (u3)
|| __builtin_memcmp (u2, u3, sizeof (u2)) != 0)
__builtin_abort ();
+ if (sizeof (u4) != sizeof (u5)
+ || __builtin_memcmp (u4, u5, sizeof (u4)) != 0)
+ __builtin_abort ();
if (sizeof (U0) != sizeof (U1)
|| __builtin_memcmp (U0, U1, sizeof (U0)) != 0)
__builtin_abort ();
if (sizeof (U2) != sizeof (U3)
|| __builtin_memcmp (U2, U3, sizeof (U2)) != 0)
__builtin_abort ();
+ if (sizeof (U4) != sizeof (U5)
+ || __builtin_memcmp (U4, U5, sizeof (U4)) != 0)
+ __builtin_abort ();
if (sizeof (L0) != sizeof (L1)
|| __builtin_memcmp (L0, L1, sizeof (L0)) != 0)
__builtin_abort ();
if (sizeof (L2) != sizeof (L3)
|| __builtin_memcmp (L2, L3, sizeof (L2)) != 0)
__builtin_abort ();
+ if (sizeof (L4) != sizeof (L5)
+ || __builtin_memcmp (L4, L5, sizeof (L4)) != 0)
+ __builtin_abort ();
if (sizeof (R"*()*") != 1
|| __builtin_memcmp (R"*()*", "", 1) != 0)
__builtin_abort ();
// { dg-do run }
// { dg-require-effective-target wchar }
-// { dg-options "-std=gnu99 -Wno-c++-compat" { target c } }
+// { dg-options "-std=gnu99 -Wno-c++-compat -trigraphs" { target c } }
// { dg-options "-std=c++0x" { target c++ } }
#ifndef __cplusplus
const char s09[] = u8R"/^&|~!=,"'(a)/^&|~!=,"'" u8"(b)";
const char s10[] = u8"(a)" u8R"0123456789abcdef(b)0123456789abcdef";
const char s11[] = u8R"ghijklmnopqrstuv(a)ghijklmnopqrstuv" u8R"w(b)w";
+const char s12[] = R"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
const char16_t u03[] = R"-(a)-" u"(b)";
const char16_t u04[] = "(a)" uR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
const char16_t u09[] = uR"/^&|~!=,"'(a)/^&|~!=,"'" u"(b)";
const char16_t u10[] = u"(a)" uR"0123456789abcdef(b)0123456789abcdef";
const char16_t u11[] = uR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" uR"w(b)w";
+const char16_t u12[] = uR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
const char32_t U03[] = R"-(a)-" U"(b)";
const char32_t U04[] = "(a)" UR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
const char32_t U09[] = UR"/^&|~!=,"'(a)/^&|~!=,"'" U"(b)";
const char32_t U10[] = U"(a)" UR"0123456789abcdef(b)0123456789abcdef";
const char32_t U11[] = UR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" UR"w(b)w";
+const char32_t U12[] = UR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
const wchar_t L03[] = R"-(a)-" L"(b)";
const wchar_t L04[] = "(a)" LR"MNOPQRSTUVWXYZ(b)MNOPQRSTUVWXYZ";
const wchar_t L09[] = LR"/^&|~!=,"'(a)/^&|~!=,"'" L"(b)";
const wchar_t L10[] = L"(a)" LR"0123456789abcdef(b)0123456789abcdef";
const wchar_t L11[] = LR"ghijklmnopqrstuv(a)ghijklmnopqrstuv" LR"w(b)w";
+const wchar_t L12[] = LR"??=??(??<??>??)??'??!??-\
+(a)#[{}]^|~";
int
main (void)
TEST (s09, "a(b)");
TEST (s10, "(a)b");
TEST (s11, "ab");
+ TEST (s12, "a");
TEST (u03, u"a(b)");
TEST (u04, u"(a)b");
TEST (u05, u"ab");
TEST (u09, u"a(b)");
TEST (u10, u"(a)b");
TEST (u11, u"ab");
+ TEST (u12, u"a");
TEST (U03, U"a(b)");
TEST (U04, U"(a)b");
TEST (U05, U"ab");
TEST (U09, U"a(b)");
TEST (U10, U"(a)b");
TEST (U11, U"ab");
+ TEST (U12, U"a");
TEST (L03, L"a(b)");
TEST (L04, L"(a)b");
TEST (L05, L"ab");
TEST (L09, L"a(b)");
TEST (L10, L"(a)b");
TEST (L11, L"ab");
+ TEST (L12, L"a");
return 0;
}
2010-03-29 Jason Merrill <jason@redhat.com>
+ More N3077 raw string changes
+ * charset.c (cpp_interpret_string): Don't transform UCNs in raw
+ strings.
+ * lex.c (bufring_append): Split out from...
+ (lex_raw_string): ...here. Undo trigraph and line splicing
+ transformations. Do process line notes in multi-line literals.
+ (_cpp_process_line_notes): Ignore notes that were already handled.
+
Some raw string changes from N3077
* charset.c (cpp_interpret_string): Change inner delimiters to ().
* lex.c (lex_raw_string): Likewise. Also disallow '\' in delimiter.
}
}
}
+ else if (note->type == 0)
+ /* Already processed in lex_raw_string. */;
else
abort ();
}
token->val.str.text = dest;
}
+/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
+ sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
+
+static void
+bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
+ _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
+{
+ _cpp_buff *first_buff = *first_buff_p;
+ _cpp_buff *last_buff = *last_buff_p;
+
+ if (first_buff == NULL)
+ first_buff = last_buff = _cpp_get_buff (pfile, len);
+ else if (len > BUFF_ROOM (last_buff))
+ {
+ size_t room = BUFF_ROOM (last_buff);
+ memcpy (BUFF_FRONT (last_buff), base, room);
+ BUFF_FRONT (last_buff) += room;
+ base += room;
+ len -= room;
+ last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
+ }
+
+ memcpy (BUFF_FRONT (last_buff), base, len);
+ BUFF_FRONT (last_buff) += len;
+
+ *first_buff_p = first_buff;
+ *last_buff_p = last_buff;
+}
+
/* Lexes a raw string. The stored string contains the spelling, including
- double quotes, delimiter string, '[' and ']', any leading
+ double quotes, delimiter string, '(' and ')', any leading
'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
literal, or CPP_OTHER if it was not properly terminated.
enum cpp_ttype type;
size_t total_len = 0;
_cpp_buff *first_buff = NULL, *last_buff = NULL;
+ _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
type = (*base == 'L' ? CPP_WSTRING :
*base == 'U' ? CPP_STRING32 :
cur = raw_prefix + raw_prefix_len + 1;
for (;;)
{
- cppchar_t c = *cur++;
+#define BUF_APPEND(STR,LEN) \
+ do { \
+ bufring_append (pfile, (const uchar *)(STR), (LEN), \
+ &first_buff, &last_buff); \
+ total_len += (LEN); \
+ } while (0);
+
+ cppchar_t c;
+
+ /* If we previously performed any trigraph or line splicing
+ transformations, undo them within the body of the raw string. */
+ while (note->pos < cur)
+ ++note;
+ for (; note->pos == cur; ++note)
+ {
+ switch (note->type)
+ {
+ case '\\':
+ case ' ':
+ /* Restore backslash followed by newline. */
+ BUF_APPEND (base, cur - base);
+ base = cur;
+ BUF_APPEND ("\\", 1);
+ after_backslash:
+ if (note->type == ' ')
+ {
+ /* GNU backslash whitespace newline extension. FIXME
+ could be any sequence of non-vertical space. When we
+ can properly restore any such sequence, we should mark
+ this note as handled so _cpp_process_line_notes
+ doesn't warn. */
+ BUF_APPEND (" ", 1);
+ }
+
+ BUF_APPEND ("\n", 1);
+ break;
+
+ case 0:
+ /* Already handled. */
+ break;
+
+ default:
+ if (_cpp_trigraph_map[note->type])
+ {
+ /* Don't warn about this trigraph in
+ _cpp_process_line_notes, since trigraphs show up as
+ trigraphs in raw strings. */
+ unsigned type = note->type;
+ note->type = 0;
+
+ if (!CPP_OPTION (pfile, trigraphs))
+ /* If we didn't convert the trigraph in the first
+ place, don't do anything now either. */
+ break;
+
+ BUF_APPEND (base, cur - base);
+ base = cur;
+ BUF_APPEND ("??", 2);
+
+ /* ??/ followed by newline gets two line notes, one for
+ the trigraph and one for the backslash/newline. */
+ if (type == '/' && note[1].pos == cur)
+ {
+ if (note[1].type != '\\'
+ && note[1].type != ' ')
+ abort ();
+ BUF_APPEND ("/", 1);
+ ++note;
+ goto after_backslash;
+ }
+ /* The ) from ??) could be part of the suffix. */
+ else if (type == ')'
+ && strncmp ((const char *) cur+1,
+ (const char *) raw_prefix,
+ raw_prefix_len) == 0
+ && cur[raw_prefix_len+1] == '"')
+ {
+ cur += raw_prefix_len+2;
+ goto break_outer_loop;
+ }
+ else
+ {
+ /* Skip the replacement character. */
+ base = ++cur;
+ BUF_APPEND (&type, 1);
+ }
+ }
+ else
+ abort ();
+ break;
+ }
+ }
+ c = *cur++;
if (c == ')'
&& strncmp ((const char *) cur, (const char *) raw_prefix,
break;
}
- /* raw strings allow embedded non-escaped newlines, which
- complicates this routine a lot. */
- if (first_buff == NULL)
- {
- total_len = cur - base;
- first_buff = last_buff = _cpp_get_buff (pfile, total_len);
- memcpy (BUFF_FRONT (last_buff), base, total_len);
- raw_prefix = BUFF_FRONT (last_buff) + (raw_prefix - base);
- BUFF_FRONT (last_buff) += total_len;
- }
- else
- {
- size_t len = cur - base;
- size_t cur_len = len > BUFF_ROOM (last_buff)
- ? BUFF_ROOM (last_buff) : len;
-
- total_len += len;
- memcpy (BUFF_FRONT (last_buff), base, cur_len);
- BUFF_FRONT (last_buff) += cur_len;
- if (len > cur_len)
- {
- last_buff = _cpp_append_extend_buff (pfile, last_buff,
- len - cur_len);
- memcpy (BUFF_FRONT (last_buff), base + cur_len,
- len - cur_len);
- BUFF_FRONT (last_buff) += len - cur_len;
- }
- }
+ BUF_APPEND (base, cur - base);
if (pfile->buffer->cur < pfile->buffer->rlimit)
CPP_INCREMENT_LINE (pfile, 0);
pfile->buffer->need_line = true;
+ pfile->buffer->cur = cur-1;
+ _cpp_process_line_notes (pfile, false);
if (!_cpp_get_fresh_line (pfile))
{
source_location src_loc = token->src_loc;
}
cur = base = pfile->buffer->cur;
+ note = &pfile->buffer->notes[pfile->buffer->cur_note];
}
else if (c == '\0' && !saw_NUL)
LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
CPP_BUF_COLUMN (pfile->buffer, cur));
}
+ break_outer_loop:
if (saw_NUL && !pfile->state.skipping)
cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,