}
}
}
+ else if (note->type == 0)
+ /* Already processed in lex_raw_string. */;
else
abort ();
}
return false;
}
+/* Helper function to get the cpp_hashnode of the identifier BASE. */
+static cpp_hashnode *
+lex_identifier_intern (cpp_reader *pfile, const uchar *base)
+{
+ cpp_hashnode *result;
+ const uchar *cur;
+ unsigned int len;
+ unsigned int hash = HT_HASHSTEP (0, *base);
+
+ cur = base + 1;
+ while (ISIDNUM (*cur))
+ {
+ hash = HT_HASHSTEP (hash, *cur);
+ cur++;
+ }
+ len = cur - base;
+ hash = HT_HASHFINISH (hash, len);
+ result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
+ base, len, hash, HT_ALLOC));
+
+ /* Rarely, identifiers require diagnostics when lexed. */
+ if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
+ && !pfile->state.skipping, 0))
+ {
+ /* It is allowed to poison the same identifier twice. */
+ if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
+ cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
+ NODE_NAME (result));
+
+ /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
+ replacement list of a variadic macro. */
+ if (result == pfile->spec_nodes.n__VA_ARGS__
+ && !pfile->state.va_args_ok)
+ cpp_error (pfile, CPP_DL_PEDWARN,
+ "__VA_ARGS__ can only appear in the expansion"
+ " of a C99 variadic macro");
+
+ /* For -Wc++-compat, warn about use of C++ named operators. */
+ if (result->flags & NODE_WARN_OPERATOR)
+ cpp_error (pfile, CPP_DL_WARNING,
+ "identifier \"%s\" is a special operator name in C++",
+ NODE_NAME (result));
+ }
+
+ return result;
+}
+
+/* Get the cpp_hashnode of an identifier specified by NAME in
+ the current cpp_reader object. If none is found, NULL is returned. */
+cpp_hashnode *
+_cpp_lex_identifier (cpp_reader *pfile, const char *name)
+{
+ cpp_hashnode *result;
+ result = lex_identifier_intern (pfile, (uchar *) name);
+ return result;
+}
+
/* Lex an identifier starting at BUFFER->CUR - 1. */
static cpp_hashnode *
lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
token->val.str.text = dest;
}
+/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
+ sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
+
+static void
+bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
+ _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
+{
+ _cpp_buff *first_buff = *first_buff_p;
+ _cpp_buff *last_buff = *last_buff_p;
+
+ if (first_buff == NULL)
+ first_buff = last_buff = _cpp_get_buff (pfile, len);
+ else if (len > BUFF_ROOM (last_buff))
+ {
+ size_t room = BUFF_ROOM (last_buff);
+ memcpy (BUFF_FRONT (last_buff), base, room);
+ BUFF_FRONT (last_buff) += room;
+ base += room;
+ len -= room;
+ last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
+ }
+
+ memcpy (BUFF_FRONT (last_buff), base, len);
+ BUFF_FRONT (last_buff) += len;
+
+ *first_buff_p = first_buff;
+ *last_buff_p = last_buff;
+}
+
+/* Lexes a raw string. The stored string contains the spelling, including
+ double quotes, delimiter string, '(' and ')', any leading
+ 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
+ literal, or CPP_OTHER if it was not properly terminated.
+
+ The spelling is NUL-terminated, but it is not guaranteed that this
+ is the first NUL since embedded NULs are preserved. */
+
+static void
+lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
+ const uchar *cur)
+{
+ source_location saw_NUL = 0;
+ const uchar *raw_prefix;
+ unsigned int raw_prefix_len = 0;
+ enum cpp_ttype type;
+ size_t total_len = 0;
+ _cpp_buff *first_buff = NULL, *last_buff = NULL;
+ _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
+
+ type = (*base == 'L' ? CPP_WSTRING :
+ *base == 'U' ? CPP_STRING32 :
+ *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
+ : CPP_STRING);
+
+ raw_prefix = cur + 1;
+ while (raw_prefix_len < 16)
+ {
+ switch (raw_prefix[raw_prefix_len])
+ {
+ case ' ': case '(': case ')': case '\\': case '\t':
+ case '\v': case '\f': case '\n': default:
+ break;
+ /* Basic source charset except the above chars. */
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ case '_': case '{': case '}': case '#': case '[': case ']':
+ case '<': case '>': case '%': case ':': case ';': case '.':
+ case '?': case '*': case '+': case '-': case '/': case '^':
+ case '&': case '|': case '~': case '!': case '=': case ',':
+ case '"': case '\'':
+ raw_prefix_len++;
+ continue;
+ }
+ break;
+ }
+
+ if (raw_prefix[raw_prefix_len] != '(')
+ {
+ int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
+ + 1;
+ if (raw_prefix_len == 16)
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
+ "raw string delimiter longer than 16 characters");
+ else
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
+ "invalid character '%c' in raw string delimiter",
+ (int) raw_prefix[raw_prefix_len]);
+ pfile->buffer->cur = raw_prefix - 1;
+ create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
+ return;
+ }
+
+ cur = raw_prefix + raw_prefix_len + 1;
+ for (;;)
+ {
+#define BUF_APPEND(STR,LEN) \
+ do { \
+ bufring_append (pfile, (const uchar *)(STR), (LEN), \
+ &first_buff, &last_buff); \
+ total_len += (LEN); \
+ } while (0);
+
+ cppchar_t c;
+
+ /* If we previously performed any trigraph or line splicing
+ transformations, undo them within the body of the raw string. */
+ while (note->pos < cur)
+ ++note;
+ for (; note->pos == cur; ++note)
+ {
+ switch (note->type)
+ {
+ case '\\':
+ case ' ':
+ /* Restore backslash followed by newline. */
+ BUF_APPEND (base, cur - base);
+ base = cur;
+ BUF_APPEND ("\\", 1);
+ after_backslash:
+ if (note->type == ' ')
+ {
+ /* GNU backslash whitespace newline extension. FIXME
+ could be any sequence of non-vertical space. When we
+ can properly restore any such sequence, we should mark
+ this note as handled so _cpp_process_line_notes
+ doesn't warn. */
+ BUF_APPEND (" ", 1);
+ }
+
+ BUF_APPEND ("\n", 1);
+ break;
+
+ case 0:
+ /* Already handled. */
+ break;
+
+ default:
+ if (_cpp_trigraph_map[note->type])
+ {
+ /* Don't warn about this trigraph in
+ _cpp_process_line_notes, since trigraphs show up as
+ trigraphs in raw strings. */
+ uchar type = note->type;
+ note->type = 0;
+
+ if (!CPP_OPTION (pfile, trigraphs))
+ /* If we didn't convert the trigraph in the first
+ place, don't do anything now either. */
+ break;
+
+ BUF_APPEND (base, cur - base);
+ base = cur;
+ BUF_APPEND ("??", 2);
+
+ /* ??/ followed by newline gets two line notes, one for
+ the trigraph and one for the backslash/newline. */
+ if (type == '/' && note[1].pos == cur)
+ {
+ if (note[1].type != '\\'
+ && note[1].type != ' ')
+ abort ();
+ BUF_APPEND ("/", 1);
+ ++note;
+ goto after_backslash;
+ }
+ /* The ) from ??) could be part of the suffix. */
+ else if (type == ')'
+ && strncmp ((const char *) cur+1,
+ (const char *) raw_prefix,
+ raw_prefix_len) == 0
+ && cur[raw_prefix_len+1] == '"')
+ {
+ cur += raw_prefix_len+2;
+ goto break_outer_loop;
+ }
+ else
+ {
+ /* Skip the replacement character. */
+ base = ++cur;
+ BUF_APPEND (&type, 1);
+ }
+ }
+ else
+ abort ();
+ break;
+ }
+ }
+ c = *cur++;
+
+ if (c == ')'
+ && strncmp ((const char *) cur, (const char *) raw_prefix,
+ raw_prefix_len) == 0
+ && cur[raw_prefix_len] == '"')
+ {
+ cur += raw_prefix_len + 1;
+ break;
+ }
+ else if (c == '\n')
+ {
+ if (pfile->state.in_directive
+ || pfile->state.parsing_args
+ || pfile->state.in_deferred_pragma)
+ {
+ cur--;
+ type = CPP_OTHER;
+ cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+ "unterminated raw string");
+ break;
+ }
+
+ BUF_APPEND (base, cur - base);
+
+ if (pfile->buffer->cur < pfile->buffer->rlimit)
+ CPP_INCREMENT_LINE (pfile, 0);
+ pfile->buffer->need_line = true;
+
+ pfile->buffer->cur = cur-1;
+ _cpp_process_line_notes (pfile, false);
+ if (!_cpp_get_fresh_line (pfile))
+ {
+ source_location src_loc = token->src_loc;
+ token->type = CPP_EOF;
+ /* Tell the compiler the line number of the EOF token. */
+ token->src_loc = pfile->line_table->highest_line;
+ token->flags = BOL;
+ if (first_buff != NULL)
+ _cpp_release_buff (pfile, first_buff);
+ cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
+ "unterminated raw string");
+ return;
+ }
+
+ cur = base = pfile->buffer->cur;
+ note = &pfile->buffer->notes[pfile->buffer->cur_note];
+ }
+ else if (c == '\0' && !saw_NUL)
+ LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
+ CPP_BUF_COLUMN (pfile->buffer, cur));
+ }
+ break_outer_loop:
+
+ if (saw_NUL && !pfile->state.skipping)
+ cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
+ "null character(s) preserved in literal");
+
+ pfile->buffer->cur = cur;
+ if (first_buff == NULL)
+ create_literal (pfile, token, base, cur - base, type);
+ else
+ {
+ uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
+
+ token->type = type;
+ token->val.str.len = total_len + (cur - base);
+ token->val.str.text = dest;
+ last_buff = first_buff;
+ while (last_buff != NULL)
+ {
+ memcpy (dest, last_buff->base,
+ BUFF_FRONT (last_buff) - last_buff->base);
+ dest += BUFF_FRONT (last_buff) - last_buff->base;
+ last_buff = last_buff->next;
+ }
+ _cpp_release_buff (pfile, first_buff);
+ memcpy (dest, base, cur - base);
+ dest[cur - base] = '\0';
+ }
+}
+
/* Lexes a string, character constant, or angle-bracketed header file
name. The stored string contains the spelling, including opening
- quote and leading any leading 'L', 'u' or 'U'. It returns the type
- of the literal, or CPP_OTHER if it was not properly terminated, or
- CPP_LESS for an unterminated header name which must be relexed as
- normal tokens.
+ quote and any leading 'L', 'u', 'U' or 'u8' and optional
+ 'R' modifier. It returns the type of the literal, or CPP_OTHER
+ if it was not properly terminated, or CPP_LESS for an unterminated
+ header name which must be relexed as normal tokens.
The spelling is NUL-terminated, but it is not guaranteed that this
is the first NUL since embedded NULs are preserved. */
cur = base;
terminator = *cur++;
- if (terminator == 'L' || terminator == 'u' || terminator == 'U')
+ if (terminator == 'L' || terminator == 'U')
terminator = *cur++;
- if (terminator == '\"')
+ else if (terminator == 'u')
+ {
+ terminator = *cur++;
+ if (terminator == '8')
+ terminator = *cur++;
+ }
+ if (terminator == 'R')
+ {
+ lex_raw_string (pfile, token, base, cur);
+ return;
+ }
+ if (terminator == '"')
type = (*base == 'L' ? CPP_WSTRING :
*base == 'U' ? CPP_STRING32 :
- *base == 'u' ? CPP_STRING16 : CPP_STRING);
+ *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
+ : CPP_STRING);
else if (terminator == '\'')
type = (*base == 'L' ? CPP_WCHAR :
*base == 'U' ? CPP_CHAR32 :
case 'L':
case 'u':
case 'U':
- /* 'L', 'u' or 'U' may introduce wide characters or strings. */
+ case 'R':
+ /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
+ wide strings or raw strings. */
if (c == 'L' || CPP_OPTION (pfile, uliterals))
{
- if (*buffer->cur == '\'' || *buffer->cur == '"')
+ if ((*buffer->cur == '\'' && c != 'R')
+ || *buffer->cur == '"'
+ || (*buffer->cur == 'R'
+ && c != 'R'
+ && buffer->cur[1] == '"'
+ && CPP_OPTION (pfile, uliterals))
+ || (*buffer->cur == '8'
+ && c == 'u'
+ && (buffer->cur[1] == '"'
+ || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
{
lex_string (pfile, result, buffer->cur - 1);
break;
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'M': case 'N': case 'O': case 'P': case 'Q':
case 'S': case 'T': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
result->type = CPP_NAME;