/* CPP Library - lexical analysis.
- Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
- Free Software Foundation, Inc.
+ Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010,
+ 2011 Free Software Foundation, Inc.
Contributed by Per Bothner, 1994-95.
Based on CCCP program by Paul Rubin, June 1986
Adapted to ANSI C, Richard Stallman, Jan 1987
}
}
-#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__))
+/* Disable on Solaris 2/x86 until the following problems can be properly
+ autoconfed:
+
+ The Solaris 8 assembler cannot assemble SSE2/SSE4.2 insns.
+ The Solaris 9 assembler cannot assemble SSE4.2 insns.
+ Before Solaris 9 Update 6, SSE insns cannot be executed.
+ The Solaris 10+ assembler tags objects with the instruction set
+ extensions used, so SSE4.2 executables cannot run on machines that
+ don't support that extension. */
+
+#if (GCC_VERSION >= 4005) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
/* Replicated character data to be shared between implementations.
Recall that outside of a context with vector support we can't
/* A version of the fast scanner using MMX vectorized byte compare insns.
This uses the PMOVMSKB instruction which was introduced with "MMX2",
- which was packaged into SSE1; it is also present in the AMD 3dNOW-A
+ which was packaged into SSE1; it is also present in the AMD MMX
extension. Mark the function as using "sse" so that we emit a real
"emms" instruction, rather than the 3dNOW "femms" instruction. */
return (const uchar *)p + found;
}
+#ifdef HAVE_SSE4
/* A version of the fast scanner using SSE 4.2 vectorized string insns. */
static const uchar *
return s + index;
}
+#else
+/* Work around out-dated assemblers without sse4 support. */
+#define search_line_sse42 search_line_sse2
+#endif
+
/* Check the CPU capabilities. */
#include "../gcc/config/i386/cpuid.h"
typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
static search_line_fast_type search_line_fast;
-static void __attribute__((constructor))
+#define HAVE_init_vectorized_lexer 1
+static inline void
init_vectorized_lexer (void)
{
unsigned dummy, ecx = 0, edx = 0;
minimum = 3;
#elif defined(__SSE2__)
minimum = 2;
-#elif defined(__SSE__) || defined(__3dNOW_A__)
+#elif defined(__SSE__)
minimum = 1;
#endif
}
else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
{
- if (minimum == 1 || edx & bit_3DNOWP)
+ if (minimum == 1
+ || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
impl = search_line_mmx;
}
search_line_fast = impl;
}
-#elif defined(__GNUC__) && defined(__ALTIVEC__)
+#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
/* A vection of the fast scanner using AltiVec vectorized byte compares. */
/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
#endif
+/* Initialize the lexer if needed. */
+
+void
+_cpp_init_lexer (void)
+{
+#ifdef HAVE_init_vectorized_lexer
+ init_vectorized_lexer ();
+#endif
+}
+
/* Returns with a logical line that contains no escaped newlines or
trigraphs. This is a time-critical inner loop. */
void
lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
const uchar *cur)
{
- source_location saw_NUL = 0;
const uchar *raw_prefix;
unsigned int raw_prefix_len = 0;
enum cpp_ttype type;
raw_prefix_len) == 0
&& cur[raw_prefix_len+1] == '"')
{
- cur += raw_prefix_len+2;
+ BUF_APPEND (")", 1);
+ base++;
+ cur += raw_prefix_len + 2;
goto break_outer_loop;
}
else
cur = base = pfile->buffer->cur;
note = &pfile->buffer->notes[pfile->buffer->cur_note];
}
- else if (c == '\0' && !saw_NUL)
- LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
- CPP_BUF_COLUMN (pfile->buffer, cur));
}
break_outer_loop:
- if (saw_NUL && !pfile->state.skipping)
- cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
- "null character(s) preserved in literal");
+ if (CPP_OPTION (pfile, user_literals))
+ {
+ /* Grab user defined literal suffix. */
+ if (ISIDST (*cur))
+ {
+ type = cpp_userdef_string_add_type (type);
+ ++cur;
+ }
+ while (ISIDNUM (*cur))
+ ++cur;
+ }
pfile->buffer->cur = cur;
if (first_buff == NULL)
cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
(int) terminator);
+ if (CPP_OPTION (pfile, user_literals))
+ {
+ /* Grab user defined literal suffix. */
+ if (ISIDST (*cur))
+ {
+ type = cpp_userdef_char_add_type (type);
+ type = cpp_userdef_string_add_type (type);
+ ++cur;
+ }
+ while (ISIDNUM (*cur))
+ ++cur;
+ }
+
pfile->buffer->cur = cur;
create_literal (pfile, token, base, cur - base, type);
}
cppchar_t type)
{
unsigned char *buffer;
- unsigned int len, clen;
+ unsigned int len, clen, i;
len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
if (is_vspace (pfile->buffer->cur[-1]))
len--;
- /* If we are currently in a directive, then we need to store all
- C++ comments as C comments internally, and so we need to
- allocate a little extra space in that case.
+ /* If we are currently in a directive or in argument parsing, then
+ we need to store all C++ comments as C comments internally, and
+ so we need to allocate a little extra space in that case.
Note that the only time we encounter a directive here is
when we are saving comments in a "#define". */
- clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
+ clen = ((pfile->state.in_directive || pfile->state.parsing_args)
+ && type == '/') ? len + 2 : len;
buffer = _cpp_unaligned_alloc (pfile, clen);
memcpy (buffer + 1, from, len - 1);
/* Finish conversion to a C comment, if necessary. */
- if (pfile->state.in_directive && type == '/')
+ if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
{
buffer[1] = '*';
buffer[clen - 2] = '*';
buffer[clen - 1] = '/';
+ /* As there can be in a C++ comments illegal sequences for C comments
+ we need to filter them out. */
+ for (i = 2; i < (clen - 2); i++)
+ if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
+ buffer[i] = '|';
}
/* Finally store this comment for use by clients of libcpp. */
return run->next;
}
+/* Return the number of not yet processed token in a given
+ context. */
+int
+_cpp_remaining_tokens_num_in_context (cpp_context *context)
+{
+ if (context->tokens_kind == TOKENS_KIND_DIRECT)
+ return (LAST (context).token - FIRST (context).token);
+ else if (context->tokens_kind == TOKENS_KIND_INDIRECT
+ || context->tokens_kind == TOKENS_KIND_EXTENDED)
+ return (LAST (context).ptoken - FIRST (context).ptoken);
+ else
+ abort ();
+}
+
+/* Returns the token present at index INDEX in a given context. If
+ INDEX is zero, the next token to be processed is returned. */
+static const cpp_token*
+_cpp_token_from_context_at (cpp_context *context, int index)
+{
+ if (context->tokens_kind == TOKENS_KIND_DIRECT)
+ return &(FIRST (context).token[index]);
+ else if (context->tokens_kind == TOKENS_KIND_INDIRECT
+ || context->tokens_kind == TOKENS_KIND_EXTENDED)
+ return FIRST (context).ptoken[index];
+ else
+ abort ();
+}
+
/* Look ahead in the input stream. */
const cpp_token *
cpp_peek_token (cpp_reader *pfile, int index)
/* First, scan through any pending cpp_context objects. */
while (context->prev)
{
- ptrdiff_t sz = (context->direct_p
- ? LAST (context).token - FIRST (context).token
- : LAST (context).ptoken - FIRST (context).ptoken);
+ ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
if (index < (int) sz)
- return (context->direct_p
- ? FIRST (context).token + index
- : *(FIRST (context).ptoken + index));
-
+ return _cpp_token_from_context_at (context, index);
index -= (int) sz;
context = context->prev;
}
}
c = *buffer->cur++;
- LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
- CPP_BUF_COLUMN (buffer, buffer->cur));
+ if (pfile->forced_token_location_p)
+ result->src_loc = *pfile->forced_token_location_p;
+ else
+ result->src_loc = linemap_position_for_column (pfile->line_table,
+ CPP_BUF_COLUMN (buffer, buffer->cur));
switch (c)
{
case 'R':
/* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
wide strings or raw strings. */
- if (c == 'L' || CPP_OPTION (pfile, uliterals))
+ if (c == 'L' || CPP_OPTION (pfile, rliterals)
+ || (c != 'R' && CPP_OPTION (pfile, uliterals)))
{
if ((*buffer->cur == '\'' && c != 'R')
|| *buffer->cur == '"'
|| (*buffer->cur == 'R'
&& c != 'R'
&& buffer->cur[1] == '"'
- && CPP_OPTION (pfile, uliterals))
+ && CPP_OPTION (pfile, rliterals))
|| (*buffer->cur == '8'
&& c == 'u'
&& (buffer->cur[1] == '"'
- || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
+ || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
+ && CPP_OPTION (pfile, rliterals)))))
{
lex_string (pfile, result, buffer->cur - 1);
break;
return CPP_TOKEN_FLD_NONE;
}
}
+
+/* All tokens lexed in R after calling this function will be forced to have
+ their source_location the same as the location referenced by P, until
+ cpp_stop_forcing_token_locations is called for R. */
+
+void
+cpp_force_token_locations (cpp_reader *r, source_location *p)
+{
+ r->forced_token_location_p = p;
+}
+
+/* Go back to assigning locations naturally for lexed tokens. */
+
+void
+cpp_stop_forcing_token_locations (cpp_reader *r)
+{
+ r->forced_token_location_p = NULL;
+}