/* Mainly the interface between cpplib and the C front ends.
Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
- 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+ 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of GCC.
#include "real.h"
#include "rtl.h"
#include "tree.h"
-#include "expr.h"
#include "input.h"
#include "output.h"
#include "c-tree.h"
#include "splay-tree.h"
#include "debug.h"
-/* The current line map. */
-static const struct line_map *map;
-
-/* The line used to refresh the lineno global variable after each token. */
-static unsigned int src_lineno;
-
/* We may keep statistics about how long which files took to compile. */
static int header_time, body_time;
static splay_tree file_info_tree;
int pending_lang_change; /* If we need to switch languages - C++ only */
int c_header_level; /* depth in C headers - C++ only */
-/* Nonzero tells yylex to ignore \ in string constants. */
-static int ignore_escape_flag;
+/* If we need to translate characters received. This is tri-state:
+ 0 means use only the untranslated string; 1 means use only
+ the translated string; -1 means chain the translated string
+ to the untranslated one. */
+int c_lex_string_translate = 1;
+
+/* True if strings should be passed to the caller of c_lex completely
+ unmolested (no concatenation, no translation). */
+bool c_lex_return_raw_strings = false;
static tree interpret_integer (const cpp_token *, unsigned int);
static tree interpret_float (const cpp_token *, unsigned int);
-static enum integer_type_kind
- narrowest_unsigned_type (tree, unsigned int);
-static enum integer_type_kind
- narrowest_signed_type (tree, unsigned int);
-static tree lex_string (const cpp_string *);
+static enum integer_type_kind narrowest_unsigned_type
+ (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned int);
+static enum integer_type_kind narrowest_signed_type
+ (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned int);
+static enum cpp_ttype lex_string (const cpp_token *, tree *, bool);
static tree lex_charconst (const cpp_token *);
static void update_header_times (const char *);
static int dump_one_header (splay_tree_node, void *);
struct cpp_callbacks *cb;
struct c_fileinfo *toplevel;
- /* Set up filename timing. Must happen before cpp_read_main_file. */
- file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
- 0,
- (splay_tree_delete_value_fn)free);
+ /* The get_fileinfo data structure must be initialized before
+ cpp_read_main_file is called. */
toplevel = get_fileinfo ("<top level>");
if (flag_detailed_statistics)
{
/* Set the debug callbacks if we can use them. */
if (debug_info_level == DINFO_LEVEL_VERBOSE
- && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG
+ && (write_symbols == DWARF2_DEBUG
|| write_symbols == VMS_AND_DWARF2_DEBUG))
{
cb->define = cb_define;
splay_tree_node n;
struct c_fileinfo *fi;
+ if (!file_info_tree)
+ file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
+ 0,
+ (splay_tree_delete_value_fn)free);
+
n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
if (n)
return (struct c_fileinfo *) n->value;
- fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
+ fi = XNEW (struct c_fileinfo);
fi->time = 0;
fi->interface_only = 0;
fi->interface_unknown = 1;
}
static int
-dump_one_header (splay_tree_node n, void *dummy ATTRIBUTE_UNUSED)
+dump_one_header (splay_tree_node n, void * ARG_UNUSED (dummy))
{
print_time ((const char *) n->key,
((struct c_fileinfo *) n->value)->time);
}
static void
-cb_ident (cpp_reader *pfile ATTRIBUTE_UNUSED,
- unsigned int line ATTRIBUTE_UNUSED,
- const cpp_string *str ATTRIBUTE_UNUSED)
+cb_ident (cpp_reader * ARG_UNUSED (pfile),
+ unsigned int ARG_UNUSED (line),
+ const cpp_string * ARG_UNUSED (str))
{
#ifdef ASM_OUTPUT_IDENT
if (! flag_no_ident)
{
/* Convert escapes in the string. */
- tree value ATTRIBUTE_UNUSED = lex_string (str);
- ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
+ cpp_string cstr = { 0, 0 };
+ if (cpp_interpret_string (pfile, str, 1, &cstr, false))
+ {
+ ASM_OUTPUT_IDENT (asm_out_file, (const char *) cstr.text);
+ free ((void *)cstr.text);
+ }
}
#endif
}
/* Called at the start of every non-empty line. TOKEN is the first
lexed token on the line. Used for diagnostic line numbers. */
static void
-cb_line_change (cpp_reader *pfile ATTRIBUTE_UNUSED, const cpp_token *token,
- int parsing_args ATTRIBUTE_UNUSED)
+cb_line_change (cpp_reader * ARG_UNUSED (pfile), const cpp_token *token,
+ int parsing_args)
{
- src_lineno = SOURCE_LINE (map, token->line);
+ if (token->type != CPP_EOF && !parsing_args)
+#ifdef USE_MAPPED_LOCATION
+ input_location = token->src_loc;
+#else
+ {
+ source_location loc = token->src_loc;
+ const struct line_map *map = linemap_lookup (&line_table, loc);
+ input_line = SOURCE_LINE (map, loc);
+ }
+#endif
}
void
fe_file_change (const struct line_map *new_map)
{
- unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line);
+ if (new_map == NULL)
+ return;
if (new_map->reason == LC_ENTER)
{
/* Don't stack the main buffer on the input stack;
we already did in compile_file. */
- if (map == NULL)
- main_input_filename = new_map->to_file;
- else
+ if (! MAIN_FILE_P (new_map))
{
- int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1);
+#ifdef USE_MAPPED_LOCATION
+ int included_at = LAST_SOURCE_LINE_LOCATION (new_map - 1);
+
+ input_location = included_at;
+ push_srcloc (new_map->start_location);
+#else
+ int included_at = LAST_SOURCE_LINE (new_map - 1);
input_line = included_at;
push_srcloc (new_map->to_file, 1);
+#endif
(*debug_hooks->start_source_file) (included_at, new_map->to_file);
#ifndef NO_IMPLICIT_EXTERN_C
if (c_header_level)
#endif
pop_srcloc ();
- (*debug_hooks->end_source_file) (to_line);
+ (*debug_hooks->end_source_file) (new_map->to_line);
}
update_header_times (new_map->to_file);
in_system_header = new_map->sysp != 0;
+#ifdef USE_MAPPED_LOCATION
+ input_location = new_map->start_location;
+#else
input_filename = new_map->to_file;
- input_line = to_line;
- map = new_map;
-
- /* Hook for C++. */
- extract_interface_info ();
+ input_line = new_map->to_line;
+#endif
}
static void
-cb_def_pragma (cpp_reader *pfile, unsigned int line)
+cb_def_pragma (cpp_reader *pfile, source_location loc)
{
/* Issue a warning message if we have been asked to do so. Ignore
unknown pragmas in system headers unless an explicit
-Wunknown-pragmas has been given. */
if (warn_unknown_pragmas > in_system_header)
{
+#ifndef USE_MAPPED_LOCATION
+ const struct line_map *map = linemap_lookup (&line_table, loc);
+#endif
const unsigned char *space, *name;
const cpp_token *s;
name = cpp_token_as_text (pfile, s);
}
- input_line = SOURCE_LINE (map, line);
+#ifdef USE_MAPPED_LOCATION
+ input_location = loc;
+#else
+ input_line = SOURCE_LINE (map, loc);
+#endif
warning ("ignoring #pragma %s %s", space, name);
}
}
/* #define callback for DWARF and DWARF2 debug info. */
static void
-cb_define (cpp_reader *pfile, unsigned int line, cpp_hashnode *node)
+cb_define (cpp_reader *pfile, source_location loc, cpp_hashnode *node)
{
- (*debug_hooks->define) (SOURCE_LINE (map, line),
+ const struct line_map *map = linemap_lookup (&line_table, loc);
+ (*debug_hooks->define) (SOURCE_LINE (map, loc),
(const char *) cpp_macro_definition (pfile, node));
}
/* #undef callback for DWARF and DWARF2 debug info. */
static void
-cb_undef (cpp_reader *pfile ATTRIBUTE_UNUSED, unsigned int line,
+cb_undef (cpp_reader * ARG_UNUSED (pfile), source_location loc,
cpp_hashnode *node)
{
- (*debug_hooks->undef) (SOURCE_LINE (map, line),
+ const struct line_map *map = linemap_lookup (&line_table, loc);
+ (*debug_hooks->undef) (SOURCE_LINE (map, loc),
(const char *) NODE_NAME (node));
}
\f
-int
-c_lex (tree *value)
+static inline const cpp_token *
+get_nonpadding_token (void)
{
const cpp_token *tok;
-
- retry:
timevar_push (TV_CPP);
do
tok = cpp_get_token (parse_in);
while (tok->type == CPP_PADDING);
timevar_pop (TV_CPP);
- /* The C++ front end does horrible things with the current line
- number. To ensure an accurate line number, we must reset it
- every time we return a token. */
- input_line = src_lineno;
+ return tok;
+}
+
+enum cpp_ttype
+c_lex_with_flags (tree *value, unsigned char *cpp_flags)
+{
+ const cpp_token *tok;
+ location_t atloc;
+ static bool no_more_pch;
+
+ retry:
+ tok = get_nonpadding_token ();
- *value = NULL_TREE;
+ retry_after_at:
switch (tok->type)
{
case CPP_NAME:
break;
default:
- abort ();
+ gcc_unreachable ();
}
}
break;
+ case CPP_ATSIGN:
+ /* An @ may give the next token special significance in Objective-C. */
+ atloc = input_location;
+ tok = get_nonpadding_token ();
+ if (c_dialect_objc ())
+ {
+ tree val;
+ switch (tok->type)
+ {
+ case CPP_NAME:
+ val = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node));
+ if (objc_is_reserved_word (val))
+ {
+ *value = val;
+ return CPP_AT_NAME;
+ }
+ break;
+
+ case CPP_STRING:
+ case CPP_WSTRING:
+ return lex_string (tok, value, true);
+
+ default: break;
+ }
+ }
+
+ /* ... or not. */
+ error ("%Hstray '@' in program", &atloc);
+ goto retry_after_at;
+
case CPP_OTHER:
{
cppchar_t c = tok->val.str.text[0];
case CPP_STRING:
case CPP_WSTRING:
- *value = lex_string (&tok->val.str);
+ if (!c_lex_return_raw_strings)
+ return lex_string (tok, value, false);
+ /* else fall through */
+
+ case CPP_PRAGMA:
+ *value = build_string (tok->val.str.len, (char *)tok->val.str.text);
break;
/* These tokens should not be visible outside cpplib. */
case CPP_HEADER_NAME:
case CPP_COMMENT:
case CPP_MACRO_ARG:
- abort ();
+ gcc_unreachable ();
+
+ default:
+ *value = NULL_TREE;
+ break;
+ }
- default: break;
+ if (! no_more_pch)
+ {
+ no_more_pch = true;
+ c_common_no_more_pch ();
}
+ if (cpp_flags)
+ *cpp_flags = tok->flags;
return tok->type;
}
+enum cpp_ttype
+c_lex (tree *value)
+{
+ return c_lex_with_flags (value, NULL);
+}
+
/* Returns the narrowest C-visible unsigned type, starting with the
- minimum specified by FLAGS, that can fit VALUE, or itk_none if
+ minimum specified by FLAGS, that can fit HIGH:LOW, or itk_none if
there isn't one. */
+
static enum integer_type_kind
-narrowest_unsigned_type (tree value, unsigned int flags)
+narrowest_unsigned_type (unsigned HOST_WIDE_INT low,
+ unsigned HOST_WIDE_INT high,
+ unsigned int flags)
{
enum integer_type_kind itk;
else
itk = itk_unsigned_long_long;
- /* int_fits_type_p must think the type of its first argument is
- wider than its second argument, or it won't do the proper check. */
- TREE_TYPE (value) = widest_unsigned_literal_type_node;
-
for (; itk < itk_none; itk += 2 /* skip unsigned types */)
- if (int_fits_type_p (value, integer_types[itk]))
- return itk;
+ {
+ tree upper = TYPE_MAX_VALUE (integer_types[itk]);
+
+ if ((unsigned HOST_WIDE_INT)TREE_INT_CST_HIGH (upper) > high
+ || ((unsigned HOST_WIDE_INT)TREE_INT_CST_HIGH (upper) == high
+ && TREE_INT_CST_LOW (upper) >= low))
+ return itk;
+ }
return itk_none;
}
/* Ditto, but narrowest signed type. */
static enum integer_type_kind
-narrowest_signed_type (tree value, unsigned int flags)
+narrowest_signed_type (unsigned HOST_WIDE_INT low,
+ unsigned HOST_WIDE_INT high, unsigned int flags)
{
enum integer_type_kind itk;
else
itk = itk_long_long;
- /* int_fits_type_p must think the type of its first argument is
- wider than its second argument, or it won't do the proper check. */
- TREE_TYPE (value) = widest_unsigned_literal_type_node;
for (; itk < itk_none; itk += 2 /* skip signed types */)
- if (int_fits_type_p (value, integer_types[itk]))
- return itk;
+ {
+ tree upper = TYPE_MAX_VALUE (integer_types[itk]);
+
+ if ((unsigned HOST_WIDE_INT)TREE_INT_CST_HIGH (upper) > high
+ || ((unsigned HOST_WIDE_INT)TREE_INT_CST_HIGH (upper) == high
+ && TREE_INT_CST_LOW (upper) >= low))
+ return itk;
+ }
return itk_none;
}
integer = cpp_interpret_integer (parse_in, token, flags);
integer = cpp_num_sign_extend (integer, options->precision);
- value = build_int_2_wide (integer.low, integer.high);
/* The type of a constant with a U suffix is straightforward. */
if (flags & CPP_N_UNSIGNED)
- itk = narrowest_unsigned_type (value, flags);
+ itk = narrowest_unsigned_type (integer.low, integer.high, flags);
else
{
/* The type of a potentially-signed integer constant varies
depending on the base it's in, the standard in use, and the
length suffixes. */
- enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags);
- enum integer_type_kind itk_s = narrowest_signed_type (value, flags);
+ enum integer_type_kind itk_u
+ = narrowest_unsigned_type (integer.low, integer.high, flags);
+ enum integer_type_kind itk_s
+ = narrowest_signed_type (integer.low, integer.high, flags);
/* In both C89 and C99, octal and hex constants may be signed or
unsigned, whichever fits tighter. We do not warn about this
if (itk > itk_unsigned_long
&& (flags & CPP_N_WIDTH) != CPP_N_LARGE
&& ! in_system_header && ! flag_isoc99)
- pedwarn ("integer constant is too large for \"%s\" type",
+ pedwarn ("integer constant is too large for %qs type",
(flags & CPP_N_UNSIGNED) ? "unsigned long" : "long");
- TREE_TYPE (value) = type;
+ value = build_int_cst_wide (type, integer.low, integer.high);
/* Convert imaginary to a complex type. */
if (flags & CPP_N_IMAGINARY)
- value = build_complex (NULL_TREE, convert (type, integer_zero_node), value);
+ value = build_complex (NULL_TREE, build_int_cst (type, 0), value);
return value;
}
REAL_VALUE_TYPE real;
char *copy;
size_t copylen;
- const char *typename;
+ const char *type_name;
- /* FIXME: make %T work in error/warning, then we don't need typename. */
+ /* FIXME: make %T work in error/warning, then we don't need type_name. */
if ((flags & CPP_N_WIDTH) == CPP_N_LARGE)
{
type = long_double_type_node;
- typename = "long double";
+ type_name = "long double";
}
else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL
|| flag_single_precision_constant)
{
type = float_type_node;
- typename = "float";
+ type_name = "float";
}
else
{
type = double_type_node;
- typename = "double";
+ type_name = "double";
}
/* Copy the constant to a nul-terminated buffer. If the constant
/* I or J suffix. */
copylen--;
- copy = alloca (copylen + 1);
+ copy = (char *) alloca (copylen + 1);
memcpy (copy, token->val.str.text, copylen);
copy[copylen] = '\0';
??? That's a dubious reason... is this a mandatory diagnostic or
isn't it? -- zw, 2001-08-21. */
if (REAL_VALUE_ISINF (real) && pedantic)
- warning ("floating constant exceeds range of \"%s\"", typename);
+ warning ("floating constant exceeds range of %<%s%>", type_name);
/* Create a node with determined type and value. */
value = build_real (type, real);
return value;
}
-static tree
-lex_string (const cpp_string *str)
+/* Convert a series of STRING and/or WSTRING tokens into a tree,
+ performing string constant concatenation. TOK is the first of
+ these. VALP is the location to write the string into. OBJC_STRING
+ indicates whether an '@' token preceded the incoming token.
+ Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING,
+ or CPP_OBJC_STRING).
+
+ This is unfortunately more work than it should be. If any of the
+ strings in the series has an L prefix, the result is a wide string
+ (6.4.5p4). Whether or not the result is a wide string affects the
+ meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape
+ sequences do not continue across the boundary between two strings in
+ a series (6.4.5p7), so we must not lose the boundaries. Therefore
+ cpp_interpret_string takes a vector of cpp_string structures, which
+ we must arrange to provide. */
+
+static enum cpp_ttype
+lex_string (const cpp_token *tok, tree *valp, bool objc_string)
{
- bool wide;
tree value;
- char *buf, *q;
- cppchar_t c;
- const unsigned char *p, *limit;
+ bool wide = false;
+ size_t count = 1;
+ struct obstack str_ob;
+ cpp_string istr;
- wide = str->text[0] == 'L';
- p = str->text + 1 + wide;
- limit = str->text + str->len - 1;
- q = buf = alloca ((str->len + 1) * (wide ? WCHAR_BYTES : 1));
+ /* Try to avoid the overhead of creating and destroying an obstack
+ for the common case of just one string. */
+ cpp_string str = tok->val.str;
+ cpp_string *strs = &str;
- while (p < limit)
- {
- c = *p++;
+ if (tok->type == CPP_WSTRING)
+ wide = true;
- if (c == '\\' && !ignore_escape_flag)
- c = cpp_parse_escape (parse_in, &p, limit, wide);
+ tok = get_nonpadding_token ();
+ if (c_dialect_objc () && tok->type == CPP_ATSIGN)
+ {
+ objc_string = true;
+ tok = get_nonpadding_token ();
+ }
+ if (tok->type == CPP_STRING || tok->type == CPP_WSTRING)
+ {
+ gcc_obstack_init (&str_ob);
+ obstack_grow (&str_ob, &str, sizeof (cpp_string));
- /* Add this single character into the buffer either as a wchar_t,
- a multibyte sequence, or as a single byte. */
- if (wide)
+ do
{
- unsigned charwidth = TYPE_PRECISION (char_type_node);
- unsigned bytemask = (1 << charwidth) - 1;
- int byte;
+ count++;
+ if (tok->type == CPP_WSTRING)
+ wide = true;
+ obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
- for (byte = 0; byte < WCHAR_BYTES; ++byte)
+ tok = get_nonpadding_token ();
+ if (c_dialect_objc () && tok->type == CPP_ATSIGN)
{
- int n;
- if (byte >= (int) sizeof (c))
- n = 0;
- else
- n = (c >> (byte * charwidth)) & bytemask;
- if (BYTES_BIG_ENDIAN)
- q[WCHAR_BYTES - byte - 1] = n;
- else
- q[byte] = n;
+ objc_string = true;
+ tok = get_nonpadding_token ();
}
- q += WCHAR_BYTES;
- }
- else
- {
- *q++ = c;
}
+ while (tok->type == CPP_STRING || tok->type == CPP_WSTRING);
+ strs = (cpp_string *) obstack_finish (&str_ob);
}
- /* Terminate the string value, either with a single byte zero
- or with a wide zero. */
+ /* We have read one more token than we want. */
+ _cpp_backup_tokens (parse_in, 1);
+
+ if (count > 1 && !objc_string && warn_traditional && !in_system_header)
+ warning ("traditional C rejects string constant concatenation");
- if (wide)
+ if ((c_lex_string_translate
+ ? cpp_interpret_string : cpp_interpret_string_notranslate)
+ (parse_in, strs, count, &istr, wide))
{
- memset (q, 0, WCHAR_BYTES);
- q += WCHAR_BYTES;
+ value = build_string (istr.len, (char *)istr.text);
+ free ((void *)istr.text);
+
+ if (c_lex_string_translate == -1)
+ {
+ int xlated = cpp_interpret_string_notranslate (parse_in, strs, count,
+ &istr, wide);
+ /* Assume that, if we managed to translate the string above,
+ then the untranslated parsing will always succeed. */
+ gcc_assert (xlated);
+
+ if (TREE_STRING_LENGTH (value) != (int)istr.len
+ || 0 != strncmp (TREE_STRING_POINTER (value), (char *)istr.text,
+ istr.len))
+ {
+ /* Arrange for us to return the untranslated string in
+ *valp, but to set up the C type of the translated
+ one. */
+ *valp = build_string (istr.len, (char *)istr.text);
+ valp = &TREE_CHAIN (*valp);
+ }
+ free ((void *)istr.text);
+ }
}
else
{
- *q++ = '\0';
+ /* Callers cannot generally handle error_mark_node in this context,
+ so return the empty string instead. cpp_interpret_string has
+ issued an error. */
+ if (wide)
+ value = build_string (TYPE_PRECISION (wchar_type_node)
+ / TYPE_PRECISION (char_type_node),
+ "\0\0\0"); /* widest supported wchar_t
+ is 32 bits */
+ else
+ value = build_string (1, "");
}
- value = build_string (q - buf, buf);
+ TREE_TYPE (value) = wide ? wchar_array_type_node : char_array_type_node;
+ *valp = fix_string_type (value);
- if (wide)
- TREE_TYPE (value) = wchar_array_type_node;
- else
- TREE_TYPE (value) = char_array_type_node;
- return value;
+ if (strs != &str)
+ obstack_free (&str_ob, 0);
+
+ return objc_string ? CPP_OBJC_STRING : wide ? CPP_WSTRING : CPP_STRING;
}
/* Converts a (possibly wide) character constant token into a tree. */
result = cpp_interpret_charconst (parse_in, token,
&chars_seen, &unsignedp);
- /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
- before possibly widening to HOST_WIDE_INT for build_int_2. */
- if (unsignedp || (cppchar_signed_t) result >= 0)
- value = build_int_2 (result, 0);
- else
- value = build_int_2 ((cppchar_signed_t) result, -1);
-
if (token->type == CPP_WCHAR)
type = wchar_type_node;
/* In C, a character constant has type 'int'.
In C++ 'char', but multi-char charconsts have type 'int'. */
- else if ((c_language == clk_c) || chars_seen > 1)
+ else if (!c_dialect_cxx () || chars_seen > 1)
type = integer_type_node;
else
type = char_type_node;
- TREE_TYPE (value) = type;
+ /* Cast to cppchar_signed_t to get correct sign-extension of RESULT
+ before possibly widening to HOST_WIDE_INT for build_int_cst. */
+ if (unsignedp || (cppchar_signed_t) result >= 0)
+ value = build_int_cst_wide (type, result, 0);
+ else
+ value = build_int_cst_wide (type, (cppchar_signed_t) result, -1);
+
return value;
}