X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fc-lex.c;h=3a63a053b794f189fa0e0e645dc72f9f11f710ac;hb=296054540d22b2adffca33e1a3e7dcce8c590db9;hp=ea0f80c7b83263987807a4b4d9e7852d3db771b9;hpb=fa70df70597245daf03e65e2b49bf9d0bdb6d3c0;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/c-lex.c b/gcc/c-lex.c index ea0f80c7b83..3a63a053b79 100644 --- a/gcc/c-lex.c +++ b/gcc/c-lex.c @@ -1,6 +1,6 @@ /* Mainly the interface between cpplib and the C front ends. Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997 - 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. This file is part of GCC. @@ -27,7 +27,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "real.h" #include "rtl.h" #include "tree.h" -#include "expr.h" #include "input.h" #include "output.h" #include "c-tree.h" @@ -42,19 +41,10 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "splay-tree.h" #include "debug.h" -/* The current line map. */ -static const struct line_map *map; - -/* The line used to refresh the lineno global variable after each token. */ -static unsigned int src_lineno; - /* We may keep statistics about how long which files took to compile. */ static int header_time, body_time; static splay_tree file_info_tree; -/* File used for outputting assembler code. */ -extern FILE *asm_out_file; - #undef WCHAR_TYPE_SIZE #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node) @@ -63,31 +53,26 @@ extern FILE *asm_out_file; int pending_lang_change; /* If we need to switch languages - C++ only */ int c_header_level; /* depth in C headers - C++ only */ +bool c_lex_string_translate = true; /* If we need to translate characters received. */ -/* Nonzero tells yylex to ignore \ in string constants. */ -static int ignore_escape_flag; - -static tree interpret_integer PARAMS ((const cpp_token *, unsigned int)); -static tree interpret_float PARAMS ((const cpp_token *, unsigned int)); +static tree interpret_integer (const cpp_token *, unsigned int); +static tree interpret_float (const cpp_token *, unsigned int); static enum integer_type_kind - narrowest_unsigned_type PARAMS ((tree, unsigned int)); + narrowest_unsigned_type (tree, unsigned int); static enum integer_type_kind - narrowest_signed_type PARAMS ((tree, unsigned int)); -static tree lex_string PARAMS ((const cpp_string *)); -static tree lex_charconst PARAMS ((const cpp_token *)); -static void update_header_times PARAMS ((const char *)); -static int dump_one_header PARAMS ((splay_tree_node, void *)); -static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int)); -static void cb_ident PARAMS ((cpp_reader *, unsigned int, - const cpp_string *)); -static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int)); -static void cb_define PARAMS ((cpp_reader *, unsigned int, - cpp_hashnode *)); -static void cb_undef PARAMS ((cpp_reader *, unsigned int, - cpp_hashnode *)); + narrowest_signed_type (tree, unsigned int); +static enum cpp_ttype lex_string (const cpp_token *, tree *, bool); +static tree lex_charconst (const cpp_token *); +static void update_header_times (const char *); +static int dump_one_header (splay_tree_node, void *); +static void cb_line_change (cpp_reader *, const cpp_token *, int); +static void cb_ident (cpp_reader *, unsigned int, const cpp_string *); +static void cb_def_pragma (cpp_reader *, unsigned int); +static void cb_define (cpp_reader *, unsigned int, cpp_hashnode *); +static void cb_undef (cpp_reader *, unsigned int, cpp_hashnode *); void -init_c_lex () +init_c_lex (void) { struct cpp_callbacks *cb; struct c_fileinfo *toplevel; @@ -103,7 +88,7 @@ init_c_lex () body_time = get_run_time (); toplevel->time = body_time; } - + cb = cpp_get_callbacks (parse_in); cb->line_change = cb_line_change; @@ -123,8 +108,7 @@ init_c_lex () } struct c_fileinfo * -get_fileinfo (name) - const char *name; +get_fileinfo (const char *name) { splay_tree_node n; struct c_fileinfo *fi; @@ -133,7 +117,7 @@ get_fileinfo (name) if (n) return (struct c_fileinfo *) n->value; - fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo)); + fi = xmalloc (sizeof (struct c_fileinfo)); fi->time = 0; fi->interface_only = 0; fi->interface_unknown = 1; @@ -143,8 +127,7 @@ get_fileinfo (name) } static void -update_header_times (name) - const char *name; +update_header_times (const char *name) { /* Changing files again. This means currently collected time is charged against header time, and body time starts back at 0. */ @@ -159,9 +142,7 @@ update_header_times (name) } static int -dump_one_header (n, dummy) - splay_tree_node n; - void *dummy ATTRIBUTE_UNUSED; +dump_one_header (splay_tree_node n, void *dummy ATTRIBUTE_UNUSED) { print_time ((const char *) n->key, ((struct c_fileinfo *) n->value)->time); @@ -169,7 +150,7 @@ dump_one_header (n, dummy) } void -dump_time_statistics () +dump_time_statistics (void) { struct c_fileinfo *file = get_fileinfo (input_filename); int this_time = get_run_time (); @@ -186,17 +167,20 @@ dump_time_statistics () } static void -cb_ident (pfile, line, str) - cpp_reader *pfile ATTRIBUTE_UNUSED; - unsigned int line ATTRIBUTE_UNUSED; - const cpp_string *str ATTRIBUTE_UNUSED; +cb_ident (cpp_reader *pfile ATTRIBUTE_UNUSED, + unsigned int line ATTRIBUTE_UNUSED, + const cpp_string *str ATTRIBUTE_UNUSED) { #ifdef ASM_OUTPUT_IDENT if (! flag_no_ident) { /* Convert escapes in the string. */ - tree value ATTRIBUTE_UNUSED = lex_string (str); - ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value)); + cpp_string cstr = { 0, 0 }; + if (cpp_interpret_string (pfile, str, 1, &cstr, false)) + { + ASM_OUTPUT_IDENT (asm_out_file, (const char *) cstr.text); + free ((void *)cstr.text); + } } #endif } @@ -204,29 +188,30 @@ cb_ident (pfile, line, str) /* Called at the start of every non-empty line. TOKEN is the first lexed token on the line. Used for diagnostic line numbers. */ static void -cb_line_change (pfile, token, parsing_args) - cpp_reader *pfile ATTRIBUTE_UNUSED; - const cpp_token *token; - int parsing_args ATTRIBUTE_UNUSED; +cb_line_change (cpp_reader *pfile ATTRIBUTE_UNUSED, const cpp_token *token, + int parsing_args) { - src_lineno = SOURCE_LINE (map, token->line); + if (token->type != CPP_EOF && !parsing_args) + { + source_location loc = token->src_loc; + const struct line_map *map = linemap_lookup (&line_table, loc); + input_line = SOURCE_LINE (map, loc); + } } void -fe_file_change (new_map) - const struct line_map *new_map; +fe_file_change (const struct line_map *new_map) { - unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line); + if (new_map == NULL) + return; if (new_map->reason == LC_ENTER) { /* Don't stack the main buffer on the input stack; we already did in compile_file. */ - if (map == NULL) - main_input_filename = new_map->to_file; - else + if (! MAIN_FILE_P (new_map)) { - int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1); + int included_at = LAST_SOURCE_LINE (new_map - 1); input_line = included_at; push_srcloc (new_map->to_file, 1); @@ -253,30 +238,28 @@ fe_file_change (new_map) } #endif pop_srcloc (); - - (*debug_hooks->end_source_file) (to_line); + + (*debug_hooks->end_source_file) (new_map->to_line); } update_header_times (new_map->to_file); in_system_header = new_map->sysp != 0; input_filename = new_map->to_file; - input_line = to_line; - map = new_map; + input_line = new_map->to_line; /* Hook for C++. */ extract_interface_info (); } static void -cb_def_pragma (pfile, line) - cpp_reader *pfile; - unsigned int line; +cb_def_pragma (cpp_reader *pfile, source_location loc) { /* Issue a warning message if we have been asked to do so. Ignore unknown pragmas in system headers unless an explicit -Wunknown-pragmas has been given. */ if (warn_unknown_pragmas > in_system_header) { + const struct line_map *map = linemap_lookup (&line_table, loc); const unsigned char *space, *name; const cpp_token *s; @@ -290,52 +273,54 @@ cb_def_pragma (pfile, line) name = cpp_token_as_text (pfile, s); } - input_line = SOURCE_LINE (map, line); + input_line = SOURCE_LINE (map, loc); warning ("ignoring #pragma %s %s", space, name); } } /* #define callback for DWARF and DWARF2 debug info. */ static void -cb_define (pfile, line, node) - cpp_reader *pfile; - unsigned int line; - cpp_hashnode *node; +cb_define (cpp_reader *pfile, source_location loc, cpp_hashnode *node) { - (*debug_hooks->define) (SOURCE_LINE (map, line), + const struct line_map *map = linemap_lookup (&line_table, loc); + (*debug_hooks->define) (SOURCE_LINE (map, loc), (const char *) cpp_macro_definition (pfile, node)); } /* #undef callback for DWARF and DWARF2 debug info. */ static void -cb_undef (pfile, line, node) - cpp_reader *pfile ATTRIBUTE_UNUSED; - unsigned int line; - cpp_hashnode *node; +cb_undef (cpp_reader *pfile ATTRIBUTE_UNUSED, source_location loc, + cpp_hashnode *node) { - (*debug_hooks->undef) (SOURCE_LINE (map, line), + const struct line_map *map = linemap_lookup (&line_table, loc); + (*debug_hooks->undef) (SOURCE_LINE (map, loc), (const char *) NODE_NAME (node)); } -int -c_lex (value) - tree *value; +static inline const cpp_token * +get_nonpadding_token (void) { const cpp_token *tok; - - retry: timevar_push (TV_CPP); do tok = cpp_get_token (parse_in); while (tok->type == CPP_PADDING); timevar_pop (TV_CPP); - /* The C++ front end does horrible things with the current line - number. To ensure an accurate line number, we must reset it - every time we return a token. */ - input_line = src_lineno; + return tok; +} + +int +c_lex_with_flags (tree *value, unsigned char *cpp_flags) +{ + const cpp_token *tok; + location_t atloc; + static bool no_more_pch; + + retry: + tok = get_nonpadding_token (); - *value = NULL_TREE; + retry_after_at: switch (tok->type) { case CPP_NAME: @@ -367,6 +352,37 @@ c_lex (value) } break; + case CPP_ATSIGN: + /* An @ may give the next token special significance in Objective-C. */ + atloc = input_location; + tok = get_nonpadding_token (); + if (c_dialect_objc ()) + { + tree val; + switch (tok->type) + { + case CPP_NAME: + val = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node)); + if (C_IS_RESERVED_WORD (val) + && OBJC_IS_AT_KEYWORD (C_RID_CODE (val))) + { + *value = val; + return CPP_AT_NAME; + } + break; + + case CPP_STRING: + case CPP_WSTRING: + return lex_string (tok, value, true); + + default: break; + } + } + + /* ... or not. */ + error ("%Hstray '@' in program", &atloc); + goto retry_after_at; + case CPP_OTHER: { cppchar_t c = tok->val.str.text[0]; @@ -387,7 +403,7 @@ c_lex (value) case CPP_STRING: case CPP_WSTRING: - *value = lex_string (&tok->val.str); + return lex_string (tok, value, false); break; /* These tokens should not be visible outside cpplib. */ @@ -396,19 +412,33 @@ c_lex (value) case CPP_MACRO_ARG: abort (); - default: break; + default: + *value = NULL_TREE; + break; + } + + if (! no_more_pch) + { + no_more_pch = true; + c_common_no_more_pch (); } + if (cpp_flags) + *cpp_flags = tok->flags; return tok->type; } +int +c_lex (tree *value) +{ + return c_lex_with_flags (value, NULL); +} + /* Returns the narrowest C-visible unsigned type, starting with the minimum specified by FLAGS, that can fit VALUE, or itk_none if there isn't one. */ static enum integer_type_kind -narrowest_unsigned_type (value, flags) - tree value; - unsigned int flags; +narrowest_unsigned_type (tree value, unsigned int flags) { enum integer_type_kind itk; @@ -432,9 +462,7 @@ narrowest_unsigned_type (value, flags) /* Ditto, but narrowest signed type. */ static enum integer_type_kind -narrowest_signed_type (value, flags) - tree value; - unsigned int flags; +narrowest_signed_type (tree value, unsigned int flags) { enum integer_type_kind itk; @@ -458,9 +486,7 @@ narrowest_signed_type (value, flags) /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */ static tree -interpret_integer (token, flags) - const cpp_token *token; - unsigned int flags; +interpret_integer (const cpp_token *token, unsigned int flags) { tree value, type; enum integer_type_kind itk; @@ -538,9 +564,7 @@ interpret_integer (token, flags) /* Interpret TOKEN, a floating point number with FLAGS as classified by cpplib. */ static tree -interpret_float (token, flags) - const cpp_token *token; - unsigned int flags; +interpret_float (const cpp_token *token, unsigned int flags) { tree type; tree value; @@ -601,82 +625,107 @@ interpret_float (token, flags) return value; } -static tree -lex_string (str) - const cpp_string *str; +/* Convert a series of STRING and/or WSTRING tokens into a tree, + performing string constant concatenation. TOK is the first of + these. VALP is the location to write the string into. OBJC_STRING + indicates whether an '@' token preceded the incoming token. + Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING, + or CPP_OBJC_STRING). + + This is unfortunately more work than it should be. If any of the + strings in the series has an L prefix, the result is a wide string + (6.4.5p4). Whether or not the result is a wide string affects the + meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape + sequences do not continue across the boundary between two strings in + a series (6.4.5p7), so we must not lose the boundaries. Therefore + cpp_interpret_string takes a vector of cpp_string structures, which + we must arrange to provide. */ + +static enum cpp_ttype +lex_string (const cpp_token *tok, tree *valp, bool objc_string) { - bool wide; tree value; - char *buf, *q; - cppchar_t c; - const unsigned char *p, *limit; - - wide = str->text[0] == 'L'; - p = str->text + 1 + wide; - limit = str->text + str->len - 1; - q = buf = alloca ((str->len + 1) * (wide ? WCHAR_BYTES : 1)); - - while (p < limit) + bool wide = false; + size_t count = 1; + struct obstack str_ob; + cpp_string istr; + + /* Try to avoid the overhead of creating and destroying an obstack + for the common case of just one string. */ + cpp_string str = tok->val.str; + cpp_string *strs = &str; + + if (tok->type == CPP_WSTRING) + wide = true; + + tok = get_nonpadding_token (); + if (c_dialect_objc () && tok->type == CPP_ATSIGN) + { + objc_string = true; + tok = get_nonpadding_token (); + } + if (tok->type == CPP_STRING || tok->type == CPP_WSTRING) { - c = *p++; + gcc_obstack_init (&str_ob); + obstack_grow (&str_ob, &str, sizeof (cpp_string)); - if (c == '\\' && !ignore_escape_flag) - c = cpp_parse_escape (parse_in, &p, limit, wide); - - /* Add this single character into the buffer either as a wchar_t, - a multibyte sequence, or as a single byte. */ - if (wide) + do { - unsigned charwidth = TYPE_PRECISION (char_type_node); - unsigned bytemask = (1 << charwidth) - 1; - int byte; + count++; + if (tok->type == CPP_WSTRING) + wide = true; + obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string)); - for (byte = 0; byte < WCHAR_BYTES; ++byte) + tok = get_nonpadding_token (); + if (c_dialect_objc () && tok->type == CPP_ATSIGN) { - int n; - if (byte >= (int) sizeof (c)) - n = 0; - else - n = (c >> (byte * charwidth)) & bytemask; - if (BYTES_BIG_ENDIAN) - q[WCHAR_BYTES - byte - 1] = n; - else - q[byte] = n; + objc_string = true; + tok = get_nonpadding_token (); } - q += WCHAR_BYTES; - } - else - { - *q++ = c; } + while (tok->type == CPP_STRING || tok->type == CPP_WSTRING); + strs = obstack_finish (&str_ob); } - /* Terminate the string value, either with a single byte zero - or with a wide zero. */ + /* We have read one more token than we want. */ + _cpp_backup_tokens (parse_in, 1); - if (wide) + if (count > 1 && !objc_string && warn_traditional && !in_system_header) + warning ("traditional C rejects string constant concatenation"); + + if ((c_lex_string_translate + ? cpp_interpret_string : cpp_interpret_string_notranslate) + (parse_in, strs, count, &istr, wide)) { - memset (q, 0, WCHAR_BYTES); - q += WCHAR_BYTES; + value = build_string (istr.len, (char *)istr.text); + free ((void *)istr.text); } else { - *q++ = '\0'; + /* Callers cannot generally handle error_mark_node in this context, + so return the empty string instead. cpp_interpret_string has + issued an error. */ + if (wide) + value = build_string (TYPE_PRECISION (wchar_type_node) + / TYPE_PRECISION (char_type_node), + "\0\0\0"); /* widest supported wchar_t + is 32 bits */ + else + value = build_string (1, ""); } - value = build_string (q - buf, buf); + TREE_TYPE (value) = wide ? wchar_array_type_node : char_array_type_node; + *valp = fix_string_type (value); - if (wide) - TREE_TYPE (value) = wchar_array_type_node; - else - TREE_TYPE (value) = char_array_type_node; - return value; + if (strs != &str) + obstack_free (&str_ob, 0); + + return objc_string ? CPP_OBJC_STRING : wide ? CPP_WSTRING : CPP_STRING; } /* Converts a (possibly wide) character constant token into a tree. */ static tree -lex_charconst (token) - const cpp_token *token; +lex_charconst (const cpp_token *token) { cppchar_t result; tree type, value; @@ -684,7 +733,7 @@ lex_charconst (token) int unsignedp; result = cpp_interpret_charconst (parse_in, token, - &chars_seen, &unsignedp); + &chars_seen, &unsignedp); /* Cast to cppchar_signed_t to get correct sign-extension of RESULT before possibly widening to HOST_WIDE_INT for build_int_2. */ @@ -697,7 +746,7 @@ lex_charconst (token) type = wchar_type_node; /* In C, a character constant has type 'int'. In C++ 'char', but multi-char charconsts have type 'int'. */ - else if ((c_language == clk_c) || chars_seen > 1) + else if (!c_dialect_cxx () || chars_seen > 1) type = integer_type_node; else type = char_type_node;