X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fc-lex.c;h=108bc5cff17474e3fe33d10ea94a5bae84366cd4;hp=3d8ecff08859b48e7783690ad39a93e99c825a43;hb=b7d1b569577509f4a733da7949c899c936db453b;hpb=5ae4caef64b121ef89043f62438b048d88f7a7e0 diff --git a/gcc/c-lex.c b/gcc/c-lex.c index 3d8ecff0885..108bc5cff17 100644 --- a/gcc/c-lex.c +++ b/gcc/c-lex.c @@ -1,6 +1,7 @@ /* Mainly the interface between cpplib and the C front ends. Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997 - 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 + Free Software Foundation, Inc. This file is part of GCC. @@ -16,16 +17,17 @@ for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING. If not, write to the Free -Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. */ +Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. */ #include "config.h" #include "system.h" +#include "coretypes.h" +#include "tm.h" #include "real.h" #include "rtl.h" #include "tree.h" -#include "expr.h" #include "input.h" #include "output.h" #include "c-tree.h" @@ -40,71 +42,47 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "splay-tree.h" #include "debug.h" -#ifdef MULTIBYTE_CHARS -#include "mbchar.h" -#include -#endif /* MULTIBYTE_CHARS */ -#ifndef GET_ENVIRONMENT -#define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME)) -#endif - -/* The current line map. */ -static const struct line_map *map; - -/* The line used to refresh the lineno global variable after each token. */ -static unsigned int src_lineno; - /* We may keep statistics about how long which files took to compile. */ static int header_time, body_time; static splay_tree file_info_tree; -/* File used for outputting assembler code. */ -extern FILE *asm_out_file; - -#undef WCHAR_TYPE_SIZE -#define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node) - -/* Number of bytes in a wide character. */ -#define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT) - int pending_lang_change; /* If we need to switch languages - C++ only */ int c_header_level; /* depth in C headers - C++ only */ -/* Nonzero tells yylex to ignore \ in string constants. */ -static int ignore_escape_flag; - -static tree interpret_integer PARAMS ((const cpp_token *, unsigned int)); -static tree interpret_float PARAMS ((const cpp_token *, unsigned int)); -static enum integer_type_kind - narrowest_unsigned_type PARAMS ((tree, unsigned int)); -static enum integer_type_kind - narrowest_signed_type PARAMS ((tree, unsigned int)); -static tree lex_string PARAMS ((const unsigned char *, unsigned int, - int)); -static tree lex_charconst PARAMS ((const cpp_token *)); -static void update_header_times PARAMS ((const char *)); -static int dump_one_header PARAMS ((splay_tree_node, void *)); -static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int)); -static void cb_ident PARAMS ((cpp_reader *, unsigned int, - const cpp_string *)); -static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *)); -static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int)); -static void cb_define PARAMS ((cpp_reader *, unsigned int, - cpp_hashnode *)); -static void cb_undef PARAMS ((cpp_reader *, unsigned int, - cpp_hashnode *)); +/* If we need to translate characters received. This is tri-state: + 0 means use only the untranslated string; 1 means use only + the translated string; -1 means chain the translated string + to the untranslated one. */ +int c_lex_string_translate = 1; + +/* True if strings should be passed to the caller of c_lex completely + unmolested (no concatenation, no translation). */ +bool c_lex_return_raw_strings = false; + +static tree interpret_integer (const cpp_token *, unsigned int); +static tree interpret_float (const cpp_token *, unsigned int); +static enum integer_type_kind narrowest_unsigned_type + (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned int); +static enum integer_type_kind narrowest_signed_type + (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned int); +static enum cpp_ttype lex_string (const cpp_token *, tree *, bool); +static tree lex_charconst (const cpp_token *); +static void update_header_times (const char *); +static int dump_one_header (splay_tree_node, void *); +static void cb_line_change (cpp_reader *, const cpp_token *, int); +static void cb_ident (cpp_reader *, unsigned int, const cpp_string *); +static void cb_def_pragma (cpp_reader *, unsigned int); +static void cb_define (cpp_reader *, unsigned int, cpp_hashnode *); +static void cb_undef (cpp_reader *, unsigned int, cpp_hashnode *); -const char * -init_c_lex (filename) - const char *filename; +void +init_c_lex (void) { struct cpp_callbacks *cb; struct c_fileinfo *toplevel; - /* Set up filename timing. Must happen before cpp_read_main_file. */ - file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp, - 0, - (splay_tree_delete_value_fn)free); + /* The get_fileinfo data structure must be initialized before + cpp_read_main_file is called. */ toplevel = get_fileinfo (""); if (flag_detailed_statistics) { @@ -112,72 +90,41 @@ init_c_lex (filename) body_time = get_run_time (); toplevel->time = body_time; } - -#ifdef MULTIBYTE_CHARS - /* Change to the native locale for multibyte conversions. */ - setlocale (LC_CTYPE, ""); - GET_ENVIRONMENT (literal_codeset, "LANG"); -#endif cb = cpp_get_callbacks (parse_in); cb->line_change = cb_line_change; cb->ident = cb_ident; - cb->file_change = cb_file_change; cb->def_pragma = cb_def_pragma; + cb->valid_pch = c_common_valid_pch; + cb->read_pch = c_common_read_pch; /* Set the debug callbacks if we can use them. */ if (debug_info_level == DINFO_LEVEL_VERBOSE - && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG - || write_symbols == VMS_AND_DWARF2_DEBUG)) + && (write_symbols == DWARF2_DEBUG + || write_symbols == VMS_AND_DWARF2_DEBUG)) { cb->define = cb_define; cb->undef = cb_undef; } - - /* Start it at 0. */ - lineno = 0; - - if (filename == NULL || !strcmp (filename, "-")) - filename = ""; - - return cpp_read_main_file (parse_in, filename, ident_hash); -} - -/* A thin wrapper around the real parser that initializes the - integrated preprocessor after debug output has been initialized. - Also, make sure the start_source_file debug hook gets called for - the primary source file. */ - -void -c_common_parse_file (set_yydebug) - int set_yydebug ATTRIBUTE_UNUSED; -{ -#if YYDEBUG != 0 - yydebug = set_yydebug; -#else - warning ("YYDEBUG not defined"); -#endif - - (*debug_hooks->start_source_file) (lineno, input_filename); - cpp_finish_options (parse_in); - - yyparse (); - free_parser_stacks (); } struct c_fileinfo * -get_fileinfo (name) - const char *name; +get_fileinfo (const char *name) { splay_tree_node n; struct c_fileinfo *fi; + if (!file_info_tree) + file_info_tree = splay_tree_new ((splay_tree_compare_fn) strcmp, + 0, + (splay_tree_delete_value_fn) free); + n = splay_tree_lookup (file_info_tree, (splay_tree_key) name); if (n) return (struct c_fileinfo *) n->value; - fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo)); + fi = XNEW (struct c_fileinfo); fi->time = 0; fi->interface_only = 0; fi->interface_unknown = 1; @@ -187,8 +134,7 @@ get_fileinfo (name) } static void -update_header_times (name) - const char *name; +update_header_times (const char *name) { /* Changing files again. This means currently collected time is charged against header time, and body time starts back at 0. */ @@ -203,9 +149,7 @@ update_header_times (name) } static int -dump_one_header (n, dummy) - splay_tree_node n; - void *dummy ATTRIBUTE_UNUSED; +dump_one_header (splay_tree_node n, void * ARG_UNUSED (dummy)) { print_time ((const char *) n->key, ((struct c_fileinfo *) n->value)->time); @@ -213,7 +157,7 @@ dump_one_header (n, dummy) } void -dump_time_statistics () +dump_time_statistics (void) { struct c_fileinfo *file = get_fileinfo (input_filename); int this_time = get_run_time (); @@ -223,24 +167,27 @@ dump_time_statistics () print_time ("header files (total)", header_time); print_time ("main file (total)", this_time - body_time); fprintf (stderr, "ratio = %g : 1\n", - (double)header_time / (double)(this_time - body_time)); + (double) header_time / (double) (this_time - body_time)); fprintf (stderr, "\n******\n"); splay_tree_foreach (file_info_tree, dump_one_header, 0); } static void -cb_ident (pfile, line, str) - cpp_reader *pfile ATTRIBUTE_UNUSED; - unsigned int line ATTRIBUTE_UNUSED; - const cpp_string *str ATTRIBUTE_UNUSED; +cb_ident (cpp_reader * ARG_UNUSED (pfile), + unsigned int ARG_UNUSED (line), + const cpp_string * ARG_UNUSED (str)) { #ifdef ASM_OUTPUT_IDENT - if (! flag_no_ident) + if (!flag_no_ident) { /* Convert escapes in the string. */ - tree value = lex_string (str->text, str->len, 0); - ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value)); + cpp_string cstr = { 0, 0 }; + if (cpp_interpret_string (pfile, str, 1, &cstr, false)) + { + ASM_OUTPUT_IDENT (asm_out_file, (const char *) cstr.text); + free ((void *) cstr.text); + } } #endif } @@ -248,33 +195,44 @@ cb_ident (pfile, line, str) /* Called at the start of every non-empty line. TOKEN is the first lexed token on the line. Used for diagnostic line numbers. */ static void -cb_line_change (pfile, token, parsing_args) - cpp_reader *pfile ATTRIBUTE_UNUSED; - const cpp_token *token; - int parsing_args ATTRIBUTE_UNUSED; +cb_line_change (cpp_reader * ARG_UNUSED (pfile), const cpp_token *token, + int parsing_args) { - src_lineno = SOURCE_LINE (map, token->line); + if (token->type != CPP_EOF && !parsing_args) +#ifdef USE_MAPPED_LOCATION + input_location = token->src_loc; +#else + { + source_location loc = token->src_loc; + const struct line_map *map = linemap_lookup (&line_table, loc); + input_line = SOURCE_LINE (map, loc); + } +#endif } -static void -cb_file_change (pfile, new_map) - cpp_reader *pfile ATTRIBUTE_UNUSED; - const struct line_map *new_map; +void +fe_file_change (const struct line_map *new_map) { - unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line); + if (new_map == NULL) + return; if (new_map->reason == LC_ENTER) { /* Don't stack the main buffer on the input stack; we already did in compile_file. */ - if (map == NULL) - main_input_filename = new_map->to_file; - else + if (!MAIN_FILE_P (new_map)) { - int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1); +#ifdef USE_MAPPED_LOCATION + int included_at = LAST_SOURCE_LINE_LOCATION (new_map - 1); - lineno = included_at; + input_location = included_at; + push_srcloc (new_map->start_location); +#else + int included_at = LAST_SOURCE_LINE (new_map - 1); + + input_line = included_at; push_srcloc (new_map->to_file, 1); +#endif (*debug_hooks->start_source_file) (included_at, new_map->to_file); #ifndef NO_IMPLICIT_EXTERN_C if (c_header_level) @@ -293,29 +251,27 @@ cb_file_change (pfile, new_map) if (c_header_level && --c_header_level == 0) { if (new_map->sysp == 2) - warning ("badly nested C headers from preprocessor"); + warning (0, "badly nested C headers from preprocessor"); --pending_lang_change; } #endif pop_srcloc (); - - (*debug_hooks->end_source_file) (to_line); + + (*debug_hooks->end_source_file) (new_map->to_line); } update_header_times (new_map->to_file); in_system_header = new_map->sysp != 0; +#ifdef USE_MAPPED_LOCATION + input_location = new_map->start_location; +#else input_filename = new_map->to_file; - lineno = to_line; - map = new_map; - - /* Hook for C++. */ - extract_interface_info (); + input_line = new_map->to_line; +#endif } static void -cb_def_pragma (pfile, line) - cpp_reader *pfile; - unsigned int line; +cb_def_pragma (cpp_reader *pfile, source_location loc) { /* Issue a warning message if we have been asked to do so. Ignore unknown pragmas in system headers unless an explicit @@ -324,6 +280,14 @@ cb_def_pragma (pfile, line) { const unsigned char *space, *name; const cpp_token *s; +#ifndef USE_MAPPED_LOCATION + location_t fe_loc; + const struct line_map *map = linemap_lookup (&line_table, loc); + fe_loc.file = map->to_file; + fe_loc.line = SOURCE_LINE (map, loc); +#else + location_t fe_loc = loc; +#endif space = name = (const unsigned char *) ""; s = cpp_get_token (pfile); @@ -335,371 +299,58 @@ cb_def_pragma (pfile, line) name = cpp_token_as_text (pfile, s); } - lineno = SOURCE_LINE (map, line); - warning ("ignoring #pragma %s %s", space, name); + warning (OPT_Wunknown_pragmas, "%Hignoring #pragma %s %s", + &fe_loc, space, name); } } /* #define callback for DWARF and DWARF2 debug info. */ static void -cb_define (pfile, line, node) - cpp_reader *pfile; - unsigned int line; - cpp_hashnode *node; +cb_define (cpp_reader *pfile, source_location loc, cpp_hashnode *node) { - (*debug_hooks->define) (SOURCE_LINE (map, line), + const struct line_map *map = linemap_lookup (&line_table, loc); + (*debug_hooks->define) (SOURCE_LINE (map, loc), (const char *) cpp_macro_definition (pfile, node)); } /* #undef callback for DWARF and DWARF2 debug info. */ static void -cb_undef (pfile, line, node) - cpp_reader *pfile ATTRIBUTE_UNUSED; - unsigned int line; - cpp_hashnode *node; +cb_undef (cpp_reader * ARG_UNUSED (pfile), source_location loc, + cpp_hashnode *node) { - (*debug_hooks->undef) (SOURCE_LINE (map, line), + const struct line_map *map = linemap_lookup (&line_table, loc); + (*debug_hooks->undef) (SOURCE_LINE (map, loc), (const char *) NODE_NAME (node)); } - -#if 0 /* not yet */ -/* Returns nonzero if C is a universal-character-name. Give an error if it - is not one which may appear in an identifier, as per [extendid]. - - Note that extended character support in identifiers has not yet been - implemented. It is my personal opinion that this is not a desirable - feature. Portable code cannot count on support for more than the basic - identifier character set. */ - -static inline int -is_extended_char (c) - int c; -{ -#ifdef TARGET_EBCDIC - return 0; -#else - /* ASCII. */ - if (c < 0x7f) - return 0; - - /* None of the valid chars are outside the Basic Multilingual Plane (the - low 16 bits). */ - if (c > 0xffff) - { - error ("universal-character-name '\\U%08x' not valid in identifier", c); - return 1; - } - - /* Latin */ - if ((c >= 0x00c0 && c <= 0x00d6) - || (c >= 0x00d8 && c <= 0x00f6) - || (c >= 0x00f8 && c <= 0x01f5) - || (c >= 0x01fa && c <= 0x0217) - || (c >= 0x0250 && c <= 0x02a8) - || (c >= 0x1e00 && c <= 0x1e9a) - || (c >= 0x1ea0 && c <= 0x1ef9)) - return 1; - - /* Greek */ - if ((c == 0x0384) - || (c >= 0x0388 && c <= 0x038a) - || (c == 0x038c) - || (c >= 0x038e && c <= 0x03a1) - || (c >= 0x03a3 && c <= 0x03ce) - || (c >= 0x03d0 && c <= 0x03d6) - || (c == 0x03da) - || (c == 0x03dc) - || (c == 0x03de) - || (c == 0x03e0) - || (c >= 0x03e2 && c <= 0x03f3) - || (c >= 0x1f00 && c <= 0x1f15) - || (c >= 0x1f18 && c <= 0x1f1d) - || (c >= 0x1f20 && c <= 0x1f45) - || (c >= 0x1f48 && c <= 0x1f4d) - || (c >= 0x1f50 && c <= 0x1f57) - || (c == 0x1f59) - || (c == 0x1f5b) - || (c == 0x1f5d) - || (c >= 0x1f5f && c <= 0x1f7d) - || (c >= 0x1f80 && c <= 0x1fb4) - || (c >= 0x1fb6 && c <= 0x1fbc) - || (c >= 0x1fc2 && c <= 0x1fc4) - || (c >= 0x1fc6 && c <= 0x1fcc) - || (c >= 0x1fd0 && c <= 0x1fd3) - || (c >= 0x1fd6 && c <= 0x1fdb) - || (c >= 0x1fe0 && c <= 0x1fec) - || (c >= 0x1ff2 && c <= 0x1ff4) - || (c >= 0x1ff6 && c <= 0x1ffc)) - return 1; - - /* Cyrillic */ - if ((c >= 0x0401 && c <= 0x040d) - || (c >= 0x040f && c <= 0x044f) - || (c >= 0x0451 && c <= 0x045c) - || (c >= 0x045e && c <= 0x0481) - || (c >= 0x0490 && c <= 0x04c4) - || (c >= 0x04c7 && c <= 0x04c8) - || (c >= 0x04cb && c <= 0x04cc) - || (c >= 0x04d0 && c <= 0x04eb) - || (c >= 0x04ee && c <= 0x04f5) - || (c >= 0x04f8 && c <= 0x04f9)) - return 1; - - /* Armenian */ - if ((c >= 0x0531 && c <= 0x0556) - || (c >= 0x0561 && c <= 0x0587)) - return 1; - - /* Hebrew */ - if ((c >= 0x05d0 && c <= 0x05ea) - || (c >= 0x05f0 && c <= 0x05f4)) - return 1; - - /* Arabic */ - if ((c >= 0x0621 && c <= 0x063a) - || (c >= 0x0640 && c <= 0x0652) - || (c >= 0x0670 && c <= 0x06b7) - || (c >= 0x06ba && c <= 0x06be) - || (c >= 0x06c0 && c <= 0x06ce) - || (c >= 0x06e5 && c <= 0x06e7)) - return 1; - - /* Devanagari */ - if ((c >= 0x0905 && c <= 0x0939) - || (c >= 0x0958 && c <= 0x0962)) - return 1; - - /* Bengali */ - if ((c >= 0x0985 && c <= 0x098c) - || (c >= 0x098f && c <= 0x0990) - || (c >= 0x0993 && c <= 0x09a8) - || (c >= 0x09aa && c <= 0x09b0) - || (c == 0x09b2) - || (c >= 0x09b6 && c <= 0x09b9) - || (c >= 0x09dc && c <= 0x09dd) - || (c >= 0x09df && c <= 0x09e1) - || (c >= 0x09f0 && c <= 0x09f1)) - return 1; - - /* Gurmukhi */ - if ((c >= 0x0a05 && c <= 0x0a0a) - || (c >= 0x0a0f && c <= 0x0a10) - || (c >= 0x0a13 && c <= 0x0a28) - || (c >= 0x0a2a && c <= 0x0a30) - || (c >= 0x0a32 && c <= 0x0a33) - || (c >= 0x0a35 && c <= 0x0a36) - || (c >= 0x0a38 && c <= 0x0a39) - || (c >= 0x0a59 && c <= 0x0a5c) - || (c == 0x0a5e)) - return 1; - - /* Gujarati */ - if ((c >= 0x0a85 && c <= 0x0a8b) - || (c == 0x0a8d) - || (c >= 0x0a8f && c <= 0x0a91) - || (c >= 0x0a93 && c <= 0x0aa8) - || (c >= 0x0aaa && c <= 0x0ab0) - || (c >= 0x0ab2 && c <= 0x0ab3) - || (c >= 0x0ab5 && c <= 0x0ab9) - || (c == 0x0ae0)) - return 1; - - /* Oriya */ - if ((c >= 0x0b05 && c <= 0x0b0c) - || (c >= 0x0b0f && c <= 0x0b10) - || (c >= 0x0b13 && c <= 0x0b28) - || (c >= 0x0b2a && c <= 0x0b30) - || (c >= 0x0b32 && c <= 0x0b33) - || (c >= 0x0b36 && c <= 0x0b39) - || (c >= 0x0b5c && c <= 0x0b5d) - || (c >= 0x0b5f && c <= 0x0b61)) - return 1; - - /* Tamil */ - if ((c >= 0x0b85 && c <= 0x0b8a) - || (c >= 0x0b8e && c <= 0x0b90) - || (c >= 0x0b92 && c <= 0x0b95) - || (c >= 0x0b99 && c <= 0x0b9a) - || (c == 0x0b9c) - || (c >= 0x0b9e && c <= 0x0b9f) - || (c >= 0x0ba3 && c <= 0x0ba4) - || (c >= 0x0ba8 && c <= 0x0baa) - || (c >= 0x0bae && c <= 0x0bb5) - || (c >= 0x0bb7 && c <= 0x0bb9)) - return 1; - - /* Telugu */ - if ((c >= 0x0c05 && c <= 0x0c0c) - || (c >= 0x0c0e && c <= 0x0c10) - || (c >= 0x0c12 && c <= 0x0c28) - || (c >= 0x0c2a && c <= 0x0c33) - || (c >= 0x0c35 && c <= 0x0c39) - || (c >= 0x0c60 && c <= 0x0c61)) - return 1; - - /* Kannada */ - if ((c >= 0x0c85 && c <= 0x0c8c) - || (c >= 0x0c8e && c <= 0x0c90) - || (c >= 0x0c92 && c <= 0x0ca8) - || (c >= 0x0caa && c <= 0x0cb3) - || (c >= 0x0cb5 && c <= 0x0cb9) - || (c >= 0x0ce0 && c <= 0x0ce1)) - return 1; - - /* Malayalam */ - if ((c >= 0x0d05 && c <= 0x0d0c) - || (c >= 0x0d0e && c <= 0x0d10) - || (c >= 0x0d12 && c <= 0x0d28) - || (c >= 0x0d2a && c <= 0x0d39) - || (c >= 0x0d60 && c <= 0x0d61)) - return 1; - - /* Thai */ - if ((c >= 0x0e01 && c <= 0x0e30) - || (c >= 0x0e32 && c <= 0x0e33) - || (c >= 0x0e40 && c <= 0x0e46) - || (c >= 0x0e4f && c <= 0x0e5b)) - return 1; - - /* Lao */ - if ((c >= 0x0e81 && c <= 0x0e82) - || (c == 0x0e84) - || (c == 0x0e87) - || (c == 0x0e88) - || (c == 0x0e8a) - || (c == 0x0e0d) - || (c >= 0x0e94 && c <= 0x0e97) - || (c >= 0x0e99 && c <= 0x0e9f) - || (c >= 0x0ea1 && c <= 0x0ea3) - || (c == 0x0ea5) - || (c == 0x0ea7) - || (c == 0x0eaa) - || (c == 0x0eab) - || (c >= 0x0ead && c <= 0x0eb0) - || (c == 0x0eb2) - || (c == 0x0eb3) - || (c == 0x0ebd) - || (c >= 0x0ec0 && c <= 0x0ec4) - || (c == 0x0ec6)) - return 1; - - /* Georgian */ - if ((c >= 0x10a0 && c <= 0x10c5) - || (c >= 0x10d0 && c <= 0x10f6)) - return 1; - - /* Hiragana */ - if ((c >= 0x3041 && c <= 0x3094) - || (c >= 0x309b && c <= 0x309e)) - return 1; - - /* Katakana */ - if ((c >= 0x30a1 && c <= 0x30fe)) - return 1; - - /* Bopmofo */ - if ((c >= 0x3105 && c <= 0x312c)) - return 1; - - /* Hangul */ - if ((c >= 0x1100 && c <= 0x1159) - || (c >= 0x1161 && c <= 0x11a2) - || (c >= 0x11a8 && c <= 0x11f9)) - return 1; - - /* CJK Unified Ideographs */ - if ((c >= 0xf900 && c <= 0xfa2d) - || (c >= 0xfb1f && c <= 0xfb36) - || (c >= 0xfb38 && c <= 0xfb3c) - || (c == 0xfb3e) - || (c >= 0xfb40 && c <= 0xfb41) - || (c >= 0xfb42 && c <= 0xfb44) - || (c >= 0xfb46 && c <= 0xfbb1) - || (c >= 0xfbd3 && c <= 0xfd3f) - || (c >= 0xfd50 && c <= 0xfd8f) - || (c >= 0xfd92 && c <= 0xfdc7) - || (c >= 0xfdf0 && c <= 0xfdfb) - || (c >= 0xfe70 && c <= 0xfe72) - || (c == 0xfe74) - || (c >= 0xfe76 && c <= 0xfefc) - || (c >= 0xff21 && c <= 0xff3a) - || (c >= 0xff41 && c <= 0xff5a) - || (c >= 0xff66 && c <= 0xffbe) - || (c >= 0xffc2 && c <= 0xffc7) - || (c >= 0xffca && c <= 0xffcf) - || (c >= 0xffd2 && c <= 0xffd7) - || (c >= 0xffda && c <= 0xffdc) - || (c >= 0x4e00 && c <= 0x9fa5)) - return 1; - - error ("universal-character-name '\\u%04x' not valid in identifier", c); - return 1; -#endif -} - -/* Add the UTF-8 representation of C to the token_buffer. */ - -static void -utf8_extend_token (c) - int c; -{ - int shift, mask; - - if (c <= 0x0000007f) - { - extend_token (c); - return; - } - else if (c <= 0x000007ff) - shift = 6, mask = 0xc0; - else if (c <= 0x0000ffff) - shift = 12, mask = 0xe0; - else if (c <= 0x001fffff) - shift = 18, mask = 0xf0; - else if (c <= 0x03ffffff) - shift = 24, mask = 0xf8; - else - shift = 30, mask = 0xfc; - - extend_token (mask | (c >> shift)); - do - { - shift -= 6; - extend_token ((unsigned char) (0x80 | (c >> shift))); - } - while (shift); -} -#endif -int -c_lex (value) - tree *value; +/* Read a token and return its type. Fill *VALUE with its value, if + applicable. Fill *CPP_FLAGS with the token's flags, if it is + non-NULL. */ + +enum cpp_ttype +c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags) { + static bool no_more_pch; const cpp_token *tok; + enum cpp_ttype type; + unsigned char add_flags = 0; - retry: timevar_push (TV_CPP); - do - tok = cpp_get_token (parse_in); - while (tok->type == CPP_PADDING); - timevar_pop (TV_CPP); - - /* The C++ front end does horrible things with the current line - number. To ensure an accurate line number, we must reset it - every time we return a token. */ - lineno = src_lineno; + retry: + tok = cpp_get_token (parse_in); + type = tok->type; - *value = NULL_TREE; - switch (tok->type) + retry_after_at: +#ifdef USE_MAPPED_LOCATION + *loc = tok->src_loc; +#else + *loc = input_location; +#endif + switch (type) { - /* Issue this error here, where we can get at tok->val.c. */ - case CPP_OTHER: - if (ISGRAPH (tok->val.c)) - error ("stray '%c' in program", tok->val.c); - else - error ("stray '\\%o' in program", tok->val.c); + case CPP_PADDING: goto retry; - + case CPP_NAME: *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node)); break; @@ -712,9 +363,15 @@ c_lex (value) { case CPP_N_INVALID: /* cpplib has issued an error. */ + *value = error_mark_node; + errorcount++; break; case CPP_N_INTEGER: + /* C++ uses '0' to mark virtual functions as pure. + Set PURE_ZERO to pass this information to the C++ parser. */ + if (tok->val.str.len == 1 && *tok->val.str.text == '0') + add_flags = PURE_ZERO; *value = interpret_integer (tok, flags); break; @@ -723,11 +380,73 @@ c_lex (value) break; default: - abort (); + gcc_unreachable (); } } break; + case CPP_ATSIGN: + /* An @ may give the next token special significance in Objective-C. */ + if (c_dialect_objc ()) + { + location_t atloc = input_location; + + retry_at: + tok = cpp_get_token (parse_in); + type = tok->type; + switch (type) + { + case CPP_PADDING: + goto retry_at; + + case CPP_STRING: + case CPP_WSTRING: + type = lex_string (tok, value, true); + break; + + case CPP_NAME: + *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node)); + if (objc_is_reserved_word (*value)) + { + type = CPP_AT_NAME; + break; + } + /* FALLTHROUGH */ + + default: + /* ... or not. */ + error ("%Hstray %<@%> in program", &atloc); + goto retry_after_at; + } + break; + } + + /* FALLTHROUGH */ + case CPP_HASH: + case CPP_PASTE: + { + unsigned char name[4]; + + *cpp_spell_token (parse_in, tok, name, true) = 0; + + error ("stray %qs in program", name); + } + + goto retry; + + case CPP_OTHER: + { + cppchar_t c = tok->val.str.text[0]; + + if (c == '"' || c == '\'') + error ("missing terminating %c character", (int) c); + else if (ISGRAPH (c)) + error ("stray %qc in program", (int) c); + else + error ("stray %<\\%o%> in program", (int) c); + } + goto retry; + case CPP_CHAR: case CPP_WCHAR: *value = lex_charconst (tok); @@ -735,29 +454,51 @@ c_lex (value) case CPP_STRING: case CPP_WSTRING: - *value = lex_string (tok->val.str.text, tok->val.str.len, - tok->type == CPP_WSTRING); + if (!c_lex_return_raw_strings) + { + type = lex_string (tok, value, false); + break; + } + *value = build_string (tok->val.str.len, (char *) tok->val.str.text); + break; + + case CPP_PRAGMA: + *value = build_int_cst (NULL, tok->val.pragma); break; /* These tokens should not be visible outside cpplib. */ case CPP_HEADER_NAME: case CPP_COMMENT: case CPP_MACRO_ARG: - abort (); + gcc_unreachable (); + + default: + *value = NULL_TREE; + break; + } + + if (cpp_flags) + *cpp_flags = tok->flags | add_flags; - default: break; + if (!no_more_pch) + { + no_more_pch = true; + c_common_no_more_pch (); } - return tok->type; + timevar_pop (TV_CPP); + + return type; } /* Returns the narrowest C-visible unsigned type, starting with the - minimum specified by FLAGS, that can fit VALUE, or itk_none if + minimum specified by FLAGS, that can fit HIGH:LOW, or itk_none if there isn't one. */ + static enum integer_type_kind -narrowest_unsigned_type (value, flags) - tree value; - unsigned int flags; +narrowest_unsigned_type (unsigned HOST_WIDE_INT low, + unsigned HOST_WIDE_INT high, + unsigned int flags) { enum integer_type_kind itk; @@ -768,22 +509,23 @@ narrowest_unsigned_type (value, flags) else itk = itk_unsigned_long_long; - /* int_fits_type_p must think the type of its first argument is - wider than its second argument, or it won't do the proper check. */ - TREE_TYPE (value) = widest_unsigned_literal_type_node; - for (; itk < itk_none; itk += 2 /* skip unsigned types */) - if (int_fits_type_p (value, integer_types[itk])) - return itk; + { + tree upper = TYPE_MAX_VALUE (integer_types[itk]); + + if ((unsigned HOST_WIDE_INT) TREE_INT_CST_HIGH (upper) > high + || ((unsigned HOST_WIDE_INT) TREE_INT_CST_HIGH (upper) == high + && TREE_INT_CST_LOW (upper) >= low)) + return itk; + } return itk_none; } /* Ditto, but narrowest signed type. */ static enum integer_type_kind -narrowest_signed_type (value, flags) - tree value; - unsigned int flags; +narrowest_signed_type (unsigned HOST_WIDE_INT low, + unsigned HOST_WIDE_INT high, unsigned int flags) { enum integer_type_kind itk; @@ -794,22 +536,23 @@ narrowest_signed_type (value, flags) else itk = itk_long_long; - /* int_fits_type_p must think the type of its first argument is - wider than its second argument, or it won't do the proper check. */ - TREE_TYPE (value) = widest_unsigned_literal_type_node; for (; itk < itk_none; itk += 2 /* skip signed types */) - if (int_fits_type_p (value, integer_types[itk])) - return itk; + { + tree upper = TYPE_MAX_VALUE (integer_types[itk]); + + if ((unsigned HOST_WIDE_INT) TREE_INT_CST_HIGH (upper) > high + || ((unsigned HOST_WIDE_INT) TREE_INT_CST_HIGH (upper) == high + && TREE_INT_CST_LOW (upper) >= low)) + return itk; + } return itk_none; } /* Interpret TOKEN, an integer with FLAGS as classified by cpplib. */ static tree -interpret_integer (token, flags) - const cpp_token *token; - unsigned int flags; +interpret_integer (const cpp_token *token, unsigned int flags) { tree value, type; enum integer_type_kind itk; @@ -818,18 +561,19 @@ interpret_integer (token, flags) integer = cpp_interpret_integer (parse_in, token, flags); integer = cpp_num_sign_extend (integer, options->precision); - value = build_int_2_wide (integer.low, integer.high); /* The type of a constant with a U suffix is straightforward. */ if (flags & CPP_N_UNSIGNED) - itk = narrowest_unsigned_type (value, flags); + itk = narrowest_unsigned_type (integer.low, integer.high, flags); else { /* The type of a potentially-signed integer constant varies depending on the base it's in, the standard in use, and the length suffixes. */ - enum integer_type_kind itk_u = narrowest_unsigned_type (value, flags); - enum integer_type_kind itk_s = narrowest_signed_type (value, flags); + enum integer_type_kind itk_u + = narrowest_unsigned_type (integer.low, integer.high, flags); + enum integer_type_kind itk_s + = narrowest_signed_type (integer.low, integer.high, flags); /* In both C89 and C99, octal and hex constants may be signed or unsigned, whichever fits tighter. We do not warn about this @@ -841,7 +585,7 @@ interpret_integer (token, flags) { /* In C99, decimal constants are always signed. In C89, decimal constants that don't fit in long have - undefined behaviour; we try to make them unsigned long. + undefined behavior; we try to make them unsigned long. In GCC's extended C89, that last is true of decimal constants that don't fit in long long, too. */ @@ -853,10 +597,11 @@ interpret_integer (token, flags) if (itk_u < itk_unsigned_long) itk_u = itk_unsigned_long; itk = itk_u; - warning ("this decimal constant is unsigned only in ISO C89"); + warning (0, "this decimal constant is unsigned only in ISO C90"); } - else if (warn_traditional) - warning ("this decimal constant would be unsigned in ISO C89"); + else + warning (OPT_Wtraditional, + "this decimal constant would be unsigned in ISO C90"); } } } @@ -871,15 +616,15 @@ interpret_integer (token, flags) if (itk > itk_unsigned_long && (flags & CPP_N_WIDTH) != CPP_N_LARGE - && ! in_system_header && ! flag_isoc99) - pedwarn ("integer constant is too large for \"%s\" type", + && !in_system_header && !flag_isoc99) + pedwarn ("integer constant is too large for %qs type", (flags & CPP_N_UNSIGNED) ? "unsigned long" : "long"); - TREE_TYPE (value) = type; + value = build_int_cst_wide (type, integer.low, integer.high); /* Convert imaginary to a complex type. */ if (flags & CPP_N_IMAGINARY) - value = build_complex (NULL_TREE, convert (type, integer_zero_node), value); + value = build_complex (NULL_TREE, build_int_cst (type, 0), value); return value; } @@ -887,65 +632,60 @@ interpret_integer (token, flags) /* Interpret TOKEN, a floating point number with FLAGS as classified by cpplib. */ static tree -interpret_float (token, flags) - const cpp_token *token; - unsigned int flags; +interpret_float (const cpp_token *token, unsigned int flags) { tree type; tree value; REAL_VALUE_TYPE real; char *copy; size_t copylen; - const char *typename; - /* FIXME: make %T work in error/warning, then we don't need typename. */ - if ((flags & CPP_N_WIDTH) == CPP_N_LARGE) - { + /* Decode type based on width and properties. */ + if (flags & CPP_N_DFLOAT) + if ((flags & CPP_N_WIDTH) == CPP_N_LARGE) + type = dfloat128_type_node; + else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL) + type = dfloat32_type_node; + else + type = dfloat64_type_node; + else + if ((flags & CPP_N_WIDTH) == CPP_N_LARGE) type = long_double_type_node; - typename = "long double"; - } - else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL - || flag_single_precision_constant) - { + else if ((flags & CPP_N_WIDTH) == CPP_N_SMALL + || flag_single_precision_constant) type = float_type_node; - typename = "float"; - } - else - { + else type = double_type_node; - typename = "double"; - } /* Copy the constant to a nul-terminated buffer. If the constant has any suffixes, cut them off; REAL_VALUE_ATOF/ REAL_VALUE_HTOF can't handle them. */ copylen = token->val.str.len; - if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM) - /* Must be an F or L suffix. */ - copylen--; - if (flags & CPP_N_IMAGINARY) - /* I or J suffix. */ - copylen--; + if (flags & CPP_N_DFLOAT) + copylen -= 2; + else + { + if ((flags & CPP_N_WIDTH) != CPP_N_MEDIUM) + /* Must be an F or L suffix. */ + copylen--; + if (flags & CPP_N_IMAGINARY) + /* I or J suffix. */ + copylen--; + } - copy = alloca (copylen + 1); + copy = (char *) alloca (copylen + 1); memcpy (copy, token->val.str.text, copylen); copy[copylen] = '\0'; - /* The second argument, machine_mode, of REAL_VALUE_ATOF tells the - desired precision of the binary result of decimal-to-binary - conversion. */ - if (flags & CPP_N_HEX) - real = REAL_VALUE_HTOF (copy, TYPE_MODE (type)); - else - real = REAL_VALUE_ATOF (copy, TYPE_MODE (type)); + real_from_string3 (&real, copy, TYPE_MODE (type)); - /* A diagnostic is required for "soft" overflow by some ISO C - testsuites. This is not pedwarn, because some people don't want - an error for this. - ??? That's a dubious reason... is this a mandatory diagnostic or - isn't it? -- zw, 2001-08-21. */ + /* Both C and C++ require a diagnostic for a floating constant + outside the range of representable values of its type. Since we + have __builtin_inf* to produce an infinity, it might now be + appropriate for this to be a mandatory pedwarn rather than + conditioned on -pedantic. */ if (REAL_VALUE_ISINF (real) && pedantic) - warning ("floating constant exceeds range of \"%s\"", typename); + pedwarn ("floating constant exceeds range of %qT", type); /* Create a node with determined type and value. */ value = build_real (type, real); @@ -955,112 +695,136 @@ interpret_float (token, flags) return value; } -static tree -lex_string (str, len, wide) - const unsigned char *str; - unsigned int len; - int wide; +/* Convert a series of STRING and/or WSTRING tokens into a tree, + performing string constant concatenation. TOK is the first of + these. VALP is the location to write the string into. OBJC_STRING + indicates whether an '@' token preceded the incoming token. + Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING, + or CPP_OBJC_STRING). + + This is unfortunately more work than it should be. If any of the + strings in the series has an L prefix, the result is a wide string + (6.4.5p4). Whether or not the result is a wide string affects the + meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape + sequences do not continue across the boundary between two strings in + a series (6.4.5p7), so we must not lose the boundaries. Therefore + cpp_interpret_string takes a vector of cpp_string structures, which + we must arrange to provide. */ + +static enum cpp_ttype +lex_string (const cpp_token *tok, tree *valp, bool objc_string) { tree value; - char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1)); - char *q = buf; - const unsigned char *p = str, *limit = str + len; - cppchar_t c; - -#ifdef MULTIBYTE_CHARS - /* Reset multibyte conversion state. */ - (void) local_mbtowc (NULL, NULL, 0); -#endif + bool wide = false; + size_t concats = 0; + struct obstack str_ob; + cpp_string istr; - while (p < limit) - { -#ifdef MULTIBYTE_CHARS - wchar_t wc; - int char_len; + /* Try to avoid the overhead of creating and destroying an obstack + for the common case of just one string. */ + cpp_string str = tok->val.str; + cpp_string *strs = &str; - char_len = local_mbtowc (&wc, (const char *) p, limit - p); - if (char_len == -1) - { - warning ("ignoring invalid multibyte character"); - char_len = 1; - c = *p++; - } - else + if (tok->type == CPP_WSTRING) + wide = true; + + retry: + tok = cpp_get_token (parse_in); + switch (tok->type) + { + case CPP_PADDING: + goto retry; + case CPP_ATSIGN: + if (c_dialect_objc ()) { - p += char_len; - c = wc; + objc_string = true; + goto retry; } -#else - c = *p++; -#endif + /* FALLTHROUGH */ - if (c == '\\' && !ignore_escape_flag) - c = cpp_parse_escape (parse_in, &p, limit, wide); - - /* Add this single character into the buffer either as a wchar_t, - a multibyte sequence, or as a single byte. */ - if (wide) - { - unsigned charwidth = TYPE_PRECISION (char_type_node); - unsigned bytemask = (1 << charwidth) - 1; - int byte; + default: + break; - for (byte = 0; byte < WCHAR_BYTES; ++byte) - { - int n; - if (byte >= (int) sizeof (c)) - n = 0; - else - n = (c >> (byte * charwidth)) & bytemask; - if (BYTES_BIG_ENDIAN) - q[WCHAR_BYTES - byte - 1] = n; - else - q[byte] = n; - } - q += WCHAR_BYTES; - } -#ifdef MULTIBYTE_CHARS - else if (char_len > 1) - { - /* We're dealing with a multibyte character. */ - for ( ; char_len >0; --char_len) - { - *q++ = *(p - char_len); - } - } -#endif - else + case CPP_WSTRING: + wide = true; + /* FALLTHROUGH */ + + case CPP_STRING: + if (!concats) { - *q++ = c; + gcc_obstack_init (&str_ob); + obstack_grow (&str_ob, &str, sizeof (cpp_string)); } + + concats++; + obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string)); + goto retry; } - /* Terminate the string value, either with a single byte zero - or with a wide zero. */ + /* We have read one more token than we want. */ + _cpp_backup_tokens (parse_in, 1); + if (concats) + strs = XOBFINISH (&str_ob, cpp_string *); + + if (concats && !objc_string && !in_system_header) + warning (OPT_Wtraditional, + "traditional C rejects string constant concatenation"); - if (wide) + if ((c_lex_string_translate + ? cpp_interpret_string : cpp_interpret_string_notranslate) + (parse_in, strs, concats + 1, &istr, wide)) { - memset (q, 0, WCHAR_BYTES); - q += WCHAR_BYTES; + value = build_string (istr.len, (char *) istr.text); + free ((void *) istr.text); + + if (c_lex_string_translate == -1) + { + int xlated = cpp_interpret_string_notranslate (parse_in, strs, + concats + 1, + &istr, wide); + /* Assume that, if we managed to translate the string above, + then the untranslated parsing will always succeed. */ + gcc_assert (xlated); + + if (TREE_STRING_LENGTH (value) != (int) istr.len + || 0 != strncmp (TREE_STRING_POINTER (value), (char *) istr.text, + istr.len)) + { + /* Arrange for us to return the untranslated string in + *valp, but to set up the C type of the translated + one. */ + *valp = build_string (istr.len, (char *) istr.text); + valp = &TREE_CHAIN (*valp); + } + free ((void *) istr.text); + } } else { - *q++ = '\0'; + /* Callers cannot generally handle error_mark_node in this context, + so return the empty string instead. cpp_interpret_string has + issued an error. */ + if (wide) + value = build_string (TYPE_PRECISION (wchar_type_node) + / TYPE_PRECISION (char_type_node), + "\0\0\0"); /* widest supported wchar_t + is 32 bits */ + else + value = build_string (1, ""); } - value = build_string (q - buf, buf); + TREE_TYPE (value) = wide ? wchar_array_type_node : char_array_type_node; + *valp = fix_string_type (value); - if (wide) - TREE_TYPE (value) = wchar_array_type_node; - else - TREE_TYPE (value) = char_array_type_node; - return value; + if (concats) + obstack_free (&str_ob, 0); + + return objc_string ? CPP_OBJC_STRING : wide ? CPP_WSTRING : CPP_STRING; } /* Converts a (possibly wide) character constant token into a tree. */ static tree -lex_charconst (token) - const cpp_token *token; +lex_charconst (const cpp_token *token) { cppchar_t result; tree type, value; @@ -1068,25 +832,23 @@ lex_charconst (token) int unsignedp; result = cpp_interpret_charconst (parse_in, token, - &chars_seen, &unsignedp); - - /* Cast to cppchar_signed_t to get correct sign-extension of RESULT - before possibly widening to HOST_WIDE_INT for build_int_2. */ - if (unsignedp || (cppchar_signed_t) result >= 0) - value = build_int_2 (result, 0); - else - value = build_int_2 ((cppchar_signed_t) result, -1); + &chars_seen, &unsignedp); if (token->type == CPP_WCHAR) type = wchar_type_node; /* In C, a character constant has type 'int'. In C++ 'char', but multi-char charconsts have type 'int'. */ - else if ((c_language == clk_c || c_language == clk_objective_c) - || chars_seen > 1) + else if (!c_dialect_cxx () || chars_seen > 1) type = integer_type_node; else type = char_type_node; - TREE_TYPE (value) = type; + /* Cast to cppchar_signed_t to get correct sign-extension of RESULT + before possibly widening to HOST_WIDE_INT for build_int_cst. */ + if (unsignedp || (cppchar_signed_t) result >= 0) + value = build_int_cst_wide (type, result, 0); + else + value = build_int_cst_wide (type, (cppchar_signed_t) result, -1); + return value; }