X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fc-lex.c;h=292c267b312f27e31cacca2a431122f5addd6731;hb=11aafc65723c2b05ae51ecb120ef1d2a47c763c5;hp=63ae9ba6676643429f757958552ee91de661e753;hpb=1fcd08b185406bf47bb33b56a0ed3d343144f5d9;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/c-lex.c b/gcc/c-lex.c index 63ae9ba6676..292c267b312 100644 --- a/gcc/c-lex.c +++ b/gcc/c-lex.c @@ -2,48 +2,42 @@ Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997 1998, 1999, 2000 Free Software Foundation, Inc. -This file is part of GNU CC. +This file is part of GCC. -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ #include "config.h" #include "system.h" #include "rtl.h" #include "tree.h" +#include "expr.h" #include "input.h" #include "output.h" -#include "c-lex.h" #include "c-tree.h" +#include "c-common.h" #include "flags.h" #include "timevar.h" #include "cpplib.h" #include "c-pragma.h" #include "toplev.h" #include "intl.h" -#include "ggc.h" #include "tm_p.h" #include "splay-tree.h" - -/* MULTIBYTE_CHARS support only works for native compilers. - ??? Ideally what we want is to model widechar support after - the current floating point support. */ -#ifdef CROSS_COMPILE -#undef MULTIBYTE_CHARS -#endif +#include "debug.h" #ifdef MULTIBYTE_CHARS #include "mbchar.h" @@ -53,69 +47,16 @@ Boston, MA 02111-1307, USA. */ #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME)) #endif -#if USE_CPPLIB -extern cpp_reader parse_in; -#else -/* Stream for reading from the input file. */ -FILE *finput; -#endif +/* The current line map. */ +static const struct line_map *map; -/* Private idea of the line number. See discussion in c_lex(). */ -static int lex_lineno; +/* The line used to refresh the lineno global variable after each token. */ +static unsigned int src_lineno; /* We may keep statistics about how long which files took to compile. */ static int header_time, body_time; static splay_tree file_info_tree; -/* Cause the `yydebug' variable to be defined. */ -#define YYDEBUG 1 - -#if !USE_CPPLIB - -struct putback_buffer -{ - unsigned char *buffer; - int buffer_size; - int index; -}; - -static struct putback_buffer putback = {NULL, 0, -1}; - -static inline int getch PARAMS ((void)); - -static inline int -getch () -{ - if (putback.index != -1) - { - int ch = putback.buffer[putback.index]; - --putback.index; - return ch; - } - return getc (finput); -} - -static inline void put_back PARAMS ((int)); - -static inline void -put_back (ch) - int ch; -{ - if (ch != EOF) - { - if (putback.index == putback.buffer_size - 1) - { - putback.buffer_size += 16; - putback.buffer = xrealloc (putback.buffer, putback.buffer_size); - } - putback.buffer[++putback.index] = ch; - } -} - -int linemode; - -#endif - /* File used for outputting assembler code. */ extern FILE *asm_out_file; @@ -125,53 +66,36 @@ extern FILE *asm_out_file; /* Number of bytes in a wide character. */ #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT) -#if !USE_CPPLIB -static int maxtoken; /* Current nominal length of token buffer. */ -static char *token_buffer; /* Pointer to token buffer. - Actual allocated length is maxtoken + 2. */ -#endif - -int indent_level; /* Number of { minus number of }. */ int pending_lang_change; /* If we need to switch languages - C++ only */ int c_header_level; /* depth in C headers - C++ only */ /* Nonzero tells yylex to ignore \ in string constants. */ static int ignore_escape_flag; -static const char *readescape PARAMS ((const char *, const char *, - unsigned int *)); -static const char *read_ucs PARAMS ((const char *, const char *, - unsigned int *, int)); -static void parse_float PARAMS ((PTR)); static tree lex_number PARAMS ((const char *, unsigned int)); -static tree lex_string PARAMS ((const char *, unsigned int, int)); -static tree lex_charconst PARAMS ((const char *, unsigned int, int)); +static tree lex_string PARAMS ((const unsigned char *, unsigned int, + int)); +static tree lex_charconst PARAMS ((const cpp_token *)); static void update_header_times PARAMS ((const char *)); static int dump_one_header PARAMS ((splay_tree_node, void *)); - -#if !USE_CPPLIB -static int skip_white_space PARAMS ((int)); -static char *extend_token_buffer PARAMS ((const char *)); -static void extend_token_buffer_to PARAMS ((int)); -static int read_line_number PARAMS ((int *)); -static void process_directive PARAMS ((void)); -#else -static void cb_ident PARAMS ((cpp_reader *, const unsigned char *, - unsigned int)); -static void cb_enter_file PARAMS ((cpp_reader *)); -static void cb_leave_file PARAMS ((cpp_reader *)); -static void cb_rename_file PARAMS ((cpp_reader *)); -static void cb_def_pragma PARAMS ((cpp_reader *)); -#endif - +static void cb_line_change PARAMS ((cpp_reader *, const cpp_token *, int)); +static void cb_ident PARAMS ((cpp_reader *, unsigned int, + const cpp_string *)); +static void cb_file_change PARAMS ((cpp_reader *, const struct line_map *)); +static void cb_def_pragma PARAMS ((cpp_reader *, unsigned int)); +static void cb_define PARAMS ((cpp_reader *, unsigned int, + cpp_hashnode *)); +static void cb_undef PARAMS ((cpp_reader *, unsigned int, + cpp_hashnode *)); const char * init_c_lex (filename) const char *filename; { + struct cpp_callbacks *cb; struct c_fileinfo *toplevel; - /* Set up filename timing. Must happen before cpp_start_read. */ + /* Set up filename timing. Must happen before cpp_read_main_file. */ file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp, 0, (splay_tree_delete_value_fn)free); @@ -189,48 +113,51 @@ init_c_lex (filename) GET_ENVIRONMENT (literal_codeset, "LANG"); #endif -#if !USE_CPPLIB - /* Open input file. */ - if (filename == 0 || !strcmp (filename, "-")) + cb = cpp_get_callbacks (parse_in); + + cb->line_change = cb_line_change; + cb->ident = cb_ident; + cb->file_change = cb_file_change; + cb->def_pragma = cb_def_pragma; + + /* Set the debug callbacks if we can use them. */ + if (debug_info_level == DINFO_LEVEL_VERBOSE + && (write_symbols == DWARF_DEBUG || write_symbols == DWARF2_DEBUG + || write_symbols == VMS_AND_DWARF2_DEBUG)) { - finput = stdin; - filename = "stdin"; + cb->define = cb_define; + cb->undef = cb_undef; } - else - finput = fopen (filename, "r"); - if (finput == 0) - pfatal_with_name (filename); -#ifdef IO_BUFFER_SIZE - setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE); -#endif -#else /* !USE_CPPLIB */ + /* Start it at 0. */ + lineno = 0; - parse_in.cb.ident = cb_ident; - parse_in.cb.enter_file = cb_enter_file; - parse_in.cb.leave_file = cb_leave_file; - parse_in.cb.rename_file = cb_rename_file; - parse_in.cb.def_pragma = cb_def_pragma; + if (filename == NULL || !strcmp (filename, "-")) + filename = ""; - /* Make sure parse_in.digraphs matches flag_digraphs. */ - CPP_OPTION (&parse_in, digraphs) = flag_digraphs; + return cpp_read_main_file (parse_in, filename, ident_hash); +} - if (! cpp_start_read (&parse_in, 0 /* no printer */, filename)) - abort (); +/* A thin wrapper around the real parser that initializes the + integrated preprocessor after debug output has been initialized. + Also, make sure the start_source_file debug hook gets called for + the primary source file. */ - if (filename == 0 || !strcmp (filename, "-")) - filename = "stdin"; +void +c_common_parse_file (set_yydebug) + int set_yydebug ATTRIBUTE_UNUSED; +{ +#if YYDEBUG != 0 + yydebug = set_yydebug; +#else + warning ("YYDEBUG not defined"); #endif -#if !USE_CPPLIB - maxtoken = 40; - token_buffer = (char *) xmalloc (maxtoken + 2); -#endif - /* Start it at 0, because check_newline is called at the very beginning - and will increment it to 1. */ - lineno = lex_lineno = 0; + (*debug_hooks->start_source_file) (lineno, input_filename); + cpp_finish_options (parse_in); - return filename; + yyparse (); + free_parser_stacks (); } struct c_fileinfo * @@ -296,733 +223,136 @@ dump_time_statistics () splay_tree_foreach (file_info_tree, dump_one_header, 0); } -#if !USE_CPPLIB - -/* If C is not whitespace, return C. - Otherwise skip whitespace and return first nonwhite char read. */ - -static int -skip_white_space (c) - register int c; -{ - for (;;) - { - switch (c) - { - /* There is no need to process comments, backslash-newline, - or \r here. None can occur in the output of cpp. */ - - case '\n': - if (linemode) - { - put_back (c); - return EOF; - } - c = check_newline (); - break; - - /* Per C99, horizontal whitespace is just these four characters. */ - case ' ': - case '\t': - case '\f': - case '\v': - c = getch (); - break; - - case '\\': - error ("stray '\\' in program"); - c = getch (); - break; - - default: - return (c); - } - } -} - -/* Skips all of the white space at the current location in the input file. */ - -void -position_after_white_space () -{ - register int c; - - c = getch (); - - put_back (skip_white_space (c)); -} - -/* Make the token buffer longer, preserving the data in it. - P should point to just beyond the last valid character in the old buffer. - The value we return is a pointer to the new buffer - at a place corresponding to P. */ - static void -extend_token_buffer_to (size) - int size; -{ - do - maxtoken = maxtoken * 2 + 10; - while (maxtoken < size); - token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2); -} - -static char * -extend_token_buffer (p) - const char *p; -{ - int offset = p - token_buffer; - extend_token_buffer_to (offset); - return token_buffer + offset; -} - - -static int -read_line_number (num) - int *num; +cb_ident (pfile, line, str) + cpp_reader *pfile ATTRIBUTE_UNUSED; + unsigned int line ATTRIBUTE_UNUSED; + const cpp_string *str ATTRIBUTE_UNUSED; { - tree value; - enum cpp_ttype token = c_lex (&value); - - if (token == CPP_NUMBER && TREE_CODE (value) == INTEGER_CST) - { - *num = TREE_INT_CST_LOW (value); - return 1; - } - else +#ifdef ASM_OUTPUT_IDENT + if (! flag_no_ident) { - if (token != CPP_EOF) - error ("invalid #-line"); - return 0; + /* Convert escapes in the string. */ + tree value = lex_string (str->text, str->len, 0); + ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value)); } +#endif } -/* At the beginning of a line, increment the line number - and process any #-directive on this line. - If the line is a #-directive, read the entire line and return a newline. - Otherwise, return the line's first non-whitespace character. */ - -int -check_newline () +/* Called at the start of every non-empty line. TOKEN is the first + lexed token on the line. Used for diagnostic line numbers. */ +static void +cb_line_change (pfile, token, parsing_args) + cpp_reader *pfile ATTRIBUTE_UNUSED; + const cpp_token *token; + int parsing_args ATTRIBUTE_UNUSED; { - register int c; - - /* Loop till we get a nonblank, non-directive line. */ - for (;;) - { - /* Read first nonwhite char on the line. */ - do - c = getch (); - while (c == ' ' || c == '\t'); - - lex_lineno++; - if (c == '#') - { - process_directive (); - return '\n'; - } - - else if (c != '\n') - break; - } - return c; + src_lineno = SOURCE_LINE (map, token->line); } static void -process_directive () +cb_file_change (pfile, new_map) + cpp_reader *pfile ATTRIBUTE_UNUSED; + const struct line_map *new_map; { - enum cpp_ttype token; - tree value; - int saw_line; - enum { act_none, act_push, act_pop } action; - int action_number, l; - char *new_file; -#ifndef NO_IMPLICIT_EXTERN_C - int entering_c_header; -#endif - - /* Don't read beyond this line. */ - saw_line = 0; - linemode = 1; - - token = c_lex (&value); + unsigned int to_line = SOURCE_LINE (new_map, new_map->to_line); - if (token == CPP_NAME) + if (new_map->reason == LC_ENTER) { - /* If a letter follows, then if the word here is `line', skip - it and ignore it; otherwise, ignore the line, with an error - if the word isn't `pragma'. */ - - const char *name = IDENTIFIER_POINTER (value); - - if (!strcmp (name, "pragma")) - { - dispatch_pragma (); - goto skipline; - -#if 0 -#ifdef HANDLE_PRAGMA - /* We invoke HANDLE_PRAGMA before HANDLE_GENERIC_PRAGMAS - (if both are defined), in order to give the back - end a chance to override the interpretation of - SYSV style pragmas. */ - if (HANDLE_PRAGMA (getch, put_back, IDENTIFIER_POINTER (value))) - goto skipline; -#endif /* HANDLE_PRAGMA */ -#endif - } - else if (!strcmp (name, "define")) - { - debug_define (lex_lineno, GET_DIRECTIVE_LINE ()); - goto skipline; - } - else if (!strcmp (name, "undef")) - { - debug_undef (lex_lineno, GET_DIRECTIVE_LINE ()); - goto skipline; - } - else if (!strcmp (name, "line")) - { - saw_line = 1; - token = c_lex (&value); - goto linenum; - } - else if (!strcmp (name, "ident")) + /* Don't stack the main buffer on the input stack; + we already did in compile_file. */ + if (map == NULL) + main_input_filename = new_map->to_file; + else { - /* #ident. We expect a string constant here. - The pedantic warning and syntax error are now in cpp. */ + int included_at = SOURCE_LINE (new_map - 1, new_map->from_line - 1); - token = c_lex (&value); - if (token != CPP_STRING || TREE_CODE (value) != STRING_CST) - goto skipline; - -#ifdef ASM_OUTPUT_IDENT - if (! flag_no_ident) + lineno = included_at; + push_srcloc (new_map->to_file, 1); + (*debug_hooks->start_source_file) (included_at, new_map->to_file); +#ifndef NO_IMPLICIT_EXTERN_C + if (c_header_level) + ++c_header_level; + else if (new_map->sysp == 2) { - ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value)); + c_header_level = 1; + ++pending_lang_change; } #endif - - /* Skip the rest of this line. */ - goto skipline; } - - error ("undefined or invalid # directive `%s'", name); - goto skipline; } - - /* If the # is the only nonwhite char on the line, - just ignore it. Check the new newline. */ - if (token == CPP_EOF) - goto skipline; - -linenum: - /* Here we have either `#line' or `# '. - In either case, it should be a line number; a digit should follow. */ - - if (token != CPP_NUMBER || TREE_CODE (value) != INTEGER_CST) - { - error ("invalid #-line"); - goto skipline; - } - - /* subtract one, because it is the following line that - gets the specified number */ - - l = TREE_INT_CST_LOW (value) - 1; - - /* More follows: it must be a string constant (filename). - It would be neat to use cpplib to quickly process the string, but - (1) we don't have a handy tokenization of the string, and - (2) I don't know how well that would work in the presense - of filenames that contain wide characters. */ - - if (saw_line) - { - /* Don't treat \ as special if we are processing #line 1 "...". - If you want it to be treated specially, use # 1 "...". */ - ignore_escape_flag = 1; - } - - /* Read the string constant. */ - token = c_lex (&value); - - ignore_escape_flag = 0; - - if (token == CPP_EOF) - { - /* No more: store the line number and check following line. */ - lex_lineno = l; - goto skipline; - } - - if (token != CPP_STRING || TREE_CODE (value) != STRING_CST) - { - error ("invalid #line"); - goto skipline; - } - - new_file = TREE_STRING_POINTER (value); - - if (main_input_filename == 0) - main_input_filename = new_file; - - action = act_none; - action_number = 0; - - /* Each change of file name - reinitializes whether we are now in a system header. */ - in_system_header = 0; - - if (!read_line_number (&action_number)) - { - /* Update the name in the top element of input_file_stack. */ - if (input_file_stack) - input_file_stack->name = input_filename; - } - - /* `1' after file name means entering new file. - `2' after file name means just left a file. */ - - if (action_number == 1) - { - action = act_push; - read_line_number (&action_number); - } - else if (action_number == 2) + else if (new_map->reason == LC_LEAVE) { - action = act_pop; - read_line_number (&action_number); - } - if (action_number == 3) - { - /* `3' after file name means this is a system header file. */ - in_system_header = 1; - read_line_number (&action_number); - } #ifndef NO_IMPLICIT_EXTERN_C - if (action_number == 4) - { - /* `4' after file name means this is a C header file. */ - entering_c_header = 1; - read_line_number (&action_number); - } -#endif - - /* Do the actions implied by the preceding numbers. */ - if (action == act_push) - { - lineno = lex_lineno; - push_srcloc (input_filename, 1); - input_file_stack->indent_level = indent_level; - debug_start_source_file (input_filename); -#ifndef NO_IMPLICIT_EXTERN_C - if (c_header_level) - ++c_header_level; - else if (entering_c_header) + if (c_header_level && --c_header_level == 0) { - c_header_level = 1; - ++pending_lang_change; - } -#endif - } - else if (action == act_pop) - { - /* Popping out of a file. */ - if (input_file_stack->next) - { -#ifndef NO_IMPLICIT_EXTERN_C - if (c_header_level && --c_header_level == 0) - { - if (entering_c_header) - warning ("badly nested C headers from preprocessor"); - --pending_lang_change; - } -#endif -#if 0 - if (indent_level != input_file_stack->indent_level) - { - warning_with_file_and_line - (input_filename, lex_lineno, - "This file contains more '%c's than '%c's.", - indent_level > input_file_stack->indent_level ? '{' : '}', - indent_level > input_file_stack->indent_level ? '}' : '{'); - } -#endif - pop_srcloc (); - debug_end_source_file (input_file_stack->line); + if (new_map->sysp == 2) + warning ("badly nested C headers from preprocessor"); + --pending_lang_change; } - else - error ("#-lines for entering and leaving files don't match"); - } - - update_header_times (new_file); - - input_filename = new_file; - lex_lineno = l; - - /* Hook for C++. */ - extract_interface_info (); - - /* skip the rest of this line. */ - skipline: - linemode = 0; - - while (getch () != '\n'); -} -#else /* USE_CPPLIB */ - -/* Not yet handled: #pragma, #define, #undef. - No need to deal with linemarkers under normal conditions. */ - -static void -cb_ident (pfile, str, len) - cpp_reader *pfile ATTRIBUTE_UNUSED; - const unsigned char *str; - unsigned int len; -{ -#ifdef ASM_OUTPUT_IDENT - if (! flag_no_ident) - { - /* Convert escapes in the string. */ - tree value = lex_string ((const char *)str, len, 0); - ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value)); - } -#endif -} - -static void -cb_enter_file (pfile) - cpp_reader *pfile; -{ - cpp_buffer *ip = CPP_BUFFER (pfile); - /* Bleah, need a better interface to this. */ - const char *flags = cpp_syshdr_flags (pfile, ip); - - /* Mustn't stack the main buffer on the input stack. (Ick.) */ - if (ip->prev) - { - lex_lineno = lineno = ip->prev->lineno - 1; - push_srcloc (ggc_alloc_string (ip->nominal_fname, -1), 1); - input_file_stack->indent_level = indent_level; - debug_start_source_file (ip->nominal_fname); - } - else - lex_lineno = 1; - - update_header_times (ip->nominal_fname); - - /* Hook for C++. */ - extract_interface_info (); - - in_system_header = (flags[0] != 0); -#ifndef NO_IMPLICIT_EXTERN_C - if (c_header_level) - ++c_header_level; - else if (flags[2] != 0) - { - c_header_level = 1; - ++pending_lang_change; - } #endif -} - -static void -cb_leave_file (pfile) - cpp_reader *pfile; -{ - /* Bleah, need a better interface to this. */ - const char *flags = cpp_syshdr_flags (pfile, CPP_BUFFER (pfile)); -#if 0 - if (indent_level != input_file_stack->indent_level) - { - warning_with_file_and_line - (input_filename, lex_lineno, - "This file contains more '%c's than '%c's.", - indent_level > input_file_stack->indent_level ? '{' : '}', - indent_level > input_file_stack->indent_level ? '}' : '{'); - } -#endif - /* We get called for the main buffer, but we mustn't pop it. */ - if (input_file_stack->next) - pop_srcloc (); - in_system_header = (flags[0] != 0); -#ifndef NO_IMPLICIT_EXTERN_C - if (c_header_level && --c_header_level == 0) - { - if (flags[2] != 0) - warning ("badly nested C headers from preprocessor"); - --pending_lang_change; + pop_srcloc (); + + (*debug_hooks->end_source_file) (to_line); } -#endif - lex_lineno = CPP_BUFFER (pfile)->lineno; - debug_end_source_file (input_file_stack->line); - update_header_times (input_file_stack->name); - /* Hook for C++. */ - extract_interface_info (); -} + update_header_times (new_map->to_file); + in_system_header = new_map->sysp != 0; + input_filename = new_map->to_file; + lineno = to_line; + map = new_map; -static void -cb_rename_file (pfile) - cpp_reader *pfile; -{ - cpp_buffer *ip = CPP_BUFFER (pfile); - /* Bleah, need a better interface to this. */ - const char *flags = cpp_syshdr_flags (pfile, ip); - input_filename = ggc_alloc_string (ip->nominal_fname, -1); - lex_lineno = ip->lineno; - in_system_header = (flags[0] != 0); - - update_header_times (ip->nominal_fname); /* Hook for C++. */ extract_interface_info (); } static void -cb_def_pragma (pfile) +cb_def_pragma (pfile, line) cpp_reader *pfile; + unsigned int line; { /* Issue a warning message if we have been asked to do so. Ignore unknown pragmas in system headers unless an explicit - -Wunknown-pragmas has been given. */ + -Wunknown-pragmas has been given. */ if (warn_unknown_pragmas > in_system_header) { - const unsigned char *space, *name; - const cpp_token *t = pfile->first_directive_token + 2; + const unsigned char *space, *name = 0; + const cpp_token *s; + + s = cpp_get_token (pfile); + space = cpp_token_as_text (pfile, s); + s = cpp_get_token (pfile); + if (s->type == CPP_NAME) + name = cpp_token_as_text (pfile, s); - space = t[0].val.node->name; - name = t[1].type == CPP_NAME ? t[1].val.node->name : 0; + lineno = SOURCE_LINE (map, line); if (name) warning ("ignoring #pragma %s %s", space, name); else warning ("ignoring #pragma %s", space); } } -#endif /* USE_CPPLIB */ - -/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. - - [lex.charset]: The character designated by the universal-character-name - \UNNNNNNNN is that character whose character short name in ISO/IEC 10646 - is NNNNNNNN; the character designated by the universal-character-name - \uNNNN is that character whose character short name in ISO/IEC 10646 is - 0000NNNN. If the hexadecimal value for a universal character name is - less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the - universal character name designates a character in the basic source - character set, then the program is ill-formed. - - We assume that wchar_t is Unicode, so we don't need to do any - mapping. Is this ever wrong? */ - -static const char * -read_ucs (p, limit, cptr, length) - const char *p; - const char *limit; - unsigned int *cptr; - int length; -{ - unsigned int code = 0; - int c; - - for (; length; --length) - { - if (p >= limit) - { - error ("incomplete universal-character-name"); - break; - } - - c = *p++; - if (! ISXDIGIT (c)) - { - error ("non hex digit '%c' in universal-character-name", c); - p--; - break; - } - - code <<= 4; - if (c >= 'a' && c <= 'f') - code += c - 'a' + 10; - if (c >= 'A' && c <= 'F') - code += c - 'A' + 10; - if (c >= '0' && c <= '9') - code += c - '0'; - } - -#ifdef TARGET_EBCDIC - sorry ("universal-character-name on EBCDIC target"); - *cptr = 0x3f; /* EBCDIC invalid character */ - return p; -#endif - if (code > 0x9f && !(code & 0x80000000)) - /* True extended character, OK. */; - else if (code >= 0x20 && code < 0x7f) - { - /* ASCII printable character. The C character set consists of all of - these except $, @ and `. We use hex escapes so that this also - works with EBCDIC hosts. */ - if (code != 0x24 && code != 0x40 && code != 0x60) - error ("universal-character-name used for '%c'", code); - } - else - error ("invalid universal-character-name"); - - *cptr = code; - return p; +/* #define callback for DWARF and DWARF2 debug info. */ +static void +cb_define (pfile, line, node) + cpp_reader *pfile; + unsigned int line; + cpp_hashnode *node; +{ + (*debug_hooks->define) (SOURCE_LINE (map, line), + (const char *) cpp_macro_definition (pfile, node)); } -/* Read an escape sequence and write its character equivalent into *CPTR. - P is the input pointer, which is just after the backslash. LIMIT - is how much text we have. - Returns the updated input pointer. */ - -static const char * -readescape (p, limit, cptr) - const char *p; - const char *limit; - unsigned int *cptr; +/* #undef callback for DWARF and DWARF2 debug info. */ +static void +cb_undef (pfile, line, node) + cpp_reader *pfile ATTRIBUTE_UNUSED; + unsigned int line; + cpp_hashnode *node; { - unsigned int c, code, count; - unsigned firstdig = 0; - int nonnull; - - if (p == limit) - { - /* cpp has already issued an error for this. */ - *cptr = 0; - return p; - } - - c = *p++; - - switch (c) - { - case 'x': - if (warn_traditional && !in_system_header) - warning ("the meaning of `\\x' varies with -traditional"); - - if (flag_traditional) - { - *cptr = 'x'; - return p; - } - - code = 0; - count = 0; - nonnull = 0; - while (p < limit) - { - c = *p++; - if (! ISXDIGIT (c)) - { - p--; - break; - } - code *= 16; - if (c >= 'a' && c <= 'f') - code += c - 'a' + 10; - if (c >= 'A' && c <= 'F') - code += c - 'A' + 10; - if (c >= '0' && c <= '9') - code += c - '0'; - if (code != 0 || count != 0) - { - if (count == 0) - firstdig = code; - count++; - } - nonnull = 1; - } - if (! nonnull) - { - warning ("\\x used with no following hex digits"); - *cptr = 'x'; - return p; - } - else if (count == 0) - /* Digits are all 0's. Ok. */ - ; - else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node) - || (count > 1 - && (((unsigned)1 - << (TYPE_PRECISION (integer_type_node) - - (count - 1) * 4)) - <= firstdig))) - pedwarn ("hex escape out of range"); - *cptr = code; - return p; - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': - code = 0; - for (count = 0; count < 3; count++) - { - if (c < '0' || c > '7') - { - p--; - break; - } - code = (code * 8) + (c - '0'); - if (p == limit) - break; - c = *p++; - } - - if (count == 3) - p--; - - *cptr = code; - return p; - - case '\\': case '\'': case '"': case '?': - *cptr = c; - return p; - - case 'n': *cptr = TARGET_NEWLINE; return p; - case 't': *cptr = TARGET_TAB; return p; - case 'r': *cptr = TARGET_CR; return p; - case 'f': *cptr = TARGET_FF; return p; - case 'b': *cptr = TARGET_BS; return p; - case 'v': *cptr = TARGET_VT; return p; - case 'a': - if (warn_traditional && !in_system_header) - warning ("the meaning of '\\a' varies with -traditional"); - *cptr = flag_traditional ? c : TARGET_BELL; - return p; - - /* Warnings and support checks handled by read_ucs(). */ - case 'u': case 'U': - if (c_language != clk_cplusplus && !flag_isoc99) - break; - - if (warn_traditional && !in_system_header) - warning ("the meaning of '\\%c' varies with -traditional", c); - - return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8); - - case 'e': case 'E': - if (pedantic) - pedwarn ("non-ISO-standard escape sequence, '\\%c'", c); - *cptr = TARGET_ESC; return p; - - /* '\(', etc, are used at beginning of line to avoid confusing Emacs. - '\%' is used to prevent SCCS from getting confused. */ - case '(': case '{': case '[': case '%': - if (pedantic) - pedwarn ("unknown escape sequence '\\%c'", c); - *cptr = c; - return p; - } - - if (ISGRAPH (c)) - pedwarn ("unknown escape sequence '\\%c'", c); - else - pedwarn ("unknown escape sequence: '\\' followed by char 0x%.2x", c); - - *cptr = c; - return p; + (*debug_hooks->undef) (SOURCE_LINE (map, line), + (const char *) NODE_NAME (node)); } #if 0 /* not yet */ @@ -1337,10 +667,10 @@ utf8_extend_token (c) #if 0 struct try_type { - tree *node_var; - char unsigned_flag; - char long_flag; - char long_long_flag; + tree *const node_var; + const char unsigned_flag; + const char long_flag; + const char long_long_flag; }; struct try_type type_sequence[] = @@ -1354,523 +684,66 @@ struct try_type type_sequence[] = }; #endif /* 0 */ -struct pf_args -{ - /* Input */ - const char *str; - int fflag; - int lflag; - int base; - /* Output */ - int conversion_errno; - REAL_VALUE_TYPE value; - tree type; -}; - -static void -parse_float (data) - PTR data; -{ - struct pf_args * args = (struct pf_args *) data; - const char *typename; - - args->conversion_errno = 0; - args->type = double_type_node; - typename = "double"; - - /* The second argument, machine_mode, of REAL_VALUE_ATOF - tells the desired precision of the binary result - of decimal-to-binary conversion. */ - - if (args->fflag) - { - if (args->lflag) - error ("both 'f' and 'l' suffixes on floating constant"); - - args->type = float_type_node; - typename = "float"; - } - else if (args->lflag) - { - args->type = long_double_type_node; - typename = "long double"; - } - else if (flag_single_precision_constant) - { - args->type = float_type_node; - typename = "float"; - } - - errno = 0; - if (args->base == 16) - args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type)); - else - args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type)); - - args->conversion_errno = errno; - /* A diagnostic is required here by some ISO C testsuites. - This is not pedwarn, because some people don't want - an error for this. */ - if (REAL_VALUE_ISINF (args->value) && pedantic) - warning ("floating point number exceeds range of '%s'", typename); -} - int c_lex (value) tree *value; { -#if USE_CPPLIB const cpp_token *tok; - enum cpp_ttype type; retry: timevar_push (TV_CPP); - tok = cpp_get_token (&parse_in); + do + tok = cpp_get_token (parse_in); + while (tok->type == CPP_PADDING); timevar_pop (TV_CPP); /* The C++ front end does horrible things with the current line number. To ensure an accurate line number, we must reset it - every time we return a token. If we reset it from tok->line - every time, we'll get line numbers inside macros referring to the - macro definition; this is nice, but we don't want to change the - behavior until integrated mode is the only option. So we keep our - own idea of the line number, and reset it from tok->line at each - new line (which never happens inside a macro). */ - if (tok->flags & BOL) - lex_lineno = tok->line; + every time we return a token. */ + lineno = src_lineno; *value = NULL_TREE; - lineno = lex_lineno; - type = tok->type; - switch (type) + switch (tok->type) { - case CPP_OPEN_BRACE: indent_level++; break; - case CPP_CLOSE_BRACE: indent_level--; break; - - /* Issue this error here, where we can get at tok->val.aux. */ + /* Issue this error here, where we can get at tok->val.c. */ case CPP_OTHER: - if (ISGRAPH (tok->val.aux)) - error ("stray '%c' in program", tok->val.aux); + if (ISGRAPH (tok->val.c)) + error ("stray '%c' in program", tok->val.c); else - error ("stray '\\%#o' in program", tok->val.aux); + error ("stray '\\%o' in program", tok->val.c); goto retry; - case CPP_DEFINED: - type = CPP_NAME; case CPP_NAME: - *value = get_identifier ((const char *)tok->val.node->name); + *value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node)); break; - case CPP_INT: - case CPP_FLOAT: case CPP_NUMBER: *value = lex_number ((const char *)tok->val.str.text, tok->val.str.len); break; case CPP_CHAR: case CPP_WCHAR: - *value = lex_charconst ((const char *)tok->val.str.text, - tok->val.str.len, tok->type == CPP_WCHAR); + *value = lex_charconst (tok); break; case CPP_STRING: case CPP_WSTRING: - case CPP_OSTRING: - *value = lex_string ((const char *)tok->val.str.text, - tok->val.str.len, tok->type == CPP_WSTRING); + *value = lex_string (tok->val.str.text, tok->val.str.len, + tok->type == CPP_WSTRING); break; /* These tokens should not be visible outside cpplib. */ case CPP_HEADER_NAME: case CPP_COMMENT: case CPP_MACRO_ARG: - case CPP_PLACEMARKER: abort (); default: break; } - return type; - -#else - int c; - char *p; - int wide_flag = 0; - int objc_flag = 0; - int charconst = 0; - - *value = NULL_TREE; - - retry: - c = getch (); - - /* Effectively do c = skip_white_space (c) - but do it faster in the usual cases. */ - while (1) - switch (c) - { - case ' ': - case '\t': - case '\f': - case '\v': - c = getch (); - break; - - case '\n': - c = skip_white_space (c); - default: - goto found_nonwhite; - } - found_nonwhite: - - lineno = lex_lineno; - - switch (c) - { - case EOF: - return CPP_EOF; - - case 'L': - /* Capital L may start a wide-string or wide-character constant. */ - { - register int c1 = getch(); - if (c1 == '\'') - { - wide_flag = 1; - goto char_constant; - } - if (c1 == '"') - { - wide_flag = 1; - goto string_constant; - } - put_back (c1); - } - goto letter; - - case '@': - if (!doing_objc_thang) - goto straychar; - else - { - /* '@' may start a constant string object. */ - register int c1 = getch (); - if (c1 == '"') - { - objc_flag = 1; - goto string_constant; - } - put_back (c1); - /* Fall through to treat '@' as the start of an identifier. */ - } - - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': case 'G': case 'H': case 'I': case 'J': - case 'K': case 'M': case 'N': case 'O': - case 'P': case 'Q': case 'R': case 'S': case 'T': - case 'U': case 'V': case 'W': case 'X': case 'Y': - case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': case 'g': case 'h': case 'i': case 'j': - case 'k': case 'l': case 'm': case 'n': case 'o': - case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': - case 'z': - case '_': - case '$': - letter: - p = token_buffer; - while (ISALNUM (c) || c == '_' || c == '$' || c == '@') - { - /* Make sure this char really belongs in an identifier. */ - if (c == '$') - { - if (! dollars_in_ident) - error ("'$' in identifier"); - else if (pedantic) - pedwarn ("'$' in identifier"); - } - - if (p >= token_buffer + maxtoken) - p = extend_token_buffer (p); - - *p++ = c; - c = getch(); - } - - put_back (c); - - if (p >= token_buffer + maxtoken) - p = extend_token_buffer (p); - *p = 0; - - *value = get_identifier (token_buffer); - return CPP_NAME; - - case '.': - { - /* It's hard to preserve tokenization on '.' because - it could be a symbol by itself, or it could be the - start of a floating point number and cpp won't tell us. */ - int c1 = getch (); - if (c1 == '.') - { - int c2 = getch (); - if (c2 == '.') - return CPP_ELLIPSIS; - - put_back (c2); - error ("parse error at '..'"); - } - else if (c1 == '*' && c_language == clk_cplusplus) - return CPP_DOT_STAR; - - put_back (c1); - if (ISDIGIT (c1)) - goto number; - } - return CPP_DOT; - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - number: - p = token_buffer; - /* Scan the next preprocessing number. All C numeric constants - are preprocessing numbers, but not all preprocessing numbers - are valid numeric constants. Preprocessing numbers fit the - regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])* - See C99 section 6.4.8. */ - for (;;) - { - if (p >= token_buffer + maxtoken) - p = extend_token_buffer (p); - - *p++ = c; - c = getch(); - - if (c == '+' || c == '-') - { - int d = p[-1]; - if (d == 'e' || d == 'E' || d == 'p' || d == 'P') - continue; - } - if (ISALNUM (c) || c == '_' || c == '.') - continue; - break; - } - put_back (c); - - *value = lex_number (token_buffer, p - token_buffer); - return CPP_NUMBER; - - case '\'': - char_constant: - charconst = 1; - - case '"': - string_constant: - { - int delimiter = charconst ? '\'' : '"'; -#ifdef MULTIBYTE_CHARS - int longest_char = local_mb_cur_max (); - (void) local_mbtowc (NULL_PTR, NULL_PTR, 0); -#endif - c = getch (); - p = token_buffer + 1; - - while (c != delimiter && c != EOF) - { - if (p + 2 > token_buffer + maxtoken) - p = extend_token_buffer (p); - - /* ignore_escape_flag is set for reading the filename in #line. */ - if (!ignore_escape_flag && c == '\\') - { - *p++ = c; - *p++ = getch (); /* escaped character */ - c = getch (); - continue; - } - else - { -#ifdef MULTIBYTE_CHARS - int i; - int char_len = -1; - for (i = 0; i < longest_char; ++i) - { - if (p + i >= token_buffer + maxtoken) - p = extend_token_buffer (p); - p[i] = c; - - char_len = local_mblen (p, i + 1); - if (char_len != -1) - break; - c = getch (); - } - if (char_len == -1) - { - /* Replace all except the first byte. */ - put_back (c); - for (--i; i > 0; --i) - put_back (p[i]); - char_len = 1; - } - /* mbtowc sometimes needs an extra char before accepting */ - else if (char_len <= i) - put_back (c); - - p += char_len; -#else - *p++ = c; -#endif - c = getch (); - } - } - } - - if (charconst) - { - *value = lex_charconst (token_buffer + 1, p - (token_buffer + 1), - wide_flag); - return wide_flag ? CPP_WCHAR : CPP_CHAR; - } - else - { - *value = lex_string (token_buffer + 1, p - (token_buffer + 1), - wide_flag); - return wide_flag ? CPP_WSTRING : objc_flag ? CPP_OSTRING : CPP_STRING; - } - - case '+': - case '-': - case '&': - case '|': - case ':': - case '<': - case '>': - case '*': - case '/': - case '%': - case '^': - case '!': - case '=': - { - int c1; - enum cpp_ttype type = CPP_EOF; - - switch (c) - { - case '+': type = CPP_PLUS; break; - case '-': type = CPP_MINUS; break; - case '&': type = CPP_AND; break; - case '|': type = CPP_OR; break; - case ':': type = CPP_COLON; break; - case '<': type = CPP_LESS; break; - case '>': type = CPP_GREATER; break; - case '*': type = CPP_MULT; break; - case '/': type = CPP_DIV; break; - case '%': type = CPP_MOD; break; - case '^': type = CPP_XOR; break; - case '!': type = CPP_NOT; break; - case '=': type = CPP_EQ; break; - } - - c1 = getch (); - - if (c1 == '=' && type < CPP_LAST_EQ) - return type + (CPP_EQ_EQ - CPP_EQ); - else if (c == c1) - switch (c) - { - case '+': return CPP_PLUS_PLUS; - case '-': return CPP_MINUS_MINUS; - case '&': return CPP_AND_AND; - case '|': return CPP_OR_OR; - case ':': - if (c_language == clk_cplusplus) - return CPP_SCOPE; - break; - - case '<': type = CPP_LSHIFT; goto do_triad; - case '>': type = CPP_RSHIFT; goto do_triad; - } - else - switch (c) - { - case '-': - if (c1 == '>') - { - if (c_language == clk_cplusplus) - { - c1 = getch (); - if (c1 == '*') - return CPP_DEREF_STAR; - put_back (c1); - } - return CPP_DEREF; - } - break; - - case '>': - if (c1 == '?' && c_language == clk_cplusplus) - { type = CPP_MAX; goto do_triad; } - break; - - case '<': - if (c1 == ':' && flag_digraphs) - return CPP_OPEN_SQUARE; - if (c1 == '%' && flag_digraphs) - { indent_level++; return CPP_OPEN_BRACE; } - if (c1 == '?' && c_language == clk_cplusplus) - { type = CPP_MIN; goto do_triad; } - break; - - case ':': - if (c1 == '>' && flag_digraphs) - return CPP_CLOSE_SQUARE; - break; - case '%': - if (c1 == '>' && flag_digraphs) - { indent_level--; return CPP_CLOSE_BRACE; } - break; - } - - put_back (c1); - return type; - - do_triad: - c1 = getch (); - if (c1 == '=') - type += (CPP_EQ_EQ - CPP_EQ); - else - put_back (c1); - return type; - } - - case '~': return CPP_COMPL; - case '?': return CPP_QUERY; - case ',': return CPP_COMMA; - case '(': return CPP_OPEN_PAREN; - case ')': return CPP_CLOSE_PAREN; - case '[': return CPP_OPEN_SQUARE; - case ']': return CPP_CLOSE_SQUARE; - case '{': indent_level++; return CPP_OPEN_BRACE; - case '}': indent_level--; return CPP_CLOSE_BRACE; - case ';': return CPP_SEMICOLON; - - straychar: - default: - if (ISGRAPH (c)) - error ("stray '%c' in program", c); - else - error ("stray '\\%#o' in program", c); - goto retry; - } - /* NOTREACHED */ -#endif + return tok->type; } - #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0) static tree @@ -1895,7 +768,7 @@ lex_number (str, len) Two HOST_WIDE_INTs is the largest int literal we can store. In order to detect overflow below, the number of parts (TOTAL_PARTS) must be exactly the number of parts needed to hold the bits - of two HOST_WIDE_INTs. */ + of two HOST_WIDE_INTs. */ #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2) unsigned int parts[TOTAL_PARTS]; @@ -1934,9 +807,7 @@ lex_number (str, len) if (c == '.') { - if (base == 16 && pedantic && !flag_isoc99) - pedwarn ("floating constant may not be in radix 16"); - else if (floatflag == AFTER_POINT) + if (floatflag == AFTER_POINT) ERROR ("too many decimal points in floating constant"); else if (floatflag == AFTER_EXPON) ERROR ("decimal point in exponent - impossible!"); @@ -1957,9 +828,10 @@ lex_number (str, len) /* It is not a decimal point. It should be a digit (perhaps a hex digit). */ - if (ISDIGIT (c)) + if (ISDIGIT (c) + || (base == 16 && ISXDIGIT (c))) { - n = c - '0'; + n = hex_value (c); } else if (base <= 10 && (c == 'e' || c == 'E')) { @@ -1972,14 +844,6 @@ lex_number (str, len) floatflag = AFTER_EXPON; break; /* start of exponent */ } - else if (base == 16 && c >= 'a' && c <= 'f') - { - n = c - 'a' + 10; - } - else if (base == 16 && c >= 'A' && c <= 'F') - { - n = c - 'A' + 10; - } else { p--; @@ -2007,7 +871,7 @@ lex_number (str, len) /* If the highest-order part overflows (gets larger than a host char will hold) then the whole number has overflowed. Record this and truncate the highest-order - part. */ + part. */ if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR) { overflow = 1; @@ -2027,9 +891,9 @@ lex_number (str, len) if (floatflag != NOT_FLOAT) { tree type; - int imag, fflag, lflag, conversion_errno; + const char *typename; + int imag, fflag, lflag; REAL_VALUE_TYPE real; - struct pf_args args; char *copy; if (base == 16 && floatflag != AFTER_EXPON) @@ -2066,7 +930,8 @@ lex_number (str, len) case 'f': case 'F': if (fflag) ERROR ("more than one 'f' suffix on floating constant"); - else if (warn_traditional && !in_system_header) + else if (warn_traditional && !in_system_header + && ! cpp_sys_macro_p (parse_in)) warning ("traditional C rejects the 'f' suffix"); fflag = 1; @@ -2075,7 +940,8 @@ lex_number (str, len) case 'l': case 'L': if (lflag) ERROR ("more than one 'l' suffix on floating constant"); - else if (warn_traditional && !in_system_header) + else if (warn_traditional && !in_system_header + && ! cpp_sys_macro_p (parse_in)) warning ("traditional C rejects the 'l' suffix"); lflag = 1; @@ -2094,34 +960,45 @@ lex_number (str, len) ERROR ("invalid suffix on floating constant"); } - /* Setup input for parse_float() */ - args.str = copy; - args.fflag = fflag; - args.lflag = lflag; - args.base = base; + type = double_type_node; + typename = "double"; + + if (fflag) + { + if (lflag) + ERROR ("both 'f' and 'l' suffixes on floating constant"); - /* Convert string to a double, checking for overflow. */ - if (do_float_handler (parse_float, (PTR) &args)) + type = float_type_node; + typename = "float"; + } + else if (lflag) + { + type = long_double_type_node; + typename = "long double"; + } + else if (flag_single_precision_constant) { - /* Receive output from parse_float() */ - real = args.value; + type = float_type_node; + typename = "float"; } + + /* Warn about this only after we know we're not issuing an error. */ + if (base == 16 && pedantic && !flag_isoc99) + pedwarn ("hexadecimal floating constants are only valid in C99"); + + /* The second argument, machine_mode, of REAL_VALUE_ATOF + tells the desired precision of the binary result + of decimal-to-binary conversion. */ + if (base == 16) + real = REAL_VALUE_HTOF (copy, TYPE_MODE (type)); else - /* We got an exception from parse_float() */ - ERROR ("floating constant out of range"); - - /* Receive output from parse_float() */ - conversion_errno = args.conversion_errno; - type = args.type; - -#ifdef ERANGE - /* ERANGE is also reported for underflow, - so test the value to distinguish overflow from that. */ - if (conversion_errno == ERANGE && !flag_traditional && pedantic - && (REAL_VALUES_LESS (dconst1, real) - || REAL_VALUES_LESS (real, dconstm1))) + real = REAL_VALUE_ATOF (copy, TYPE_MODE (type)); + + /* A diagnostic is required here by some ISO C testsuites. + This is not pedwarn, because some people don't want + an error for this. */ + if (REAL_VALUE_ISINF (real) && pedantic) warning ("floating point number exceeds range of 'double'"); -#endif /* Create a node with determined type and value. */ if (imag) @@ -2132,7 +1009,7 @@ lex_number (str, len) } else { - tree trad_type, ansi_type, type; + tree trad_type, type; HOST_WIDE_INT high, low; int spec_unsigned = 0; int spec_long = 0; @@ -2141,7 +1018,7 @@ lex_number (str, len) int suffix_lu = 0; int warn = 0, i; - trad_type = ansi_type = type = NULL_TREE; + trad_type = type = NULL_TREE; while (p < str + len) { c = *p++; @@ -2150,7 +1027,8 @@ lex_number (str, len) case 'u': case 'U': if (spec_unsigned) error ("two 'u' suffixes on integer constant"); - else if (warn_traditional && !in_system_header) + else if (warn_traditional && !in_system_header + && ! cpp_sys_macro_p (parse_in)) warning ("traditional C rejects the 'u' suffix"); spec_unsigned = 1; @@ -2188,7 +1066,7 @@ lex_number (str, len) } } - /* If the literal overflowed, pedwarn about it now. */ + /* If the literal overflowed, pedwarn about it now. */ if (overflow) { warn = 1; @@ -2212,11 +1090,9 @@ lex_number (str, len) TREE_TYPE (value) = long_long_unsigned_type_node; /* If warn_traditional, calculate both the ISO type and the - traditional type, then see if they disagree. - Otherwise, calculate only the type for the dialect in use. */ - if (warn_traditional || flag_traditional) + traditional type, then see if they disagree. */ + if (warn_traditional) { - /* Calculate the traditional type. */ /* Traditionally, any constant is signed; but if unsigned is specified explicitly, obey that. Use the smallest size with the right number of bits, except for one special @@ -2246,54 +1122,58 @@ lex_number (str, len) ? widest_unsigned_literal_type_node : widest_integer_literal_type_node); } - if (warn_traditional || ! flag_traditional) - { - /* Calculate the ISO type. */ - if (! spec_long && ! spec_unsigned - && int_fits_type_p (value, integer_type_node)) - ansi_type = integer_type_node; - else if (! spec_long && (base != 10 || spec_unsigned) - && int_fits_type_p (value, unsigned_type_node)) - ansi_type = unsigned_type_node; - else if (! spec_unsigned && !spec_long_long - && int_fits_type_p (value, long_integer_type_node)) - ansi_type = long_integer_type_node; - else if (! spec_long_long - && int_fits_type_p (value, long_unsigned_type_node)) - ansi_type = long_unsigned_type_node; - else if (! spec_unsigned - && int_fits_type_p (value, long_long_integer_type_node)) - ansi_type = long_long_integer_type_node; - else if (int_fits_type_p (value, long_long_unsigned_type_node)) - ansi_type = long_long_unsigned_type_node; - else if (! spec_unsigned - && int_fits_type_p (value, widest_integer_literal_type_node)) - ansi_type = widest_integer_literal_type_node; - else - ansi_type = widest_unsigned_literal_type_node; - } - - type = flag_traditional ? trad_type : ansi_type; + + /* Calculate the ISO type. */ + if (! spec_long && ! spec_unsigned + && int_fits_type_p (value, integer_type_node)) + type = integer_type_node; + else if (! spec_long && (base != 10 || spec_unsigned) + && int_fits_type_p (value, unsigned_type_node)) + type = unsigned_type_node; + else if (! spec_unsigned && !spec_long_long + && int_fits_type_p (value, long_integer_type_node)) + type = long_integer_type_node; + else if (! spec_long_long + && int_fits_type_p (value, long_unsigned_type_node)) + type = long_unsigned_type_node; + else if (! spec_unsigned + && int_fits_type_p (value, long_long_integer_type_node)) + type = long_long_integer_type_node; + else if (int_fits_type_p (value, long_long_unsigned_type_node)) + type = long_long_unsigned_type_node; + else if (! spec_unsigned + && int_fits_type_p (value, widest_integer_literal_type_node)) + type = widest_integer_literal_type_node; + else + type = widest_unsigned_literal_type_node; /* We assume that constants specified in a non-decimal base are bit patterns, and that the programmer really meant what they wrote. */ if (warn_traditional && !in_system_header - && base == 10 && trad_type != ansi_type) + && base == 10 && trad_type != type) { - if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type)) - warning ("width of integer constant changes with -traditional"); - else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type)) - warning ("integer constant is unsigned in ISO C, signed with -traditional"); + if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (type)) + warning ("width of integer constant is different in traditional C"); + else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (type)) + warning ("integer constant is unsigned in ISO C, signed in traditional C"); else - warning ("width of integer constant may change on other systems with -traditional"); + warning ("width of integer constant may change on other systems in traditional C"); } - if (pedantic && !flag_traditional && !spec_long_long && !warn - && (TYPE_PRECISION (long_integer_type_node) < TYPE_PRECISION (type))) + if (pedantic && (flag_isoc99 || !spec_long_long) + && !warn + && ((flag_isoc99 + ? TYPE_PRECISION (long_long_integer_type_node) + : TYPE_PRECISION (long_integer_type_node)) < TYPE_PRECISION (type))) { warn = 1; - pedwarn ("integer constant larger than the maximum value of an unsigned long int"); + pedwarn ("integer constant larger than the maximum value of %s", + (flag_isoc99 + ? (TREE_UNSIGNED (type) + ? _("an unsigned long long int") + : _("a long long int")) + : _("an unsigned long int"))); } if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type)) @@ -2308,20 +1188,11 @@ lex_number (str, len) else ERROR ("complex integer constant is too wide for 'complex int'"); } - else if (flag_traditional && !int_fits_type_p (value, type)) - /* The traditional constant 0x80000000 is signed - but doesn't fit in the range of int. - This will change it to -0x80000000, which does fit. */ - { - TREE_TYPE (value) = unsigned_type (type); - value = convert (type, value); - TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0; - } else TREE_TYPE (value) = type; /* If it's still an integer (not a complex), and it doesn't - fit in the type we choose for it, then pedwarn. */ + fit in the type we choose for it, then pedwarn. */ if (! warn && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE @@ -2340,21 +1211,19 @@ lex_number (str, len) static tree lex_string (str, len, wide) - const char *str; + const unsigned char *str; unsigned int len; int wide; { tree value; char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1)); char *q = buf; - const char *p = str, *limit = str + len; - unsigned int c; - unsigned width = wide ? WCHAR_TYPE_SIZE - : TYPE_PRECISION (char_type_node); + const unsigned char *p = str, *limit = str + len; + cppchar_t c; #ifdef MULTIBYTE_CHARS /* Reset multibyte conversion state. */ - (void) local_mbtowc (NULL_PTR, NULL_PTR, 0); + (void) local_mbtowc (NULL, NULL, 0); #endif while (p < limit) @@ -2363,10 +1232,10 @@ lex_string (str, len, wide) wchar_t wc; int char_len; - char_len = local_mbtowc (&wc, p, limit - p); + char_len = local_mbtowc (&wc, (const char *) p, limit - p); if (char_len == -1) { - warning ("Ignoring invalid multibyte character"); + warning ("ignoring invalid multibyte character"); char_len = 1; c = *p++; } @@ -2380,19 +1249,14 @@ lex_string (str, len, wide) #endif if (c == '\\' && !ignore_escape_flag) - { - p = readescape (p, limit, &c); - if (width < HOST_BITS_PER_INT - && (unsigned) c >= ((unsigned)1 << width)) - pedwarn ("escape sequence out of range for character"); - } + c = cpp_parse_escape (parse_in, &p, limit, wide); - /* Add this single character into the buffer either as a wchar_t - or as a single byte. */ + /* Add this single character into the buffer either as a wchar_t, + a multibyte sequence, or as a single byte. */ if (wide) { unsigned charwidth = TYPE_PRECISION (char_type_node); - unsigned bytemask = (1 << width) - 1; + unsigned bytemask = (1 << charwidth) - 1; int byte; for (byte = 0; byte < WCHAR_BYTES; ++byte) @@ -2409,6 +1273,16 @@ lex_string (str, len, wide) } q += WCHAR_BYTES; } +#ifdef MULTIBYTE_CHARS + else if (char_len > 1) + { + /* We're dealing with a multibyte character. */ + for ( ; char_len >0; --char_len) + { + *q++ = *(p - char_len); + } + } +#endif else { *q++ = c; @@ -2437,113 +1311,36 @@ lex_string (str, len, wide) return value; } +/* Converts a (possibly wide) character constant token into a tree. */ static tree -lex_charconst (str, len, wide) - const char *str; - unsigned int len; - int wide; +lex_charconst (token) + const cpp_token *token; { - const char *limit = str + len; - int result = 0; - int num_chars = 0; - int chars_seen = 0; - unsigned width = TYPE_PRECISION (char_type_node); - int max_chars; - unsigned int c; - tree value; - -#ifdef MULTIBYTE_CHARS - int longest_char = local_mb_cur_max (); - (void) local_mbtowc (NULL_PTR, NULL_PTR, 0); -#endif - - max_chars = TYPE_PRECISION (integer_type_node) / width; - if (wide) - width = WCHAR_TYPE_SIZE; - - while (str < limit) - { -#ifdef MULTIBYTE_CHARS - wchar_t wc; - int char_len; - - char_len = local_mbtowc (&wc, str, limit - str); - if (char_len == -1) - { - warning ("Ignoring invalid multibyte character"); - char_len = 1; - c = *str++; - } - else - { - p += char_len; - c = wc; - } -#else - c = *str++; -#endif - - ++chars_seen; - if (c == '\\') - { - str = readescape (str, limit, &c); - if (width < HOST_BITS_PER_INT - && (unsigned) c >= ((unsigned)1 << width)) - pedwarn ("escape sequence out of range for character"); - } -#ifdef MAP_CHARACTER - if (ISPRINT (c)) - c = MAP_CHARACTER (c); -#endif - - /* Merge character into result; ignore excess chars. */ - num_chars += (width / TYPE_PRECISION (char_type_node)); - if (num_chars < max_chars + 1) - { - if (width < HOST_BITS_PER_INT) - result = (result << width) | (c & ((1 << width) - 1)); - else - result = c; - } - } - - if (chars_seen == 0) - error ("empty character constant"); - else if (num_chars > max_chars) - { - num_chars = max_chars; - error ("character constant too long"); - } - else if (chars_seen != 1 && ! flag_traditional && warn_multichar) - warning ("multi-character character constant"); + cppchar_t result; + tree type, value; + unsigned int chars_seen; + int unsignedp; + + result = cpp_interpret_charconst (parse_in, token, + &chars_seen, &unsignedp); - /* If char type is signed, sign-extend the constant. */ - if (! wide) - { - int num_bits = num_chars * width; - if (num_bits == 0) - /* We already got an error; avoid invalid shift. */ - value = build_int_2 (0, 0); - else if (TREE_UNSIGNED (char_type_node) - || ((result >> (num_bits - 1)) & 1) == 0) - value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0 - >> (HOST_BITS_PER_WIDE_INT - num_bits)), - 0); - else - value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0 - >> (HOST_BITS_PER_WIDE_INT - num_bits)), - -1); - /* In C, a character constant has type 'int'; in C++, 'char'. */ - if (chars_seen <= 1 && c_language == clk_cplusplus) - TREE_TYPE (value) = char_type_node; - else - TREE_TYPE (value) = integer_type_node; - } + /* Cast to cppchar_signed_t to get correct sign-extension of RESULT + before possibly widening to HOST_WIDE_INT for build_int_2. */ + if (unsignedp || (cppchar_signed_t) result >= 0) + value = build_int_2 (result, 0); else - { - value = build_int_2 (result, 0); - TREE_TYPE (value) = wchar_type_node; - } + value = build_int_2 ((cppchar_signed_t) result, -1); + + if (token->type == CPP_WCHAR) + type = wchar_type_node; + /* In C, a character constant has type 'int'. + In C++ 'char', but multi-char charconsts have type 'int'. */ + else if ((c_language == clk_c || c_language == clk_objective_c) + || chars_seen > 1) + type = integer_type_node; + else + type = char_type_node; + TREE_TYPE (value) = type; return value; }