1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
40 #include "splay-tree.h"
42 /* MULTIBYTE_CHARS support only works for native compilers.
43 ??? Ideally what we want is to model widechar support after
44 the current floating point support. */
46 #undef MULTIBYTE_CHARS
49 #ifdef MULTIBYTE_CHARS
52 #endif /* MULTIBYTE_CHARS */
53 #ifndef GET_ENVIRONMENT
54 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
58 extern cpp_reader parse_in;
60 /* Stream for reading from the input file. */
64 /* Private idea of the line number. See discussion in c_lex(). */
65 static int lex_lineno;
67 /* We may keep statistics about how long which files took to compile. */
68 static int header_time, body_time;
69 static splay_tree file_info_tree;
71 /* Cause the `yydebug' variable to be defined. */
78 unsigned char *buffer;
83 static struct putback_buffer putback = {NULL, 0, -1};
85 static inline int getch PARAMS ((void));
90 if (putback.index != -1)
92 int ch = putback.buffer[putback.index];
99 static inline void put_back PARAMS ((int));
107 if (putback.index == putback.buffer_size - 1)
109 putback.buffer_size += 16;
110 putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
112 putback.buffer[++putback.index] = ch;
120 /* File used for outputting assembler code. */
121 extern FILE *asm_out_file;
123 #undef WCHAR_TYPE_SIZE
124 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
126 /* Number of bytes in a wide character. */
127 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
130 static int maxtoken; /* Current nominal length of token buffer. */
131 static char *token_buffer; /* Pointer to token buffer.
132 Actual allocated length is maxtoken + 2. */
135 int indent_level; /* Number of { minus number of }. */
136 int pending_lang_change; /* If we need to switch languages - C++ only */
137 int c_header_level; /* depth in C headers - C++ only */
139 /* Nonzero tells yylex to ignore \ in string constants. */
140 static int ignore_escape_flag;
142 static const char *readescape PARAMS ((const char *, const char *,
144 static const char *read_ucs PARAMS ((const char *, const char *,
145 unsigned int *, int));
146 static void parse_float PARAMS ((PTR));
147 static tree lex_number PARAMS ((const char *, unsigned int));
148 static tree lex_string PARAMS ((const char *, unsigned int, int));
149 static tree lex_charconst PARAMS ((const char *, unsigned int, int));
150 static void update_header_times PARAMS ((const char *));
151 static int dump_one_header PARAMS ((splay_tree_node, void *));
152 static int mark_splay_tree_node PARAMS ((splay_tree_node, void *));
153 static void mark_splay_tree PARAMS ((void *));
156 static int skip_white_space PARAMS ((int));
157 static char *extend_token_buffer PARAMS ((const char *));
158 static void extend_token_buffer_to PARAMS ((int));
159 static int read_line_number PARAMS ((int *));
160 static void process_directive PARAMS ((void));
162 static void cb_ident PARAMS ((cpp_reader *, const cpp_string *));
163 static void cb_enter_file PARAMS ((cpp_reader *));
164 static void cb_leave_file PARAMS ((cpp_reader *));
165 static void cb_rename_file PARAMS ((cpp_reader *));
166 static void cb_def_pragma PARAMS ((cpp_reader *));
171 init_c_lex (filename)
172 const char *filename;
174 struct c_fileinfo *toplevel;
176 /* Set up filename timing. Must happen before cpp_start_read. */
177 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
179 (splay_tree_delete_value_fn)free);
180 /* Make sure to mark the filenames in the tree for GC. */
181 ggc_add_root (&file_info_tree, 1, sizeof (file_info_tree),
183 toplevel = get_fileinfo (ggc_strdup ("<top level>"));
184 if (flag_detailed_statistics)
187 body_time = get_run_time ();
188 toplevel->time = body_time;
191 #ifdef MULTIBYTE_CHARS
192 /* Change to the native locale for multibyte conversions. */
193 setlocale (LC_CTYPE, "");
194 GET_ENVIRONMENT (literal_codeset, "LANG");
198 /* Open input file. */
199 if (filename == 0 || !strcmp (filename, "-"))
205 finput = fopen (filename, "r");
207 pfatal_with_name (filename);
209 #ifdef IO_BUFFER_SIZE
210 setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
212 #else /* !USE_CPPLIB */
214 parse_in.cb.ident = cb_ident;
215 parse_in.cb.enter_file = cb_enter_file;
216 parse_in.cb.leave_file = cb_leave_file;
217 parse_in.cb.rename_file = cb_rename_file;
218 parse_in.cb.def_pragma = cb_def_pragma;
220 /* Make sure parse_in.digraphs matches flag_digraphs. */
221 CPP_OPTION (&parse_in, digraphs) = flag_digraphs;
223 if (! cpp_start_read (&parse_in, filename))
226 if (filename == 0 || !strcmp (filename, "-"))
232 token_buffer = (char *) xmalloc (maxtoken + 2);
234 /* Start it at 0, because check_newline is called at the very beginning
235 and will increment it to 1. */
236 lineno = lex_lineno = 0;
246 struct c_fileinfo *fi;
248 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
250 return (struct c_fileinfo *) n->value;
252 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
254 fi->interface_only = 0;
255 fi->interface_unknown = 1;
256 splay_tree_insert (file_info_tree, (splay_tree_key) name,
257 (splay_tree_value) fi);
262 update_header_times (name)
265 /* Changing files again. This means currently collected time
266 is charged against header time, and body time starts back at 0. */
267 if (flag_detailed_statistics)
269 int this_time = get_run_time ();
270 struct c_fileinfo *file = get_fileinfo (name);
271 header_time += this_time - body_time;
272 file->time += this_time - body_time;
273 body_time = this_time;
278 dump_one_header (n, dummy)
280 void *dummy ATTRIBUTE_UNUSED;
282 print_time ((const char *) n->key,
283 ((struct c_fileinfo *) n->value)->time);
288 dump_time_statistics ()
290 struct c_fileinfo *file = get_fileinfo (input_filename);
291 int this_time = get_run_time ();
292 file->time += this_time - body_time;
294 fprintf (stderr, "\n******\n");
295 print_time ("header files (total)", header_time);
296 print_time ("main file (total)", this_time - body_time);
297 fprintf (stderr, "ratio = %g : 1\n",
298 (double)header_time / (double)(this_time - body_time));
299 fprintf (stderr, "\n******\n");
301 splay_tree_foreach (file_info_tree, dump_one_header, 0);
306 /* If C is not whitespace, return C.
307 Otherwise skip whitespace and return first nonwhite char read. */
317 /* There is no need to process comments or backslash-newline
318 here. None can occur in the output of cpp. Do handle \r
319 in case someone sent us a .i file. */
327 c = check_newline ();
331 /* Per C99, horizontal whitespace is just these four characters. */
340 error ("stray '\\' in program");
350 /* Skips all of the white space at the current location in the input file. */
353 position_after_white_space ()
359 put_back (skip_white_space (c));
362 /* Make the token buffer longer, preserving the data in it.
363 P should point to just beyond the last valid character in the old buffer.
364 The value we return is a pointer to the new buffer
365 at a place corresponding to P. */
368 extend_token_buffer_to (size)
372 maxtoken = maxtoken * 2 + 10;
373 while (maxtoken < size);
374 token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
378 extend_token_buffer (p)
381 int offset = p - token_buffer;
382 extend_token_buffer_to (offset);
383 return token_buffer + offset;
388 read_line_number (num)
392 enum cpp_ttype token = c_lex (&value);
394 if (token == CPP_NUMBER && TREE_CODE (value) == INTEGER_CST)
396 *num = TREE_INT_CST_LOW (value);
401 if (token != CPP_EOF)
402 error ("invalid #-line");
407 /* At the beginning of a line, increment the line number
408 and process any #-directive on this line.
409 If the line is a #-directive, read the entire line and return a newline.
410 Otherwise, return the line's first non-whitespace character. */
417 /* Loop till we get a nonblank, non-directive line. */
420 /* Read first nonwhite char on the line. */
423 while (c == ' ' || c == '\t');
428 process_directive ();
441 enum cpp_ttype token;
444 enum { act_none, act_push, act_pop } action;
445 int action_number, l;
447 #ifndef NO_IMPLICIT_EXTERN_C
448 int entering_c_header = 0;
451 /* Don't read beyond this line. */
455 token = c_lex (&value);
457 if (token == CPP_NAME)
459 /* If a letter follows, then if the word here is `line', skip
460 it and ignore it; otherwise, ignore the line, with an error
461 if the word isn't `pragma'. */
463 const char *name = IDENTIFIER_POINTER (value);
465 if (!strcmp (name, "pragma"))
470 else if (!strcmp (name, "define"))
472 debug_define (lex_lineno, GET_DIRECTIVE_LINE ());
475 else if (!strcmp (name, "undef"))
477 debug_undef (lex_lineno, GET_DIRECTIVE_LINE ());
480 else if (!strcmp (name, "line"))
483 token = c_lex (&value);
486 else if (!strcmp (name, "ident"))
488 /* #ident. We expect a string constant here.
489 The pedantic warning and syntax error are now in cpp. */
491 token = c_lex (&value);
492 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
495 #ifdef ASM_OUTPUT_IDENT
498 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
502 /* Skip the rest of this line. */
506 error ("undefined or invalid # directive `%s'", name);
510 /* If the # is the only nonwhite char on the line,
511 just ignore it. Check the new newline. */
512 if (token == CPP_EOF)
516 /* Here we have either `#line' or `# <nonletter>'.
517 In either case, it should be a line number; a digit should follow. */
519 if (token != CPP_NUMBER || TREE_CODE (value) != INTEGER_CST)
521 error ("invalid #-line");
525 /* subtract one, because it is the following line that
526 gets the specified number */
528 l = TREE_INT_CST_LOW (value) - 1;
530 /* More follows: it must be a string constant (filename).
531 It would be neat to use cpplib to quickly process the string, but
532 (1) we don't have a handy tokenization of the string, and
533 (2) I don't know how well that would work in the presense
534 of filenames that contain wide characters. */
538 /* Don't treat \ as special if we are processing #line 1 "...".
539 If you want it to be treated specially, use # 1 "...". */
540 ignore_escape_flag = 1;
543 /* Read the string constant. */
544 token = c_lex (&value);
546 ignore_escape_flag = 0;
548 if (token == CPP_EOF)
550 /* No more: store the line number and check following line. */
555 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
557 error ("invalid #line");
561 new_file = TREE_STRING_POINTER (value);
563 if (main_input_filename == 0)
564 main_input_filename = new_file;
569 /* Each change of file name
570 reinitializes whether we are now in a system header. */
571 in_system_header = 0;
573 if (!read_line_number (&action_number))
575 /* Update the name in the top element of input_file_stack. */
576 if (input_file_stack)
577 input_file_stack->name = input_filename;
580 /* `1' after file name means entering new file.
581 `2' after file name means just left a file. */
583 if (action_number == 1)
586 read_line_number (&action_number);
588 else if (action_number == 2)
591 read_line_number (&action_number);
593 if (action_number == 3)
595 /* `3' after file name means this is a system header file. */
596 in_system_header = 1;
597 read_line_number (&action_number);
599 #ifndef NO_IMPLICIT_EXTERN_C
600 if (action_number == 4)
602 /* `4' after file name means this is a C header file. */
603 entering_c_header = 1;
604 read_line_number (&action_number);
608 /* Do the actions implied by the preceding numbers. */
609 if (action == act_push)
612 push_srcloc (input_filename, 1);
613 input_file_stack->indent_level = indent_level;
614 debug_start_source_file (input_filename);
615 #ifndef NO_IMPLICIT_EXTERN_C
618 else if (entering_c_header)
621 ++pending_lang_change;
625 else if (action == act_pop)
627 /* Popping out of a file. */
628 if (input_file_stack->next)
630 #ifndef NO_IMPLICIT_EXTERN_C
631 if (c_header_level && --c_header_level == 0)
633 if (entering_c_header)
634 warning ("badly nested C headers from preprocessor");
635 --pending_lang_change;
639 if (indent_level != input_file_stack->indent_level)
641 warning_with_file_and_line
642 (input_filename, lex_lineno,
643 "This file contains more '%c's than '%c's.",
644 indent_level > input_file_stack->indent_level ? '{' : '}',
645 indent_level > input_file_stack->indent_level ? '}' : '{');
649 debug_end_source_file (input_file_stack->line);
652 error ("#-lines for entering and leaving files don't match");
655 update_header_times (new_file);
657 input_filename = new_file;
661 extract_interface_info ();
663 /* skip the rest of this line. */
667 while (getch () != '\n');
669 #else /* USE_CPPLIB */
671 /* Not yet handled: #pragma, #define, #undef.
672 No need to deal with linemarkers under normal conditions. */
675 cb_ident (pfile, str)
676 cpp_reader *pfile ATTRIBUTE_UNUSED;
677 const cpp_string *str;
679 #ifdef ASM_OUTPUT_IDENT
682 /* Convert escapes in the string. */
683 tree value = lex_string ((const char *)str->text, str->len, 0);
684 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
690 cb_enter_file (pfile)
693 cpp_buffer *ip = CPP_BUFFER (pfile);
694 /* Bleah, need a better interface to this. */
695 const char *flags = cpp_syshdr_flags (pfile, ip);
697 /* Mustn't stack the main buffer on the input stack. (Ick.) */
700 lex_lineno = lineno = ip->prev->lineno - 1;
701 push_srcloc (ggc_alloc_string (ip->nominal_fname, -1), 1);
702 input_file_stack->indent_level = indent_level;
703 debug_start_source_file (ip->nominal_fname);
708 update_header_times (ip->nominal_fname);
711 extract_interface_info ();
713 in_system_header = (flags[0] != 0);
714 #ifndef NO_IMPLICIT_EXTERN_C
717 else if (flags[2] != 0)
720 ++pending_lang_change;
726 cb_leave_file (pfile)
729 /* Bleah, need a better interface to this. */
730 const char *flags = cpp_syshdr_flags (pfile, CPP_BUFFER (pfile));
732 if (indent_level != input_file_stack->indent_level)
734 warning_with_file_and_line
735 (input_filename, lex_lineno,
736 "This file contains more '%c's than '%c's.",
737 indent_level > input_file_stack->indent_level ? '{' : '}',
738 indent_level > input_file_stack->indent_level ? '}' : '{');
741 /* We get called for the main buffer, but we mustn't pop it. */
742 if (input_file_stack->next)
744 in_system_header = (flags[0] != 0);
745 #ifndef NO_IMPLICIT_EXTERN_C
746 if (c_header_level && --c_header_level == 0)
749 warning ("badly nested C headers from preprocessor");
750 --pending_lang_change;
753 lex_lineno = CPP_BUFFER (pfile)->lineno;
754 debug_end_source_file (input_file_stack->line);
756 update_header_times (input_file_stack->name);
758 extract_interface_info ();
762 cb_rename_file (pfile)
765 cpp_buffer *ip = CPP_BUFFER (pfile);
766 /* Bleah, need a better interface to this. */
767 const char *flags = cpp_syshdr_flags (pfile, ip);
768 input_filename = ggc_alloc_string (ip->nominal_fname, -1);
769 lex_lineno = ip->lineno;
770 in_system_header = (flags[0] != 0);
772 update_header_times (ip->nominal_fname);
774 extract_interface_info ();
778 cb_def_pragma (pfile)
781 /* Issue a warning message if we have been asked to do so. Ignore
782 unknown pragmas in system headers unless an explicit
783 -Wunknown-pragmas has been given. */
784 if (warn_unknown_pragmas > in_system_header)
786 const unsigned char *space, *name = 0;
789 cpp_get_token (pfile, &s);
790 space = cpp_token_as_text (pfile, &s);
791 cpp_get_token (pfile, &s);
792 if (s.type == CPP_NAME)
793 name = cpp_token_as_text (pfile, &s);
796 warning ("ignoring #pragma %s %s", space, name);
798 warning ("ignoring #pragma %s", space);
801 #endif /* USE_CPPLIB */
803 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
805 [lex.charset]: The character designated by the universal-character-name
806 \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
807 is NNNNNNNN; the character designated by the universal-character-name
808 \uNNNN is that character whose character short name in ISO/IEC 10646 is
809 0000NNNN. If the hexadecimal value for a universal character name is
810 less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
811 universal character name designates a character in the basic source
812 character set, then the program is ill-formed.
814 We assume that wchar_t is Unicode, so we don't need to do any
815 mapping. Is this ever wrong? */
818 read_ucs (p, limit, cptr, length)
824 unsigned int code = 0;
827 for (; length; --length)
831 error ("incomplete universal-character-name");
838 error ("non hex digit '%c' in universal-character-name", c);
844 if (c >= 'a' && c <= 'f')
845 code += c - 'a' + 10;
846 if (c >= 'A' && c <= 'F')
847 code += c - 'A' + 10;
848 if (c >= '0' && c <= '9')
853 sorry ("universal-character-name on EBCDIC target");
854 *cptr = 0x3f; /* EBCDIC invalid character */
858 if (code > 0x9f && !(code & 0x80000000))
859 /* True extended character, OK. */;
860 else if (code >= 0x20 && code < 0x7f)
862 /* ASCII printable character. The C character set consists of all of
863 these except $, @ and `. We use hex escapes so that this also
864 works with EBCDIC hosts. */
865 if (code != 0x24 && code != 0x40 && code != 0x60)
866 error ("universal-character-name used for '%c'", code);
869 error ("invalid universal-character-name");
875 /* Read an escape sequence and write its character equivalent into *CPTR.
876 P is the input pointer, which is just after the backslash. LIMIT
877 is how much text we have.
878 Returns the updated input pointer. */
881 readescape (p, limit, cptr)
886 unsigned int c, code, count;
887 unsigned firstdig = 0;
892 /* cpp has already issued an error for this. */
902 if (warn_traditional && !in_system_header)
903 warning ("the meaning of `\\x' varies with -traditional");
905 if (flag_traditional)
923 if (c >= 'a' && c <= 'f')
924 code += c - 'a' + 10;
925 if (c >= 'A' && c <= 'F')
926 code += c - 'A' + 10;
927 if (c >= '0' && c <= '9')
929 if (code != 0 || count != 0)
939 warning ("\\x used with no following hex digits");
944 /* Digits are all 0's. Ok. */
946 else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
949 << (TYPE_PRECISION (integer_type_node)
952 pedwarn ("hex escape out of range");
956 case '0': case '1': case '2': case '3': case '4':
957 case '5': case '6': case '7':
959 for (count = 0; count < 3; count++)
961 if (c < '0' || c > '7')
966 code = (code * 8) + (c - '0');
978 case '\\': case '\'': case '"': case '?':
982 case 'n': *cptr = TARGET_NEWLINE; return p;
983 case 't': *cptr = TARGET_TAB; return p;
984 case 'r': *cptr = TARGET_CR; return p;
985 case 'f': *cptr = TARGET_FF; return p;
986 case 'b': *cptr = TARGET_BS; return p;
987 case 'v': *cptr = TARGET_VT; return p;
989 if (warn_traditional && !in_system_header)
990 warning ("the meaning of '\\a' varies with -traditional");
991 *cptr = flag_traditional ? c : TARGET_BELL;
994 /* Warnings and support checks handled by read_ucs(). */
996 if (c_language != clk_cplusplus && !flag_isoc99)
999 if (warn_traditional && !in_system_header)
1000 warning ("the meaning of '\\%c' varies with -traditional", c);
1002 return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8);
1006 pedwarn ("non-ISO-standard escape sequence, '\\%c'", c);
1007 *cptr = TARGET_ESC; return p;
1009 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1010 '\%' is used to prevent SCCS from getting confused. */
1011 case '(': case '{': case '[': case '%':
1013 pedwarn ("unknown escape sequence '\\%c'", c);
1019 pedwarn ("unknown escape sequence '\\%c'", c);
1021 pedwarn ("unknown escape sequence: '\\' followed by char 0x%x", c);
1028 /* Returns nonzero if C is a universal-character-name. Give an error if it
1029 is not one which may appear in an identifier, as per [extendid].
1031 Note that extended character support in identifiers has not yet been
1032 implemented. It is my personal opinion that this is not a desirable
1033 feature. Portable code cannot count on support for more than the basic
1034 identifier character set. */
1037 is_extended_char (c)
1040 #ifdef TARGET_EBCDIC
1047 /* None of the valid chars are outside the Basic Multilingual Plane (the
1051 error ("universal-character-name '\\U%08x' not valid in identifier", c);
1056 if ((c >= 0x00c0 && c <= 0x00d6)
1057 || (c >= 0x00d8 && c <= 0x00f6)
1058 || (c >= 0x00f8 && c <= 0x01f5)
1059 || (c >= 0x01fa && c <= 0x0217)
1060 || (c >= 0x0250 && c <= 0x02a8)
1061 || (c >= 0x1e00 && c <= 0x1e9a)
1062 || (c >= 0x1ea0 && c <= 0x1ef9))
1067 || (c >= 0x0388 && c <= 0x038a)
1069 || (c >= 0x038e && c <= 0x03a1)
1070 || (c >= 0x03a3 && c <= 0x03ce)
1071 || (c >= 0x03d0 && c <= 0x03d6)
1076 || (c >= 0x03e2 && c <= 0x03f3)
1077 || (c >= 0x1f00 && c <= 0x1f15)
1078 || (c >= 0x1f18 && c <= 0x1f1d)
1079 || (c >= 0x1f20 && c <= 0x1f45)
1080 || (c >= 0x1f48 && c <= 0x1f4d)
1081 || (c >= 0x1f50 && c <= 0x1f57)
1085 || (c >= 0x1f5f && c <= 0x1f7d)
1086 || (c >= 0x1f80 && c <= 0x1fb4)
1087 || (c >= 0x1fb6 && c <= 0x1fbc)
1088 || (c >= 0x1fc2 && c <= 0x1fc4)
1089 || (c >= 0x1fc6 && c <= 0x1fcc)
1090 || (c >= 0x1fd0 && c <= 0x1fd3)
1091 || (c >= 0x1fd6 && c <= 0x1fdb)
1092 || (c >= 0x1fe0 && c <= 0x1fec)
1093 || (c >= 0x1ff2 && c <= 0x1ff4)
1094 || (c >= 0x1ff6 && c <= 0x1ffc))
1098 if ((c >= 0x0401 && c <= 0x040d)
1099 || (c >= 0x040f && c <= 0x044f)
1100 || (c >= 0x0451 && c <= 0x045c)
1101 || (c >= 0x045e && c <= 0x0481)
1102 || (c >= 0x0490 && c <= 0x04c4)
1103 || (c >= 0x04c7 && c <= 0x04c8)
1104 || (c >= 0x04cb && c <= 0x04cc)
1105 || (c >= 0x04d0 && c <= 0x04eb)
1106 || (c >= 0x04ee && c <= 0x04f5)
1107 || (c >= 0x04f8 && c <= 0x04f9))
1111 if ((c >= 0x0531 && c <= 0x0556)
1112 || (c >= 0x0561 && c <= 0x0587))
1116 if ((c >= 0x05d0 && c <= 0x05ea)
1117 || (c >= 0x05f0 && c <= 0x05f4))
1121 if ((c >= 0x0621 && c <= 0x063a)
1122 || (c >= 0x0640 && c <= 0x0652)
1123 || (c >= 0x0670 && c <= 0x06b7)
1124 || (c >= 0x06ba && c <= 0x06be)
1125 || (c >= 0x06c0 && c <= 0x06ce)
1126 || (c >= 0x06e5 && c <= 0x06e7))
1130 if ((c >= 0x0905 && c <= 0x0939)
1131 || (c >= 0x0958 && c <= 0x0962))
1135 if ((c >= 0x0985 && c <= 0x098c)
1136 || (c >= 0x098f && c <= 0x0990)
1137 || (c >= 0x0993 && c <= 0x09a8)
1138 || (c >= 0x09aa && c <= 0x09b0)
1140 || (c >= 0x09b6 && c <= 0x09b9)
1141 || (c >= 0x09dc && c <= 0x09dd)
1142 || (c >= 0x09df && c <= 0x09e1)
1143 || (c >= 0x09f0 && c <= 0x09f1))
1147 if ((c >= 0x0a05 && c <= 0x0a0a)
1148 || (c >= 0x0a0f && c <= 0x0a10)
1149 || (c >= 0x0a13 && c <= 0x0a28)
1150 || (c >= 0x0a2a && c <= 0x0a30)
1151 || (c >= 0x0a32 && c <= 0x0a33)
1152 || (c >= 0x0a35 && c <= 0x0a36)
1153 || (c >= 0x0a38 && c <= 0x0a39)
1154 || (c >= 0x0a59 && c <= 0x0a5c)
1159 if ((c >= 0x0a85 && c <= 0x0a8b)
1161 || (c >= 0x0a8f && c <= 0x0a91)
1162 || (c >= 0x0a93 && c <= 0x0aa8)
1163 || (c >= 0x0aaa && c <= 0x0ab0)
1164 || (c >= 0x0ab2 && c <= 0x0ab3)
1165 || (c >= 0x0ab5 && c <= 0x0ab9)
1170 if ((c >= 0x0b05 && c <= 0x0b0c)
1171 || (c >= 0x0b0f && c <= 0x0b10)
1172 || (c >= 0x0b13 && c <= 0x0b28)
1173 || (c >= 0x0b2a && c <= 0x0b30)
1174 || (c >= 0x0b32 && c <= 0x0b33)
1175 || (c >= 0x0b36 && c <= 0x0b39)
1176 || (c >= 0x0b5c && c <= 0x0b5d)
1177 || (c >= 0x0b5f && c <= 0x0b61))
1181 if ((c >= 0x0b85 && c <= 0x0b8a)
1182 || (c >= 0x0b8e && c <= 0x0b90)
1183 || (c >= 0x0b92 && c <= 0x0b95)
1184 || (c >= 0x0b99 && c <= 0x0b9a)
1186 || (c >= 0x0b9e && c <= 0x0b9f)
1187 || (c >= 0x0ba3 && c <= 0x0ba4)
1188 || (c >= 0x0ba8 && c <= 0x0baa)
1189 || (c >= 0x0bae && c <= 0x0bb5)
1190 || (c >= 0x0bb7 && c <= 0x0bb9))
1194 if ((c >= 0x0c05 && c <= 0x0c0c)
1195 || (c >= 0x0c0e && c <= 0x0c10)
1196 || (c >= 0x0c12 && c <= 0x0c28)
1197 || (c >= 0x0c2a && c <= 0x0c33)
1198 || (c >= 0x0c35 && c <= 0x0c39)
1199 || (c >= 0x0c60 && c <= 0x0c61))
1203 if ((c >= 0x0c85 && c <= 0x0c8c)
1204 || (c >= 0x0c8e && c <= 0x0c90)
1205 || (c >= 0x0c92 && c <= 0x0ca8)
1206 || (c >= 0x0caa && c <= 0x0cb3)
1207 || (c >= 0x0cb5 && c <= 0x0cb9)
1208 || (c >= 0x0ce0 && c <= 0x0ce1))
1212 if ((c >= 0x0d05 && c <= 0x0d0c)
1213 || (c >= 0x0d0e && c <= 0x0d10)
1214 || (c >= 0x0d12 && c <= 0x0d28)
1215 || (c >= 0x0d2a && c <= 0x0d39)
1216 || (c >= 0x0d60 && c <= 0x0d61))
1220 if ((c >= 0x0e01 && c <= 0x0e30)
1221 || (c >= 0x0e32 && c <= 0x0e33)
1222 || (c >= 0x0e40 && c <= 0x0e46)
1223 || (c >= 0x0e4f && c <= 0x0e5b))
1227 if ((c >= 0x0e81 && c <= 0x0e82)
1233 || (c >= 0x0e94 && c <= 0x0e97)
1234 || (c >= 0x0e99 && c <= 0x0e9f)
1235 || (c >= 0x0ea1 && c <= 0x0ea3)
1240 || (c >= 0x0ead && c <= 0x0eb0)
1244 || (c >= 0x0ec0 && c <= 0x0ec4)
1249 if ((c >= 0x10a0 && c <= 0x10c5)
1250 || (c >= 0x10d0 && c <= 0x10f6))
1254 if ((c >= 0x3041 && c <= 0x3094)
1255 || (c >= 0x309b && c <= 0x309e))
1259 if ((c >= 0x30a1 && c <= 0x30fe))
1263 if ((c >= 0x3105 && c <= 0x312c))
1267 if ((c >= 0x1100 && c <= 0x1159)
1268 || (c >= 0x1161 && c <= 0x11a2)
1269 || (c >= 0x11a8 && c <= 0x11f9))
1272 /* CJK Unified Ideographs */
1273 if ((c >= 0xf900 && c <= 0xfa2d)
1274 || (c >= 0xfb1f && c <= 0xfb36)
1275 || (c >= 0xfb38 && c <= 0xfb3c)
1277 || (c >= 0xfb40 && c <= 0xfb41)
1278 || (c >= 0xfb42 && c <= 0xfb44)
1279 || (c >= 0xfb46 && c <= 0xfbb1)
1280 || (c >= 0xfbd3 && c <= 0xfd3f)
1281 || (c >= 0xfd50 && c <= 0xfd8f)
1282 || (c >= 0xfd92 && c <= 0xfdc7)
1283 || (c >= 0xfdf0 && c <= 0xfdfb)
1284 || (c >= 0xfe70 && c <= 0xfe72)
1286 || (c >= 0xfe76 && c <= 0xfefc)
1287 || (c >= 0xff21 && c <= 0xff3a)
1288 || (c >= 0xff41 && c <= 0xff5a)
1289 || (c >= 0xff66 && c <= 0xffbe)
1290 || (c >= 0xffc2 && c <= 0xffc7)
1291 || (c >= 0xffca && c <= 0xffcf)
1292 || (c >= 0xffd2 && c <= 0xffd7)
1293 || (c >= 0xffda && c <= 0xffdc)
1294 || (c >= 0x4e00 && c <= 0x9fa5))
1297 error ("universal-character-name '\\u%04x' not valid in identifier", c);
1302 /* Add the UTF-8 representation of C to the token_buffer. */
1305 utf8_extend_token (c)
1310 if (c <= 0x0000007f)
1315 else if (c <= 0x000007ff)
1316 shift = 6, mask = 0xc0;
1317 else if (c <= 0x0000ffff)
1318 shift = 12, mask = 0xe0;
1319 else if (c <= 0x001fffff)
1320 shift = 18, mask = 0xf0;
1321 else if (c <= 0x03ffffff)
1322 shift = 24, mask = 0xf8;
1324 shift = 30, mask = 0xfc;
1326 extend_token (mask | (c >> shift));
1330 extend_token ((unsigned char) (0x80 | (c >> shift)));
1342 char long_long_flag;
1345 struct try_type type_sequence[] =
1347 { &integer_type_node, 0, 0, 0},
1348 { &unsigned_type_node, 1, 0, 0},
1349 { &long_integer_type_node, 0, 1, 0},
1350 { &long_unsigned_type_node, 1, 1, 0},
1351 { &long_long_integer_type_node, 0, 1, 1},
1352 { &long_long_unsigned_type_node, 1, 1, 1}
1364 int conversion_errno;
1365 REAL_VALUE_TYPE value;
1373 struct pf_args * args = (struct pf_args *) data;
1374 const char *typename;
1376 args->conversion_errno = 0;
1377 args->type = double_type_node;
1378 typename = "double";
1380 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1381 tells the desired precision of the binary result
1382 of decimal-to-binary conversion. */
1387 error ("both 'f' and 'l' suffixes on floating constant");
1389 args->type = float_type_node;
1392 else if (args->lflag)
1394 args->type = long_double_type_node;
1395 typename = "long double";
1397 else if (flag_single_precision_constant)
1399 args->type = float_type_node;
1404 if (args->base == 16)
1405 args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type));
1407 args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type));
1409 args->conversion_errno = errno;
1410 /* A diagnostic is required here by some ISO C testsuites.
1411 This is not pedwarn, because some people don't want
1412 an error for this. */
1413 if (REAL_VALUE_ISINF (args->value) && pedantic)
1414 warning ("floating point number exceeds range of '%s'", typename);
1423 enum cpp_ttype type;
1426 timevar_push (TV_CPP);
1427 cpp_get_token (&parse_in, &tok);
1428 timevar_pop (TV_CPP);
1430 /* The C++ front end does horrible things with the current line
1431 number. To ensure an accurate line number, we must reset it
1432 every time we return a token. */
1433 lex_lineno = cpp_get_line (&parse_in)->line;
1436 lineno = lex_lineno;
1440 case CPP_OPEN_BRACE: indent_level++; break;
1441 case CPP_CLOSE_BRACE: indent_level--; break;
1443 /* Issue this error here, where we can get at tok.val.aux. */
1445 if (ISGRAPH (tok.val.aux))
1446 error ("stray '%c' in program", tok.val.aux);
1448 error ("stray '\\%#o' in program", tok.val.aux);
1452 *value = get_identifier ((const char *)tok.val.node->name);
1458 *value = lex_number ((const char *)tok.val.str.text, tok.val.str.len);
1463 *value = lex_charconst ((const char *)tok.val.str.text,
1464 tok.val.str.len, tok.type == CPP_WCHAR);
1470 *value = lex_string ((const char *)tok.val.str.text,
1471 tok.val.str.len, tok.type == CPP_WSTRING);
1474 /* These tokens should not be visible outside cpplib. */
1475 case CPP_HEADER_NAME:
1478 case CPP_PLACEMARKER:
1498 /* Effectively do c = skip_white_space (c)
1499 but do it faster in the usual cases. */
1512 c = skip_white_space (c);
1514 goto found_nonwhite;
1518 lineno = lex_lineno;
1526 /* Capital L may start a wide-string or wide-character constant. */
1528 register int c1 = getch();
1537 goto string_constant;
1544 if (!doing_objc_thang)
1548 /* '@' may start a constant string object. */
1549 register int c1 = getch ();
1553 goto string_constant;
1556 /* Fall through to treat '@' as the start of an identifier. */
1559 case 'A': case 'B': case 'C': case 'D': case 'E':
1560 case 'F': case 'G': case 'H': case 'I': case 'J':
1561 case 'K': case 'M': case 'N': case 'O':
1562 case 'P': case 'Q': case 'R': case 'S': case 'T':
1563 case 'U': case 'V': case 'W': case 'X': case 'Y':
1565 case 'a': case 'b': case 'c': case 'd': case 'e':
1566 case 'f': case 'g': case 'h': case 'i': case 'j':
1567 case 'k': case 'l': case 'm': case 'n': case 'o':
1568 case 'p': case 'q': case 'r': case 's': case 't':
1569 case 'u': case 'v': case 'w': case 'x': case 'y':
1575 while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
1577 /* Make sure this char really belongs in an identifier. */
1580 if (! dollars_in_ident)
1581 error ("'$' in identifier");
1583 pedwarn ("'$' in identifier");
1586 if (p >= token_buffer + maxtoken)
1587 p = extend_token_buffer (p);
1595 if (p >= token_buffer + maxtoken)
1596 p = extend_token_buffer (p);
1599 *value = get_identifier (token_buffer);
1604 /* It's hard to preserve tokenization on '.' because
1605 it could be a symbol by itself, or it could be the
1606 start of a floating point number and cpp won't tell us. */
1612 return CPP_ELLIPSIS;
1615 error ("parse error at '..'");
1617 else if (c1 == '*' && c_language == clk_cplusplus)
1618 return CPP_DOT_STAR;
1626 case '0': case '1': case '2': case '3': case '4':
1627 case '5': case '6': case '7': case '8': case '9':
1630 /* Scan the next preprocessing number. All C numeric constants
1631 are preprocessing numbers, but not all preprocessing numbers
1632 are valid numeric constants. Preprocessing numbers fit the
1633 regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1634 See C99 section 6.4.8. */
1637 if (p >= token_buffer + maxtoken)
1638 p = extend_token_buffer (p);
1643 if (c == '+' || c == '-')
1646 if (d == 'e' || d == 'E' || d == 'p' || d == 'P')
1649 if (ISALNUM (c) || c == '_' || c == '.')
1655 *value = lex_number (token_buffer, p - token_buffer);
1665 int delimiter = charconst ? '\'' : '"';
1666 #ifdef MULTIBYTE_CHARS
1667 int longest_char = local_mb_cur_max ();
1668 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
1671 p = token_buffer + 1;
1673 while (c != delimiter && c != EOF)
1675 if (p + 2 > token_buffer + maxtoken)
1676 p = extend_token_buffer (p);
1678 /* ignore_escape_flag is set for reading the filename in #line. */
1679 if (!ignore_escape_flag && c == '\\')
1682 *p++ = getch (); /* escaped character */
1688 #ifdef MULTIBYTE_CHARS
1691 for (i = 0; i < longest_char; ++i)
1693 if (p + i >= token_buffer + maxtoken)
1694 p = extend_token_buffer (p);
1697 char_len = local_mblen (p, i + 1);
1704 /* Replace all except the first byte. */
1706 for (--i; i > 0; --i)
1710 /* mbtowc sometimes needs an extra char before accepting */
1711 else if (char_len <= i)
1725 *value = lex_charconst (token_buffer + 1, p - (token_buffer + 1),
1727 return wide_flag ? CPP_WCHAR : CPP_CHAR;
1731 *value = lex_string (token_buffer + 1, p - (token_buffer + 1),
1733 return wide_flag ? CPP_WSTRING : objc_flag ? CPP_OSTRING : CPP_STRING;
1751 enum cpp_ttype type = CPP_EOF;
1755 case '+': type = CPP_PLUS; break;
1756 case '-': type = CPP_MINUS; break;
1757 case '&': type = CPP_AND; break;
1758 case '|': type = CPP_OR; break;
1759 case ':': type = CPP_COLON; break;
1760 case '<': type = CPP_LESS; break;
1761 case '>': type = CPP_GREATER; break;
1762 case '*': type = CPP_MULT; break;
1763 case '/': type = CPP_DIV; break;
1764 case '%': type = CPP_MOD; break;
1765 case '^': type = CPP_XOR; break;
1766 case '!': type = CPP_NOT; break;
1767 case '=': type = CPP_EQ; break;
1772 if (c1 == '=' && type < CPP_LAST_EQ)
1773 return type + (CPP_EQ_EQ - CPP_EQ);
1777 case '+': return CPP_PLUS_PLUS;
1778 case '-': return CPP_MINUS_MINUS;
1779 case '&': return CPP_AND_AND;
1780 case '|': return CPP_OR_OR;
1782 if (c_language == clk_cplusplus)
1786 case '<': type = CPP_LSHIFT; goto do_triad;
1787 case '>': type = CPP_RSHIFT; goto do_triad;
1795 if (c_language == clk_cplusplus)
1799 return CPP_DEREF_STAR;
1807 if (c1 == '?' && c_language == clk_cplusplus)
1808 { type = CPP_MAX; goto do_triad; }
1812 if (c1 == ':' && flag_digraphs)
1813 return CPP_OPEN_SQUARE;
1814 if (c1 == '%' && flag_digraphs)
1815 { indent_level++; return CPP_OPEN_BRACE; }
1816 if (c1 == '?' && c_language == clk_cplusplus)
1817 { type = CPP_MIN; goto do_triad; }
1821 if (c1 == '>' && flag_digraphs)
1822 return CPP_CLOSE_SQUARE;
1825 if (c1 == '>' && flag_digraphs)
1826 { indent_level--; return CPP_CLOSE_BRACE; }
1836 type += (CPP_EQ_EQ - CPP_EQ);
1842 case '~': return CPP_COMPL;
1843 case '?': return CPP_QUERY;
1844 case ',': return CPP_COMMA;
1845 case '(': return CPP_OPEN_PAREN;
1846 case ')': return CPP_CLOSE_PAREN;
1847 case '[': return CPP_OPEN_SQUARE;
1848 case ']': return CPP_CLOSE_SQUARE;
1849 case '{': indent_level++; return CPP_OPEN_BRACE;
1850 case '}': indent_level--; return CPP_CLOSE_BRACE;
1851 case ';': return CPP_SEMICOLON;
1856 error ("stray '%c' in program", c);
1858 error ("stray '\\%#o' in program", c);
1866 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
1869 lex_number (str, len)
1875 int largest_digit = 0;
1881 enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
1883 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1884 The code below which fills the parts array assumes that a host
1885 int is at least twice as wide as a host char, and that
1886 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1887 Two HOST_WIDE_INTs is the largest int literal we can store.
1888 In order to detect overflow below, the number of parts (TOTAL_PARTS)
1889 must be exactly the number of parts needed to hold the bits
1890 of two HOST_WIDE_INTs. */
1891 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1892 unsigned int parts[TOTAL_PARTS];
1894 /* Optimize for most frequent case. */
1898 return integer_zero_node;
1899 else if (*str == '1')
1900 return integer_one_node;
1902 return build_int_2 (*str - '0', 0);
1905 for (count = 0; count < TOTAL_PARTS; count++)
1908 /* len is known to be >1 at this point. */
1911 if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
1916 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
1917 else if (str[0] == '0' && ISDIGIT (str[1]))
1929 if (base == 16 && pedantic && !flag_isoc99)
1930 pedwarn ("floating constant may not be in radix 16");
1931 else if (floatflag == AFTER_POINT)
1932 ERROR ("too many decimal points in floating constant");
1933 else if (floatflag == AFTER_EXPON)
1934 ERROR ("decimal point in exponent - impossible!");
1936 floatflag = AFTER_POINT;
1942 /* Possible future extension: silently ignore _ in numbers,
1943 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1944 but somewhat easier to read. Ada has this? */
1945 ERROR ("underscore in number");
1949 /* It is not a decimal point.
1950 It should be a digit (perhaps a hex digit). */
1956 else if (base <= 10 && (c == 'e' || c == 'E'))
1959 floatflag = AFTER_EXPON;
1962 else if (base == 16 && (c == 'p' || c == 'P'))
1964 floatflag = AFTER_EXPON;
1965 break; /* start of exponent */
1967 else if (base == 16 && c >= 'a' && c <= 'f')
1971 else if (base == 16 && c >= 'A' && c <= 'F')
1978 break; /* start of suffix */
1981 if (n >= largest_digit)
1985 for (count = 0; count < TOTAL_PARTS; count++)
1987 parts[count] *= base;
1991 += (parts[count-1] >> HOST_BITS_PER_CHAR);
1993 &= (1 << HOST_BITS_PER_CHAR) - 1;
1999 /* If the highest-order part overflows (gets larger than
2000 a host char will hold) then the whole number has
2001 overflowed. Record this and truncate the highest-order
2003 if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
2006 parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
2010 while (p < str + len);
2012 /* This can happen on input like `int i = 0x;' */
2014 ERROR ("numeric constant with no digits");
2016 if (largest_digit >= base)
2017 ERROR ("numeric constant contains digits beyond the radix");
2019 if (floatflag != NOT_FLOAT)
2022 int imag, fflag, lflag, conversion_errno;
2023 REAL_VALUE_TYPE real;
2024 struct pf_args args;
2027 if (base == 16 && floatflag != AFTER_EXPON)
2028 ERROR ("hexadecimal floating constant has no exponent");
2030 /* Read explicit exponent if any, and put it in tokenbuf. */
2031 if ((base == 10 && ((c == 'e') || (c == 'E')))
2032 || (base == 16 && (c == 'p' || c == 'P')))
2036 if (p < str + len && (c == '+' || c == '-'))
2038 /* Exponent is decimal, even if string is a hex float. */
2040 ERROR ("floating constant exponent has no digits");
2041 while (p < str + len && ISDIGIT (c))
2047 /* Copy the float constant now; we don't want any suffixes in the
2048 string passed to parse_float. */
2049 copy = alloca (p - str + 1);
2050 memcpy (copy, str, p - str);
2051 copy[p - str] = '\0';
2053 /* Now parse suffixes. */
2054 fflag = lflag = imag = 0;
2055 while (p < str + len)
2060 ERROR ("more than one 'f' suffix on floating constant");
2061 else if (warn_traditional && !in_system_header)
2062 warning ("traditional C rejects the 'f' suffix");
2069 ERROR ("more than one 'l' suffix on floating constant");
2070 else if (warn_traditional && !in_system_header)
2071 warning ("traditional C rejects the 'l' suffix");
2079 ERROR ("more than one 'i' or 'j' suffix on floating constant");
2081 pedwarn ("ISO C forbids imaginary numeric constants");
2086 ERROR ("invalid suffix on floating constant");
2089 /* Setup input for parse_float() */
2095 /* Convert string to a double, checking for overflow. */
2096 if (do_float_handler (parse_float, (PTR) &args))
2098 /* Receive output from parse_float() */
2102 /* We got an exception from parse_float() */
2103 ERROR ("floating constant out of range");
2105 /* Receive output from parse_float() */
2106 conversion_errno = args.conversion_errno;
2110 /* ERANGE is also reported for underflow,
2111 so test the value to distinguish overflow from that. */
2112 if (conversion_errno == ERANGE && !flag_traditional && pedantic
2113 && (REAL_VALUES_LESS (dconst1, real)
2114 || REAL_VALUES_LESS (real, dconstm1)))
2115 warning ("floating point number exceeds range of 'double'");
2118 /* Create a node with determined type and value. */
2120 value = build_complex (NULL_TREE, convert (type, integer_zero_node),
2121 build_real (type, real));
2123 value = build_real (type, real);
2127 tree trad_type, ansi_type, type;
2128 HOST_WIDE_INT high, low;
2129 int spec_unsigned = 0;
2131 int spec_long_long = 0;
2136 trad_type = ansi_type = type = NULL_TREE;
2137 while (p < str + len)
2144 error ("two 'u' suffixes on integer constant");
2145 else if (warn_traditional && !in_system_header)
2146 warning ("traditional C rejects the 'u' suffix");
2157 error ("three 'l' suffixes on integer constant");
2159 error ("'lul' is not a valid integer suffix");
2160 else if (c != spec_long)
2161 error ("'Ll' and 'lL' are not valid integer suffixes");
2162 else if (pedantic && ! flag_isoc99
2163 && ! in_system_header && warn_long_long)
2164 pedwarn ("ISO C89 forbids long long integer constants");
2170 case 'i': case 'I': case 'j': case 'J':
2172 error ("more than one 'i' or 'j' suffix on integer constant");
2174 pedwarn ("ISO C forbids imaginary numeric constants");
2179 ERROR ("invalid suffix on integer constant");
2183 /* If the literal overflowed, pedwarn about it now. */
2187 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
2190 /* This is simplified by the fact that our constant
2191 is always positive. */
2195 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
2197 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
2198 / HOST_BITS_PER_CHAR)]
2199 << (i * HOST_BITS_PER_CHAR));
2200 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
2203 value = build_int_2 (low, high);
2204 TREE_TYPE (value) = long_long_unsigned_type_node;
2206 /* If warn_traditional, calculate both the ISO type and the
2207 traditional type, then see if they disagree.
2208 Otherwise, calculate only the type for the dialect in use. */
2209 if (warn_traditional || flag_traditional)
2211 /* Calculate the traditional type. */
2212 /* Traditionally, any constant is signed; but if unsigned is
2213 specified explicitly, obey that. Use the smallest size
2214 with the right number of bits, except for one special
2215 case with decimal constants. */
2216 if (! spec_long && base != 10
2217 && int_fits_type_p (value, unsigned_type_node))
2218 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2219 /* A decimal constant must be long if it does not fit in
2220 type int. I think this is independent of whether the
2221 constant is signed. */
2222 else if (! spec_long && base == 10
2223 && int_fits_type_p (value, integer_type_node))
2224 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2225 else if (! spec_long_long)
2226 trad_type = (spec_unsigned
2227 ? long_unsigned_type_node
2228 : long_integer_type_node);
2229 else if (int_fits_type_p (value,
2231 ? long_long_unsigned_type_node
2232 : long_long_integer_type_node))
2233 trad_type = (spec_unsigned
2234 ? long_long_unsigned_type_node
2235 : long_long_integer_type_node);
2237 trad_type = (spec_unsigned
2238 ? widest_unsigned_literal_type_node
2239 : widest_integer_literal_type_node);
2241 if (warn_traditional || ! flag_traditional)
2243 /* Calculate the ISO type. */
2244 if (! spec_long && ! spec_unsigned
2245 && int_fits_type_p (value, integer_type_node))
2246 ansi_type = integer_type_node;
2247 else if (! spec_long && (base != 10 || spec_unsigned)
2248 && int_fits_type_p (value, unsigned_type_node))
2249 ansi_type = unsigned_type_node;
2250 else if (! spec_unsigned && !spec_long_long
2251 && int_fits_type_p (value, long_integer_type_node))
2252 ansi_type = long_integer_type_node;
2253 else if (! spec_long_long
2254 && int_fits_type_p (value, long_unsigned_type_node))
2255 ansi_type = long_unsigned_type_node;
2256 else if (! spec_unsigned
2257 && int_fits_type_p (value, long_long_integer_type_node))
2258 ansi_type = long_long_integer_type_node;
2259 else if (int_fits_type_p (value, long_long_unsigned_type_node))
2260 ansi_type = long_long_unsigned_type_node;
2261 else if (! spec_unsigned
2262 && int_fits_type_p (value, widest_integer_literal_type_node))
2263 ansi_type = widest_integer_literal_type_node;
2265 ansi_type = widest_unsigned_literal_type_node;
2268 type = flag_traditional ? trad_type : ansi_type;
2270 /* We assume that constants specified in a non-decimal
2271 base are bit patterns, and that the programmer really
2272 meant what they wrote. */
2273 if (warn_traditional && !in_system_header
2274 && base == 10 && trad_type != ansi_type)
2276 if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type))
2277 warning ("width of integer constant changes with -traditional");
2278 else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type))
2279 warning ("integer constant is unsigned in ISO C, signed with -traditional");
2281 warning ("width of integer constant may change on other systems with -traditional");
2284 if (pedantic && !flag_traditional && (flag_isoc99 || !spec_long_long)
2287 ? TYPE_PRECISION (long_long_integer_type_node)
2288 : TYPE_PRECISION (long_integer_type_node)) < TYPE_PRECISION (type)))
2291 pedwarn ("integer constant larger than the maximum value of %s",
2293 ? (TREE_UNSIGNED (type)
2294 ? "an unsigned long long int"
2295 : "a long long int")
2296 : "an unsigned long int"));
2299 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
2300 warning ("decimal constant is so large that it is unsigned");
2304 if (TYPE_PRECISION (type)
2305 <= TYPE_PRECISION (integer_type_node))
2306 value = build_complex (NULL_TREE, integer_zero_node,
2307 convert (integer_type_node, value));
2309 ERROR ("complex integer constant is too wide for 'complex int'");
2311 else if (flag_traditional && !int_fits_type_p (value, type))
2312 /* The traditional constant 0x80000000 is signed
2313 but doesn't fit in the range of int.
2314 This will change it to -0x80000000, which does fit. */
2316 TREE_TYPE (value) = unsigned_type (type);
2317 value = convert (type, value);
2318 TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0;
2321 TREE_TYPE (value) = type;
2323 /* If it's still an integer (not a complex), and it doesn't
2324 fit in the type we choose for it, then pedwarn. */
2327 && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
2328 && ! int_fits_type_p (value, TREE_TYPE (value)))
2329 pedwarn ("integer constant is larger than the maximum value for its type");
2333 error ("missing white space after number '%.*s'", (int) (p - str), str);
2338 return integer_zero_node;
2342 lex_string (str, len, wide)
2348 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
2350 const char *p = str, *limit = str + len;
2352 unsigned width = wide ? WCHAR_TYPE_SIZE
2353 : TYPE_PRECISION (char_type_node);
2355 #ifdef MULTIBYTE_CHARS
2356 /* Reset multibyte conversion state. */
2357 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2362 #ifdef MULTIBYTE_CHARS
2366 char_len = local_mbtowc (&wc, p, limit - p);
2369 warning ("Ignoring invalid multibyte character");
2382 if (c == '\\' && !ignore_escape_flag)
2384 p = readescape (p, limit, &c);
2385 if (width < HOST_BITS_PER_INT
2386 && (unsigned) c >= ((unsigned)1 << width))
2387 pedwarn ("escape sequence out of range for character");
2390 /* Add this single character into the buffer either as a wchar_t
2391 or as a single byte. */
2394 unsigned charwidth = TYPE_PRECISION (char_type_node);
2395 unsigned bytemask = (1 << charwidth) - 1;
2398 for (byte = 0; byte < WCHAR_BYTES; ++byte)
2401 if (byte >= (int) sizeof (c))
2404 n = (c >> (byte * charwidth)) & bytemask;
2405 if (BYTES_BIG_ENDIAN)
2406 q[WCHAR_BYTES - byte - 1] = n;
2418 /* Terminate the string value, either with a single byte zero
2419 or with a wide zero. */
2423 memset (q, 0, WCHAR_BYTES);
2431 value = build_string (q - buf, buf);
2434 TREE_TYPE (value) = wchar_array_type_node;
2436 TREE_TYPE (value) = char_array_type_node;
2441 lex_charconst (str, len, wide)
2446 const char *limit = str + len;
2450 unsigned width = TYPE_PRECISION (char_type_node);
2455 #ifdef MULTIBYTE_CHARS
2456 int longest_char = local_mb_cur_max ();
2457 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2460 max_chars = TYPE_PRECISION (integer_type_node) / width;
2462 width = WCHAR_TYPE_SIZE;
2466 #ifdef MULTIBYTE_CHARS
2470 char_len = local_mbtowc (&wc, str, limit - str);
2473 warning ("Ignoring invalid multibyte character");
2489 str = readescape (str, limit, &c);
2490 if (width < HOST_BITS_PER_INT
2491 && (unsigned) c >= ((unsigned)1 << width))
2492 pedwarn ("escape sequence out of range for character");
2494 #ifdef MAP_CHARACTER
2496 c = MAP_CHARACTER (c);
2499 /* Merge character into result; ignore excess chars. */
2500 num_chars += (width / TYPE_PRECISION (char_type_node));
2501 if (num_chars < max_chars + 1)
2503 if (width < HOST_BITS_PER_INT)
2504 result = (result << width) | (c & ((1 << width) - 1));
2510 if (chars_seen == 0)
2511 error ("empty character constant");
2512 else if (num_chars > max_chars)
2514 num_chars = max_chars;
2515 error ("character constant too long");
2517 else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
2518 warning ("multi-character character constant");
2520 /* If char type is signed, sign-extend the constant. */
2523 int num_bits = num_chars * width;
2525 /* We already got an error; avoid invalid shift. */
2526 value = build_int_2 (0, 0);
2527 else if (TREE_UNSIGNED (char_type_node)
2528 || ((result >> (num_bits - 1)) & 1) == 0)
2529 value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
2530 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2533 value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
2534 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2536 /* In C, a character constant has type 'int'; in C++, 'char'. */
2537 if (chars_seen <= 1 && c_language == clk_cplusplus)
2538 TREE_TYPE (value) = char_type_node;
2540 TREE_TYPE (value) = integer_type_node;
2544 value = build_int_2 (result, 0);
2545 TREE_TYPE (value) = wchar_type_node;
2551 /* Mark for GC a node in a splay tree whose keys are strings. */
2554 mark_splay_tree_node (n, data)
2556 void *data ATTRIBUTE_UNUSED;
2558 ggc_mark_string ((char *) n->key);
2562 /* Mark for GC a splay tree whose keys are strings. */
2568 splay_tree st = *(splay_tree *) p;
2570 splay_tree_foreach (st, mark_splay_tree_node, NULL);