1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
39 #include "splay-tree.h"
41 /* MULTIBYTE_CHARS support only works for native compilers.
42 ??? Ideally what we want is to model widechar support after
43 the current floating point support. */
45 #undef MULTIBYTE_CHARS
48 #ifdef MULTIBYTE_CHARS
51 #endif /* MULTIBYTE_CHARS */
52 #ifndef GET_ENVIRONMENT
53 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
57 extern cpp_reader parse_in;
59 /* Stream for reading from the input file. */
63 /* The original file name, before changing "-" to "stdin". */
64 static const char *orig_filename;
66 /* Private idea of the line number. See discussion in c_lex(). */
67 static int lex_lineno;
69 /* We may keep statistics about how long which files took to compile. */
70 static int header_time, body_time;
71 static splay_tree file_info_tree;
73 /* Cause the `yydebug' variable to be defined. */
80 unsigned char *buffer;
85 static struct putback_buffer putback = {NULL, 0, -1};
87 static inline int getch PARAMS ((void));
92 if (putback.index != -1)
94 int ch = putback.buffer[putback.index];
101 static inline void put_back PARAMS ((int));
109 if (putback.index == putback.buffer_size - 1)
111 putback.buffer_size += 16;
112 putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
114 putback.buffer[++putback.index] = ch;
122 /* File used for outputting assembler code. */
123 extern FILE *asm_out_file;
125 #undef WCHAR_TYPE_SIZE
126 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
128 /* Number of bytes in a wide character. */
129 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
132 static int maxtoken; /* Current nominal length of token buffer. */
133 static char *token_buffer; /* Pointer to token buffer.
134 Actual allocated length is maxtoken + 2. */
137 int indent_level; /* Number of { minus number of }. */
138 int pending_lang_change; /* If we need to switch languages - C++ only */
139 int c_header_level; /* depth in C headers - C++ only */
141 /* Nonzero tells yylex to ignore \ in string constants. */
142 static int ignore_escape_flag;
144 static const char *readescape PARAMS ((const char *, const char *,
146 static const char *read_ucs PARAMS ((const char *, const char *,
147 unsigned int *, int));
148 static void parse_float PARAMS ((PTR));
149 static tree lex_number PARAMS ((const char *, unsigned int));
150 static tree lex_string PARAMS ((const char *, unsigned int, int));
151 static tree lex_charconst PARAMS ((const char *, unsigned int, int));
152 static void update_header_times PARAMS ((const char *));
153 static int dump_one_header PARAMS ((splay_tree_node, void *));
156 static int skip_white_space PARAMS ((int));
157 static char *extend_token_buffer PARAMS ((const char *));
158 static void extend_token_buffer_to PARAMS ((int));
159 static int read_line_number PARAMS ((int *));
160 static void process_directive PARAMS ((void));
162 static void cb_ident PARAMS ((cpp_reader *, const cpp_string *));
163 static void cb_enter_file PARAMS ((cpp_reader *));
164 static void cb_leave_file PARAMS ((cpp_reader *));
165 static void cb_rename_file PARAMS ((cpp_reader *));
166 static void cb_def_pragma PARAMS ((cpp_reader *));
171 init_c_lex (filename)
172 const char *filename;
174 struct c_fileinfo *toplevel;
176 orig_filename = filename;
178 /* Set up filename timing. Must happen before cpp_start_read. */
179 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
181 (splay_tree_delete_value_fn)free);
182 toplevel = get_fileinfo ("<top level>");
183 if (flag_detailed_statistics)
186 body_time = get_run_time ();
187 toplevel->time = body_time;
190 #ifdef MULTIBYTE_CHARS
191 /* Change to the native locale for multibyte conversions. */
192 setlocale (LC_CTYPE, "");
193 GET_ENVIRONMENT (literal_codeset, "LANG");
197 /* Open input file. */
198 if (filename == 0 || !strcmp (filename, "-"))
204 finput = fopen (filename, "r");
206 pfatal_with_name (filename);
208 #ifdef IO_BUFFER_SIZE
209 setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
211 #else /* !USE_CPPLIB */
213 parse_in.cb.ident = cb_ident;
214 parse_in.cb.enter_file = cb_enter_file;
215 parse_in.cb.leave_file = cb_leave_file;
216 parse_in.cb.rename_file = cb_rename_file;
217 parse_in.cb.def_pragma = cb_def_pragma;
219 /* Make sure parse_in.digraphs matches flag_digraphs. */
220 CPP_OPTION (&parse_in, digraphs) = flag_digraphs;
222 if (filename == 0 || !strcmp (filename, "-"))
228 token_buffer = (char *) xmalloc (maxtoken + 2);
230 /* Start it at 0, because check_newline is called at the very beginning
231 and will increment it to 1. */
232 lineno = lex_lineno = 0;
237 /* A thin wrapper around the real parser that initializes the
238 integrated preprocessor after debug output has been initialized. */
243 if (! cpp_start_read (&parse_in, orig_filename))
244 return 1; /* cpplib has emitted an error. */
254 struct c_fileinfo *fi;
256 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
258 return (struct c_fileinfo *) n->value;
260 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
262 fi->interface_only = 0;
263 fi->interface_unknown = 1;
264 splay_tree_insert (file_info_tree, (splay_tree_key) name,
265 (splay_tree_value) fi);
270 update_header_times (name)
273 /* Changing files again. This means currently collected time
274 is charged against header time, and body time starts back at 0. */
275 if (flag_detailed_statistics)
277 int this_time = get_run_time ();
278 struct c_fileinfo *file = get_fileinfo (name);
279 header_time += this_time - body_time;
280 file->time += this_time - body_time;
281 body_time = this_time;
286 dump_one_header (n, dummy)
288 void *dummy ATTRIBUTE_UNUSED;
290 print_time ((const char *) n->key,
291 ((struct c_fileinfo *) n->value)->time);
296 dump_time_statistics ()
298 struct c_fileinfo *file = get_fileinfo (input_filename);
299 int this_time = get_run_time ();
300 file->time += this_time - body_time;
302 fprintf (stderr, "\n******\n");
303 print_time ("header files (total)", header_time);
304 print_time ("main file (total)", this_time - body_time);
305 fprintf (stderr, "ratio = %g : 1\n",
306 (double)header_time / (double)(this_time - body_time));
307 fprintf (stderr, "\n******\n");
309 splay_tree_foreach (file_info_tree, dump_one_header, 0);
314 /* If C is not whitespace, return C.
315 Otherwise skip whitespace and return first nonwhite char read. */
325 /* There is no need to process comments or backslash-newline
326 here. None can occur in the output of cpp. Do handle \r
327 in case someone sent us a .i file. */
335 c = check_newline ();
339 /* Per C99, horizontal whitespace is just these four characters. */
348 error ("stray '\\' in program");
358 /* Skips all of the white space at the current location in the input file. */
361 position_after_white_space ()
367 put_back (skip_white_space (c));
370 /* Make the token buffer longer, preserving the data in it.
371 P should point to just beyond the last valid character in the old buffer.
372 The value we return is a pointer to the new buffer
373 at a place corresponding to P. */
376 extend_token_buffer_to (size)
380 maxtoken = maxtoken * 2 + 10;
381 while (maxtoken < size);
382 token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
386 extend_token_buffer (p)
389 int offset = p - token_buffer;
390 extend_token_buffer_to (offset);
391 return token_buffer + offset;
396 read_line_number (num)
400 enum cpp_ttype token = c_lex (&value);
402 if (token == CPP_NUMBER && TREE_CODE (value) == INTEGER_CST)
404 *num = TREE_INT_CST_LOW (value);
409 if (token != CPP_EOF)
410 error ("invalid #-line");
415 /* At the beginning of a line, increment the line number
416 and process any #-directive on this line.
417 If the line is a #-directive, read the entire line and return a newline.
418 Otherwise, return the line's first non-whitespace character. */
425 /* Loop till we get a nonblank, non-directive line. */
428 /* Read first nonwhite char on the line. */
431 while (c == ' ' || c == '\t');
436 process_directive ();
449 enum cpp_ttype token;
452 enum { act_none, act_push, act_pop } action;
453 int action_number, l;
454 const char *new_file;
455 #ifndef NO_IMPLICIT_EXTERN_C
456 int entering_c_header = 0;
459 /* Don't read beyond this line. */
463 token = c_lex (&value);
465 if (token == CPP_NAME)
467 /* If a letter follows, then if the word here is `line', skip
468 it and ignore it; otherwise, ignore the line, with an error
469 if the word isn't `pragma'. */
471 const char *name = IDENTIFIER_POINTER (value);
473 if (!strcmp (name, "pragma"))
478 else if (!strcmp (name, "define"))
480 debug_define (lex_lineno, GET_DIRECTIVE_LINE ());
483 else if (!strcmp (name, "undef"))
485 debug_undef (lex_lineno, GET_DIRECTIVE_LINE ());
488 else if (!strcmp (name, "line"))
491 token = c_lex (&value);
494 else if (!strcmp (name, "ident"))
496 /* #ident. We expect a string constant here.
497 The pedantic warning and syntax error are now in cpp. */
499 token = c_lex (&value);
500 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
503 #ifdef ASM_OUTPUT_IDENT
506 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
510 /* Skip the rest of this line. */
514 error ("undefined or invalid # directive `%s'", name);
518 /* If the # is the only nonwhite char on the line,
519 just ignore it. Check the new newline. */
520 if (token == CPP_EOF)
524 /* Here we have either `#line' or `# <nonletter>'.
525 In either case, it should be a line number; a digit should follow. */
527 if (token != CPP_NUMBER || TREE_CODE (value) != INTEGER_CST)
529 error ("invalid #-line");
533 /* subtract one, because it is the following line that
534 gets the specified number */
536 l = TREE_INT_CST_LOW (value) - 1;
538 /* More follows: it must be a string constant (filename).
539 It would be neat to use cpplib to quickly process the string, but
540 (1) we don't have a handy tokenization of the string, and
541 (2) I don't know how well that would work in the presense
542 of filenames that contain wide characters. */
546 /* Don't treat \ as special if we are processing #line 1 "...".
547 If you want it to be treated specially, use # 1 "...". */
548 ignore_escape_flag = 1;
551 /* Read the string constant. */
552 token = c_lex (&value);
554 ignore_escape_flag = 0;
556 if (token == CPP_EOF)
558 /* No more: store the line number and check following line. */
563 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
565 error ("invalid #line");
569 new_file = TREE_STRING_POINTER (value);
571 if (main_input_filename == 0)
572 main_input_filename = new_file;
577 /* Each change of file name
578 reinitializes whether we are now in a system header. */
579 in_system_header = 0;
581 if (!read_line_number (&action_number))
583 /* Update the name in the top element of input_file_stack. */
584 if (input_file_stack)
585 input_file_stack->name = input_filename;
588 /* `1' after file name means entering new file.
589 `2' after file name means just left a file. */
591 if (action_number == 1)
594 read_line_number (&action_number);
596 else if (action_number == 2)
599 read_line_number (&action_number);
601 if (action_number == 3)
603 /* `3' after file name means this is a system header file. */
604 in_system_header = 1;
605 read_line_number (&action_number);
607 #ifndef NO_IMPLICIT_EXTERN_C
608 if (action_number == 4)
610 /* `4' after file name means this is a C header file. */
611 entering_c_header = 1;
612 read_line_number (&action_number);
616 /* Do the actions implied by the preceding numbers. */
617 if (action == act_push)
620 push_srcloc (input_filename, 1);
621 input_file_stack->indent_level = indent_level;
622 debug_start_source_file (input_filename);
623 #ifndef NO_IMPLICIT_EXTERN_C
626 else if (entering_c_header)
629 ++pending_lang_change;
633 else if (action == act_pop)
635 /* Popping out of a file. */
636 if (input_file_stack->next)
638 #ifndef NO_IMPLICIT_EXTERN_C
639 if (c_header_level && --c_header_level == 0)
641 if (entering_c_header)
642 warning ("badly nested C headers from preprocessor");
643 --pending_lang_change;
647 if (indent_level != input_file_stack->indent_level)
649 warning_with_file_and_line
650 (input_filename, lex_lineno,
651 "This file contains more '%c's than '%c's.",
652 indent_level > input_file_stack->indent_level ? '{' : '}',
653 indent_level > input_file_stack->indent_level ? '}' : '{');
657 debug_end_source_file (input_file_stack->line);
660 error ("#-lines for entering and leaving files don't match");
663 update_header_times (new_file);
665 input_filename = new_file;
669 extract_interface_info ();
671 /* skip the rest of this line. */
675 while (getch () != '\n');
677 #else /* USE_CPPLIB */
679 /* Not yet handled: #pragma, #define, #undef.
680 No need to deal with linemarkers under normal conditions. */
683 cb_ident (pfile, str)
684 cpp_reader *pfile ATTRIBUTE_UNUSED;
685 const cpp_string *str;
687 #ifdef ASM_OUTPUT_IDENT
690 /* Convert escapes in the string. */
691 tree value = lex_string ((const char *)str->text, str->len, 0);
692 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
698 cb_enter_file (pfile)
701 cpp_buffer *ip = CPP_BUFFER (pfile);
702 /* Bleah, need a better interface to this. */
703 const char *flags = cpp_syshdr_flags (pfile, ip);
705 /* Mustn't stack the main buffer on the input stack. (Ick.) */
708 lex_lineno = lineno = ip->prev->lineno - 1;
709 push_srcloc (ip->nominal_fname, 1);
710 input_file_stack->indent_level = indent_level;
711 debug_start_source_file (ip->nominal_fname);
716 update_header_times (ip->nominal_fname);
719 extract_interface_info ();
721 in_system_header = (flags[0] != 0);
722 #ifndef NO_IMPLICIT_EXTERN_C
725 else if (in_system_header && flags[1] != 0 && flags[2] != 0)
728 ++pending_lang_change;
734 cb_leave_file (pfile)
737 /* Bleah, need a better interface to this. */
738 const char *flags = cpp_syshdr_flags (pfile, CPP_BUFFER (pfile));
740 if (input_file_stack->next)
742 #ifndef NO_IMPLICIT_EXTERN_C
743 if (c_header_level && --c_header_level == 0)
746 warning ("badly nested C headers from preprocessor");
747 --pending_lang_change;
751 if (indent_level != input_file_stack->indent_level)
753 warning_with_file_and_line
754 (input_filename, lex_lineno,
755 "This file contains more '%c's than '%c's.",
756 indent_level > input_file_stack->indent_level ? '{' : '}',
757 indent_level > input_file_stack->indent_level ? '}' : '{');
760 /* We get called for the main buffer, but we mustn't pop it. */
762 debug_end_source_file (input_file_stack->line);
765 in_system_header = (flags[0] != 0);
766 lex_lineno = CPP_BUFFER (pfile)->lineno;
768 update_header_times (input_file_stack->name);
770 extract_interface_info ();
774 cb_rename_file (pfile)
777 cpp_buffer *ip = CPP_BUFFER (pfile);
778 /* Bleah, need a better interface to this. */
779 const char *flags = cpp_syshdr_flags (pfile, ip);
780 input_filename = ip->nominal_fname;
781 lex_lineno = ip->lineno;
782 in_system_header = (flags[0] != 0);
784 update_header_times (ip->nominal_fname);
786 extract_interface_info ();
790 cb_def_pragma (pfile)
793 /* Issue a warning message if we have been asked to do so. Ignore
794 unknown pragmas in system headers unless an explicit
795 -Wunknown-pragmas has been given. */
796 if (warn_unknown_pragmas > in_system_header)
798 const unsigned char *space, *name = 0;
801 cpp_get_token (pfile, &s);
802 space = cpp_token_as_text (pfile, &s);
803 cpp_get_token (pfile, &s);
804 if (s.type == CPP_NAME)
805 name = cpp_token_as_text (pfile, &s);
808 warning ("ignoring #pragma %s %s", space, name);
810 warning ("ignoring #pragma %s", space);
813 #endif /* USE_CPPLIB */
815 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
817 [lex.charset]: The character designated by the universal-character-name
818 \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
819 is NNNNNNNN; the character designated by the universal-character-name
820 \uNNNN is that character whose character short name in ISO/IEC 10646 is
821 0000NNNN. If the hexadecimal value for a universal character name is
822 less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
823 universal character name designates a character in the basic source
824 character set, then the program is ill-formed.
826 We assume that wchar_t is Unicode, so we don't need to do any
827 mapping. Is this ever wrong? */
830 read_ucs (p, limit, cptr, length)
836 unsigned int code = 0;
839 for (; length; --length)
843 error ("incomplete universal-character-name");
850 error ("non hex digit '%c' in universal-character-name", c);
856 if (c >= 'a' && c <= 'f')
857 code += c - 'a' + 10;
858 if (c >= 'A' && c <= 'F')
859 code += c - 'A' + 10;
860 if (c >= '0' && c <= '9')
865 sorry ("universal-character-name on EBCDIC target");
866 *cptr = 0x3f; /* EBCDIC invalid character */
870 if (code > 0x9f && !(code & 0x80000000))
871 /* True extended character, OK. */;
872 else if (code >= 0x20 && code < 0x7f)
874 /* ASCII printable character. The C character set consists of all of
875 these except $, @ and `. We use hex escapes so that this also
876 works with EBCDIC hosts. */
877 if (code != 0x24 && code != 0x40 && code != 0x60)
878 error ("universal-character-name used for '%c'", code);
881 error ("invalid universal-character-name");
887 /* Read an escape sequence and write its character equivalent into *CPTR.
888 P is the input pointer, which is just after the backslash. LIMIT
889 is how much text we have.
890 Returns the updated input pointer. */
893 readescape (p, limit, cptr)
898 unsigned int c, code, count;
899 unsigned firstdig = 0;
904 /* cpp has already issued an error for this. */
914 if (warn_traditional && !in_system_header)
915 warning ("the meaning of `\\x' varies with -traditional");
917 if (flag_traditional)
935 if (c >= 'a' && c <= 'f')
936 code += c - 'a' + 10;
937 if (c >= 'A' && c <= 'F')
938 code += c - 'A' + 10;
939 if (c >= '0' && c <= '9')
941 if (code != 0 || count != 0)
951 warning ("\\x used with no following hex digits");
956 /* Digits are all 0's. Ok. */
958 else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
961 << (TYPE_PRECISION (integer_type_node)
964 pedwarn ("hex escape out of range");
968 case '0': case '1': case '2': case '3': case '4':
969 case '5': case '6': case '7':
971 for (count = 0; count < 3; count++)
973 if (c < '0' || c > '7')
978 code = (code * 8) + (c - '0');
990 case '\\': case '\'': case '"': case '?':
994 case 'n': *cptr = TARGET_NEWLINE; return p;
995 case 't': *cptr = TARGET_TAB; return p;
996 case 'r': *cptr = TARGET_CR; return p;
997 case 'f': *cptr = TARGET_FF; return p;
998 case 'b': *cptr = TARGET_BS; return p;
999 case 'v': *cptr = TARGET_VT; return p;
1001 if (warn_traditional && !in_system_header)
1002 warning ("the meaning of '\\a' varies with -traditional");
1003 *cptr = flag_traditional ? c : TARGET_BELL;
1006 /* Warnings and support checks handled by read_ucs(). */
1008 if (c_language != clk_cplusplus && !flag_isoc99)
1011 if (warn_traditional && !in_system_header)
1012 warning ("the meaning of '\\%c' varies with -traditional", c);
1014 return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8);
1018 pedwarn ("non-ISO-standard escape sequence, '\\%c'", c);
1019 *cptr = TARGET_ESC; return p;
1021 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
1022 '\%' is used to prevent SCCS from getting confused. */
1023 case '(': case '{': case '[': case '%':
1025 pedwarn ("unknown escape sequence '\\%c'", c);
1031 pedwarn ("unknown escape sequence '\\%c'", c);
1033 pedwarn ("unknown escape sequence: '\\' followed by char 0x%x", c);
1040 /* Returns nonzero if C is a universal-character-name. Give an error if it
1041 is not one which may appear in an identifier, as per [extendid].
1043 Note that extended character support in identifiers has not yet been
1044 implemented. It is my personal opinion that this is not a desirable
1045 feature. Portable code cannot count on support for more than the basic
1046 identifier character set. */
1049 is_extended_char (c)
1052 #ifdef TARGET_EBCDIC
1059 /* None of the valid chars are outside the Basic Multilingual Plane (the
1063 error ("universal-character-name '\\U%08x' not valid in identifier", c);
1068 if ((c >= 0x00c0 && c <= 0x00d6)
1069 || (c >= 0x00d8 && c <= 0x00f6)
1070 || (c >= 0x00f8 && c <= 0x01f5)
1071 || (c >= 0x01fa && c <= 0x0217)
1072 || (c >= 0x0250 && c <= 0x02a8)
1073 || (c >= 0x1e00 && c <= 0x1e9a)
1074 || (c >= 0x1ea0 && c <= 0x1ef9))
1079 || (c >= 0x0388 && c <= 0x038a)
1081 || (c >= 0x038e && c <= 0x03a1)
1082 || (c >= 0x03a3 && c <= 0x03ce)
1083 || (c >= 0x03d0 && c <= 0x03d6)
1088 || (c >= 0x03e2 && c <= 0x03f3)
1089 || (c >= 0x1f00 && c <= 0x1f15)
1090 || (c >= 0x1f18 && c <= 0x1f1d)
1091 || (c >= 0x1f20 && c <= 0x1f45)
1092 || (c >= 0x1f48 && c <= 0x1f4d)
1093 || (c >= 0x1f50 && c <= 0x1f57)
1097 || (c >= 0x1f5f && c <= 0x1f7d)
1098 || (c >= 0x1f80 && c <= 0x1fb4)
1099 || (c >= 0x1fb6 && c <= 0x1fbc)
1100 || (c >= 0x1fc2 && c <= 0x1fc4)
1101 || (c >= 0x1fc6 && c <= 0x1fcc)
1102 || (c >= 0x1fd0 && c <= 0x1fd3)
1103 || (c >= 0x1fd6 && c <= 0x1fdb)
1104 || (c >= 0x1fe0 && c <= 0x1fec)
1105 || (c >= 0x1ff2 && c <= 0x1ff4)
1106 || (c >= 0x1ff6 && c <= 0x1ffc))
1110 if ((c >= 0x0401 && c <= 0x040d)
1111 || (c >= 0x040f && c <= 0x044f)
1112 || (c >= 0x0451 && c <= 0x045c)
1113 || (c >= 0x045e && c <= 0x0481)
1114 || (c >= 0x0490 && c <= 0x04c4)
1115 || (c >= 0x04c7 && c <= 0x04c8)
1116 || (c >= 0x04cb && c <= 0x04cc)
1117 || (c >= 0x04d0 && c <= 0x04eb)
1118 || (c >= 0x04ee && c <= 0x04f5)
1119 || (c >= 0x04f8 && c <= 0x04f9))
1123 if ((c >= 0x0531 && c <= 0x0556)
1124 || (c >= 0x0561 && c <= 0x0587))
1128 if ((c >= 0x05d0 && c <= 0x05ea)
1129 || (c >= 0x05f0 && c <= 0x05f4))
1133 if ((c >= 0x0621 && c <= 0x063a)
1134 || (c >= 0x0640 && c <= 0x0652)
1135 || (c >= 0x0670 && c <= 0x06b7)
1136 || (c >= 0x06ba && c <= 0x06be)
1137 || (c >= 0x06c0 && c <= 0x06ce)
1138 || (c >= 0x06e5 && c <= 0x06e7))
1142 if ((c >= 0x0905 && c <= 0x0939)
1143 || (c >= 0x0958 && c <= 0x0962))
1147 if ((c >= 0x0985 && c <= 0x098c)
1148 || (c >= 0x098f && c <= 0x0990)
1149 || (c >= 0x0993 && c <= 0x09a8)
1150 || (c >= 0x09aa && c <= 0x09b0)
1152 || (c >= 0x09b6 && c <= 0x09b9)
1153 || (c >= 0x09dc && c <= 0x09dd)
1154 || (c >= 0x09df && c <= 0x09e1)
1155 || (c >= 0x09f0 && c <= 0x09f1))
1159 if ((c >= 0x0a05 && c <= 0x0a0a)
1160 || (c >= 0x0a0f && c <= 0x0a10)
1161 || (c >= 0x0a13 && c <= 0x0a28)
1162 || (c >= 0x0a2a && c <= 0x0a30)
1163 || (c >= 0x0a32 && c <= 0x0a33)
1164 || (c >= 0x0a35 && c <= 0x0a36)
1165 || (c >= 0x0a38 && c <= 0x0a39)
1166 || (c >= 0x0a59 && c <= 0x0a5c)
1171 if ((c >= 0x0a85 && c <= 0x0a8b)
1173 || (c >= 0x0a8f && c <= 0x0a91)
1174 || (c >= 0x0a93 && c <= 0x0aa8)
1175 || (c >= 0x0aaa && c <= 0x0ab0)
1176 || (c >= 0x0ab2 && c <= 0x0ab3)
1177 || (c >= 0x0ab5 && c <= 0x0ab9)
1182 if ((c >= 0x0b05 && c <= 0x0b0c)
1183 || (c >= 0x0b0f && c <= 0x0b10)
1184 || (c >= 0x0b13 && c <= 0x0b28)
1185 || (c >= 0x0b2a && c <= 0x0b30)
1186 || (c >= 0x0b32 && c <= 0x0b33)
1187 || (c >= 0x0b36 && c <= 0x0b39)
1188 || (c >= 0x0b5c && c <= 0x0b5d)
1189 || (c >= 0x0b5f && c <= 0x0b61))
1193 if ((c >= 0x0b85 && c <= 0x0b8a)
1194 || (c >= 0x0b8e && c <= 0x0b90)
1195 || (c >= 0x0b92 && c <= 0x0b95)
1196 || (c >= 0x0b99 && c <= 0x0b9a)
1198 || (c >= 0x0b9e && c <= 0x0b9f)
1199 || (c >= 0x0ba3 && c <= 0x0ba4)
1200 || (c >= 0x0ba8 && c <= 0x0baa)
1201 || (c >= 0x0bae && c <= 0x0bb5)
1202 || (c >= 0x0bb7 && c <= 0x0bb9))
1206 if ((c >= 0x0c05 && c <= 0x0c0c)
1207 || (c >= 0x0c0e && c <= 0x0c10)
1208 || (c >= 0x0c12 && c <= 0x0c28)
1209 || (c >= 0x0c2a && c <= 0x0c33)
1210 || (c >= 0x0c35 && c <= 0x0c39)
1211 || (c >= 0x0c60 && c <= 0x0c61))
1215 if ((c >= 0x0c85 && c <= 0x0c8c)
1216 || (c >= 0x0c8e && c <= 0x0c90)
1217 || (c >= 0x0c92 && c <= 0x0ca8)
1218 || (c >= 0x0caa && c <= 0x0cb3)
1219 || (c >= 0x0cb5 && c <= 0x0cb9)
1220 || (c >= 0x0ce0 && c <= 0x0ce1))
1224 if ((c >= 0x0d05 && c <= 0x0d0c)
1225 || (c >= 0x0d0e && c <= 0x0d10)
1226 || (c >= 0x0d12 && c <= 0x0d28)
1227 || (c >= 0x0d2a && c <= 0x0d39)
1228 || (c >= 0x0d60 && c <= 0x0d61))
1232 if ((c >= 0x0e01 && c <= 0x0e30)
1233 || (c >= 0x0e32 && c <= 0x0e33)
1234 || (c >= 0x0e40 && c <= 0x0e46)
1235 || (c >= 0x0e4f && c <= 0x0e5b))
1239 if ((c >= 0x0e81 && c <= 0x0e82)
1245 || (c >= 0x0e94 && c <= 0x0e97)
1246 || (c >= 0x0e99 && c <= 0x0e9f)
1247 || (c >= 0x0ea1 && c <= 0x0ea3)
1252 || (c >= 0x0ead && c <= 0x0eb0)
1256 || (c >= 0x0ec0 && c <= 0x0ec4)
1261 if ((c >= 0x10a0 && c <= 0x10c5)
1262 || (c >= 0x10d0 && c <= 0x10f6))
1266 if ((c >= 0x3041 && c <= 0x3094)
1267 || (c >= 0x309b && c <= 0x309e))
1271 if ((c >= 0x30a1 && c <= 0x30fe))
1275 if ((c >= 0x3105 && c <= 0x312c))
1279 if ((c >= 0x1100 && c <= 0x1159)
1280 || (c >= 0x1161 && c <= 0x11a2)
1281 || (c >= 0x11a8 && c <= 0x11f9))
1284 /* CJK Unified Ideographs */
1285 if ((c >= 0xf900 && c <= 0xfa2d)
1286 || (c >= 0xfb1f && c <= 0xfb36)
1287 || (c >= 0xfb38 && c <= 0xfb3c)
1289 || (c >= 0xfb40 && c <= 0xfb41)
1290 || (c >= 0xfb42 && c <= 0xfb44)
1291 || (c >= 0xfb46 && c <= 0xfbb1)
1292 || (c >= 0xfbd3 && c <= 0xfd3f)
1293 || (c >= 0xfd50 && c <= 0xfd8f)
1294 || (c >= 0xfd92 && c <= 0xfdc7)
1295 || (c >= 0xfdf0 && c <= 0xfdfb)
1296 || (c >= 0xfe70 && c <= 0xfe72)
1298 || (c >= 0xfe76 && c <= 0xfefc)
1299 || (c >= 0xff21 && c <= 0xff3a)
1300 || (c >= 0xff41 && c <= 0xff5a)
1301 || (c >= 0xff66 && c <= 0xffbe)
1302 || (c >= 0xffc2 && c <= 0xffc7)
1303 || (c >= 0xffca && c <= 0xffcf)
1304 || (c >= 0xffd2 && c <= 0xffd7)
1305 || (c >= 0xffda && c <= 0xffdc)
1306 || (c >= 0x4e00 && c <= 0x9fa5))
1309 error ("universal-character-name '\\u%04x' not valid in identifier", c);
1314 /* Add the UTF-8 representation of C to the token_buffer. */
1317 utf8_extend_token (c)
1322 if (c <= 0x0000007f)
1327 else if (c <= 0x000007ff)
1328 shift = 6, mask = 0xc0;
1329 else if (c <= 0x0000ffff)
1330 shift = 12, mask = 0xe0;
1331 else if (c <= 0x001fffff)
1332 shift = 18, mask = 0xf0;
1333 else if (c <= 0x03ffffff)
1334 shift = 24, mask = 0xf8;
1336 shift = 30, mask = 0xfc;
1338 extend_token (mask | (c >> shift));
1342 extend_token ((unsigned char) (0x80 | (c >> shift)));
1354 char long_long_flag;
1357 struct try_type type_sequence[] =
1359 { &integer_type_node, 0, 0, 0},
1360 { &unsigned_type_node, 1, 0, 0},
1361 { &long_integer_type_node, 0, 1, 0},
1362 { &long_unsigned_type_node, 1, 1, 0},
1363 { &long_long_integer_type_node, 0, 1, 1},
1364 { &long_long_unsigned_type_node, 1, 1, 1}
1376 int conversion_errno;
1377 REAL_VALUE_TYPE value;
1385 struct pf_args * args = (struct pf_args *) data;
1386 const char *typename;
1388 args->conversion_errno = 0;
1389 args->type = double_type_node;
1390 typename = "double";
1392 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1393 tells the desired precision of the binary result
1394 of decimal-to-binary conversion. */
1399 error ("both 'f' and 'l' suffixes on floating constant");
1401 args->type = float_type_node;
1404 else if (args->lflag)
1406 args->type = long_double_type_node;
1407 typename = "long double";
1409 else if (flag_single_precision_constant)
1411 args->type = float_type_node;
1416 if (args->base == 16)
1417 args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type));
1419 args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type));
1421 args->conversion_errno = errno;
1422 /* A diagnostic is required here by some ISO C testsuites.
1423 This is not pedwarn, because some people don't want
1424 an error for this. */
1425 if (REAL_VALUE_ISINF (args->value) && pedantic)
1426 warning ("floating point number exceeds range of '%s'", typename);
1435 enum cpp_ttype type;
1438 timevar_push (TV_CPP);
1439 cpp_get_token (&parse_in, &tok);
1440 timevar_pop (TV_CPP);
1442 /* The C++ front end does horrible things with the current line
1443 number. To ensure an accurate line number, we must reset it
1444 every time we return a token. */
1445 lex_lineno = cpp_get_line (&parse_in)->line;
1448 lineno = lex_lineno;
1452 case CPP_OPEN_BRACE: indent_level++; break;
1453 case CPP_CLOSE_BRACE: indent_level--; break;
1455 /* Issue this error here, where we can get at tok.val.c. */
1457 if (ISGRAPH (tok.val.c))
1458 error ("stray '%c' in program", tok.val.c);
1460 error ("stray '\\%#o' in program", tok.val.c);
1464 *value = get_identifier ((const char *)tok.val.node->name);
1470 *value = lex_number ((const char *)tok.val.str.text, tok.val.str.len);
1475 *value = lex_charconst ((const char *)tok.val.str.text,
1476 tok.val.str.len, tok.type == CPP_WCHAR);
1482 *value = lex_string ((const char *)tok.val.str.text,
1483 tok.val.str.len, tok.type == CPP_WSTRING);
1486 /* These tokens should not be visible outside cpplib. */
1487 case CPP_HEADER_NAME:
1509 /* Effectively do c = skip_white_space (c)
1510 but do it faster in the usual cases. */
1523 c = skip_white_space (c);
1525 goto found_nonwhite;
1529 lineno = lex_lineno;
1537 /* Capital L may start a wide-string or wide-character constant. */
1539 register int c1 = getch();
1548 goto string_constant;
1555 if (!doing_objc_thang)
1559 /* '@' may start a constant string object. */
1560 register int c1 = getch ();
1564 goto string_constant;
1567 /* Fall through to treat '@' as the start of an identifier. */
1570 case 'A': case 'B': case 'C': case 'D': case 'E':
1571 case 'F': case 'G': case 'H': case 'I': case 'J':
1572 case 'K': case 'M': case 'N': case 'O':
1573 case 'P': case 'Q': case 'R': case 'S': case 'T':
1574 case 'U': case 'V': case 'W': case 'X': case 'Y':
1576 case 'a': case 'b': case 'c': case 'd': case 'e':
1577 case 'f': case 'g': case 'h': case 'i': case 'j':
1578 case 'k': case 'l': case 'm': case 'n': case 'o':
1579 case 'p': case 'q': case 'r': case 's': case 't':
1580 case 'u': case 'v': case 'w': case 'x': case 'y':
1586 while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
1588 /* Make sure this char really belongs in an identifier. */
1591 if (! dollars_in_ident)
1592 error ("'$' in identifier");
1594 pedwarn ("'$' in identifier");
1597 if (p >= token_buffer + maxtoken)
1598 p = extend_token_buffer (p);
1606 if (p >= token_buffer + maxtoken)
1607 p = extend_token_buffer (p);
1610 *value = get_identifier (token_buffer);
1615 /* It's hard to preserve tokenization on '.' because
1616 it could be a symbol by itself, or it could be the
1617 start of a floating point number and cpp won't tell us. */
1623 return CPP_ELLIPSIS;
1626 error ("parse error at '..'");
1628 else if (c1 == '*' && c_language == clk_cplusplus)
1629 return CPP_DOT_STAR;
1637 case '0': case '1': case '2': case '3': case '4':
1638 case '5': case '6': case '7': case '8': case '9':
1641 /* Scan the next preprocessing number. All C numeric constants
1642 are preprocessing numbers, but not all preprocessing numbers
1643 are valid numeric constants. Preprocessing numbers fit the
1644 regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1645 See C99 section 6.4.8. */
1648 if (p >= token_buffer + maxtoken)
1649 p = extend_token_buffer (p);
1654 if (c == '+' || c == '-')
1657 if (d == 'e' || d == 'E' || d == 'p' || d == 'P')
1660 if (ISALNUM (c) || c == '_' || c == '.')
1666 *value = lex_number (token_buffer, p - token_buffer);
1676 int delimiter = charconst ? '\'' : '"';
1677 #ifdef MULTIBYTE_CHARS
1678 int longest_char = local_mb_cur_max ();
1679 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
1682 p = token_buffer + 1;
1684 while (c != delimiter && c != EOF)
1686 if (p + 2 > token_buffer + maxtoken)
1687 p = extend_token_buffer (p);
1689 /* ignore_escape_flag is set for reading the filename in #line. */
1690 if (!ignore_escape_flag && c == '\\')
1693 *p++ = getch (); /* escaped character */
1699 #ifdef MULTIBYTE_CHARS
1702 for (i = 0; i < longest_char; ++i)
1704 if (p + i >= token_buffer + maxtoken)
1705 p = extend_token_buffer (p);
1708 char_len = local_mblen (p, i + 1);
1715 /* Replace all except the first byte. */
1717 for (--i; i > 0; --i)
1721 /* mbtowc sometimes needs an extra char before accepting */
1722 else if (char_len <= i)
1736 *value = lex_charconst (token_buffer + 1, p - (token_buffer + 1),
1738 return wide_flag ? CPP_WCHAR : CPP_CHAR;
1742 *value = lex_string (token_buffer + 1, p - (token_buffer + 1),
1744 return wide_flag ? CPP_WSTRING : objc_flag ? CPP_OSTRING : CPP_STRING;
1762 enum cpp_ttype type = CPP_EOF;
1766 case '+': type = CPP_PLUS; break;
1767 case '-': type = CPP_MINUS; break;
1768 case '&': type = CPP_AND; break;
1769 case '|': type = CPP_OR; break;
1770 case ':': type = CPP_COLON; break;
1771 case '<': type = CPP_LESS; break;
1772 case '>': type = CPP_GREATER; break;
1773 case '*': type = CPP_MULT; break;
1774 case '/': type = CPP_DIV; break;
1775 case '%': type = CPP_MOD; break;
1776 case '^': type = CPP_XOR; break;
1777 case '!': type = CPP_NOT; break;
1778 case '=': type = CPP_EQ; break;
1783 if (c1 == '=' && type < CPP_LAST_EQ)
1784 return type + (CPP_EQ_EQ - CPP_EQ);
1788 case '+': return CPP_PLUS_PLUS;
1789 case '-': return CPP_MINUS_MINUS;
1790 case '&': return CPP_AND_AND;
1791 case '|': return CPP_OR_OR;
1793 if (c_language == clk_cplusplus)
1797 case '<': type = CPP_LSHIFT; goto do_triad;
1798 case '>': type = CPP_RSHIFT; goto do_triad;
1806 if (c_language == clk_cplusplus)
1810 return CPP_DEREF_STAR;
1818 if (c1 == '?' && c_language == clk_cplusplus)
1819 { type = CPP_MAX; goto do_triad; }
1823 if (c1 == ':' && flag_digraphs)
1824 return CPP_OPEN_SQUARE;
1825 if (c1 == '%' && flag_digraphs)
1826 { indent_level++; return CPP_OPEN_BRACE; }
1827 if (c1 == '?' && c_language == clk_cplusplus)
1828 { type = CPP_MIN; goto do_triad; }
1832 if (c1 == '>' && flag_digraphs)
1833 return CPP_CLOSE_SQUARE;
1836 if (c1 == '>' && flag_digraphs)
1837 { indent_level--; return CPP_CLOSE_BRACE; }
1847 type += (CPP_EQ_EQ - CPP_EQ);
1853 case '~': return CPP_COMPL;
1854 case '?': return CPP_QUERY;
1855 case ',': return CPP_COMMA;
1856 case '(': return CPP_OPEN_PAREN;
1857 case ')': return CPP_CLOSE_PAREN;
1858 case '[': return CPP_OPEN_SQUARE;
1859 case ']': return CPP_CLOSE_SQUARE;
1860 case '{': indent_level++; return CPP_OPEN_BRACE;
1861 case '}': indent_level--; return CPP_CLOSE_BRACE;
1862 case ';': return CPP_SEMICOLON;
1867 error ("stray '%c' in program", c);
1869 error ("stray '\\%#o' in program", c);
1877 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
1880 lex_number (str, len)
1886 int largest_digit = 0;
1892 enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
1894 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1895 The code below which fills the parts array assumes that a host
1896 int is at least twice as wide as a host char, and that
1897 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1898 Two HOST_WIDE_INTs is the largest int literal we can store.
1899 In order to detect overflow below, the number of parts (TOTAL_PARTS)
1900 must be exactly the number of parts needed to hold the bits
1901 of two HOST_WIDE_INTs. */
1902 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1903 unsigned int parts[TOTAL_PARTS];
1905 /* Optimize for most frequent case. */
1909 return integer_zero_node;
1910 else if (*str == '1')
1911 return integer_one_node;
1913 return build_int_2 (*str - '0', 0);
1916 for (count = 0; count < TOTAL_PARTS; count++)
1919 /* len is known to be >1 at this point. */
1922 if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
1927 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
1928 else if (str[0] == '0' && ISDIGIT (str[1]))
1940 if (base == 16 && pedantic && !flag_isoc99)
1941 pedwarn ("floating constant may not be in radix 16");
1942 else if (floatflag == AFTER_POINT)
1943 ERROR ("too many decimal points in floating constant");
1944 else if (floatflag == AFTER_EXPON)
1945 ERROR ("decimal point in exponent - impossible!");
1947 floatflag = AFTER_POINT;
1953 /* Possible future extension: silently ignore _ in numbers,
1954 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1955 but somewhat easier to read. Ada has this? */
1956 ERROR ("underscore in number");
1960 /* It is not a decimal point.
1961 It should be a digit (perhaps a hex digit). */
1967 else if (base <= 10 && (c == 'e' || c == 'E'))
1970 floatflag = AFTER_EXPON;
1973 else if (base == 16 && (c == 'p' || c == 'P'))
1975 floatflag = AFTER_EXPON;
1976 break; /* start of exponent */
1978 else if (base == 16 && c >= 'a' && c <= 'f')
1982 else if (base == 16 && c >= 'A' && c <= 'F')
1989 break; /* start of suffix */
1992 if (n >= largest_digit)
1996 for (count = 0; count < TOTAL_PARTS; count++)
1998 parts[count] *= base;
2002 += (parts[count-1] >> HOST_BITS_PER_CHAR);
2004 &= (1 << HOST_BITS_PER_CHAR) - 1;
2010 /* If the highest-order part overflows (gets larger than
2011 a host char will hold) then the whole number has
2012 overflowed. Record this and truncate the highest-order
2014 if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
2017 parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
2021 while (p < str + len);
2023 /* This can happen on input like `int i = 0x;' */
2025 ERROR ("numeric constant with no digits");
2027 if (largest_digit >= base)
2028 ERROR ("numeric constant contains digits beyond the radix");
2030 if (floatflag != NOT_FLOAT)
2033 int imag, fflag, lflag, conversion_errno;
2034 REAL_VALUE_TYPE real;
2035 struct pf_args args;
2038 if (base == 16 && floatflag != AFTER_EXPON)
2039 ERROR ("hexadecimal floating constant has no exponent");
2041 /* Read explicit exponent if any, and put it in tokenbuf. */
2042 if ((base == 10 && ((c == 'e') || (c == 'E')))
2043 || (base == 16 && (c == 'p' || c == 'P')))
2047 if (p < str + len && (c == '+' || c == '-'))
2049 /* Exponent is decimal, even if string is a hex float. */
2051 ERROR ("floating constant exponent has no digits");
2052 while (p < str + len && ISDIGIT (c))
2058 /* Copy the float constant now; we don't want any suffixes in the
2059 string passed to parse_float. */
2060 copy = alloca (p - str + 1);
2061 memcpy (copy, str, p - str);
2062 copy[p - str] = '\0';
2064 /* Now parse suffixes. */
2065 fflag = lflag = imag = 0;
2066 while (p < str + len)
2071 ERROR ("more than one 'f' suffix on floating constant");
2072 else if (warn_traditional && !in_system_header)
2073 warning ("traditional C rejects the 'f' suffix");
2080 ERROR ("more than one 'l' suffix on floating constant");
2081 else if (warn_traditional && !in_system_header)
2082 warning ("traditional C rejects the 'l' suffix");
2090 ERROR ("more than one 'i' or 'j' suffix on floating constant");
2092 pedwarn ("ISO C forbids imaginary numeric constants");
2097 ERROR ("invalid suffix on floating constant");
2100 /* Setup input for parse_float() */
2106 /* Convert string to a double, checking for overflow. */
2107 if (do_float_handler (parse_float, (PTR) &args))
2109 /* Receive output from parse_float() */
2113 /* We got an exception from parse_float() */
2114 ERROR ("floating constant out of range");
2116 /* Receive output from parse_float() */
2117 conversion_errno = args.conversion_errno;
2121 /* ERANGE is also reported for underflow,
2122 so test the value to distinguish overflow from that. */
2123 if (conversion_errno == ERANGE && !flag_traditional && pedantic
2124 && (REAL_VALUES_LESS (dconst1, real)
2125 || REAL_VALUES_LESS (real, dconstm1)))
2126 warning ("floating point number exceeds range of 'double'");
2129 /* Create a node with determined type and value. */
2131 value = build_complex (NULL_TREE, convert (type, integer_zero_node),
2132 build_real (type, real));
2134 value = build_real (type, real);
2138 tree trad_type, ansi_type, type;
2139 HOST_WIDE_INT high, low;
2140 int spec_unsigned = 0;
2142 int spec_long_long = 0;
2147 trad_type = ansi_type = type = NULL_TREE;
2148 while (p < str + len)
2155 error ("two 'u' suffixes on integer constant");
2156 else if (warn_traditional && !in_system_header)
2157 warning ("traditional C rejects the 'u' suffix");
2168 error ("three 'l' suffixes on integer constant");
2170 error ("'lul' is not a valid integer suffix");
2171 else if (c != spec_long)
2172 error ("'Ll' and 'lL' are not valid integer suffixes");
2173 else if (pedantic && ! flag_isoc99
2174 && ! in_system_header && warn_long_long)
2175 pedwarn ("ISO C89 forbids long long integer constants");
2181 case 'i': case 'I': case 'j': case 'J':
2183 error ("more than one 'i' or 'j' suffix on integer constant");
2185 pedwarn ("ISO C forbids imaginary numeric constants");
2190 ERROR ("invalid suffix on integer constant");
2194 /* If the literal overflowed, pedwarn about it now. */
2198 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
2201 /* This is simplified by the fact that our constant
2202 is always positive. */
2206 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
2208 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
2209 / HOST_BITS_PER_CHAR)]
2210 << (i * HOST_BITS_PER_CHAR));
2211 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
2214 value = build_int_2 (low, high);
2215 TREE_TYPE (value) = long_long_unsigned_type_node;
2217 /* If warn_traditional, calculate both the ISO type and the
2218 traditional type, then see if they disagree.
2219 Otherwise, calculate only the type for the dialect in use. */
2220 if (warn_traditional || flag_traditional)
2222 /* Calculate the traditional type. */
2223 /* Traditionally, any constant is signed; but if unsigned is
2224 specified explicitly, obey that. Use the smallest size
2225 with the right number of bits, except for one special
2226 case with decimal constants. */
2227 if (! spec_long && base != 10
2228 && int_fits_type_p (value, unsigned_type_node))
2229 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2230 /* A decimal constant must be long if it does not fit in
2231 type int. I think this is independent of whether the
2232 constant is signed. */
2233 else if (! spec_long && base == 10
2234 && int_fits_type_p (value, integer_type_node))
2235 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2236 else if (! spec_long_long)
2237 trad_type = (spec_unsigned
2238 ? long_unsigned_type_node
2239 : long_integer_type_node);
2240 else if (int_fits_type_p (value,
2242 ? long_long_unsigned_type_node
2243 : long_long_integer_type_node))
2244 trad_type = (spec_unsigned
2245 ? long_long_unsigned_type_node
2246 : long_long_integer_type_node);
2248 trad_type = (spec_unsigned
2249 ? widest_unsigned_literal_type_node
2250 : widest_integer_literal_type_node);
2252 if (warn_traditional || ! flag_traditional)
2254 /* Calculate the ISO type. */
2255 if (! spec_long && ! spec_unsigned
2256 && int_fits_type_p (value, integer_type_node))
2257 ansi_type = integer_type_node;
2258 else if (! spec_long && (base != 10 || spec_unsigned)
2259 && int_fits_type_p (value, unsigned_type_node))
2260 ansi_type = unsigned_type_node;
2261 else if (! spec_unsigned && !spec_long_long
2262 && int_fits_type_p (value, long_integer_type_node))
2263 ansi_type = long_integer_type_node;
2264 else if (! spec_long_long
2265 && int_fits_type_p (value, long_unsigned_type_node))
2266 ansi_type = long_unsigned_type_node;
2267 else if (! spec_unsigned
2268 && int_fits_type_p (value, long_long_integer_type_node))
2269 ansi_type = long_long_integer_type_node;
2270 else if (int_fits_type_p (value, long_long_unsigned_type_node))
2271 ansi_type = long_long_unsigned_type_node;
2272 else if (! spec_unsigned
2273 && int_fits_type_p (value, widest_integer_literal_type_node))
2274 ansi_type = widest_integer_literal_type_node;
2276 ansi_type = widest_unsigned_literal_type_node;
2279 type = flag_traditional ? trad_type : ansi_type;
2281 /* We assume that constants specified in a non-decimal
2282 base are bit patterns, and that the programmer really
2283 meant what they wrote. */
2284 if (warn_traditional && !in_system_header
2285 && base == 10 && trad_type != ansi_type)
2287 if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type))
2288 warning ("width of integer constant changes with -traditional");
2289 else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type))
2290 warning ("integer constant is unsigned in ISO C, signed with -traditional");
2292 warning ("width of integer constant may change on other systems with -traditional");
2295 if (pedantic && !flag_traditional && (flag_isoc99 || !spec_long_long)
2298 ? TYPE_PRECISION (long_long_integer_type_node)
2299 : TYPE_PRECISION (long_integer_type_node)) < TYPE_PRECISION (type)))
2302 pedwarn ("integer constant larger than the maximum value of %s",
2304 ? (TREE_UNSIGNED (type)
2305 ? "an unsigned long long int"
2306 : "a long long int")
2307 : "an unsigned long int"));
2310 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
2311 warning ("decimal constant is so large that it is unsigned");
2315 if (TYPE_PRECISION (type)
2316 <= TYPE_PRECISION (integer_type_node))
2317 value = build_complex (NULL_TREE, integer_zero_node,
2318 convert (integer_type_node, value));
2320 ERROR ("complex integer constant is too wide for 'complex int'");
2322 else if (flag_traditional && !int_fits_type_p (value, type))
2323 /* The traditional constant 0x80000000 is signed
2324 but doesn't fit in the range of int.
2325 This will change it to -0x80000000, which does fit. */
2327 TREE_TYPE (value) = unsigned_type (type);
2328 value = convert (type, value);
2329 TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0;
2332 TREE_TYPE (value) = type;
2334 /* If it's still an integer (not a complex), and it doesn't
2335 fit in the type we choose for it, then pedwarn. */
2338 && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
2339 && ! int_fits_type_p (value, TREE_TYPE (value)))
2340 pedwarn ("integer constant is larger than the maximum value for its type");
2344 error ("missing white space after number '%.*s'", (int) (p - str), str);
2349 return integer_zero_node;
2353 lex_string (str, len, wide)
2359 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
2361 const char *p = str, *limit = str + len;
2363 unsigned width = wide ? WCHAR_TYPE_SIZE
2364 : TYPE_PRECISION (char_type_node);
2366 #ifdef MULTIBYTE_CHARS
2367 /* Reset multibyte conversion state. */
2368 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2373 #ifdef MULTIBYTE_CHARS
2377 char_len = local_mbtowc (&wc, p, limit - p);
2380 warning ("Ignoring invalid multibyte character");
2393 if (c == '\\' && !ignore_escape_flag)
2395 p = readescape (p, limit, &c);
2396 if (width < HOST_BITS_PER_INT
2397 && (unsigned) c >= ((unsigned)1 << width))
2398 pedwarn ("escape sequence out of range for character");
2401 /* Add this single character into the buffer either as a wchar_t
2402 or as a single byte. */
2405 unsigned charwidth = TYPE_PRECISION (char_type_node);
2406 unsigned bytemask = (1 << charwidth) - 1;
2409 for (byte = 0; byte < WCHAR_BYTES; ++byte)
2412 if (byte >= (int) sizeof (c))
2415 n = (c >> (byte * charwidth)) & bytemask;
2416 if (BYTES_BIG_ENDIAN)
2417 q[WCHAR_BYTES - byte - 1] = n;
2429 /* Terminate the string value, either with a single byte zero
2430 or with a wide zero. */
2434 memset (q, 0, WCHAR_BYTES);
2442 value = build_string (q - buf, buf);
2445 TREE_TYPE (value) = wchar_array_type_node;
2447 TREE_TYPE (value) = char_array_type_node;
2452 lex_charconst (str, len, wide)
2457 const char *limit = str + len;
2461 unsigned width = TYPE_PRECISION (char_type_node);
2466 #ifdef MULTIBYTE_CHARS
2467 int longest_char = local_mb_cur_max ();
2468 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2471 max_chars = TYPE_PRECISION (integer_type_node) / width;
2473 width = WCHAR_TYPE_SIZE;
2477 #ifdef MULTIBYTE_CHARS
2481 char_len = local_mbtowc (&wc, str, limit - str);
2484 warning ("Ignoring invalid multibyte character");
2500 str = readescape (str, limit, &c);
2501 if (width < HOST_BITS_PER_INT
2502 && (unsigned) c >= ((unsigned)1 << width))
2503 pedwarn ("escape sequence out of range for character");
2505 #ifdef MAP_CHARACTER
2507 c = MAP_CHARACTER (c);
2510 /* Merge character into result; ignore excess chars. */
2511 num_chars += (width / TYPE_PRECISION (char_type_node));
2512 if (num_chars < max_chars + 1)
2514 if (width < HOST_BITS_PER_INT)
2515 result = (result << width) | (c & ((1 << width) - 1));
2521 if (chars_seen == 0)
2522 error ("empty character constant");
2523 else if (num_chars > max_chars)
2525 num_chars = max_chars;
2526 error ("character constant too long");
2528 else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
2529 warning ("multi-character character constant");
2531 /* If char type is signed, sign-extend the constant. */
2534 int num_bits = num_chars * width;
2536 /* We already got an error; avoid invalid shift. */
2537 value = build_int_2 (0, 0);
2538 else if (TREE_UNSIGNED (char_type_node)
2539 || ((result >> (num_bits - 1)) & 1) == 0)
2540 value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
2541 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2544 value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
2545 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2547 /* In C, a character constant has type 'int'; in C++, 'char'. */
2548 if (chars_seen <= 1 && c_language == clk_cplusplus)
2549 TREE_TYPE (value) = char_type_node;
2551 TREE_TYPE (value) = integer_type_node;
2555 value = build_int_2 (result, 0);
2556 TREE_TYPE (value) = wchar_type_node;