1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 1988, 1989, 1992, 1994, 1995, 1996, 1997
3 1998, 1999, 2000 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
38 #include "splay-tree.h"
40 /* MULTIBYTE_CHARS support only works for native compilers.
41 ??? Ideally what we want is to model widechar support after
42 the current floating point support. */
44 #undef MULTIBYTE_CHARS
47 #ifdef MULTIBYTE_CHARS
50 #endif /* MULTIBYTE_CHARS */
51 #ifndef GET_ENVIRONMENT
52 #define GET_ENVIRONMENT(ENV_VALUE,ENV_NAME) ((ENV_VALUE) = getenv (ENV_NAME))
58 extern cpp_reader parse_in;
60 /* Stream for reading from the input file. */
64 /* Private idea of the line number. See discussion in c_lex(). */
65 static int lex_lineno;
67 /* We may keep statistics about how long which files took to compile. */
68 static int header_time, body_time;
69 static splay_tree file_info_tree;
71 /* Cause the `yydebug' variable to be defined. */
78 unsigned char *buffer;
83 static struct putback_buffer putback = {NULL, 0, -1};
85 static inline int getch PARAMS ((void));
90 if (putback.index != -1)
92 int ch = putback.buffer[putback.index];
99 static inline void put_back PARAMS ((int));
107 if (putback.index == putback.buffer_size - 1)
109 putback.buffer_size += 16;
110 putback.buffer = xrealloc (putback.buffer, putback.buffer_size);
112 putback.buffer[++putback.index] = ch;
120 /* File used for outputting assembler code. */
121 extern FILE *asm_out_file;
123 #undef WCHAR_TYPE_SIZE
124 #define WCHAR_TYPE_SIZE TYPE_PRECISION (wchar_type_node)
126 /* Number of bytes in a wide character. */
127 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
130 static int maxtoken; /* Current nominal length of token buffer. */
131 static char *token_buffer; /* Pointer to token buffer.
132 Actual allocated length is maxtoken + 2. */
135 int indent_level; /* Number of { minus number of }. */
136 int pending_lang_change; /* If we need to switch languages - C++ only */
137 int c_header_level; /* depth in C headers - C++ only */
139 /* Nonzero tells yylex to ignore \ in string constants. */
140 static int ignore_escape_flag;
142 static const char *readescape PARAMS ((const char *, const char *,
144 static const char *read_ucs PARAMS ((const char *, const char *,
145 unsigned int *, int));
146 static void parse_float PARAMS ((PTR));
147 static tree lex_number PARAMS ((const char *, unsigned int));
148 static tree lex_string PARAMS ((const char *, unsigned int, int));
149 static tree lex_charconst PARAMS ((const char *, unsigned int, int));
150 static void update_header_times PARAMS ((const char *));
151 static int dump_one_header PARAMS ((splay_tree_node, void *));
154 static int skip_white_space PARAMS ((int));
155 static char *extend_token_buffer PARAMS ((const char *));
156 static void extend_token_buffer_to PARAMS ((int));
157 static int read_line_number PARAMS ((int *));
158 static void process_directive PARAMS ((void));
160 static void cb_ident PARAMS ((cpp_reader *, const unsigned char *,
162 static void cb_enter_file PARAMS ((cpp_reader *));
163 static void cb_leave_file PARAMS ((cpp_reader *));
164 static void cb_rename_file PARAMS ((cpp_reader *));
169 init_c_lex (filename)
170 const char *filename;
172 struct c_fileinfo *toplevel;
174 /* Set up filename timing. Must happen before cpp_start_read. */
175 file_info_tree = splay_tree_new ((splay_tree_compare_fn)strcmp,
177 (splay_tree_delete_value_fn)free);
178 toplevel = get_fileinfo ("<top level>");
179 if (flag_detailed_statistics)
182 body_time = get_run_time ();
183 toplevel->time = body_time;
186 #ifdef MULTIBYTE_CHARS
187 /* Change to the native locale for multibyte conversions. */
188 setlocale (LC_CTYPE, "");
189 GET_ENVIRONMENT (literal_codeset, "LANG");
193 /* Open input file. */
194 if (filename == 0 || !strcmp (filename, "-"))
200 finput = fopen (filename, "r");
202 pfatal_with_name (filename);
204 #ifdef IO_BUFFER_SIZE
205 setvbuf (finput, (char *) xmalloc (IO_BUFFER_SIZE), _IOFBF, IO_BUFFER_SIZE);
207 #else /* !USE_CPPLIB */
209 parse_in.cb.ident = cb_ident;
210 parse_in.cb.enter_file = cb_enter_file;
211 parse_in.cb.leave_file = cb_leave_file;
212 parse_in.cb.rename_file = cb_rename_file;
214 /* Make sure parse_in.digraphs matches flag_digraphs. */
215 CPP_OPTION (&parse_in, digraphs) = flag_digraphs;
217 if (! cpp_start_read (&parse_in, 0 /* no printer */, filename))
220 if (filename == 0 || !strcmp (filename, "-"))
226 token_buffer = (char *) xmalloc (maxtoken + 2);
228 /* Start it at 0, because check_newline is called at the very beginning
229 and will increment it to 1. */
230 lineno = lex_lineno = 0;
240 struct c_fileinfo *fi;
242 n = splay_tree_lookup (file_info_tree, (splay_tree_key) name);
244 return (struct c_fileinfo *) n->value;
246 fi = (struct c_fileinfo *) xmalloc (sizeof (struct c_fileinfo));
248 fi->interface_only = 0;
249 fi->interface_unknown = 1;
250 splay_tree_insert (file_info_tree, (splay_tree_key) name,
251 (splay_tree_value) fi);
256 update_header_times (name)
259 /* Changing files again. This means currently collected time
260 is charged against header time, and body time starts back at 0. */
261 if (flag_detailed_statistics)
263 int this_time = get_run_time ();
264 struct c_fileinfo *file = get_fileinfo (name);
265 header_time += this_time - body_time;
266 file->time += this_time - body_time;
267 body_time = this_time;
272 dump_one_header (n, dummy)
274 void *dummy ATTRIBUTE_UNUSED;
276 print_time ((const char *) n->key,
277 ((struct c_fileinfo *) n->value)->time);
282 dump_time_statistics ()
284 struct c_fileinfo *file = get_fileinfo (input_filename);
285 int this_time = get_run_time ();
286 file->time += this_time - body_time;
288 fprintf (stderr, "\n******\n");
289 print_time ("header files (total)", header_time);
290 print_time ("main file (total)", this_time - body_time);
291 fprintf (stderr, "ratio = %g : 1\n",
292 (double)header_time / (double)(this_time - body_time));
293 fprintf (stderr, "\n******\n");
295 splay_tree_foreach (file_info_tree, dump_one_header, 0);
300 /* If C is not whitespace, return C.
301 Otherwise skip whitespace and return first nonwhite char read. */
311 /* There is no need to process comments, backslash-newline,
312 or \r here. None can occur in the output of cpp. */
320 c = check_newline ();
323 /* Per C99, horizontal whitespace is just these four characters. */
332 error ("stray '\\' in program");
342 /* Skips all of the white space at the current location in the input file. */
345 position_after_white_space ()
351 put_back (skip_white_space (c));
354 /* Make the token buffer longer, preserving the data in it.
355 P should point to just beyond the last valid character in the old buffer.
356 The value we return is a pointer to the new buffer
357 at a place corresponding to P. */
360 extend_token_buffer_to (size)
364 maxtoken = maxtoken * 2 + 10;
365 while (maxtoken < size);
366 token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
370 extend_token_buffer (p)
373 int offset = p - token_buffer;
374 extend_token_buffer_to (offset);
375 return token_buffer + offset;
380 read_line_number (num)
384 enum cpp_ttype token = c_lex (&value);
386 if (token == CPP_NUMBER && TREE_CODE (value) == INTEGER_CST)
388 *num = TREE_INT_CST_LOW (value);
393 if (token != CPP_EOF)
394 error ("invalid #-line");
399 /* At the beginning of a line, increment the line number
400 and process any #-directive on this line.
401 If the line is a #-directive, read the entire line and return a newline.
402 Otherwise, return the line's first non-whitespace character. */
409 /* Loop till we get a nonblank, non-directive line. */
412 /* Read first nonwhite char on the line. */
415 while (c == ' ' || c == '\t');
420 process_directive ();
433 enum cpp_ttype token;
436 enum { act_none, act_push, act_pop } action;
437 int action_number, l;
439 #ifndef NO_IMPLICIT_EXTERN_C
440 int entering_c_header;
443 /* Don't read beyond this line. */
447 token = c_lex (&value);
449 if (token == CPP_NAME)
451 /* If a letter follows, then if the word here is `line', skip
452 it and ignore it; otherwise, ignore the line, with an error
453 if the word isn't `pragma'. */
455 const char *name = IDENTIFIER_POINTER (value);
457 if (!strcmp (name, "pragma"))
464 /* We invoke HANDLE_PRAGMA before HANDLE_GENERIC_PRAGMAS
465 (if both are defined), in order to give the back
466 end a chance to override the interpretation of
467 SYSV style pragmas. */
468 if (HANDLE_PRAGMA (getch, put_back, IDENTIFIER_POINTER (value)))
470 #endif /* HANDLE_PRAGMA */
473 else if (!strcmp (name, "define"))
475 debug_define (lex_lineno, GET_DIRECTIVE_LINE ());
478 else if (!strcmp (name, "undef"))
480 debug_undef (lex_lineno, GET_DIRECTIVE_LINE ());
483 else if (!strcmp (name, "line"))
486 token = c_lex (&value);
489 else if (!strcmp (name, "ident"))
491 /* #ident. We expect a string constant here.
492 The pedantic warning and syntax error are now in cpp. */
494 token = c_lex (&value);
495 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
498 #ifdef ASM_OUTPUT_IDENT
501 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
505 /* Skip the rest of this line. */
509 error ("undefined or invalid # directive `%s'", name);
513 /* If the # is the only nonwhite char on the line,
514 just ignore it. Check the new newline. */
515 if (token == CPP_EOF)
519 /* Here we have either `#line' or `# <nonletter>'.
520 In either case, it should be a line number; a digit should follow. */
522 if (token != CPP_NUMBER || TREE_CODE (value) != INTEGER_CST)
524 error ("invalid #-line");
528 /* subtract one, because it is the following line that
529 gets the specified number */
531 l = TREE_INT_CST_LOW (value) - 1;
533 /* More follows: it must be a string constant (filename).
534 It would be neat to use cpplib to quickly process the string, but
535 (1) we don't have a handy tokenization of the string, and
536 (2) I don't know how well that would work in the presense
537 of filenames that contain wide characters. */
541 /* Don't treat \ as special if we are processing #line 1 "...".
542 If you want it to be treated specially, use # 1 "...". */
543 ignore_escape_flag = 1;
546 /* Read the string constant. */
547 token = c_lex (&value);
549 ignore_escape_flag = 0;
551 if (token == CPP_EOF)
553 /* No more: store the line number and check following line. */
558 if (token != CPP_STRING || TREE_CODE (value) != STRING_CST)
560 error ("invalid #line");
564 new_file = TREE_STRING_POINTER (value);
566 if (main_input_filename == 0)
567 main_input_filename = new_file;
572 /* Each change of file name
573 reinitializes whether we are now in a system header. */
574 in_system_header = 0;
576 if (!read_line_number (&action_number))
578 /* Update the name in the top element of input_file_stack. */
579 if (input_file_stack)
580 input_file_stack->name = input_filename;
583 /* `1' after file name means entering new file.
584 `2' after file name means just left a file. */
586 if (action_number == 1)
589 read_line_number (&action_number);
591 else if (action_number == 2)
594 read_line_number (&action_number);
596 if (action_number == 3)
598 /* `3' after file name means this is a system header file. */
599 in_system_header = 1;
600 read_line_number (&action_number);
602 #ifndef NO_IMPLICIT_EXTERN_C
603 if (action_number == 4)
605 /* `4' after file name means this is a C header file. */
606 entering_c_header = 1;
607 read_line_number (&action_number);
611 /* Do the actions implied by the preceding numbers. */
612 if (action == act_push)
615 push_srcloc (input_filename, 1);
616 input_file_stack->indent_level = indent_level;
617 debug_start_source_file (input_filename);
618 #ifndef NO_IMPLICIT_EXTERN_C
621 else if (entering_c_header)
624 ++pending_lang_change;
628 else if (action == act_pop)
630 /* Popping out of a file. */
631 if (input_file_stack->next)
633 #ifndef NO_IMPLICIT_EXTERN_C
634 if (c_header_level && --c_header_level == 0)
636 if (entering_c_header)
637 warning ("badly nested C headers from preprocessor");
638 --pending_lang_change;
642 if (indent_level != input_file_stack->indent_level)
644 warning_with_file_and_line
645 (input_filename, lex_lineno,
646 "This file contains more '%c's than '%c's.",
647 indent_level > input_file_stack->indent_level ? '{' : '}',
648 indent_level > input_file_stack->indent_level ? '}' : '{');
652 debug_end_source_file (input_file_stack->line);
655 error ("#-lines for entering and leaving files don't match");
658 update_header_times (new_file);
660 input_filename = new_file;
664 extract_interface_info ();
666 /* skip the rest of this line. */
670 while (getch () != '\n');
672 #else /* USE_CPPLIB */
674 /* Not yet handled: #pragma, #define, #undef.
675 No need to deal with linemarkers under normal conditions. */
678 cb_ident (pfile, str, len)
679 cpp_reader *pfile ATTRIBUTE_UNUSED;
680 const unsigned char *str;
683 #ifdef ASM_OUTPUT_IDENT
686 /* Convert escapes in the string. */
687 tree value = lex_string ((const char *)str, len, 0);
688 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value));
694 cb_enter_file (pfile)
697 cpp_buffer *ip = CPP_BUFFER (pfile);
698 /* Bleah, need a better interface to this. */
699 const char *flags = cpp_syshdr_flags (pfile, ip);
701 /* Mustn't stack the main buffer on the input stack. (Ick.) */
704 lex_lineno = lineno = ip->prev->lineno - 1;
705 push_srcloc (ggc_alloc_string (ip->nominal_fname, -1), 1);
706 input_file_stack->indent_level = indent_level;
707 debug_start_source_file (ip->nominal_fname);
712 update_header_times (ip->nominal_fname);
715 extract_interface_info ();
717 in_system_header = (flags[0] != 0);
718 #ifndef NO_IMPLICIT_EXTERN_C
721 else if (flags[2] != 0)
724 ++pending_lang_change;
730 cb_leave_file (pfile)
733 /* Bleah, need a better interface to this. */
734 const char *flags = cpp_syshdr_flags (pfile, CPP_BUFFER (pfile));
736 if (indent_level != input_file_stack->indent_level)
738 warning_with_file_and_line
739 (input_filename, lex_lineno,
740 "This file contains more '%c's than '%c's.",
741 indent_level > input_file_stack->indent_level ? '{' : '}',
742 indent_level > input_file_stack->indent_level ? '}' : '{');
745 /* We get called for the main buffer, but we mustn't pop it. */
746 if (input_file_stack->next)
748 in_system_header = (flags[0] != 0);
749 #ifndef NO_IMPLICIT_EXTERN_C
750 if (c_header_level && --c_header_level == 0)
753 warning ("badly nested C headers from preprocessor");
754 --pending_lang_change;
757 lex_lineno = CPP_BUFFER (pfile)->lineno;
758 debug_end_source_file (input_file_stack->line);
760 update_header_times (input_file_stack->name);
762 extract_interface_info ();
766 cb_rename_file (pfile)
769 cpp_buffer *ip = CPP_BUFFER (pfile);
770 /* Bleah, need a better interface to this. */
771 const char *flags = cpp_syshdr_flags (pfile, ip);
772 input_filename = ggc_alloc_string (ip->nominal_fname, -1);
773 lex_lineno = ip->lineno;
774 in_system_header = (flags[0] != 0);
776 update_header_times (ip->nominal_fname);
778 extract_interface_info ();
780 #endif /* USE_CPPLIB */
782 /* Parse a '\uNNNN' or '\UNNNNNNNN' sequence.
784 [lex.charset]: The character designated by the universal-character-name
785 \UNNNNNNNN is that character whose character short name in ISO/IEC 10646
786 is NNNNNNNN; the character designated by the universal-character-name
787 \uNNNN is that character whose character short name in ISO/IEC 10646 is
788 0000NNNN. If the hexadecimal value for a universal character name is
789 less than 0x20 or in the range 0x7F-0x9F (inclusive), or if the
790 universal character name designates a character in the basic source
791 character set, then the program is ill-formed.
793 We assume that wchar_t is Unicode, so we don't need to do any
794 mapping. Is this ever wrong? */
797 read_ucs (p, limit, cptr, length)
803 unsigned int code = 0;
806 for (; length; --length)
810 error ("incomplete universal-character-name");
817 error ("non hex digit '%c' in universal-character-name", c);
823 if (c >= 'a' && c <= 'f')
824 code += c - 'a' + 10;
825 if (c >= 'A' && c <= 'F')
826 code += c - 'A' + 10;
827 if (c >= '0' && c <= '9')
832 sorry ("universal-character-name on EBCDIC target");
833 *cptr = 0x3f; /* EBCDIC invalid character */
837 if (code > 0x9f && !(code & 0x80000000))
838 /* True extended character, OK. */;
839 else if (code >= 0x20 && code < 0x7f)
841 /* ASCII printable character. The C character set consists of all of
842 these except $, @ and `. We use hex escapes so that this also
843 works with EBCDIC hosts. */
844 if (code != 0x24 && code != 0x40 && code != 0x60)
845 error ("universal-character-name used for '%c'", code);
848 error ("invalid universal-character-name");
854 /* Read an escape sequence and write its character equivalent into *CPTR.
855 P is the input pointer, which is just after the backslash. LIMIT
856 is how much text we have.
857 Returns the updated input pointer. */
860 readescape (p, limit, cptr)
865 unsigned int c, code, count;
866 unsigned firstdig = 0;
871 /* cpp has already issued an error for this. */
881 if (warn_traditional && !in_system_header)
882 warning ("the meaning of `\\x' varies with -traditional");
884 if (flag_traditional)
902 if (c >= 'a' && c <= 'f')
903 code += c - 'a' + 10;
904 if (c >= 'A' && c <= 'F')
905 code += c - 'A' + 10;
906 if (c >= '0' && c <= '9')
908 if (code != 0 || count != 0)
918 warning ("\\x used with no following hex digits");
923 /* Digits are all 0's. Ok. */
925 else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
928 << (TYPE_PRECISION (integer_type_node)
931 pedwarn ("hex escape out of range");
935 case '0': case '1': case '2': case '3': case '4':
936 case '5': case '6': case '7':
938 for (count = 0; count < 3; count++)
940 if (c < '0' || c > '7')
945 code = (code * 8) + (c - '0');
957 case '\\': case '\'': case '"': case '?':
961 case 'n': *cptr = TARGET_NEWLINE; return p;
962 case 't': *cptr = TARGET_TAB; return p;
963 case 'r': *cptr = TARGET_CR; return p;
964 case 'f': *cptr = TARGET_FF; return p;
965 case 'b': *cptr = TARGET_BS; return p;
966 case 'v': *cptr = TARGET_VT; return p;
968 if (warn_traditional && !in_system_header)
969 warning ("the meaning of '\\a' varies with -traditional");
970 *cptr = flag_traditional ? c : TARGET_BELL;
973 /* Warnings and support checks handled by read_ucs(). */
975 if (c_language != clk_cplusplus && !flag_isoc99)
978 if (warn_traditional && !in_system_header)
979 warning ("the meaning of '\\%c' varies with -traditional", c);
981 return read_ucs (p, limit, cptr, c == 'u' ? 4 : 8);
985 pedwarn ("non-ISO-standard escape sequence, '\\%c'", c);
986 *cptr = TARGET_ESC; return p;
988 /* '\(', etc, are used at beginning of line to avoid confusing Emacs.
989 '\%' is used to prevent SCCS from getting confused. */
990 case '(': case '{': case '[': case '%':
992 pedwarn ("unknown escape sequence '\\%c'", c);
998 pedwarn ("unknown escape sequence '\\%c'", c);
1000 pedwarn ("unknown escape sequence: '\\' followed by char 0x%.2x", c);
1007 /* Returns nonzero if C is a universal-character-name. Give an error if it
1008 is not one which may appear in an identifier, as per [extendid].
1010 Note that extended character support in identifiers has not yet been
1011 implemented. It is my personal opinion that this is not a desirable
1012 feature. Portable code cannot count on support for more than the basic
1013 identifier character set. */
1016 is_extended_char (c)
1019 #ifdef TARGET_EBCDIC
1026 /* None of the valid chars are outside the Basic Multilingual Plane (the
1030 error ("universal-character-name '\\U%08x' not valid in identifier", c);
1035 if ((c >= 0x00c0 && c <= 0x00d6)
1036 || (c >= 0x00d8 && c <= 0x00f6)
1037 || (c >= 0x00f8 && c <= 0x01f5)
1038 || (c >= 0x01fa && c <= 0x0217)
1039 || (c >= 0x0250 && c <= 0x02a8)
1040 || (c >= 0x1e00 && c <= 0x1e9a)
1041 || (c >= 0x1ea0 && c <= 0x1ef9))
1046 || (c >= 0x0388 && c <= 0x038a)
1048 || (c >= 0x038e && c <= 0x03a1)
1049 || (c >= 0x03a3 && c <= 0x03ce)
1050 || (c >= 0x03d0 && c <= 0x03d6)
1055 || (c >= 0x03e2 && c <= 0x03f3)
1056 || (c >= 0x1f00 && c <= 0x1f15)
1057 || (c >= 0x1f18 && c <= 0x1f1d)
1058 || (c >= 0x1f20 && c <= 0x1f45)
1059 || (c >= 0x1f48 && c <= 0x1f4d)
1060 || (c >= 0x1f50 && c <= 0x1f57)
1064 || (c >= 0x1f5f && c <= 0x1f7d)
1065 || (c >= 0x1f80 && c <= 0x1fb4)
1066 || (c >= 0x1fb6 && c <= 0x1fbc)
1067 || (c >= 0x1fc2 && c <= 0x1fc4)
1068 || (c >= 0x1fc6 && c <= 0x1fcc)
1069 || (c >= 0x1fd0 && c <= 0x1fd3)
1070 || (c >= 0x1fd6 && c <= 0x1fdb)
1071 || (c >= 0x1fe0 && c <= 0x1fec)
1072 || (c >= 0x1ff2 && c <= 0x1ff4)
1073 || (c >= 0x1ff6 && c <= 0x1ffc))
1077 if ((c >= 0x0401 && c <= 0x040d)
1078 || (c >= 0x040f && c <= 0x044f)
1079 || (c >= 0x0451 && c <= 0x045c)
1080 || (c >= 0x045e && c <= 0x0481)
1081 || (c >= 0x0490 && c <= 0x04c4)
1082 || (c >= 0x04c7 && c <= 0x04c8)
1083 || (c >= 0x04cb && c <= 0x04cc)
1084 || (c >= 0x04d0 && c <= 0x04eb)
1085 || (c >= 0x04ee && c <= 0x04f5)
1086 || (c >= 0x04f8 && c <= 0x04f9))
1090 if ((c >= 0x0531 && c <= 0x0556)
1091 || (c >= 0x0561 && c <= 0x0587))
1095 if ((c >= 0x05d0 && c <= 0x05ea)
1096 || (c >= 0x05f0 && c <= 0x05f4))
1100 if ((c >= 0x0621 && c <= 0x063a)
1101 || (c >= 0x0640 && c <= 0x0652)
1102 || (c >= 0x0670 && c <= 0x06b7)
1103 || (c >= 0x06ba && c <= 0x06be)
1104 || (c >= 0x06c0 && c <= 0x06ce)
1105 || (c >= 0x06e5 && c <= 0x06e7))
1109 if ((c >= 0x0905 && c <= 0x0939)
1110 || (c >= 0x0958 && c <= 0x0962))
1114 if ((c >= 0x0985 && c <= 0x098c)
1115 || (c >= 0x098f && c <= 0x0990)
1116 || (c >= 0x0993 && c <= 0x09a8)
1117 || (c >= 0x09aa && c <= 0x09b0)
1119 || (c >= 0x09b6 && c <= 0x09b9)
1120 || (c >= 0x09dc && c <= 0x09dd)
1121 || (c >= 0x09df && c <= 0x09e1)
1122 || (c >= 0x09f0 && c <= 0x09f1))
1126 if ((c >= 0x0a05 && c <= 0x0a0a)
1127 || (c >= 0x0a0f && c <= 0x0a10)
1128 || (c >= 0x0a13 && c <= 0x0a28)
1129 || (c >= 0x0a2a && c <= 0x0a30)
1130 || (c >= 0x0a32 && c <= 0x0a33)
1131 || (c >= 0x0a35 && c <= 0x0a36)
1132 || (c >= 0x0a38 && c <= 0x0a39)
1133 || (c >= 0x0a59 && c <= 0x0a5c)
1138 if ((c >= 0x0a85 && c <= 0x0a8b)
1140 || (c >= 0x0a8f && c <= 0x0a91)
1141 || (c >= 0x0a93 && c <= 0x0aa8)
1142 || (c >= 0x0aaa && c <= 0x0ab0)
1143 || (c >= 0x0ab2 && c <= 0x0ab3)
1144 || (c >= 0x0ab5 && c <= 0x0ab9)
1149 if ((c >= 0x0b05 && c <= 0x0b0c)
1150 || (c >= 0x0b0f && c <= 0x0b10)
1151 || (c >= 0x0b13 && c <= 0x0b28)
1152 || (c >= 0x0b2a && c <= 0x0b30)
1153 || (c >= 0x0b32 && c <= 0x0b33)
1154 || (c >= 0x0b36 && c <= 0x0b39)
1155 || (c >= 0x0b5c && c <= 0x0b5d)
1156 || (c >= 0x0b5f && c <= 0x0b61))
1160 if ((c >= 0x0b85 && c <= 0x0b8a)
1161 || (c >= 0x0b8e && c <= 0x0b90)
1162 || (c >= 0x0b92 && c <= 0x0b95)
1163 || (c >= 0x0b99 && c <= 0x0b9a)
1165 || (c >= 0x0b9e && c <= 0x0b9f)
1166 || (c >= 0x0ba3 && c <= 0x0ba4)
1167 || (c >= 0x0ba8 && c <= 0x0baa)
1168 || (c >= 0x0bae && c <= 0x0bb5)
1169 || (c >= 0x0bb7 && c <= 0x0bb9))
1173 if ((c >= 0x0c05 && c <= 0x0c0c)
1174 || (c >= 0x0c0e && c <= 0x0c10)
1175 || (c >= 0x0c12 && c <= 0x0c28)
1176 || (c >= 0x0c2a && c <= 0x0c33)
1177 || (c >= 0x0c35 && c <= 0x0c39)
1178 || (c >= 0x0c60 && c <= 0x0c61))
1182 if ((c >= 0x0c85 && c <= 0x0c8c)
1183 || (c >= 0x0c8e && c <= 0x0c90)
1184 || (c >= 0x0c92 && c <= 0x0ca8)
1185 || (c >= 0x0caa && c <= 0x0cb3)
1186 || (c >= 0x0cb5 && c <= 0x0cb9)
1187 || (c >= 0x0ce0 && c <= 0x0ce1))
1191 if ((c >= 0x0d05 && c <= 0x0d0c)
1192 || (c >= 0x0d0e && c <= 0x0d10)
1193 || (c >= 0x0d12 && c <= 0x0d28)
1194 || (c >= 0x0d2a && c <= 0x0d39)
1195 || (c >= 0x0d60 && c <= 0x0d61))
1199 if ((c >= 0x0e01 && c <= 0x0e30)
1200 || (c >= 0x0e32 && c <= 0x0e33)
1201 || (c >= 0x0e40 && c <= 0x0e46)
1202 || (c >= 0x0e4f && c <= 0x0e5b))
1206 if ((c >= 0x0e81 && c <= 0x0e82)
1212 || (c >= 0x0e94 && c <= 0x0e97)
1213 || (c >= 0x0e99 && c <= 0x0e9f)
1214 || (c >= 0x0ea1 && c <= 0x0ea3)
1219 || (c >= 0x0ead && c <= 0x0eb0)
1223 || (c >= 0x0ec0 && c <= 0x0ec4)
1228 if ((c >= 0x10a0 && c <= 0x10c5)
1229 || (c >= 0x10d0 && c <= 0x10f6))
1233 if ((c >= 0x3041 && c <= 0x3094)
1234 || (c >= 0x309b && c <= 0x309e))
1238 if ((c >= 0x30a1 && c <= 0x30fe))
1242 if ((c >= 0x3105 && c <= 0x312c))
1246 if ((c >= 0x1100 && c <= 0x1159)
1247 || (c >= 0x1161 && c <= 0x11a2)
1248 || (c >= 0x11a8 && c <= 0x11f9))
1251 /* CJK Unified Ideographs */
1252 if ((c >= 0xf900 && c <= 0xfa2d)
1253 || (c >= 0xfb1f && c <= 0xfb36)
1254 || (c >= 0xfb38 && c <= 0xfb3c)
1256 || (c >= 0xfb40 && c <= 0xfb41)
1257 || (c >= 0xfb42 && c <= 0xfb44)
1258 || (c >= 0xfb46 && c <= 0xfbb1)
1259 || (c >= 0xfbd3 && c <= 0xfd3f)
1260 || (c >= 0xfd50 && c <= 0xfd8f)
1261 || (c >= 0xfd92 && c <= 0xfdc7)
1262 || (c >= 0xfdf0 && c <= 0xfdfb)
1263 || (c >= 0xfe70 && c <= 0xfe72)
1265 || (c >= 0xfe76 && c <= 0xfefc)
1266 || (c >= 0xff21 && c <= 0xff3a)
1267 || (c >= 0xff41 && c <= 0xff5a)
1268 || (c >= 0xff66 && c <= 0xffbe)
1269 || (c >= 0xffc2 && c <= 0xffc7)
1270 || (c >= 0xffca && c <= 0xffcf)
1271 || (c >= 0xffd2 && c <= 0xffd7)
1272 || (c >= 0xffda && c <= 0xffdc)
1273 || (c >= 0x4e00 && c <= 0x9fa5))
1276 error ("universal-character-name '\\u%04x' not valid in identifier", c);
1281 /* Add the UTF-8 representation of C to the token_buffer. */
1284 utf8_extend_token (c)
1289 if (c <= 0x0000007f)
1294 else if (c <= 0x000007ff)
1295 shift = 6, mask = 0xc0;
1296 else if (c <= 0x0000ffff)
1297 shift = 12, mask = 0xe0;
1298 else if (c <= 0x001fffff)
1299 shift = 18, mask = 0xf0;
1300 else if (c <= 0x03ffffff)
1301 shift = 24, mask = 0xf8;
1303 shift = 30, mask = 0xfc;
1305 extend_token (mask | (c >> shift));
1309 extend_token ((unsigned char) (0x80 | (c >> shift)));
1321 char long_long_flag;
1324 struct try_type type_sequence[] =
1326 { &integer_type_node, 0, 0, 0},
1327 { &unsigned_type_node, 1, 0, 0},
1328 { &long_integer_type_node, 0, 1, 0},
1329 { &long_unsigned_type_node, 1, 1, 0},
1330 { &long_long_integer_type_node, 0, 1, 1},
1331 { &long_long_unsigned_type_node, 1, 1, 1}
1343 int conversion_errno;
1344 REAL_VALUE_TYPE value;
1352 struct pf_args * args = (struct pf_args *) data;
1353 const char *typename;
1355 args->conversion_errno = 0;
1356 args->type = double_type_node;
1357 typename = "double";
1359 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1360 tells the desired precision of the binary result
1361 of decimal-to-binary conversion. */
1366 error ("both 'f' and 'l' suffixes on floating constant");
1368 args->type = float_type_node;
1371 else if (args->lflag)
1373 args->type = long_double_type_node;
1374 typename = "long double";
1376 else if (flag_single_precision_constant)
1378 args->type = float_type_node;
1383 if (args->base == 16)
1384 args->value = REAL_VALUE_HTOF (args->str, TYPE_MODE (args->type));
1386 args->value = REAL_VALUE_ATOF (args->str, TYPE_MODE (args->type));
1388 args->conversion_errno = errno;
1389 /* A diagnostic is required here by some ISO C testsuites.
1390 This is not pedwarn, because some people don't want
1391 an error for this. */
1392 if (REAL_VALUE_ISINF (args->value) && pedantic)
1393 warning ("floating point number exceeds range of '%s'", typename);
1401 const cpp_token *tok;
1402 enum cpp_ttype type;
1405 timevar_push (TV_CPP);
1406 tok = cpp_get_token (&parse_in);
1407 timevar_pop (TV_CPP);
1409 /* The C++ front end does horrible things with the current line
1410 number. To ensure an accurate line number, we must reset it
1411 every time we return a token. If we reset it from tok->line
1412 every time, we'll get line numbers inside macros referring to the
1413 macro definition; this is nice, but we don't want to change the
1414 behavior until integrated mode is the only option. So we keep our
1415 own idea of the line number, and reset it from tok->line at each
1416 new line (which never happens inside a macro). */
1417 if (tok->flags & BOL)
1418 lex_lineno = tok->line;
1421 lineno = lex_lineno;
1425 case CPP_OPEN_BRACE: indent_level++; break;
1426 case CPP_CLOSE_BRACE: indent_level--; break;
1428 /* Issue this error here, where we can get at tok->val.aux. */
1430 if (ISGRAPH (tok->val.aux))
1431 error ("stray '%c' in program", tok->val.aux);
1433 error ("stray '\\%#o' in program", tok->val.aux);
1439 *value = get_identifier ((const char *)tok->val.node->name);
1445 *value = lex_number ((const char *)tok->val.str.text, tok->val.str.len);
1450 *value = lex_charconst ((const char *)tok->val.str.text,
1451 tok->val.str.len, tok->type == CPP_WCHAR);
1457 *value = lex_string ((const char *)tok->val.str.text,
1458 tok->val.str.len, tok->type == CPP_WSTRING);
1461 /* These tokens should not be visible outside cpplib. */
1462 case CPP_HEADER_NAME:
1465 case CPP_PLACEMARKER:
1485 /* Effectively do c = skip_white_space (c)
1486 but do it faster in the usual cases. */
1498 c = skip_white_space (c);
1500 goto found_nonwhite;
1504 lineno = lex_lineno;
1512 /* Capital L may start a wide-string or wide-character constant. */
1514 register int c1 = getch();
1523 goto string_constant;
1530 if (!doing_objc_thang)
1534 /* '@' may start a constant string object. */
1535 register int c1 = getch ();
1539 goto string_constant;
1542 /* Fall through to treat '@' as the start of an identifier. */
1545 case 'A': case 'B': case 'C': case 'D': case 'E':
1546 case 'F': case 'G': case 'H': case 'I': case 'J':
1547 case 'K': case 'M': case 'N': case 'O':
1548 case 'P': case 'Q': case 'R': case 'S': case 'T':
1549 case 'U': case 'V': case 'W': case 'X': case 'Y':
1551 case 'a': case 'b': case 'c': case 'd': case 'e':
1552 case 'f': case 'g': case 'h': case 'i': case 'j':
1553 case 'k': case 'l': case 'm': case 'n': case 'o':
1554 case 'p': case 'q': case 'r': case 's': case 't':
1555 case 'u': case 'v': case 'w': case 'x': case 'y':
1561 while (ISALNUM (c) || c == '_' || c == '$' || c == '@')
1563 /* Make sure this char really belongs in an identifier. */
1566 if (! dollars_in_ident)
1567 error ("'$' in identifier");
1569 pedwarn ("'$' in identifier");
1572 if (p >= token_buffer + maxtoken)
1573 p = extend_token_buffer (p);
1581 if (p >= token_buffer + maxtoken)
1582 p = extend_token_buffer (p);
1585 *value = get_identifier (token_buffer);
1590 /* It's hard to preserve tokenization on '.' because
1591 it could be a symbol by itself, or it could be the
1592 start of a floating point number and cpp won't tell us. */
1598 return CPP_ELLIPSIS;
1601 error ("parse error at '..'");
1603 else if (c1 == '*' && c_language == clk_cplusplus)
1604 return CPP_DOT_STAR;
1612 case '0': case '1': case '2': case '3': case '4':
1613 case '5': case '6': case '7': case '8': case '9':
1616 /* Scan the next preprocessing number. All C numeric constants
1617 are preprocessing numbers, but not all preprocessing numbers
1618 are valid numeric constants. Preprocessing numbers fit the
1619 regular expression \.?[0-9]([0-9a-zA-Z_.]|[eEpP][+-])*
1620 See C99 section 6.4.8. */
1623 if (p >= token_buffer + maxtoken)
1624 p = extend_token_buffer (p);
1629 if (c == '+' || c == '-')
1632 if (d == 'e' || d == 'E' || d == 'p' || d == 'P')
1635 if (ISALNUM (c) || c == '_' || c == '.')
1641 *value = lex_number (token_buffer, p - token_buffer);
1651 int delimiter = charconst ? '\'' : '"';
1652 #ifdef MULTIBYTE_CHARS
1653 int longest_char = local_mb_cur_max ();
1654 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
1657 p = token_buffer + 1;
1659 while (c != delimiter && c != EOF)
1661 if (p + 2 > token_buffer + maxtoken)
1662 p = extend_token_buffer (p);
1664 /* ignore_escape_flag is set for reading the filename in #line. */
1665 if (!ignore_escape_flag && c == '\\')
1668 *p++ = getch (); /* escaped character */
1674 #ifdef MULTIBYTE_CHARS
1677 for (i = 0; i < longest_char; ++i)
1679 if (p + i >= token_buffer + maxtoken)
1680 p = extend_token_buffer (p);
1683 char_len = local_mblen (p, i + 1);
1690 /* Replace all except the first byte. */
1692 for (--i; i > 0; --i)
1696 /* mbtowc sometimes needs an extra char before accepting */
1697 else if (char_len <= i)
1711 *value = lex_charconst (token_buffer + 1, p - (token_buffer + 1),
1713 return wide_flag ? CPP_WCHAR : CPP_CHAR;
1717 *value = lex_string (token_buffer + 1, p - (token_buffer + 1),
1719 return wide_flag ? CPP_WSTRING : objc_flag ? CPP_OSTRING : CPP_STRING;
1737 enum cpp_ttype type = CPP_EOF;
1741 case '+': type = CPP_PLUS; break;
1742 case '-': type = CPP_MINUS; break;
1743 case '&': type = CPP_AND; break;
1744 case '|': type = CPP_OR; break;
1745 case ':': type = CPP_COLON; break;
1746 case '<': type = CPP_LESS; break;
1747 case '>': type = CPP_GREATER; break;
1748 case '*': type = CPP_MULT; break;
1749 case '/': type = CPP_DIV; break;
1750 case '%': type = CPP_MOD; break;
1751 case '^': type = CPP_XOR; break;
1752 case '!': type = CPP_NOT; break;
1753 case '=': type = CPP_EQ; break;
1758 if (c1 == '=' && type < CPP_LAST_EQ)
1759 return type + (CPP_EQ_EQ - CPP_EQ);
1763 case '+': return CPP_PLUS_PLUS;
1764 case '-': return CPP_MINUS_MINUS;
1765 case '&': return CPP_AND_AND;
1766 case '|': return CPP_OR_OR;
1768 if (c_language == clk_cplusplus)
1772 case '<': type = CPP_LSHIFT; goto do_triad;
1773 case '>': type = CPP_RSHIFT; goto do_triad;
1781 if (c_language == clk_cplusplus)
1785 return CPP_DEREF_STAR;
1793 if (c1 == '?' && c_language == clk_cplusplus)
1794 { type = CPP_MAX; goto do_triad; }
1798 if (c1 == ':' && flag_digraphs)
1799 return CPP_OPEN_SQUARE;
1800 if (c1 == '%' && flag_digraphs)
1801 { indent_level++; return CPP_OPEN_BRACE; }
1802 if (c1 == '?' && c_language == clk_cplusplus)
1803 { type = CPP_MIN; goto do_triad; }
1807 if (c1 == '>' && flag_digraphs)
1808 return CPP_CLOSE_SQUARE;
1811 if (c1 == '>' && flag_digraphs)
1812 { indent_level--; return CPP_CLOSE_BRACE; }
1822 type += (CPP_EQ_EQ - CPP_EQ);
1828 case '~': return CPP_COMPL;
1829 case '?': return CPP_QUERY;
1830 case ',': return CPP_COMMA;
1831 case '(': return CPP_OPEN_PAREN;
1832 case ')': return CPP_CLOSE_PAREN;
1833 case '[': return CPP_OPEN_SQUARE;
1834 case ']': return CPP_CLOSE_SQUARE;
1835 case '{': indent_level++; return CPP_OPEN_BRACE;
1836 case '}': indent_level--; return CPP_CLOSE_BRACE;
1837 case ';': return CPP_SEMICOLON;
1842 error ("stray '%c' in program", c);
1844 error ("stray '\\%#o' in program", c);
1852 #define ERROR(msgid) do { error(msgid); goto syntax_error; } while(0)
1855 lex_number (str, len)
1861 int largest_digit = 0;
1867 enum anon1 { NOT_FLOAT = 0, AFTER_POINT, AFTER_EXPON } floatflag = NOT_FLOAT;
1869 /* We actually store only HOST_BITS_PER_CHAR bits in each part.
1870 The code below which fills the parts array assumes that a host
1871 int is at least twice as wide as a host char, and that
1872 HOST_BITS_PER_WIDE_INT is an even multiple of HOST_BITS_PER_CHAR.
1873 Two HOST_WIDE_INTs is the largest int literal we can store.
1874 In order to detect overflow below, the number of parts (TOTAL_PARTS)
1875 must be exactly the number of parts needed to hold the bits
1876 of two HOST_WIDE_INTs. */
1877 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2)
1878 unsigned int parts[TOTAL_PARTS];
1880 /* Optimize for most frequent case. */
1884 return integer_zero_node;
1885 else if (*str == '1')
1886 return integer_one_node;
1888 return build_int_2 (*str - '0', 0);
1891 for (count = 0; count < TOTAL_PARTS; count++)
1894 /* len is known to be >1 at this point. */
1897 if (len > 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
1902 /* The ISDIGIT check is so we are not confused by a suffix on 0. */
1903 else if (str[0] == '0' && ISDIGIT (str[1]))
1915 if (base == 16 && pedantic && !flag_isoc99)
1916 pedwarn ("floating constant may not be in radix 16");
1917 else if (floatflag == AFTER_POINT)
1918 ERROR ("too many decimal points in floating constant");
1919 else if (floatflag == AFTER_EXPON)
1920 ERROR ("decimal point in exponent - impossible!");
1922 floatflag = AFTER_POINT;
1928 /* Possible future extension: silently ignore _ in numbers,
1929 permitting cosmetic grouping - e.g. 0x8000_0000 == 0x80000000
1930 but somewhat easier to read. Ada has this? */
1931 ERROR ("underscore in number");
1935 /* It is not a decimal point.
1936 It should be a digit (perhaps a hex digit). */
1942 else if (base <= 10 && (c == 'e' || c == 'E'))
1945 floatflag = AFTER_EXPON;
1948 else if (base == 16 && (c == 'p' || c == 'P'))
1950 floatflag = AFTER_EXPON;
1951 break; /* start of exponent */
1953 else if (base == 16 && c >= 'a' && c <= 'f')
1957 else if (base == 16 && c >= 'A' && c <= 'F')
1964 break; /* start of suffix */
1967 if (n >= largest_digit)
1971 for (count = 0; count < TOTAL_PARTS; count++)
1973 parts[count] *= base;
1977 += (parts[count-1] >> HOST_BITS_PER_CHAR);
1979 &= (1 << HOST_BITS_PER_CHAR) - 1;
1985 /* If the highest-order part overflows (gets larger than
1986 a host char will hold) then the whole number has
1987 overflowed. Record this and truncate the highest-order
1989 if (parts[TOTAL_PARTS - 1] >> HOST_BITS_PER_CHAR)
1992 parts[TOTAL_PARTS - 1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1996 while (p < str + len);
1998 /* This can happen on input like `int i = 0x;' */
2000 ERROR ("numeric constant with no digits");
2002 if (largest_digit >= base)
2003 ERROR ("numeric constant contains digits beyond the radix");
2005 if (floatflag != NOT_FLOAT)
2008 int imag, fflag, lflag, conversion_errno;
2009 REAL_VALUE_TYPE real;
2010 struct pf_args args;
2013 if (base == 16 && floatflag != AFTER_EXPON)
2014 ERROR ("hexadecimal floating constant has no exponent");
2016 /* Read explicit exponent if any, and put it in tokenbuf. */
2017 if ((base == 10 && ((c == 'e') || (c == 'E')))
2018 || (base == 16 && (c == 'p' || c == 'P')))
2022 if (p < str + len && (c == '+' || c == '-'))
2024 /* Exponent is decimal, even if string is a hex float. */
2026 ERROR ("floating constant exponent has no digits");
2027 while (p < str + len && ISDIGIT (c))
2033 /* Copy the float constant now; we don't want any suffixes in the
2034 string passed to parse_float. */
2035 copy = alloca (p - str + 1);
2036 memcpy (copy, str, p - str);
2037 copy[p - str] = '\0';
2039 /* Now parse suffixes. */
2040 fflag = lflag = imag = 0;
2041 while (p < str + len)
2046 ERROR ("more than one 'f' suffix on floating constant");
2047 else if (warn_traditional && !in_system_header)
2048 warning ("traditional C rejects the 'f' suffix");
2055 ERROR ("more than one 'l' suffix on floating constant");
2056 else if (warn_traditional && !in_system_header)
2057 warning ("traditional C rejects the 'l' suffix");
2065 ERROR ("more than one 'i' or 'j' suffix on floating constant");
2067 pedwarn ("ISO C forbids imaginary numeric constants");
2072 ERROR ("invalid suffix on floating constant");
2075 /* Setup input for parse_float() */
2081 /* Convert string to a double, checking for overflow. */
2082 if (do_float_handler (parse_float, (PTR) &args))
2084 /* Receive output from parse_float() */
2088 /* We got an exception from parse_float() */
2089 ERROR ("floating constant out of range");
2091 /* Receive output from parse_float() */
2092 conversion_errno = args.conversion_errno;
2096 /* ERANGE is also reported for underflow,
2097 so test the value to distinguish overflow from that. */
2098 if (conversion_errno == ERANGE && !flag_traditional && pedantic
2099 && (REAL_VALUES_LESS (dconst1, real)
2100 || REAL_VALUES_LESS (real, dconstm1)))
2101 warning ("floating point number exceeds range of 'double'");
2104 /* Create a node with determined type and value. */
2106 value = build_complex (NULL_TREE, convert (type, integer_zero_node),
2107 build_real (type, real));
2109 value = build_real (type, real);
2113 tree trad_type, ansi_type, type;
2114 HOST_WIDE_INT high, low;
2115 int spec_unsigned = 0;
2117 int spec_long_long = 0;
2122 trad_type = ansi_type = type = NULL_TREE;
2123 while (p < str + len)
2130 error ("two 'u' suffixes on integer constant");
2131 else if (warn_traditional && !in_system_header)
2132 warning ("traditional C rejects the 'u' suffix");
2143 error ("three 'l' suffixes on integer constant");
2145 error ("'lul' is not a valid integer suffix");
2146 else if (c != spec_long)
2147 error ("'Ll' and 'lL' are not valid integer suffixes");
2148 else if (pedantic && ! flag_isoc99
2149 && ! in_system_header && warn_long_long)
2150 pedwarn ("ISO C89 forbids long long integer constants");
2156 case 'i': case 'I': case 'j': case 'J':
2158 error ("more than one 'i' or 'j' suffix on integer constant");
2160 pedwarn ("ISO C forbids imaginary numeric constants");
2165 ERROR ("invalid suffix on integer constant");
2169 /* If the literal overflowed, pedwarn about it now. */
2173 pedwarn ("integer constant is too large for this configuration of the compiler - truncated to %d bits", HOST_BITS_PER_WIDE_INT * 2);
2176 /* This is simplified by the fact that our constant
2177 is always positive. */
2181 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
2183 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
2184 / HOST_BITS_PER_CHAR)]
2185 << (i * HOST_BITS_PER_CHAR));
2186 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
2189 value = build_int_2 (low, high);
2190 TREE_TYPE (value) = long_long_unsigned_type_node;
2192 /* If warn_traditional, calculate both the ISO type and the
2193 traditional type, then see if they disagree.
2194 Otherwise, calculate only the type for the dialect in use. */
2195 if (warn_traditional || flag_traditional)
2197 /* Calculate the traditional type. */
2198 /* Traditionally, any constant is signed; but if unsigned is
2199 specified explicitly, obey that. Use the smallest size
2200 with the right number of bits, except for one special
2201 case with decimal constants. */
2202 if (! spec_long && base != 10
2203 && int_fits_type_p (value, unsigned_type_node))
2204 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2205 /* A decimal constant must be long if it does not fit in
2206 type int. I think this is independent of whether the
2207 constant is signed. */
2208 else if (! spec_long && base == 10
2209 && int_fits_type_p (value, integer_type_node))
2210 trad_type = spec_unsigned ? unsigned_type_node : integer_type_node;
2211 else if (! spec_long_long)
2212 trad_type = (spec_unsigned
2213 ? long_unsigned_type_node
2214 : long_integer_type_node);
2215 else if (int_fits_type_p (value,
2217 ? long_long_unsigned_type_node
2218 : long_long_integer_type_node))
2219 trad_type = (spec_unsigned
2220 ? long_long_unsigned_type_node
2221 : long_long_integer_type_node);
2223 trad_type = (spec_unsigned
2224 ? widest_unsigned_literal_type_node
2225 : widest_integer_literal_type_node);
2227 if (warn_traditional || ! flag_traditional)
2229 /* Calculate the ISO type. */
2230 if (! spec_long && ! spec_unsigned
2231 && int_fits_type_p (value, integer_type_node))
2232 ansi_type = integer_type_node;
2233 else if (! spec_long && (base != 10 || spec_unsigned)
2234 && int_fits_type_p (value, unsigned_type_node))
2235 ansi_type = unsigned_type_node;
2236 else if (! spec_unsigned && !spec_long_long
2237 && int_fits_type_p (value, long_integer_type_node))
2238 ansi_type = long_integer_type_node;
2239 else if (! spec_long_long
2240 && int_fits_type_p (value, long_unsigned_type_node))
2241 ansi_type = long_unsigned_type_node;
2242 else if (! spec_unsigned
2243 && int_fits_type_p (value, long_long_integer_type_node))
2244 ansi_type = long_long_integer_type_node;
2245 else if (int_fits_type_p (value, long_long_unsigned_type_node))
2246 ansi_type = long_long_unsigned_type_node;
2247 else if (! spec_unsigned
2248 && int_fits_type_p (value, widest_integer_literal_type_node))
2249 ansi_type = widest_integer_literal_type_node;
2251 ansi_type = widest_unsigned_literal_type_node;
2254 type = flag_traditional ? trad_type : ansi_type;
2256 /* We assume that constants specified in a non-decimal
2257 base are bit patterns, and that the programmer really
2258 meant what they wrote. */
2259 if (warn_traditional && !in_system_header
2260 && base == 10 && trad_type != ansi_type)
2262 if (TYPE_PRECISION (trad_type) != TYPE_PRECISION (ansi_type))
2263 warning ("width of integer constant changes with -traditional");
2264 else if (TREE_UNSIGNED (trad_type) != TREE_UNSIGNED (ansi_type))
2265 warning ("integer constant is unsigned in ISO C, signed with -traditional");
2267 warning ("width of integer constant may change on other systems with -traditional");
2270 if (pedantic && !flag_traditional && !spec_long_long && !warn
2271 && (TYPE_PRECISION (long_integer_type_node) < TYPE_PRECISION (type)))
2274 pedwarn ("integer constant larger than the maximum value of an unsigned long int");
2277 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
2278 warning ("decimal constant is so large that it is unsigned");
2282 if (TYPE_PRECISION (type)
2283 <= TYPE_PRECISION (integer_type_node))
2284 value = build_complex (NULL_TREE, integer_zero_node,
2285 convert (integer_type_node, value));
2287 ERROR ("complex integer constant is too wide for 'complex int'");
2289 else if (flag_traditional && !int_fits_type_p (value, type))
2290 /* The traditional constant 0x80000000 is signed
2291 but doesn't fit in the range of int.
2292 This will change it to -0x80000000, which does fit. */
2294 TREE_TYPE (value) = unsigned_type (type);
2295 value = convert (type, value);
2296 TREE_OVERFLOW (value) = TREE_CONSTANT_OVERFLOW (value) = 0;
2299 TREE_TYPE (value) = type;
2301 /* If it's still an integer (not a complex), and it doesn't
2302 fit in the type we choose for it, then pedwarn. */
2305 && TREE_CODE (TREE_TYPE (value)) == INTEGER_TYPE
2306 && ! int_fits_type_p (value, TREE_TYPE (value)))
2307 pedwarn ("integer constant is larger than the maximum value for its type");
2311 error ("missing white space after number '%.*s'", (int) (p - str), str);
2316 return integer_zero_node;
2320 lex_string (str, len, wide)
2326 char *buf = alloca ((len + 1) * (wide ? WCHAR_BYTES : 1));
2328 const char *p = str, *limit = str + len;
2330 unsigned width = wide ? WCHAR_TYPE_SIZE
2331 : TYPE_PRECISION (char_type_node);
2333 #ifdef MULTIBYTE_CHARS
2334 /* Reset multibyte conversion state. */
2335 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2340 #ifdef MULTIBYTE_CHARS
2344 char_len = local_mbtowc (&wc, p, limit - p);
2347 warning ("Ignoring invalid multibyte character");
2360 if (c == '\\' && !ignore_escape_flag)
2362 p = readescape (p, limit, &c);
2363 if (width < HOST_BITS_PER_INT
2364 && (unsigned) c >= ((unsigned)1 << width))
2365 pedwarn ("escape sequence out of range for character");
2368 /* Add this single character into the buffer either as a wchar_t
2369 or as a single byte. */
2372 unsigned charwidth = TYPE_PRECISION (char_type_node);
2373 unsigned bytemask = (1 << width) - 1;
2376 for (byte = 0; byte < WCHAR_BYTES; ++byte)
2379 if (byte >= (int) sizeof (c))
2382 n = (c >> (byte * charwidth)) & bytemask;
2383 if (BYTES_BIG_ENDIAN)
2384 q[WCHAR_BYTES - byte - 1] = n;
2396 /* Terminate the string value, either with a single byte zero
2397 or with a wide zero. */
2401 memset (q, 0, WCHAR_BYTES);
2409 value = build_string (q - buf, buf);
2412 TREE_TYPE (value) = wchar_array_type_node;
2414 TREE_TYPE (value) = char_array_type_node;
2419 lex_charconst (str, len, wide)
2424 const char *limit = str + len;
2428 unsigned width = TYPE_PRECISION (char_type_node);
2433 #ifdef MULTIBYTE_CHARS
2434 int longest_char = local_mb_cur_max ();
2435 (void) local_mbtowc (NULL_PTR, NULL_PTR, 0);
2438 max_chars = TYPE_PRECISION (integer_type_node) / width;
2440 width = WCHAR_TYPE_SIZE;
2444 #ifdef MULTIBYTE_CHARS
2448 char_len = local_mbtowc (&wc, str, limit - str);
2451 warning ("Ignoring invalid multibyte character");
2467 str = readescape (str, limit, &c);
2468 if (width < HOST_BITS_PER_INT
2469 && (unsigned) c >= ((unsigned)1 << width))
2470 pedwarn ("escape sequence out of range for character");
2472 #ifdef MAP_CHARACTER
2474 c = MAP_CHARACTER (c);
2477 /* Merge character into result; ignore excess chars. */
2478 num_chars += (width / TYPE_PRECISION (char_type_node));
2479 if (num_chars < max_chars + 1)
2481 if (width < HOST_BITS_PER_INT)
2482 result = (result << width) | (c & ((1 << width) - 1));
2488 if (chars_seen == 0)
2489 error ("empty character constant");
2490 else if (num_chars > max_chars)
2492 num_chars = max_chars;
2493 error ("character constant too long");
2495 else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
2496 warning ("multi-character character constant");
2498 /* If char type is signed, sign-extend the constant. */
2501 int num_bits = num_chars * width;
2503 /* We already got an error; avoid invalid shift. */
2504 value = build_int_2 (0, 0);
2505 else if (TREE_UNSIGNED (char_type_node)
2506 || ((result >> (num_bits - 1)) & 1) == 0)
2507 value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
2508 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2511 value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
2512 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
2514 /* In C, a character constant has type 'int'; in C++, 'char'. */
2515 if (chars_seen <= 1 && c_language == clk_cplusplus)
2516 TREE_TYPE (value) = char_type_node;
2518 TREE_TYPE (value) = integer_type_node;
2522 value = build_int_2 (result, 0);
2523 TREE_TYPE (value) = wchar_type_node;