1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 88, 89, 92, 94-96, 1997 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
48 /* MULTIBYTE_CHARS support only works for native compilers.
49 ??? Ideally what we want is to model widechar support after
50 the current floating point support. */
52 #undef MULTIBYTE_CHARS
55 #ifdef MULTIBYTE_CHARS
67 cpp_options parse_options;
68 static enum cpp_token cpp_token;
71 /* The elements of `ridpointers' are identifier nodes
72 for the reserved type names and storage classes.
73 It is indexed by a RID_... value. */
74 tree ridpointers[(int) RID_MAX];
76 /* Cause the `yydebug' variable to be defined. */
80 static unsigned char *yy_cur, *yy_lim;
87 parse_in.limit = parse_in.token_buffer;
88 cpp_token = cpp_get_token (&parse_in);
89 if (cpp_token == CPP_EOF)
91 yy_lim = CPP_PWRITTEN (&parse_in);
92 yy_cur = parse_in.token_buffer;
98 #define GETC() (yy_cur < yy_lim ? *yy_cur++ : yy_get_token ())
99 #define UNGETC(c) ((c), yy_cur--)
101 #define GETC() getc (finput)
102 #define UNGETC(c) ungetc (c, finput)
105 /* the declaration found for the last IDENTIFIER token read in.
106 yylex must look this up to detect typedefs, which get token type TYPENAME,
107 so it is left around in case the identifier is not a typedef but is
108 used in a context which makes it a reference to a variable. */
111 /* Nonzero enables objc features. */
113 int doing_objc_thang;
115 extern tree is_class_name ();
119 /* File used for outputting assembler code. */
120 extern FILE *asm_out_file;
122 #ifndef WCHAR_TYPE_SIZE
124 #define WCHAR_TYPE_SIZE INT_TYPE_SIZE
126 #define WCHAR_TYPE_SIZE BITS_PER_WORD
130 /* Number of bytes in a wide character. */
131 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
133 static int maxtoken; /* Current nominal length of token buffer. */
134 char *token_buffer; /* Pointer to token buffer.
135 Actual allocated length is maxtoken + 2.
136 This is not static because objc-parse.y uses it. */
138 static int indent_level = 0; /* Number of { minus number of }. */
140 /* Nonzero if end-of-file has been seen on input. */
141 static int end_of_file;
144 /* Buffered-back input character; faster than using ungetc. */
145 static int nextchar = -1;
148 #ifdef HANDLE_SYSV_PRAGMA
149 static int handle_sysv_pragma PROTO((int));
150 #endif /* HANDLE_SYSV_PRAGMA */
151 static int skip_white_space PROTO((int));
152 static char *extend_token_buffer PROTO((char *));
153 static int readescape PROTO((int *));
154 int check_newline ();
156 /* Do not insert generated code into the source, instead, include it.
157 This allows us to build gcc automatically even for targets that
158 need to add or modify the reserved keyword lists. */
161 /* Return something to represent absolute declarators containing a *.
162 TARGET is the absolute declarator that the * contains.
163 TYPE_QUALS is a list of modifiers such as const or volatile
164 to apply to the pointer type, represented as identifiers.
166 We return an INDIRECT_REF whose "contents" are TARGET
167 and whose type is the modifier list. */
170 make_pointer_declarator (type_quals, target)
171 tree type_quals, target;
173 return build1 (INDIRECT_REF, type_quals, target);
177 forget_protocol_qualifiers ()
179 int i, n = sizeof wordlist / sizeof (struct resword);
181 for (i = 0; i < n; i++)
182 if ((int) wordlist[i].rid >= (int) RID_IN
183 && (int) wordlist[i].rid <= (int) RID_ONEWAY)
184 wordlist[i].name = "";
188 remember_protocol_qualifiers ()
190 int i, n = sizeof wordlist / sizeof (struct resword);
192 for (i = 0; i < n; i++)
193 if (wordlist[i].rid == RID_IN)
194 wordlist[i].name = "in";
195 else if (wordlist[i].rid == RID_OUT)
196 wordlist[i].name = "out";
197 else if (wordlist[i].rid == RID_INOUT)
198 wordlist[i].name = "inout";
199 else if (wordlist[i].rid == RID_BYCOPY)
200 wordlist[i].name = "bycopy";
201 else if (wordlist[i].rid == RID_ONEWAY)
202 wordlist[i].name = "oneway";
207 init_parse (filename)
214 cpp_reader_init (&parse_in);
215 parse_in.data = &parse_options;
216 cpp_options_init (&parse_options);
217 cpp_handle_options (&parse_in, 0, NULL); /* FIXME */
218 parse_in.show_column = 1;
219 if (! cpp_start_read (&parse_in, filename))
226 cpp_finish (&parse_in);
233 /* Make identifier nodes long enough for the language-specific slots. */
234 set_identifier_size (sizeof (struct lang_identifier));
236 /* Start it at 0, because check_newline is called at the very beginning
237 and will increment it to 1. */
240 #ifdef MULTIBYTE_CHARS
241 /* Change to the native locale for multibyte conversions. */
242 setlocale (LC_CTYPE, "");
246 token_buffer = (char *) xmalloc (maxtoken + 2);
248 ridpointers[(int) RID_INT] = get_identifier ("int");
249 ridpointers[(int) RID_CHAR] = get_identifier ("char");
250 ridpointers[(int) RID_VOID] = get_identifier ("void");
251 ridpointers[(int) RID_FLOAT] = get_identifier ("float");
252 ridpointers[(int) RID_DOUBLE] = get_identifier ("double");
253 ridpointers[(int) RID_SHORT] = get_identifier ("short");
254 ridpointers[(int) RID_LONG] = get_identifier ("long");
255 ridpointers[(int) RID_UNSIGNED] = get_identifier ("unsigned");
256 ridpointers[(int) RID_SIGNED] = get_identifier ("signed");
257 ridpointers[(int) RID_INLINE] = get_identifier ("inline");
258 ridpointers[(int) RID_CONST] = get_identifier ("const");
259 ridpointers[(int) RID_VOLATILE] = get_identifier ("volatile");
260 ridpointers[(int) RID_AUTO] = get_identifier ("auto");
261 ridpointers[(int) RID_STATIC] = get_identifier ("static");
262 ridpointers[(int) RID_EXTERN] = get_identifier ("extern");
263 ridpointers[(int) RID_TYPEDEF] = get_identifier ("typedef");
264 ridpointers[(int) RID_REGISTER] = get_identifier ("register");
265 ridpointers[(int) RID_ITERATOR] = get_identifier ("iterator");
266 ridpointers[(int) RID_COMPLEX] = get_identifier ("complex");
267 ridpointers[(int) RID_ID] = get_identifier ("id");
268 ridpointers[(int) RID_IN] = get_identifier ("in");
269 ridpointers[(int) RID_OUT] = get_identifier ("out");
270 ridpointers[(int) RID_INOUT] = get_identifier ("inout");
271 ridpointers[(int) RID_BYCOPY] = get_identifier ("bycopy");
272 ridpointers[(int) RID_ONEWAY] = get_identifier ("oneway");
273 forget_protocol_qualifiers();
275 /* Some options inhibit certain reserved words.
276 Clear those words out of the hash table so they won't be recognized. */
277 #define UNSET_RESERVED_WORD(STRING) \
278 do { struct resword *s = is_reserved_word (STRING, sizeof (STRING) - 1); \
279 if (s) s->name = ""; } while (0)
281 if (! doing_objc_thang)
282 UNSET_RESERVED_WORD ("id");
284 if (flag_traditional)
286 UNSET_RESERVED_WORD ("const");
287 UNSET_RESERVED_WORD ("volatile");
288 UNSET_RESERVED_WORD ("typeof");
289 UNSET_RESERVED_WORD ("signed");
290 UNSET_RESERVED_WORD ("inline");
291 UNSET_RESERVED_WORD ("iterator");
292 UNSET_RESERVED_WORD ("complex");
296 UNSET_RESERVED_WORD ("asm");
297 UNSET_RESERVED_WORD ("typeof");
298 UNSET_RESERVED_WORD ("inline");
299 UNSET_RESERVED_WORD ("iterator");
300 UNSET_RESERVED_WORD ("complex");
305 reinit_parse_for_function ()
309 /* Function used when yydebug is set, to print a token in more detail. */
312 yyprint (file, yychar, yylval)
324 if (IDENTIFIER_POINTER (t))
325 fprintf (file, " `%s'", IDENTIFIER_POINTER (t));
330 if (TREE_CODE (t) == INTEGER_CST)
332 #if HOST_BITS_PER_WIDE_INT == 64
333 #if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT
339 #if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT
345 TREE_INT_CST_HIGH (t), TREE_INT_CST_LOW (t));
350 /* Iff C is a carriage return, warn about it - if appropriate -
351 and return nonzero. */
356 static int newline_warning = 0;
360 /* ANSI C says the effects of a carriage return in a source file
362 if (pedantic && !newline_warning)
364 warning ("carriage return in source file");
365 warning ("(we only warn about the first carriage return)");
373 /* If C is not whitespace, return C.
374 Otherwise skip whitespace and return first nonwhite char read. */
384 /* We don't recognize comments here, because
385 cpp output can include / and * consecutively as operators.
386 Also, there's no need, since cpp removes all comments. */
389 c = check_newline ();
410 error ("stray '\\' in program");
420 /* Skips all of the white space at the current location in the input file.
421 Must use and reset nextchar if it has the next character. */
424 position_after_white_space ()
430 c = nextchar, nextchar = -1;
435 UNGETC (skip_white_space (c));
438 /* Like skip_white_space, but don't advance beyond the end of line.
439 Moreover, we don't get passed a character to start with. */
441 skip_white_space_on_line ()
470 /* Make the token buffer longer, preserving the data in it.
471 P should point to just beyond the last valid character in the old buffer.
472 The value we return is a pointer to the new buffer
473 at a place corresponding to P. */
476 extend_token_buffer (p)
479 int offset = p - token_buffer;
481 maxtoken = maxtoken * 2 + 10;
482 token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
484 return token_buffer + offset;
488 #define GET_DIRECTIVE_LINE() get_directive_line (finput)
489 #else /* USE_CPPLIB */
490 /* Read the rest of a #-directive from input stream FINPUT.
491 In normal use, the directive name and the white space after it
492 have already been read, so they won't be included in the result.
493 We allow for the fact that the directive line may contain
494 a newline embedded within a character or string literal which forms
495 a part of the directive.
497 The value is a string in a reusable buffer. It remains valid
498 only until the next time this function is called. */
501 GET_DIRECTIVE_LINE ()
503 static char *directive_buffer = NULL;
504 static unsigned buffer_length = 0;
506 register char *buffer_limit;
507 register int looking_for = 0;
508 register int char_escaped = 0;
510 if (buffer_length == 0)
512 directive_buffer = (char *)xmalloc (128);
516 buffer_limit = &directive_buffer[buffer_length];
518 for (p = directive_buffer; ; )
522 /* Make buffer bigger if it is full. */
523 if (p >= buffer_limit)
525 register unsigned bytes_used = (p - directive_buffer);
529 = (char *)xrealloc (directive_buffer, buffer_length);
530 p = &directive_buffer[bytes_used];
531 buffer_limit = &directive_buffer[buffer_length];
536 /* Discard initial whitespace. */
537 if ((c == ' ' || c == '\t') && p == directive_buffer)
540 /* Detect the end of the directive. */
541 if (c == '\n' && looking_for == 0)
550 return directive_buffer;
552 /* Handle string and character constant syntax. */
555 if (looking_for == c && !char_escaped)
556 looking_for = 0; /* Found terminator... stop looking. */
559 if (c == '\'' || c == '"')
560 looking_for = c; /* Don't stop buffering until we see another
561 another one of these (or an EOF). */
563 /* Handle backslash. */
564 char_escaped = (c == '\\' && ! char_escaped);
567 #endif /* USE_CPPLIB */
569 /* At the beginning of a line, increment the line number
570 and process any #-directive on this line.
571 If the line is a #-directive, read the entire line and return a newline.
572 Otherwise, return the line's first non-whitespace character. */
582 /* Read first nonwhite char on the line. */
585 while (c == ' ' || c == '\t')
590 /* If not #, return it so caller will use it. */
594 /* Read first nonwhite char after the `#'. */
597 while (c == ' ' || c == '\t')
600 /* If a letter follows, then if the word here is `line', skip
601 it and ignore it; otherwise, ignore the line, with an error
602 if the word isn't `pragma', `ident', `define', or `undef'. */
604 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
613 && ((c = GETC()) == ' ' || c == '\t' || c == '\n'
614 || whitespace_cr (c) ))
616 while (c == ' ' || c == '\t' || whitespace_cr (c))
620 #ifdef HANDLE_SYSV_PRAGMA
623 if (token != IDENTIFIER)
625 return handle_sysv_pragma (token);
626 #else /* !HANDLE_SYSV_PRAGMA */
631 if (token != IDENTIFIER)
633 if (HANDLE_PRAGMA (finput, yylval.ttype))
639 ??? do not know what to do ???;
640 #endif /* !USE_CPPLIB */
641 #endif /* HANDLE_PRAGMA */
642 #endif /* !HANDLE_SYSV_PRAGMA */
654 && ((c = GETC()) == ' ' || c == '\t' || c == '\n'))
657 debug_define (lineno, GET_DIRECTIVE_LINE ());
667 && ((c = GETC()) == ' ' || c == '\t' || c == '\n'))
670 debug_undef (lineno, GET_DIRECTIVE_LINE ());
679 && ((c = GETC()) == ' ' || c == '\t'))
688 && ((c = GETC()) == ' ' || c == '\t'))
690 /* #ident. The pedantic warning is now in cccp.c. */
692 /* Here we have just seen `#ident '.
693 A string constant should follow. */
695 c = skip_white_space_on_line ();
697 /* If no argument, ignore the line. */
704 || TREE_CODE (yylval.ttype) != STRING_CST)
706 error ("invalid #ident");
712 #ifdef ASM_OUTPUT_IDENT
713 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (yylval.ttype));
717 /* Skip the rest of this line. */
722 error ("undefined or invalid # directive");
727 /* Here we have either `#line' or `# <nonletter>'.
728 In either case, it should be a line number; a digit should follow. */
730 /* Can't use skip_white_space here, but must handle all whitespace
731 that is not '\n', lest we get a recursion for '\r' '\n' when
734 c = skip_white_space_on_line ();
736 /* If the # is the only nonwhite char on the line,
737 just ignore it. Check the new newline. */
741 /* Something follows the #; read a token. */
746 if (token == CONSTANT
747 && TREE_CODE (yylval.ttype) == INTEGER_CST)
749 int old_lineno = lineno;
751 /* subtract one, because it is the following line that
752 gets the specified number */
754 int l = TREE_INT_CST_LOW (yylval.ttype) - 1;
756 /* Is this the last nonwhite stuff on the line? */
757 c = skip_white_space_on_line ();
760 /* No more: store the line number and check following line. */
766 /* More follows: it must be a string constant (filename). */
768 /* Read the string constant. */
771 if (token != STRING || TREE_CODE (yylval.ttype) != STRING_CST)
773 error ("invalid #line");
778 = (char *) permalloc (TREE_STRING_LENGTH (yylval.ttype) + 1);
779 strcpy (input_filename, TREE_STRING_POINTER (yylval.ttype));
782 /* Each change of file name
783 reinitializes whether we are now in a system header. */
784 in_system_header = 0;
786 if (main_input_filename == 0)
787 main_input_filename = input_filename;
789 /* Is this the last nonwhite stuff on the line? */
790 c = skip_white_space_on_line ();
793 /* Update the name in the top element of input_file_stack. */
794 if (input_file_stack)
795 input_file_stack->name = input_filename;
804 /* `1' after file name means entering new file.
805 `2' after file name means just left a file. */
807 if (token == CONSTANT
808 && TREE_CODE (yylval.ttype) == INTEGER_CST)
810 if (TREE_INT_CST_LOW (yylval.ttype) == 1)
812 /* Pushing to a new file. */
814 = (struct file_stack *) xmalloc (sizeof (struct file_stack));
815 input_file_stack->line = old_lineno;
816 p->next = input_file_stack;
817 p->name = input_filename;
818 p->indent_level = indent_level;
819 input_file_stack = p;
820 input_file_stack_tick++;
821 debug_start_source_file (input_filename);
824 else if (TREE_INT_CST_LOW (yylval.ttype) == 2)
826 /* Popping out of a file. */
827 if (input_file_stack->next)
829 struct file_stack *p = input_file_stack;
830 if (indent_level != p->indent_level)
832 warning_with_file_and_line
833 (p->name, old_lineno,
834 "This file contains more `%c's than `%c's.",
835 indent_level > p->indent_level ? '{' : '}',
836 indent_level > p->indent_level ? '}' : '{');
838 input_file_stack = p->next;
840 input_file_stack_tick++;
841 debug_end_source_file (input_file_stack->line);
844 error ("#-lines for entering and leaving files don't match");
850 /* Now that we've pushed or popped the input stack,
851 update the name in the top element. */
852 if (input_file_stack)
853 input_file_stack->name = input_filename;
855 /* If we have handled a `1' or a `2',
856 see if there is another number to read. */
859 /* Is this the last nonwhite stuff on the line? */
860 c = skip_white_space_on_line ();
869 /* `3' after file name means this is a system header file. */
871 if (token == CONSTANT
872 && TREE_CODE (yylval.ttype) == INTEGER_CST
873 && TREE_INT_CST_LOW (yylval.ttype) == 3)
874 in_system_header = 1, used_up = 1;
878 /* Is this the last nonwhite stuff on the line? */
879 c = skip_white_space_on_line ();
885 warning ("unrecognized text at end of #line");
888 error ("invalid #-line");
890 /* skip the rest of this line. */
893 if (c != '\n' && c != EOF && nextchar >= 0)
894 c = nextchar, nextchar = -1;
896 while (c != '\n' && c != EOF)
901 #ifdef HANDLE_SYSV_PRAGMA
903 /* Handle a #pragma directive.
904 TOKEN is the token we read after `#pragma'. Processes the entire input
905 line and returns a character for the caller to reread: either \n or EOF. */
907 /* This function has to be in this file, in order to get at
911 handle_sysv_pragma (token)
924 handle_pragma_token (token_buffer, yylval.ttype);
927 handle_pragma_token (token_buffer, 0);
931 c = nextchar, nextchar = -1;
936 while (c == ' ' || c == '\t')
938 if (c == '\n' || c == EOF)
940 handle_pragma_token (0, 0);
948 #endif /* HANDLE_SYSV_PRAGMA */
950 #define ENDFILE -1 /* token that represents end-of-file */
952 /* Read an escape sequence, returning its equivalent as a character,
953 or store 1 in *ignore_ptr if it is backslash-newline. */
956 readescape (ignore_ptr)
959 register int c = GETC();
961 register unsigned count;
962 unsigned firstdig = 0;
968 if (warn_traditional)
969 warning ("the meaning of `\\x' varies with -traditional");
971 if (flag_traditional)
980 if (!(c >= 'a' && c <= 'f')
981 && !(c >= 'A' && c <= 'F')
982 && !(c >= '0' && c <= '9'))
988 if (c >= 'a' && c <= 'f')
989 code += c - 'a' + 10;
990 if (c >= 'A' && c <= 'F')
991 code += c - 'A' + 10;
992 if (c >= '0' && c <= '9')
994 if (code != 0 || count != 0)
1003 error ("\\x used with no following hex digits");
1004 else if (count == 0)
1005 /* Digits are all 0's. Ok. */
1007 else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
1009 && ((1 << (TYPE_PRECISION (integer_type_node) - (count - 1) * 4))
1011 pedwarn ("hex escape out of range");
1014 case '0': case '1': case '2': case '3': case '4':
1015 case '5': case '6': case '7':
1018 while ((c <= '7') && (c >= '0') && (count++ < 3))
1020 code = (code * 8) + (c - '0');
1026 case '\\': case '\'': case '"':
1035 return TARGET_NEWLINE;
1050 if (warn_traditional)
1051 warning ("the meaning of `\\a' varies with -traditional");
1053 if (flag_traditional)
1058 #if 0 /* Vertical tab is present in common usage compilers. */
1059 if (flag_traditional)
1067 pedwarn ("non-ANSI-standard escape sequence, `\\%c'", c);
1073 /* `\(', etc, are used at beginning of line to avoid confusing Emacs. */
1077 /* `\%' is used to prevent SCCS from getting confused. */
1080 pedwarn ("non-ANSI escape sequence `\\%c'", c);
1083 if (c >= 040 && c < 0177)
1084 pedwarn ("unknown escape sequence `\\%c'", c);
1086 pedwarn ("unknown escape sequence: `\\' followed by char code 0x%x", c);
1096 strcpy (buf, string);
1098 /* We can't print string and character constants well
1099 because the token_buffer contains the result of processing escapes. */
1101 strcat (buf, " at end of input");
1102 else if (token_buffer[0] == 0)
1103 strcat (buf, " at null character");
1104 else if (token_buffer[0] == '"')
1105 strcat (buf, " before string constant");
1106 else if (token_buffer[0] == '\'')
1107 strcat (buf, " before character constant");
1108 else if (token_buffer[0] < 040 || (unsigned char) token_buffer[0] >= 0177)
1109 sprintf (buf + strlen (buf), " before character 0%o",
1110 (unsigned char) token_buffer[0]);
1112 strcat (buf, " before `%s'");
1114 error (buf, token_buffer);
1124 char long_long_flag;
1127 struct try_type type_sequence[] =
1129 { &integer_type_node, 0, 0, 0},
1130 { &unsigned_type_node, 1, 0, 0},
1131 { &long_integer_type_node, 0, 1, 0},
1132 { &long_unsigned_type_node, 1, 1, 0},
1133 { &long_long_integer_type_node, 0, 1, 1},
1134 { &long_long_unsigned_type_node, 1, 1, 1}
1149 c = nextchar, nextchar = -1;
1154 /* Effectively do c = skip_white_space (c)
1155 but do it faster in the usual cases. */
1168 /* Call skip_white_space so we can warn if appropriate. */
1173 c = skip_white_space (c);
1175 goto found_nonwhite;
1179 token_buffer[0] = c;
1180 token_buffer[1] = 0;
1182 /* yylloc.first_line = lineno; */
1188 token_buffer[0] = 0;
1193 /* Capital L may start a wide-string or wide-character constant. */
1195 register int c = GETC();
1204 goto string_constant;
1211 if (!doing_objc_thang)
1218 /* '@' may start a constant string object. */
1219 register int c = GETC ();
1223 goto string_constant;
1226 /* Fall through to treat '@' as the start of an identifier. */
1229 case 'A': case 'B': case 'C': case 'D': case 'E':
1230 case 'F': case 'G': case 'H': case 'I': case 'J':
1231 case 'K': case 'M': case 'N': case 'O':
1232 case 'P': case 'Q': case 'R': case 'S': case 'T':
1233 case 'U': case 'V': case 'W': case 'X': case 'Y':
1235 case 'a': case 'b': case 'c': case 'd': case 'e':
1236 case 'f': case 'g': case 'h': case 'i': case 'j':
1237 case 'k': case 'l': case 'm': case 'n': case 'o':
1238 case 'p': case 'q': case 'r': case 's': case 't':
1239 case 'u': case 'v': case 'w': case 'x': case 'y':
1245 while (isalnum (c) || c == '_' || c == '$' || c == '@')
1247 /* Make sure this char really belongs in an identifier. */
1248 if (c == '@' && ! doing_objc_thang)
1252 if (! dollars_in_ident)
1253 error ("`$' in identifier");
1255 pedwarn ("`$' in identifier");
1258 if (p >= token_buffer + maxtoken)
1259 p = extend_token_buffer (p);
1275 /* Try to recognize a keyword. Uses minimum-perfect hash function */
1278 register struct resword *ptr;
1280 if ((ptr = is_reserved_word (token_buffer, p - token_buffer)))
1283 yylval.ttype = ridpointers[(int) ptr->rid];
1284 value = (int) ptr->token;
1286 /* Only return OBJECTNAME if it is a typedef. */
1287 if (doing_objc_thang && value == OBJECTNAME)
1289 lastiddecl = lookup_name(yylval.ttype);
1291 if (lastiddecl == NULL_TREE
1292 || TREE_CODE (lastiddecl) != TYPE_DECL)
1296 /* Even if we decided to recognize asm, still perhaps warn. */
1298 && (value == ASM_KEYWORD || value == TYPEOF
1299 || ptr->rid == RID_INLINE)
1300 && token_buffer[0] != '_')
1301 pedwarn ("ANSI does not permit the keyword `%s'",
1306 /* If we did not find a keyword, look for an identifier
1309 if (value == IDENTIFIER)
1311 if (token_buffer[0] == '@')
1312 error("invalid identifier `%s'", token_buffer);
1314 yylval.ttype = get_identifier (token_buffer);
1315 lastiddecl = lookup_name (yylval.ttype);
1317 if (lastiddecl != 0 && TREE_CODE (lastiddecl) == TYPE_DECL)
1319 /* A user-invisible read-only initialized variable
1320 should be replaced by its value.
1321 We handle only strings since that's the only case used in C. */
1322 else if (lastiddecl != 0 && TREE_CODE (lastiddecl) == VAR_DECL
1323 && DECL_IGNORED_P (lastiddecl)
1324 && TREE_READONLY (lastiddecl)
1325 && DECL_INITIAL (lastiddecl) != 0
1326 && TREE_CODE (DECL_INITIAL (lastiddecl)) == STRING_CST)
1328 tree stringval = DECL_INITIAL (lastiddecl);
1330 /* Copy the string value so that we won't clobber anything
1331 if we put something in the TREE_CHAIN of this one. */
1332 yylval.ttype = build_string (TREE_STRING_LENGTH (stringval),
1333 TREE_STRING_POINTER (stringval));
1336 else if (doing_objc_thang)
1338 tree objc_interface_decl = is_class_name (yylval.ttype);
1340 if (objc_interface_decl)
1343 yylval.ttype = objc_interface_decl;
1353 /* Check first for common special case: single-digit 0 or 1. */
1356 UNGETC (next_c); /* Always undo this lookahead. */
1357 if (!isalnum (next_c) && next_c != '.')
1359 token_buffer[0] = (char)c, token_buffer[1] = '\0';
1360 yylval.ttype = (c == '0') ? integer_zero_node : integer_one_node;
1366 case '2': case '3': case '4':
1367 case '5': case '6': case '7': case '8': case '9':
1372 int largest_digit = 0;
1374 /* for multi-precision arithmetic,
1375 we actually store only HOST_BITS_PER_CHAR bits in each part.
1376 The number of parts is chosen so as to be sufficient to hold
1377 the enough bits to fit into the two HOST_WIDE_INTs that contain
1378 the integer value (this is always at least as many bits as are
1379 in a target `long long' value, but may be wider). */
1380 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
1381 int parts[TOTAL_PARTS];
1384 enum anon1 { NOT_FLOAT, AFTER_POINT, TOO_MANY_POINTS} floatflag
1387 for (count = 0; count < TOTAL_PARTS; count++)
1395 *p++ = (c = GETC());
1396 if ((c == 'x') || (c == 'X'))
1399 *p++ = (c = GETC());
1401 /* Leading 0 forces octal unless the 0 is the only digit. */
1402 else if (c >= '0' && c <= '9')
1411 /* Read all the digits-and-decimal-points. */
1414 || (isalnum (c) && c != 'l' && c != 'L'
1415 && c != 'u' && c != 'U'
1416 && c != 'i' && c != 'I' && c != 'j' && c != 'J'
1417 && (floatflag == NOT_FLOAT || ((c != 'f') && (c != 'F')))))
1422 error ("floating constant may not be in radix 16");
1423 if (floatflag == TOO_MANY_POINTS)
1424 /* We have already emitted an error. Don't need another. */
1426 else if (floatflag == AFTER_POINT)
1428 error ("malformed floating constant");
1429 floatflag = TOO_MANY_POINTS;
1430 /* Avoid another error from atof by forcing all characters
1431 from here on to be ignored. */
1435 floatflag = AFTER_POINT;
1439 /* Accept '.' as the start of a floating-point number
1440 only when it is followed by a digit.
1441 Otherwise, unread the following non-digit
1442 and use the '.' as a structural token. */
1443 if (p == token_buffer + 2 && !isdigit (c))
1454 error ("parse error at `..'");
1457 token_buffer[1] = 0;
1464 /* It is not a decimal point.
1465 It should be a digit (perhaps a hex digit). */
1471 else if (base <= 10)
1473 if (c == 'e' || c == 'E')
1476 floatflag = AFTER_POINT;
1477 break; /* start of exponent */
1479 error ("nondigits in number and not hexadecimal");
1490 if (c >= largest_digit)
1494 for (count = 0; count < TOTAL_PARTS; count++)
1496 parts[count] *= base;
1500 += (parts[count-1] >> HOST_BITS_PER_CHAR);
1502 &= (1 << HOST_BITS_PER_CHAR) - 1;
1508 /* If the extra highest-order part ever gets anything in it,
1509 the number is certainly too big. */
1510 if (parts[TOTAL_PARTS - 1] != 0)
1513 if (p >= token_buffer + maxtoken - 3)
1514 p = extend_token_buffer (p);
1515 *p++ = (c = GETC());
1520 error ("numeric constant with no digits");
1522 if (largest_digit >= base)
1523 error ("numeric constant contains digits beyond the radix");
1525 /* Remove terminating char from the token buffer and delimit the string */
1528 if (floatflag != NOT_FLOAT)
1530 tree type = double_type_node;
1531 int exceeds_double = 0;
1533 REAL_VALUE_TYPE value;
1536 /* Read explicit exponent if any, and put it in tokenbuf. */
1538 if ((c == 'e') || (c == 'E'))
1540 if (p >= token_buffer + maxtoken - 3)
1541 p = extend_token_buffer (p);
1544 if ((c == '+') || (c == '-'))
1550 error ("floating constant exponent has no digits");
1553 if (p >= token_buffer + maxtoken - 3)
1554 p = extend_token_buffer (p);
1563 /* Convert string to a double, checking for overflow. */
1564 if (setjmp (handler))
1566 error ("floating constant out of range");
1571 int fflag = 0, lflag = 0;
1572 /* Copy token_buffer now, while it has just the number
1573 and not the suffixes; once we add `f' or `i',
1574 REAL_VALUE_ATOF may not work any more. */
1575 char *copy = (char *) alloca (p - token_buffer + 1);
1576 bcopy (token_buffer, copy, p - token_buffer + 1);
1578 set_float_handler (handler);
1584 /* Read the suffixes to choose a data type. */
1589 error ("more than one `f' in numeric constant");
1595 error ("more than one `l' in numeric constant");
1601 error ("more than one `i' or `j' in numeric constant");
1603 pedwarn ("ANSI C forbids imaginary numeric constants");
1614 if (p >= token_buffer + maxtoken - 3)
1615 p = extend_token_buffer (p);
1621 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1622 tells the desired precision of the binary result
1623 of decimal-to-binary conversion. */
1628 error ("both `f' and `l' in floating constant");
1630 type = float_type_node;
1631 value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
1632 /* A diagnostic is required here by some ANSI C testsuites.
1633 This is not pedwarn, become some people don't want
1634 an error for this. */
1635 if (REAL_VALUE_ISINF (value) && pedantic)
1636 warning ("floating point number exceeds range of `float'");
1640 type = long_double_type_node;
1641 value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
1642 if (REAL_VALUE_ISINF (value) && pedantic)
1643 warning ("floating point number exceeds range of `long double'");
1647 value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
1648 if (REAL_VALUE_ISINF (value) && pedantic)
1649 warning ("floating point number exceeds range of `double'");
1652 set_float_handler (NULL_PTR);
1655 if (errno == ERANGE && !flag_traditional && pedantic)
1657 /* ERANGE is also reported for underflow,
1658 so test the value to distinguish overflow from that. */
1659 if (REAL_VALUES_LESS (dconst1, value)
1660 || REAL_VALUES_LESS (value, dconstm1))
1662 warning ("floating point number exceeds range of `double'");
1668 /* If the result is not a number, assume it must have been
1669 due to some error message above, so silently convert
1671 if (REAL_VALUE_ISNAN (value))
1674 /* Create a node with determined type and value. */
1676 yylval.ttype = build_complex (NULL_TREE,
1677 convert (type, integer_zero_node),
1678 build_real (type, value));
1680 yylval.ttype = build_real (type, value);
1684 tree traditional_type, ansi_type, type;
1685 HOST_WIDE_INT high, low;
1686 int spec_unsigned = 0;
1688 int spec_long_long = 0;
1694 if (c == 'u' || c == 'U')
1697 error ("two `u's in integer constant");
1700 else if (c == 'l' || c == 'L')
1705 error ("three `l's in integer constant");
1707 pedwarn ("ANSI C forbids long long integer constants");
1712 else if (c == 'i' || c == 'j' || c == 'I' || c == 'J')
1715 error ("more than one `i' or `j' in numeric constant");
1717 pedwarn ("ANSI C forbids imaginary numeric constants");
1722 if (p >= token_buffer + maxtoken - 3)
1723 p = extend_token_buffer (p);
1728 /* If the constant won't fit in an unsigned long long,
1729 then warn that the constant is out of range. */
1731 /* ??? This assumes that long long and long integer types are
1732 a multiple of 8 bits. This better than the original code
1733 though which assumed that long was exactly 32 bits and long
1734 long was exactly 64 bits. */
1736 bytes = TYPE_PRECISION (long_long_integer_type_node) / 8;
1739 for (i = bytes; i < TOTAL_PARTS; i++)
1743 pedwarn ("integer constant out of range");
1745 /* This is simplified by the fact that our constant
1746 is always positive. */
1750 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1752 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1753 / HOST_BITS_PER_CHAR)]
1754 << (i * HOST_BITS_PER_CHAR));
1755 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1758 yylval.ttype = build_int_2 (low, high);
1759 TREE_TYPE (yylval.ttype) = long_long_unsigned_type_node;
1761 /* If warn_traditional, calculate both the ANSI type and the
1762 traditional type, then see if they disagree.
1763 Otherwise, calculate only the type for the dialect in use. */
1764 if (warn_traditional || flag_traditional)
1766 /* Calculate the traditional type. */
1767 /* Traditionally, any constant is signed;
1768 but if unsigned is specified explicitly, obey that.
1769 Use the smallest size with the right number of bits,
1770 except for one special case with decimal constants. */
1771 if (! spec_long && base != 10
1772 && int_fits_type_p (yylval.ttype, unsigned_type_node))
1773 traditional_type = (spec_unsigned ? unsigned_type_node
1774 : integer_type_node);
1775 /* A decimal constant must be long
1776 if it does not fit in type int.
1777 I think this is independent of whether
1778 the constant is signed. */
1779 else if (! spec_long && base == 10
1780 && int_fits_type_p (yylval.ttype, integer_type_node))
1781 traditional_type = (spec_unsigned ? unsigned_type_node
1782 : integer_type_node);
1783 else if (! spec_long_long)
1784 traditional_type = (spec_unsigned ? long_unsigned_type_node
1785 : long_integer_type_node);
1787 traditional_type = (spec_unsigned
1788 ? long_long_unsigned_type_node
1789 : long_long_integer_type_node);
1791 if (warn_traditional || ! flag_traditional)
1793 /* Calculate the ANSI type. */
1794 if (! spec_long && ! spec_unsigned
1795 && int_fits_type_p (yylval.ttype, integer_type_node))
1796 ansi_type = integer_type_node;
1797 else if (! spec_long && (base != 10 || spec_unsigned)
1798 && int_fits_type_p (yylval.ttype, unsigned_type_node))
1799 ansi_type = unsigned_type_node;
1800 else if (! spec_unsigned && !spec_long_long
1801 && int_fits_type_p (yylval.ttype, long_integer_type_node))
1802 ansi_type = long_integer_type_node;
1803 else if (! spec_long_long
1804 && int_fits_type_p (yylval.ttype,
1805 long_unsigned_type_node))
1806 ansi_type = long_unsigned_type_node;
1807 else if (! spec_unsigned
1808 && int_fits_type_p (yylval.ttype,
1809 long_long_integer_type_node))
1810 ansi_type = long_long_integer_type_node;
1812 ansi_type = long_long_unsigned_type_node;
1815 type = flag_traditional ? traditional_type : ansi_type;
1817 if (warn_traditional && traditional_type != ansi_type)
1819 if (TYPE_PRECISION (traditional_type)
1820 != TYPE_PRECISION (ansi_type))
1821 warning ("width of integer constant changes with -traditional");
1822 else if (TREE_UNSIGNED (traditional_type)
1823 != TREE_UNSIGNED (ansi_type))
1824 warning ("integer constant is unsigned in ANSI C, signed with -traditional");
1826 warning ("width of integer constant may change on other systems with -traditional");
1829 if (pedantic && !flag_traditional && !spec_long_long && !warn
1830 && (TYPE_PRECISION (long_integer_type_node)
1831 < TYPE_PRECISION (type)))
1832 pedwarn ("integer constant out of range");
1834 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
1835 warning ("decimal constant is so large that it is unsigned");
1839 if (TYPE_PRECISION (type)
1840 <= TYPE_PRECISION (integer_type_node))
1842 = build_complex (NULL_TREE, integer_zero_node,
1843 convert (integer_type_node,
1846 error ("complex integer constant is too wide for `complex int'");
1848 else if (flag_traditional && !int_fits_type_p (yylval.ttype, type))
1849 /* The traditional constant 0x80000000 is signed
1850 but doesn't fit in the range of int.
1851 This will change it to -0x80000000, which does fit. */
1853 TREE_TYPE (yylval.ttype) = unsigned_type (type);
1854 yylval.ttype = convert (type, yylval.ttype);
1855 TREE_OVERFLOW (yylval.ttype)
1856 = TREE_CONSTANT_OVERFLOW (yylval.ttype) = 0;
1859 TREE_TYPE (yylval.ttype) = type;
1865 if (isalnum (c) || c == '.' || c == '_' || c == '$'
1866 || (!flag_traditional && (c == '-' || c == '+')
1867 && (p[-1] == 'e' || p[-1] == 'E')))
1868 error ("missing white space after number `%s'", token_buffer);
1870 value = CONSTANT; break;
1876 register int result = 0;
1877 register int num_chars = 0;
1878 unsigned width = TYPE_PRECISION (char_type_node);
1883 width = WCHAR_TYPE_SIZE;
1884 #ifdef MULTIBYTE_CHARS
1885 max_chars = MB_CUR_MAX;
1891 max_chars = TYPE_PRECISION (integer_type_node) / width;
1899 if (c == '\'' || c == EOF)
1905 c = readescape (&ignore);
1908 if (width < HOST_BITS_PER_INT
1909 && (unsigned) c >= (1 << width))
1910 pedwarn ("escape sequence out of range for character");
1911 #ifdef MAP_CHARACTER
1913 c = MAP_CHARACTER (c);
1919 pedwarn ("ANSI C forbids newline in character constant");
1922 #ifdef MAP_CHARACTER
1924 c = MAP_CHARACTER (c);
1928 if (num_chars > maxtoken - 4)
1929 extend_token_buffer (token_buffer);
1931 token_buffer[num_chars] = c;
1933 /* Merge character into result; ignore excess chars. */
1934 if (num_chars < max_chars + 1)
1936 if (width < HOST_BITS_PER_INT)
1937 result = (result << width) | (c & ((1 << width) - 1));
1943 token_buffer[num_chars + 1] = '\'';
1944 token_buffer[num_chars + 2] = 0;
1947 error ("malformatted character constant");
1948 else if (num_chars == 0)
1949 error ("empty character constant");
1950 else if (num_chars > max_chars)
1952 num_chars = max_chars;
1953 error ("character constant too long");
1955 else if (num_chars != 1 && ! flag_traditional)
1956 warning ("multi-character character constant");
1958 /* If char type is signed, sign-extend the constant. */
1961 int num_bits = num_chars * width;
1963 /* We already got an error; avoid invalid shift. */
1964 yylval.ttype = build_int_2 (0, 0);
1965 else if (TREE_UNSIGNED (char_type_node)
1966 || ((result >> (num_bits - 1)) & 1) == 0)
1968 = build_int_2 (result & ((unsigned HOST_WIDE_INT) ~0
1969 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
1973 = build_int_2 (result | ~((unsigned HOST_WIDE_INT) ~0
1974 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
1976 TREE_TYPE (yylval.ttype) = integer_type_node;
1980 #ifdef MULTIBYTE_CHARS
1981 /* Set the initial shift state and convert the next sequence. */
1983 /* In all locales L'\0' is zero and mbtowc will return zero,
1986 || (num_chars == 1 && token_buffer[1] != '\0'))
1989 (void) mbtowc (NULL_PTR, NULL_PTR, 0);
1990 if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars)
1993 warning ("Ignoring invalid multibyte character");
1996 yylval.ttype = build_int_2 (result, 0);
1997 TREE_TYPE (yylval.ttype) = wchar_type_node;
2008 p = token_buffer + 1;
2010 while (c != '"' && c >= 0)
2015 c = readescape (&ignore);
2019 && TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT
2020 && c >= (1 << TYPE_PRECISION (char_type_node)))
2021 pedwarn ("escape sequence out of range for character");
2026 pedwarn ("ANSI C forbids newline in string constant");
2030 if (p == token_buffer + maxtoken)
2031 p = extend_token_buffer (p);
2040 error ("Unterminated string constant");
2042 /* We have read the entire constant.
2043 Construct a STRING_CST for the result. */
2047 /* If this is a L"..." wide-string, convert the multibyte string
2048 to a wide character string. */
2049 char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES);
2052 #ifdef MULTIBYTE_CHARS
2053 len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer);
2054 if (len < 0 || len >= (p - token_buffer))
2056 warning ("Ignoring invalid multibyte string");
2059 bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES);
2064 wp = widep + (BYTES_BIG_ENDIAN ? WCHAR_BYTES - 1 : 0);
2065 bzero (widep, (p - token_buffer) * WCHAR_BYTES);
2066 for (cp = token_buffer + 1; cp < p; cp++)
2067 *wp = *cp, wp += WCHAR_BYTES;
2068 len = p - token_buffer - 1;
2071 yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep);
2072 TREE_TYPE (yylval.ttype) = wchar_array_type_node;
2077 extern tree build_objc_string();
2078 /* Return an Objective-C @"..." constant string object. */
2079 yylval.ttype = build_objc_string (p - token_buffer,
2081 TREE_TYPE (yylval.ttype) = char_array_type_node;
2082 value = OBJC_STRING;
2086 yylval.ttype = build_string (p - token_buffer, token_buffer + 1);
2087 TREE_TYPE (yylval.ttype) = char_array_type_node;
2118 yylval.code = PLUS_EXPR; break;
2120 yylval.code = MINUS_EXPR; break;
2122 yylval.code = BIT_AND_EXPR; break;
2124 yylval.code = BIT_IOR_EXPR; break;
2126 yylval.code = MULT_EXPR; break;
2128 yylval.code = TRUNC_DIV_EXPR; break;
2130 yylval.code = TRUNC_MOD_EXPR; break;
2132 yylval.code = BIT_XOR_EXPR; break;
2134 yylval.code = LSHIFT_EXPR; break;
2136 yylval.code = RSHIFT_EXPR; break;
2138 yylval.code = LT_EXPR; break;
2140 yylval.code = GT_EXPR; break;
2143 token_buffer[1] = c1 = GETC();
2144 token_buffer[2] = 0;
2151 value = ARITHCOMPARE; yylval.code = LE_EXPR; goto done;
2153 value = ARITHCOMPARE; yylval.code = GE_EXPR; goto done;
2155 value = EQCOMPARE; yylval.code = NE_EXPR; goto done;
2157 value = EQCOMPARE; yylval.code = EQ_EXPR; goto done;
2159 value = ASSIGN; goto done;
2165 value = PLUSPLUS; goto done;
2167 value = MINUSMINUS; goto done;
2169 value = ANDAND; goto done;
2171 value = OROR; goto done;
2184 { value = POINTSAT; goto done; }
2188 { value = ']'; goto done; }
2192 { value = '{'; indent_level++; goto done; }
2194 { value = '['; goto done; }
2198 { value = '}'; indent_level--; goto done; }
2202 token_buffer[1] = 0;
2204 if ((c == '<') || (c == '>'))
2205 value = ARITHCOMPARE;
2211 /* Don't make yyparse think this is eof. */
2230 /* yylloc.last_line = lineno; */
2235 /* Sets the value of the 'yydebug' variable to VALUE.
2236 This is a function so we don't have to have YYDEBUG defined
2237 in order to build the compiler. */
2246 warning ("YYDEBUG not defined.");