1 /* Lexical analyzer for C and Objective C.
2 Copyright (C) 1987, 88, 89, 92, 94-96, 1997 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
38 #ifdef MULTIBYTE_CHARS
50 cpp_options parse_options;
51 static enum cpp_token cpp_token;
54 /* The elements of `ridpointers' are identifier nodes
55 for the reserved type names and storage classes.
56 It is indexed by a RID_... value. */
57 tree ridpointers[(int) RID_MAX];
59 /* Cause the `yydebug' variable to be defined. */
63 static unsigned char *yy_cur, *yy_lim;
70 parse_in.limit = parse_in.token_buffer;
71 cpp_token = cpp_get_token (&parse_in);
72 if (cpp_token == CPP_EOF)
74 yy_lim = CPP_PWRITTEN (&parse_in);
75 yy_cur = parse_in.token_buffer;
81 #define GETC() (yy_cur < yy_lim ? *yy_cur++ : yy_get_token ())
82 #define UNGETC(c) ((c), yy_cur--)
84 #define GETC() getc (finput)
85 #define UNGETC(c) ungetc (c, finput)
88 /* the declaration found for the last IDENTIFIER token read in.
89 yylex must look this up to detect typedefs, which get token type TYPENAME,
90 so it is left around in case the identifier is not a typedef but is
91 used in a context which makes it a reference to a variable. */
94 /* Nonzero enables objc features. */
98 extern tree is_class_name ();
102 /* File used for outputting assembler code. */
103 extern FILE *asm_out_file;
105 #ifndef WCHAR_TYPE_SIZE
107 #define WCHAR_TYPE_SIZE INT_TYPE_SIZE
109 #define WCHAR_TYPE_SIZE BITS_PER_WORD
113 /* Number of bytes in a wide character. */
114 #define WCHAR_BYTES (WCHAR_TYPE_SIZE / BITS_PER_UNIT)
116 static int maxtoken; /* Current nominal length of token buffer. */
117 char *token_buffer; /* Pointer to token buffer.
118 Actual allocated length is maxtoken + 2.
119 This is not static because objc-parse.y uses it. */
121 static int indent_level = 0; /* Number of { minus number of }. */
123 /* Nonzero if end-of-file has been seen on input. */
124 static int end_of_file;
127 /* Buffered-back input character; faster than using ungetc. */
128 static int nextchar = -1;
131 int check_newline ();
133 /* Do not insert generated code into the source, instead, include it.
134 This allows us to build gcc automatically even for targets that
135 need to add or modify the reserved keyword lists. */
138 /* Return something to represent absolute declarators containing a *.
139 TARGET is the absolute declarator that the * contains.
140 TYPE_QUALS is a list of modifiers such as const or volatile
141 to apply to the pointer type, represented as identifiers.
143 We return an INDIRECT_REF whose "contents" are TARGET
144 and whose type is the modifier list. */
147 make_pointer_declarator (type_quals, target)
148 tree type_quals, target;
150 return build1 (INDIRECT_REF, type_quals, target);
154 forget_protocol_qualifiers ()
156 int i, n = sizeof wordlist / sizeof (struct resword);
158 for (i = 0; i < n; i++)
159 if ((int) wordlist[i].rid >= (int) RID_IN
160 && (int) wordlist[i].rid <= (int) RID_ONEWAY)
161 wordlist[i].name = "";
165 remember_protocol_qualifiers ()
167 int i, n = sizeof wordlist / sizeof (struct resword);
169 for (i = 0; i < n; i++)
170 if (wordlist[i].rid == RID_IN)
171 wordlist[i].name = "in";
172 else if (wordlist[i].rid == RID_OUT)
173 wordlist[i].name = "out";
174 else if (wordlist[i].rid == RID_INOUT)
175 wordlist[i].name = "inout";
176 else if (wordlist[i].rid == RID_BYCOPY)
177 wordlist[i].name = "bycopy";
178 else if (wordlist[i].rid == RID_ONEWAY)
179 wordlist[i].name = "oneway";
184 init_parse (filename)
191 cpp_reader_init (&parse_in);
192 parse_in.data = &parse_options;
193 cpp_options_init (&parse_options);
194 cpp_handle_options (&parse_in, 0, NULL); /* FIXME */
195 parse_in.show_column = 1;
196 if (! cpp_start_read (&parse_in, filename))
203 cpp_finish (&parse_in);
210 /* Make identifier nodes long enough for the language-specific slots. */
211 set_identifier_size (sizeof (struct lang_identifier));
213 /* Start it at 0, because check_newline is called at the very beginning
214 and will increment it to 1. */
217 #ifdef MULTIBYTE_CHARS
218 /* Change to the native locale for multibyte conversions. */
219 setlocale (LC_CTYPE, "");
223 token_buffer = (char *) xmalloc (maxtoken + 2);
225 ridpointers[(int) RID_INT] = get_identifier ("int");
226 ridpointers[(int) RID_CHAR] = get_identifier ("char");
227 ridpointers[(int) RID_VOID] = get_identifier ("void");
228 ridpointers[(int) RID_FLOAT] = get_identifier ("float");
229 ridpointers[(int) RID_DOUBLE] = get_identifier ("double");
230 ridpointers[(int) RID_SHORT] = get_identifier ("short");
231 ridpointers[(int) RID_LONG] = get_identifier ("long");
232 ridpointers[(int) RID_UNSIGNED] = get_identifier ("unsigned");
233 ridpointers[(int) RID_SIGNED] = get_identifier ("signed");
234 ridpointers[(int) RID_INLINE] = get_identifier ("inline");
235 ridpointers[(int) RID_CONST] = get_identifier ("const");
236 ridpointers[(int) RID_VOLATILE] = get_identifier ("volatile");
237 ridpointers[(int) RID_AUTO] = get_identifier ("auto");
238 ridpointers[(int) RID_STATIC] = get_identifier ("static");
239 ridpointers[(int) RID_EXTERN] = get_identifier ("extern");
240 ridpointers[(int) RID_TYPEDEF] = get_identifier ("typedef");
241 ridpointers[(int) RID_REGISTER] = get_identifier ("register");
242 ridpointers[(int) RID_ITERATOR] = get_identifier ("iterator");
243 ridpointers[(int) RID_COMPLEX] = get_identifier ("complex");
244 ridpointers[(int) RID_ID] = get_identifier ("id");
245 ridpointers[(int) RID_IN] = get_identifier ("in");
246 ridpointers[(int) RID_OUT] = get_identifier ("out");
247 ridpointers[(int) RID_INOUT] = get_identifier ("inout");
248 ridpointers[(int) RID_BYCOPY] = get_identifier ("bycopy");
249 ridpointers[(int) RID_ONEWAY] = get_identifier ("oneway");
250 forget_protocol_qualifiers();
252 /* Some options inhibit certain reserved words.
253 Clear those words out of the hash table so they won't be recognized. */
254 #define UNSET_RESERVED_WORD(STRING) \
255 do { struct resword *s = is_reserved_word (STRING, sizeof (STRING) - 1); \
256 if (s) s->name = ""; } while (0)
258 if (! doing_objc_thang)
259 UNSET_RESERVED_WORD ("id");
261 if (flag_traditional)
263 UNSET_RESERVED_WORD ("const");
264 UNSET_RESERVED_WORD ("volatile");
265 UNSET_RESERVED_WORD ("typeof");
266 UNSET_RESERVED_WORD ("signed");
267 UNSET_RESERVED_WORD ("inline");
268 UNSET_RESERVED_WORD ("iterator");
269 UNSET_RESERVED_WORD ("complex");
273 UNSET_RESERVED_WORD ("asm");
274 UNSET_RESERVED_WORD ("typeof");
275 UNSET_RESERVED_WORD ("inline");
276 UNSET_RESERVED_WORD ("iterator");
277 UNSET_RESERVED_WORD ("complex");
282 reinit_parse_for_function ()
286 /* Function used when yydebug is set, to print a token in more detail. */
289 yyprint (file, yychar, yylval)
301 if (IDENTIFIER_POINTER (t))
302 fprintf (file, " `%s'", IDENTIFIER_POINTER (t));
307 if (TREE_CODE (t) == INTEGER_CST)
309 #if HOST_BITS_PER_WIDE_INT == 64
310 #if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT
316 #if HOST_BITS_PER_WIDE_INT != HOST_BITS_PER_INT
322 TREE_INT_CST_HIGH (t), TREE_INT_CST_LOW (t));
328 /* If C is not whitespace, return C.
329 Otherwise skip whitespace and return first nonwhite char read. */
335 static int newline_warning = 0;
341 /* We don't recognize comments here, because
342 cpp output can include / and * consecutively as operators.
343 Also, there's no need, since cpp removes all comments. */
346 c = check_newline ();
358 /* ANSI C says the effects of a carriage return in a source file
360 if (pedantic && !newline_warning)
362 warning ("carriage return in source file");
363 warning ("(we only warn about the first carriage return)");
374 error ("stray '\\' in program");
384 /* Skips all of the white space at the current location in the input file.
385 Must use and reset nextchar if it has the next character. */
388 position_after_white_space ()
394 c = nextchar, nextchar = -1;
399 UNGETC (skip_white_space (c));
402 /* Make the token buffer longer, preserving the data in it.
403 P should point to just beyond the last valid character in the old buffer.
404 The value we return is a pointer to the new buffer
405 at a place corresponding to P. */
408 extend_token_buffer (p)
411 int offset = p - token_buffer;
413 maxtoken = maxtoken * 2 + 10;
414 token_buffer = (char *) xrealloc (token_buffer, maxtoken + 2);
416 return token_buffer + offset;
421 #define GET_DIRECTIVE_LINE() get_directive_line (finput)
422 #else /* USE_CPPLIB */
423 /* Read the rest of a #-directive from input stream FINPUT.
424 In normal use, the directive name and the white space after it
425 have already been read, so they won't be included in the result.
426 We allow for the fact that the directive line may contain
427 a newline embedded within a character or string literal which forms
428 a part of the directive.
430 The value is a string in a reusable buffer. It remains valid
431 only until the next time this function is called. */
434 GET_DIRECTIVE_LINE ()
436 static char *directive_buffer = NULL;
437 static unsigned buffer_length = 0;
439 register char *buffer_limit;
440 register int looking_for = 0;
441 register int char_escaped = 0;
443 if (buffer_length == 0)
445 directive_buffer = (char *)xmalloc (128);
449 buffer_limit = &directive_buffer[buffer_length];
451 for (p = directive_buffer; ; )
455 /* Make buffer bigger if it is full. */
456 if (p >= buffer_limit)
458 register unsigned bytes_used = (p - directive_buffer);
462 = (char *)xrealloc (directive_buffer, buffer_length);
463 p = &directive_buffer[bytes_used];
464 buffer_limit = &directive_buffer[buffer_length];
469 /* Discard initial whitespace. */
470 if ((c == ' ' || c == '\t') && p == directive_buffer)
473 /* Detect the end of the directive. */
474 if (c == '\n' && looking_for == 0)
483 return directive_buffer;
485 /* Handle string and character constant syntax. */
488 if (looking_for == c && !char_escaped)
489 looking_for = 0; /* Found terminator... stop looking. */
492 if (c == '\'' || c == '"')
493 looking_for = c; /* Don't stop buffering until we see another
494 another one of these (or an EOF). */
496 /* Handle backslash. */
497 char_escaped = (c == '\\' && ! char_escaped);
500 #endif /* USE_CPPLIB */
502 /* At the beginning of a line, increment the line number
503 and process any #-directive on this line.
504 If the line is a #-directive, read the entire line and return a newline.
505 Otherwise, return the line's first non-whitespace character. */
515 /* Read first nonwhite char on the line. */
518 while (c == ' ' || c == '\t')
523 /* If not #, return it so caller will use it. */
527 /* Read first nonwhite char after the `#'. */
530 while (c == ' ' || c == '\t')
533 /* If a letter follows, then if the word here is `line', skip
534 it and ignore it; otherwise, ignore the line, with an error
535 if the word isn't `pragma', `ident', `define', or `undef'. */
537 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
546 && ((c = GETC()) == ' ' || c == '\t' || c == '\n'))
548 while (c == ' ' || c == '\t')
552 #ifdef HANDLE_SYSV_PRAGMA
555 if (token != IDENTIFIER)
557 return handle_sysv_pragma (token);
558 #else /* !HANDLE_SYSV_PRAGMA */
563 if (token != IDENTIFIER)
565 if (HANDLE_PRAGMA (finput, yylval.ttype))
571 ??? do not know what to do ???;
572 #endif /* !USE_CPPLIB */
573 #endif /* HANDLE_PRAGMA */
574 #endif /* !HANDLE_SYSV_PRAGMA */
586 && ((c = GETC()) == ' ' || c == '\t' || c == '\n'))
589 debug_define (lineno, GET_DIRECTIVE_LINE ());
599 && ((c = GETC()) == ' ' || c == '\t' || c == '\n'))
602 debug_undef (lineno, GET_DIRECTIVE_LINE ());
611 && ((c = GETC()) == ' ' || c == '\t'))
620 && ((c = GETC()) == ' ' || c == '\t'))
622 /* #ident. The pedantic warning is now in cccp.c. */
624 /* Here we have just seen `#ident '.
625 A string constant should follow. */
627 while (c == ' ' || c == '\t')
630 /* If no argument, ignore the line. */
637 || TREE_CODE (yylval.ttype) != STRING_CST)
639 error ("invalid #ident");
645 #ifdef ASM_OUTPUT_IDENT
646 ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (yylval.ttype));
650 /* Skip the rest of this line. */
655 error ("undefined or invalid # directive");
660 /* Here we have either `#line' or `# <nonletter>'.
661 In either case, it should be a line number; a digit should follow. */
663 while (c == ' ' || c == '\t')
666 /* If the # is the only nonwhite char on the line,
667 just ignore it. Check the new newline. */
671 /* Something follows the #; read a token. */
676 if (token == CONSTANT
677 && TREE_CODE (yylval.ttype) == INTEGER_CST)
679 int old_lineno = lineno;
681 /* subtract one, because it is the following line that
682 gets the specified number */
684 int l = TREE_INT_CST_LOW (yylval.ttype) - 1;
686 /* Is this the last nonwhite stuff on the line? */
688 while (c == ' ' || c == '\t')
692 /* No more: store the line number and check following line. */
698 /* More follows: it must be a string constant (filename). */
700 /* Read the string constant. */
703 if (token != STRING || TREE_CODE (yylval.ttype) != STRING_CST)
705 error ("invalid #line");
710 = (char *) permalloc (TREE_STRING_LENGTH (yylval.ttype) + 1);
711 strcpy (input_filename, TREE_STRING_POINTER (yylval.ttype));
714 /* Each change of file name
715 reinitializes whether we are now in a system header. */
716 in_system_header = 0;
718 if (main_input_filename == 0)
719 main_input_filename = input_filename;
721 /* Is this the last nonwhite stuff on the line? */
723 while (c == ' ' || c == '\t')
727 /* Update the name in the top element of input_file_stack. */
728 if (input_file_stack)
729 input_file_stack->name = input_filename;
738 /* `1' after file name means entering new file.
739 `2' after file name means just left a file. */
741 if (token == CONSTANT
742 && TREE_CODE (yylval.ttype) == INTEGER_CST)
744 if (TREE_INT_CST_LOW (yylval.ttype) == 1)
746 /* Pushing to a new file. */
748 = (struct file_stack *) xmalloc (sizeof (struct file_stack));
749 input_file_stack->line = old_lineno;
750 p->next = input_file_stack;
751 p->name = input_filename;
752 p->indent_level = indent_level;
753 input_file_stack = p;
754 input_file_stack_tick++;
755 debug_start_source_file (input_filename);
758 else if (TREE_INT_CST_LOW (yylval.ttype) == 2)
760 /* Popping out of a file. */
761 if (input_file_stack->next)
763 struct file_stack *p = input_file_stack;
764 if (indent_level != p->indent_level)
766 warning_with_file_and_line
767 (p->name, old_lineno,
768 "This file contains more `%c's than `%c's.",
769 indent_level > p->indent_level ? '{' : '}',
770 indent_level > p->indent_level ? '}' : '{');
772 input_file_stack = p->next;
774 input_file_stack_tick++;
775 debug_end_source_file (input_file_stack->line);
778 error ("#-lines for entering and leaving files don't match");
784 /* Now that we've pushed or popped the input stack,
785 update the name in the top element. */
786 if (input_file_stack)
787 input_file_stack->name = input_filename;
789 /* If we have handled a `1' or a `2',
790 see if there is another number to read. */
793 /* Is this the last nonwhite stuff on the line? */
795 while (c == ' ' || c == '\t')
805 /* `3' after file name means this is a system header file. */
807 if (token == CONSTANT
808 && TREE_CODE (yylval.ttype) == INTEGER_CST
809 && TREE_INT_CST_LOW (yylval.ttype) == 3)
810 in_system_header = 1, used_up = 1;
814 /* Is this the last nonwhite stuff on the line? */
816 while (c == ' ' || c == '\t')
823 warning ("unrecognized text at end of #line");
826 error ("invalid #-line");
828 /* skip the rest of this line. */
831 if (c != '\n' && c != EOF && nextchar >= 0)
832 c = nextchar, nextchar = -1;
834 while (c != '\n' && c != EOF)
839 #ifdef HANDLE_SYSV_PRAGMA
841 /* Handle a #pragma directive.
842 TOKEN is the token we read after `#pragma'. Processes the entire input
843 line and returns a character for the caller to reread: either \n or EOF. */
845 /* This function has to be in this file, in order to get at
849 handle_sysv_pragma (token)
862 handle_pragma_token (token_buffer, yylval.ttype);
865 handle_pragma_token (token_buffer, 0);
869 c = nextchar, nextchar = -1;
874 while (c == ' ' || c == '\t')
876 if (c == '\n' || c == EOF)
878 handle_pragma_token (0, 0);
886 #endif /* HANDLE_SYSV_PRAGMA */
888 #define ENDFILE -1 /* token that represents end-of-file */
890 /* Read an escape sequence, returning its equivalent as a character,
891 or store 1 in *ignore_ptr if it is backslash-newline. */
894 readescape (ignore_ptr)
897 register int c = GETC();
899 register unsigned count;
900 unsigned firstdig = 0;
906 if (warn_traditional)
907 warning ("the meaning of `\\x' varies with -traditional");
909 if (flag_traditional)
918 if (!(c >= 'a' && c <= 'f')
919 && !(c >= 'A' && c <= 'F')
920 && !(c >= '0' && c <= '9'))
926 if (c >= 'a' && c <= 'f')
927 code += c - 'a' + 10;
928 if (c >= 'A' && c <= 'F')
929 code += c - 'A' + 10;
930 if (c >= '0' && c <= '9')
932 if (code != 0 || count != 0)
941 error ("\\x used with no following hex digits");
943 /* Digits are all 0's. Ok. */
945 else if ((count - 1) * 4 >= TYPE_PRECISION (integer_type_node)
947 && ((1 << (TYPE_PRECISION (integer_type_node) - (count - 1) * 4))
949 pedwarn ("hex escape out of range");
952 case '0': case '1': case '2': case '3': case '4':
953 case '5': case '6': case '7':
956 while ((c <= '7') && (c >= '0') && (count++ < 3))
958 code = (code * 8) + (c - '0');
964 case '\\': case '\'': case '"':
973 return TARGET_NEWLINE;
988 if (warn_traditional)
989 warning ("the meaning of `\\a' varies with -traditional");
991 if (flag_traditional)
996 #if 0 /* Vertical tab is present in common usage compilers. */
997 if (flag_traditional)
1005 pedwarn ("non-ANSI-standard escape sequence, `\\%c'", c);
1011 /* `\(', etc, are used at beginning of line to avoid confusing Emacs. */
1015 /* `\%' is used to prevent SCCS from getting confused. */
1018 pedwarn ("non-ANSI escape sequence `\\%c'", c);
1021 if (c >= 040 && c < 0177)
1022 pedwarn ("unknown escape sequence `\\%c'", c);
1024 pedwarn ("unknown escape sequence: `\\' followed by char code 0x%x", c);
1034 strcpy (buf, string);
1036 /* We can't print string and character constants well
1037 because the token_buffer contains the result of processing escapes. */
1039 strcat (buf, " at end of input");
1040 else if (token_buffer[0] == 0)
1041 strcat (buf, " at null character");
1042 else if (token_buffer[0] == '"')
1043 strcat (buf, " before string constant");
1044 else if (token_buffer[0] == '\'')
1045 strcat (buf, " before character constant");
1046 else if (token_buffer[0] < 040 || (unsigned char) token_buffer[0] >= 0177)
1047 sprintf (buf + strlen (buf), " before character 0%o",
1048 (unsigned char) token_buffer[0]);
1050 strcat (buf, " before `%s'");
1052 error (buf, token_buffer);
1062 char long_long_flag;
1065 struct try_type type_sequence[] =
1067 { &integer_type_node, 0, 0, 0},
1068 { &unsigned_type_node, 1, 0, 0},
1069 { &long_integer_type_node, 0, 1, 0},
1070 { &long_unsigned_type_node, 1, 1, 0},
1071 { &long_long_integer_type_node, 0, 1, 1},
1072 { &long_long_unsigned_type_node, 1, 1, 1}
1087 c = nextchar, nextchar = -1;
1092 /* Effectively do c = skip_white_space (c)
1093 but do it faster in the usual cases. */
1106 /* Call skip_white_space so we can warn if appropriate. */
1111 c = skip_white_space (c);
1113 goto found_nonwhite;
1117 token_buffer[0] = c;
1118 token_buffer[1] = 0;
1120 /* yylloc.first_line = lineno; */
1126 token_buffer[0] = 0;
1131 /* Capital L may start a wide-string or wide-character constant. */
1133 register int c = GETC();
1142 goto string_constant;
1149 if (!doing_objc_thang)
1156 /* '@' may start a constant string object. */
1157 register int c = GETC ();
1161 goto string_constant;
1164 /* Fall through to treat '@' as the start of an identifier. */
1167 case 'A': case 'B': case 'C': case 'D': case 'E':
1168 case 'F': case 'G': case 'H': case 'I': case 'J':
1169 case 'K': case 'M': case 'N': case 'O':
1170 case 'P': case 'Q': case 'R': case 'S': case 'T':
1171 case 'U': case 'V': case 'W': case 'X': case 'Y':
1173 case 'a': case 'b': case 'c': case 'd': case 'e':
1174 case 'f': case 'g': case 'h': case 'i': case 'j':
1175 case 'k': case 'l': case 'm': case 'n': case 'o':
1176 case 'p': case 'q': case 'r': case 's': case 't':
1177 case 'u': case 'v': case 'w': case 'x': case 'y':
1183 while (isalnum (c) || c == '_' || c == '$' || c == '@')
1185 /* Make sure this char really belongs in an identifier. */
1186 if (c == '@' && ! doing_objc_thang)
1190 if (! dollars_in_ident)
1191 error ("`$' in identifier");
1193 pedwarn ("`$' in identifier");
1196 if (p >= token_buffer + maxtoken)
1197 p = extend_token_buffer (p);
1213 /* Try to recognize a keyword. Uses minimum-perfect hash function */
1216 register struct resword *ptr;
1218 if (ptr = is_reserved_word (token_buffer, p - token_buffer))
1221 yylval.ttype = ridpointers[(int) ptr->rid];
1222 value = (int) ptr->token;
1224 /* Only return OBJECTNAME if it is a typedef. */
1225 if (doing_objc_thang && value == OBJECTNAME)
1227 lastiddecl = lookup_name(yylval.ttype);
1229 if (lastiddecl == NULL_TREE
1230 || TREE_CODE (lastiddecl) != TYPE_DECL)
1234 /* Even if we decided to recognize asm, still perhaps warn. */
1236 && (value == ASM_KEYWORD || value == TYPEOF
1237 || ptr->rid == RID_INLINE)
1238 && token_buffer[0] != '_')
1239 pedwarn ("ANSI does not permit the keyword `%s'",
1244 /* If we did not find a keyword, look for an identifier
1247 if (value == IDENTIFIER)
1249 if (token_buffer[0] == '@')
1250 error("invalid identifier `%s'", token_buffer);
1252 yylval.ttype = get_identifier (token_buffer);
1253 lastiddecl = lookup_name (yylval.ttype);
1255 if (lastiddecl != 0 && TREE_CODE (lastiddecl) == TYPE_DECL)
1257 /* A user-invisible read-only initialized variable
1258 should be replaced by its value.
1259 We handle only strings since that's the only case used in C. */
1260 else if (lastiddecl != 0 && TREE_CODE (lastiddecl) == VAR_DECL
1261 && DECL_IGNORED_P (lastiddecl)
1262 && TREE_READONLY (lastiddecl)
1263 && DECL_INITIAL (lastiddecl) != 0
1264 && TREE_CODE (DECL_INITIAL (lastiddecl)) == STRING_CST)
1266 tree stringval = DECL_INITIAL (lastiddecl);
1268 /* Copy the string value so that we won't clobber anything
1269 if we put something in the TREE_CHAIN of this one. */
1270 yylval.ttype = build_string (TREE_STRING_LENGTH (stringval),
1271 TREE_STRING_POINTER (stringval));
1274 else if (doing_objc_thang)
1276 tree objc_interface_decl = is_class_name (yylval.ttype);
1278 if (objc_interface_decl)
1281 yylval.ttype = objc_interface_decl;
1291 /* Check first for common special case: single-digit 0 or 1. */
1294 UNGETC (next_c); /* Always undo this lookahead. */
1295 if (!isalnum (next_c) && next_c != '.')
1297 token_buffer[0] = (char)c, token_buffer[1] = '\0';
1298 yylval.ttype = (c == '0') ? integer_zero_node : integer_one_node;
1304 case '2': case '3': case '4':
1305 case '5': case '6': case '7': case '8': case '9':
1310 int largest_digit = 0;
1312 /* for multi-precision arithmetic,
1313 we actually store only HOST_BITS_PER_CHAR bits in each part.
1314 The number of parts is chosen so as to be sufficient to hold
1315 the enough bits to fit into the two HOST_WIDE_INTs that contain
1316 the integer value (this is always at least as many bits as are
1317 in a target `long long' value, but may be wider). */
1318 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
1319 int parts[TOTAL_PARTS];
1322 enum anon1 { NOT_FLOAT, AFTER_POINT, TOO_MANY_POINTS} floatflag
1325 for (count = 0; count < TOTAL_PARTS; count++)
1333 *p++ = (c = GETC());
1334 if ((c == 'x') || (c == 'X'))
1337 *p++ = (c = GETC());
1339 /* Leading 0 forces octal unless the 0 is the only digit. */
1340 else if (c >= '0' && c <= '9')
1349 /* Read all the digits-and-decimal-points. */
1352 || (isalnum (c) && c != 'l' && c != 'L'
1353 && c != 'u' && c != 'U'
1354 && c != 'i' && c != 'I' && c != 'j' && c != 'J'
1355 && (floatflag == NOT_FLOAT || ((c != 'f') && (c != 'F')))))
1360 error ("floating constant may not be in radix 16");
1361 if (floatflag == TOO_MANY_POINTS)
1362 /* We have already emitted an error. Don't need another. */
1364 else if (floatflag == AFTER_POINT)
1366 error ("malformed floating constant");
1367 floatflag = TOO_MANY_POINTS;
1368 /* Avoid another error from atof by forcing all characters
1369 from here on to be ignored. */
1373 floatflag = AFTER_POINT;
1377 /* Accept '.' as the start of a floating-point number
1378 only when it is followed by a digit.
1379 Otherwise, unread the following non-digit
1380 and use the '.' as a structural token. */
1381 if (p == token_buffer + 2 && !isdigit (c))
1392 error ("parse error at `..'");
1395 token_buffer[1] = 0;
1402 /* It is not a decimal point.
1403 It should be a digit (perhaps a hex digit). */
1409 else if (base <= 10)
1411 if (c == 'e' || c == 'E')
1414 floatflag = AFTER_POINT;
1415 break; /* start of exponent */
1417 error ("nondigits in number and not hexadecimal");
1428 if (c >= largest_digit)
1432 for (count = 0; count < TOTAL_PARTS; count++)
1434 parts[count] *= base;
1438 += (parts[count-1] >> HOST_BITS_PER_CHAR);
1440 &= (1 << HOST_BITS_PER_CHAR) - 1;
1446 /* If the extra highest-order part ever gets anything in it,
1447 the number is certainly too big. */
1448 if (parts[TOTAL_PARTS - 1] != 0)
1451 if (p >= token_buffer + maxtoken - 3)
1452 p = extend_token_buffer (p);
1453 *p++ = (c = GETC());
1458 error ("numeric constant with no digits");
1460 if (largest_digit >= base)
1461 error ("numeric constant contains digits beyond the radix");
1463 /* Remove terminating char from the token buffer and delimit the string */
1466 if (floatflag != NOT_FLOAT)
1468 tree type = double_type_node;
1469 int exceeds_double = 0;
1471 REAL_VALUE_TYPE value;
1474 /* Read explicit exponent if any, and put it in tokenbuf. */
1476 if ((c == 'e') || (c == 'E'))
1478 if (p >= token_buffer + maxtoken - 3)
1479 p = extend_token_buffer (p);
1482 if ((c == '+') || (c == '-'))
1488 error ("floating constant exponent has no digits");
1491 if (p >= token_buffer + maxtoken - 3)
1492 p = extend_token_buffer (p);
1501 /* Convert string to a double, checking for overflow. */
1502 if (setjmp (handler))
1504 error ("floating constant out of range");
1509 int fflag = 0, lflag = 0;
1510 /* Copy token_buffer now, while it has just the number
1511 and not the suffixes; once we add `f' or `i',
1512 REAL_VALUE_ATOF may not work any more. */
1513 char *copy = (char *) alloca (p - token_buffer + 1);
1514 bcopy (token_buffer, copy, p - token_buffer + 1);
1516 set_float_handler (handler);
1522 /* Read the suffixes to choose a data type. */
1527 error ("more than one `f' in numeric constant");
1533 error ("more than one `l' in numeric constant");
1539 error ("more than one `i' or `j' in numeric constant");
1541 pedwarn ("ANSI C forbids imaginary numeric constants");
1552 if (p >= token_buffer + maxtoken - 3)
1553 p = extend_token_buffer (p);
1559 /* The second argument, machine_mode, of REAL_VALUE_ATOF
1560 tells the desired precision of the binary result
1561 of decimal-to-binary conversion. */
1566 error ("both `f' and `l' in floating constant");
1568 type = float_type_node;
1569 value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
1570 /* A diagnostic is required here by some ANSI C testsuites.
1571 This is not pedwarn, become some people don't want
1572 an error for this. */
1573 if (REAL_VALUE_ISINF (value) && pedantic)
1574 warning ("floating point number exceeds range of `float'");
1578 type = long_double_type_node;
1579 value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
1580 if (REAL_VALUE_ISINF (value) && pedantic)
1581 warning ("floating point number exceeds range of `long double'");
1585 value = REAL_VALUE_ATOF (copy, TYPE_MODE (type));
1586 if (REAL_VALUE_ISINF (value) && pedantic)
1587 warning ("floating point number exceeds range of `double'");
1590 set_float_handler (NULL_PTR);
1593 if (errno == ERANGE && !flag_traditional && pedantic)
1595 /* ERANGE is also reported for underflow,
1596 so test the value to distinguish overflow from that. */
1597 if (REAL_VALUES_LESS (dconst1, value)
1598 || REAL_VALUES_LESS (value, dconstm1))
1600 warning ("floating point number exceeds range of `double'");
1606 /* If the result is not a number, assume it must have been
1607 due to some error message above, so silently convert
1609 if (REAL_VALUE_ISNAN (value))
1612 /* Create a node with determined type and value. */
1614 yylval.ttype = build_complex (NULL_TREE,
1615 convert (type, integer_zero_node),
1616 build_real (type, value));
1618 yylval.ttype = build_real (type, value);
1622 tree traditional_type, ansi_type, type;
1623 HOST_WIDE_INT high, low;
1624 int spec_unsigned = 0;
1626 int spec_long_long = 0;
1632 if (c == 'u' || c == 'U')
1635 error ("two `u's in integer constant");
1638 else if (c == 'l' || c == 'L')
1643 error ("three `l's in integer constant");
1645 pedwarn ("ANSI C forbids long long integer constants");
1650 else if (c == 'i' || c == 'j' || c == 'I' || c == 'J')
1653 error ("more than one `i' or `j' in numeric constant");
1655 pedwarn ("ANSI C forbids imaginary numeric constants");
1660 if (p >= token_buffer + maxtoken - 3)
1661 p = extend_token_buffer (p);
1666 /* If the constant is not long long and it won't fit in an
1667 unsigned long, or if the constant is long long and won't fit
1668 in an unsigned long long, then warn that the constant is out
1671 /* ??? This assumes that long long and long integer types are
1672 a multiple of 8 bits. This better than the original code
1673 though which assumed that long was exactly 32 bits and long
1674 long was exactly 64 bits. */
1677 bytes = TYPE_PRECISION (long_long_integer_type_node) / 8;
1679 bytes = TYPE_PRECISION (long_integer_type_node) / 8;
1682 for (i = bytes; i < TOTAL_PARTS; i++)
1686 pedwarn ("integer constant out of range");
1688 /* This is simplified by the fact that our constant
1689 is always positive. */
1693 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1695 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1696 / HOST_BITS_PER_CHAR)]
1697 << (i * HOST_BITS_PER_CHAR));
1698 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1701 yylval.ttype = build_int_2 (low, high);
1702 TREE_TYPE (yylval.ttype) = long_long_unsigned_type_node;
1704 /* If warn_traditional, calculate both the ANSI type and the
1705 traditional type, then see if they disagree.
1706 Otherwise, calculate only the type for the dialect in use. */
1707 if (warn_traditional || flag_traditional)
1709 /* Calculate the traditional type. */
1710 /* Traditionally, any constant is signed;
1711 but if unsigned is specified explicitly, obey that.
1712 Use the smallest size with the right number of bits,
1713 except for one special case with decimal constants. */
1714 if (! spec_long && base != 10
1715 && int_fits_type_p (yylval.ttype, unsigned_type_node))
1716 traditional_type = (spec_unsigned ? unsigned_type_node
1717 : integer_type_node);
1718 /* A decimal constant must be long
1719 if it does not fit in type int.
1720 I think this is independent of whether
1721 the constant is signed. */
1722 else if (! spec_long && base == 10
1723 && int_fits_type_p (yylval.ttype, integer_type_node))
1724 traditional_type = (spec_unsigned ? unsigned_type_node
1725 : integer_type_node);
1726 else if (! spec_long_long)
1727 traditional_type = (spec_unsigned ? long_unsigned_type_node
1728 : long_integer_type_node);
1730 traditional_type = (spec_unsigned
1731 ? long_long_unsigned_type_node
1732 : long_long_integer_type_node);
1734 if (warn_traditional || ! flag_traditional)
1736 /* Calculate the ANSI type. */
1737 if (! spec_long && ! spec_unsigned
1738 && int_fits_type_p (yylval.ttype, integer_type_node))
1739 ansi_type = integer_type_node;
1740 else if (! spec_long && (base != 10 || spec_unsigned)
1741 && int_fits_type_p (yylval.ttype, unsigned_type_node))
1742 ansi_type = unsigned_type_node;
1743 else if (! spec_unsigned && !spec_long_long
1744 && int_fits_type_p (yylval.ttype, long_integer_type_node))
1745 ansi_type = long_integer_type_node;
1746 else if (! spec_long_long)
1747 ansi_type = long_unsigned_type_node;
1748 else if (! spec_unsigned
1749 && int_fits_type_p (yylval.ttype,
1750 long_long_integer_type_node))
1751 ansi_type = long_long_integer_type_node;
1753 ansi_type = long_long_unsigned_type_node;
1756 type = flag_traditional ? traditional_type : ansi_type;
1758 if (warn_traditional && traditional_type != ansi_type)
1760 if (TYPE_PRECISION (traditional_type)
1761 != TYPE_PRECISION (ansi_type))
1762 warning ("width of integer constant changes with -traditional");
1763 else if (TREE_UNSIGNED (traditional_type)
1764 != TREE_UNSIGNED (ansi_type))
1765 warning ("integer constant is unsigned in ANSI C, signed with -traditional");
1767 warning ("width of integer constant may change on other systems with -traditional");
1770 if (!flag_traditional && !int_fits_type_p (yylval.ttype, type)
1772 pedwarn ("integer constant out of range");
1774 if (base == 10 && ! spec_unsigned && TREE_UNSIGNED (type))
1775 warning ("decimal constant is so large that it is unsigned");
1779 if (TYPE_PRECISION (type)
1780 <= TYPE_PRECISION (integer_type_node))
1782 = build_complex (NULL_TREE, integer_zero_node,
1783 convert (integer_type_node,
1786 error ("complex integer constant is too wide for `complex int'");
1788 else if (flag_traditional && !int_fits_type_p (yylval.ttype, type))
1789 /* The traditional constant 0x80000000 is signed
1790 but doesn't fit in the range of int.
1791 This will change it to -0x80000000, which does fit. */
1793 TREE_TYPE (yylval.ttype) = unsigned_type (type);
1794 yylval.ttype = convert (type, yylval.ttype);
1795 TREE_OVERFLOW (yylval.ttype)
1796 = TREE_CONSTANT_OVERFLOW (yylval.ttype) = 0;
1799 TREE_TYPE (yylval.ttype) = type;
1805 if (isalnum (c) || c == '.' || c == '_' || c == '$'
1806 || (!flag_traditional && (c == '-' || c == '+')
1807 && (p[-1] == 'e' || p[-1] == 'E')))
1808 error ("missing white space after number `%s'", token_buffer);
1810 value = CONSTANT; break;
1816 register int result = 0;
1817 register int num_chars = 0;
1818 unsigned width = TYPE_PRECISION (char_type_node);
1823 width = WCHAR_TYPE_SIZE;
1824 #ifdef MULTIBYTE_CHARS
1825 max_chars = MB_CUR_MAX;
1831 max_chars = TYPE_PRECISION (integer_type_node) / width;
1839 if (c == '\'' || c == EOF)
1845 c = readescape (&ignore);
1848 if (width < HOST_BITS_PER_INT
1849 && (unsigned) c >= (1 << width))
1850 pedwarn ("escape sequence out of range for character");
1851 #ifdef MAP_CHARACTER
1853 c = MAP_CHARACTER (c);
1859 pedwarn ("ANSI C forbids newline in character constant");
1862 #ifdef MAP_CHARACTER
1864 c = MAP_CHARACTER (c);
1868 if (num_chars > maxtoken - 4)
1869 extend_token_buffer (token_buffer);
1871 token_buffer[num_chars] = c;
1873 /* Merge character into result; ignore excess chars. */
1874 if (num_chars < max_chars + 1)
1876 if (width < HOST_BITS_PER_INT)
1877 result = (result << width) | (c & ((1 << width) - 1));
1883 token_buffer[num_chars + 1] = '\'';
1884 token_buffer[num_chars + 2] = 0;
1887 error ("malformatted character constant");
1888 else if (num_chars == 0)
1889 error ("empty character constant");
1890 else if (num_chars > max_chars)
1892 num_chars = max_chars;
1893 error ("character constant too long");
1895 else if (num_chars != 1 && ! flag_traditional)
1896 warning ("multi-character character constant");
1898 /* If char type is signed, sign-extend the constant. */
1901 int num_bits = num_chars * width;
1903 /* We already got an error; avoid invalid shift. */
1904 yylval.ttype = build_int_2 (0, 0);
1905 else if (TREE_UNSIGNED (char_type_node)
1906 || ((result >> (num_bits - 1)) & 1) == 0)
1908 = build_int_2 (result & ((unsigned HOST_WIDE_INT) ~0
1909 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
1913 = build_int_2 (result | ~((unsigned HOST_WIDE_INT) ~0
1914 >> (HOST_BITS_PER_WIDE_INT - num_bits)),
1916 TREE_TYPE (yylval.ttype) = integer_type_node;
1920 #ifdef MULTIBYTE_CHARS
1921 /* Set the initial shift state and convert the next sequence. */
1923 /* In all locales L'\0' is zero and mbtowc will return zero,
1926 || (num_chars == 1 && token_buffer[1] != '\0'))
1929 (void) mbtowc (NULL_PTR, NULL_PTR, 0);
1930 if (mbtowc (& wc, token_buffer + 1, num_chars) == num_chars)
1933 warning ("Ignoring invalid multibyte character");
1936 yylval.ttype = build_int_2 (result, 0);
1937 TREE_TYPE (yylval.ttype) = wchar_type_node;
1948 p = token_buffer + 1;
1950 while (c != '"' && c >= 0)
1955 c = readescape (&ignore);
1959 && TYPE_PRECISION (char_type_node) < HOST_BITS_PER_INT
1960 && c >= (1 << TYPE_PRECISION (char_type_node)))
1961 pedwarn ("escape sequence out of range for character");
1966 pedwarn ("ANSI C forbids newline in string constant");
1970 if (p == token_buffer + maxtoken)
1971 p = extend_token_buffer (p);
1980 error ("Unterminated string constant");
1982 /* We have read the entire constant.
1983 Construct a STRING_CST for the result. */
1987 /* If this is a L"..." wide-string, convert the multibyte string
1988 to a wide character string. */
1989 char *widep = (char *) alloca ((p - token_buffer) * WCHAR_BYTES);
1992 #ifdef MULTIBYTE_CHARS
1993 len = mbstowcs ((wchar_t *) widep, token_buffer + 1, p - token_buffer);
1994 if (len < 0 || len >= (p - token_buffer))
1996 warning ("Ignoring invalid multibyte string");
1999 bzero (widep + (len * WCHAR_BYTES), WCHAR_BYTES);
2002 union { long l; char c[sizeof (long)]; } u;
2006 /* Determine whether host is little or big endian. */
2008 big_endian = u.c[sizeof (long) - 1];
2009 wp = widep + (big_endian ? WCHAR_BYTES - 1 : 0);
2011 bzero (widep, (p - token_buffer) * WCHAR_BYTES);
2012 for (cp = token_buffer + 1; cp < p; cp++)
2013 *wp = *cp, wp += WCHAR_BYTES;
2014 len = p - token_buffer - 1;
2017 yylval.ttype = build_string ((len + 1) * WCHAR_BYTES, widep);
2018 TREE_TYPE (yylval.ttype) = wchar_array_type_node;
2023 extern tree build_objc_string();
2024 /* Return an Objective-C @"..." constant string object. */
2025 yylval.ttype = build_objc_string (p - token_buffer,
2027 TREE_TYPE (yylval.ttype) = char_array_type_node;
2028 value = OBJC_STRING;
2032 yylval.ttype = build_string (p - token_buffer, token_buffer + 1);
2033 TREE_TYPE (yylval.ttype) = char_array_type_node;
2064 yylval.code = PLUS_EXPR; break;
2066 yylval.code = MINUS_EXPR; break;
2068 yylval.code = BIT_AND_EXPR; break;
2070 yylval.code = BIT_IOR_EXPR; break;
2072 yylval.code = MULT_EXPR; break;
2074 yylval.code = TRUNC_DIV_EXPR; break;
2076 yylval.code = TRUNC_MOD_EXPR; break;
2078 yylval.code = BIT_XOR_EXPR; break;
2080 yylval.code = LSHIFT_EXPR; break;
2082 yylval.code = RSHIFT_EXPR; break;
2084 yylval.code = LT_EXPR; break;
2086 yylval.code = GT_EXPR; break;
2089 token_buffer[1] = c1 = GETC();
2090 token_buffer[2] = 0;
2097 value = ARITHCOMPARE; yylval.code = LE_EXPR; goto done;
2099 value = ARITHCOMPARE; yylval.code = GE_EXPR; goto done;
2101 value = EQCOMPARE; yylval.code = NE_EXPR; goto done;
2103 value = EQCOMPARE; yylval.code = EQ_EXPR; goto done;
2105 value = ASSIGN; goto done;
2111 value = PLUSPLUS; goto done;
2113 value = MINUSMINUS; goto done;
2115 value = ANDAND; goto done;
2117 value = OROR; goto done;
2130 { value = POINTSAT; goto done; }
2134 { value = ']'; goto done; }
2138 { value = '{'; indent_level++; goto done; }
2140 { value = '['; goto done; }
2144 { value = '}'; indent_level--; goto done; }
2148 token_buffer[1] = 0;
2150 if ((c == '<') || (c == '>'))
2151 value = ARITHCOMPARE;
2157 /* Don't make yyparse think this is eof. */
2176 /* yylloc.last_line = lineno; */
2181 /* Sets the value of the 'yydebug' variable to VALUE.
2182 This is a function so we don't have to have YYDEBUG defined
2183 in order to build the compiler. */
2192 warning ("YYDEBUG not defined.");