2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006
3 Free Software Foundation, Inc.
4 Contributed by Andy Vaught
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 /* Set of subroutines to (ultimately) return the next character to the
24 various matching subroutines. This file's job is to read files and
25 build up lines that are parsed by the parser. This means that we
26 handle continuation lines and "include" lines.
28 The first thing the scanner does is to load an entire file into
29 memory. We load the entire file into memory for a couple reasons.
30 The first is that we want to be able to deal with nonseekable input
31 (pipes, stdin) and there is a lot of backing up involved during
34 The second is that we want to be able to print the locus of errors,
35 and an error on line 999999 could conflict with something on line
36 one. Given nonseekable input, we've got to store the whole thing.
38 One thing that helps are the column truncation limits that give us
39 an upper bound on the size of individual lines. We don't store the
42 From the scanner's viewpoint, the higher level subroutines ask for
43 new characters and do a lot of jumping backwards. */
50 /* Structure for holding module and include file search path. */
51 typedef struct gfc_directorylist
54 struct gfc_directorylist *next;
58 /* List of include file search directories. */
59 static gfc_directorylist *include_dirs;
61 static gfc_file *file_head, *current_file;
63 static int continue_flag, end_flag, openmp_flag;
64 static locus openmp_locus;
66 gfc_source_form gfc_current_form;
67 static gfc_linebuf *line_head, *line_tail;
69 locus gfc_current_locus;
70 const char *gfc_source_file;
71 static FILE *gfc_src_file;
72 static char *gfc_src_preprocessor_lines[2];
75 /* Main scanner initialization. */
78 gfc_scanner_init_1 (void)
88 /* Main scanner destructor. */
91 gfc_scanner_done_1 (void)
96 while(line_head != NULL)
103 while(file_head != NULL)
106 gfc_free(file_head->filename);
114 /* Adds path to the list pointed to by list. */
117 gfc_add_include_path (const char *path)
119 gfc_directorylist *dir;
123 while (*p == ' ' || *p == '\t') /* someone might do 'gfortran "-I include"' */
130 dir = include_dirs = gfc_getmem (sizeof (gfc_directorylist));
137 dir->next = gfc_getmem (sizeof (gfc_directorylist));
142 dir->path = gfc_getmem (strlen (p) + 2);
143 strcpy (dir->path, p);
144 strcat (dir->path, "/"); /* make '/' last character */
148 /* Release resources allocated for options. */
151 gfc_release_include_path (void)
153 gfc_directorylist *p;
155 gfc_free (gfc_option.module_dir);
156 while (include_dirs != NULL)
159 include_dirs = include_dirs->next;
165 /* Opens file for reading, searching through the include directories
166 given if necessary. If the include_cwd argument is true, we try
167 to open the file in the current directory first. */
170 gfc_open_included_file (const char *name, const bool include_cwd)
173 gfc_directorylist *p;
178 f = gfc_open_file (name);
183 for (p = include_dirs; p; p = p->next)
185 fullname = (char *) alloca(strlen (p->path) + strlen (name) + 1);
186 strcpy (fullname, p->path);
187 strcat (fullname, name);
189 f = gfc_open_file (fullname);
197 /* Test to see if we're at the end of the main source file. */
207 /* Test to see if we're at the end of the current file. */
216 if (line_head == NULL)
217 return 1; /* Null file */
219 if (gfc_current_locus.lb == NULL)
226 /* Test to see if we're at the beginning of a new line. */
234 return (gfc_current_locus.nextc == gfc_current_locus.lb->line);
238 /* Test to see if we're at the end of a line. */
247 return (*gfc_current_locus.nextc == '\0');
251 /* Advance the current line pointer to the next line. */
254 gfc_advance_line (void)
259 if (gfc_current_locus.lb == NULL)
265 gfc_current_locus.lb = gfc_current_locus.lb->next;
267 if (gfc_current_locus.lb != NULL)
268 gfc_current_locus.nextc = gfc_current_locus.lb->line;
271 gfc_current_locus.nextc = NULL;
277 /* Get the next character from the input, advancing gfc_current_file's
278 locus. When we hit the end of the line or the end of the file, we
279 start returning a '\n' in order to complete the current statement.
280 No Fortran line conventions are implemented here.
282 Requiring explicit advances to the next line prevents the parse
283 pointer from being on the wrong line if the current statement ends
291 if (gfc_current_locus.nextc == NULL)
294 c = *gfc_current_locus.nextc++;
297 gfc_current_locus.nextc--; /* Remain on this line. */
304 /* Skip a comment. When we come here the parse pointer is positioned
305 immediately after the comment character. If we ever implement
306 compiler directives withing comments, here is where we parse the
310 skip_comment_line (void)
324 /* Comment lines are null lines, lines containing only blanks or lines
325 on which the first nonblank line is a '!'. */
328 skip_free_comments (void)
336 at_bol = gfc_at_bol ();
337 start = gfc_current_locus;
343 while (gfc_is_whitespace (c));
353 /* If -fopenmp, we need to handle here 2 things:
354 1) don't treat !$omp as comments, but directives
355 2) handle OpenMP conditional compilation, where
356 !$ should be treated as 2 spaces (for initial lines
357 only if followed by space). */
358 if (gfc_option.flag_openmp && at_bol)
360 locus old_loc = gfc_current_locus;
361 if (next_char () == '$')
364 if (c == 'o' || c == 'O')
366 if (((c = next_char ()) == 'm' || c == 'M')
367 && ((c = next_char ()) == 'p' || c == 'P')
368 && ((c = next_char ()) == ' ' || continue_flag))
370 while (gfc_is_whitespace (c))
372 if (c != '\n' && c != '!')
375 openmp_locus = old_loc;
376 gfc_current_locus = start;
380 gfc_current_locus = old_loc;
384 if (continue_flag || c == ' ')
386 gfc_current_locus = old_loc;
391 gfc_current_locus = old_loc;
393 skip_comment_line ();
400 if (openmp_flag && at_bol)
402 gfc_current_locus = start;
406 /* Skip comment lines in fixed source mode. We have the same rules as
407 in skip_free_comment(), except that we can have a 'c', 'C' or '*'
408 in column 1, and a '!' cannot be in column 6. Also, we deal with
409 lines with 'd' or 'D' in column 1, if the user requested this. */
412 skip_fixed_comments (void)
420 start = gfc_current_locus;
425 while (gfc_is_whitespace (c));
430 skip_comment_line ();
435 gfc_current_locus = start;
442 start = gfc_current_locus;
453 if (c == '!' || c == 'c' || c == 'C' || c == '*')
455 /* If -fopenmp, we need to handle here 2 things:
456 1) don't treat !$omp|c$omp|*$omp as comments, but directives
457 2) handle OpenMP conditional compilation, where
458 !$|c$|*$ should be treated as 2 spaces if the characters
459 in columns 3 to 6 are valid fixed form label columns
461 if (gfc_option.flag_openmp)
463 if (next_char () == '$')
466 if (c == 'o' || c == 'O')
468 if (((c = next_char ()) == 'm' || c == 'M')
469 && ((c = next_char ()) == 'p' || c == 'P'))
473 && ((openmp_flag && continue_flag)
474 || c == ' ' || c == '0'))
477 while (gfc_is_whitespace (c))
479 if (c != '\n' && c != '!')
481 /* Canonicalize to *$omp. */
484 gfc_current_locus = start;
494 for (col = 3; col < 6; col++, c = next_char ())
497 else if (c < '0' || c > '9')
502 if (col == 6 && c != '\n'
503 && ((continue_flag && !digit_seen)
504 || c == ' ' || c == '0'))
506 gfc_current_locus = start;
507 start.nextc[0] = ' ';
508 start.nextc[1] = ' ';
513 gfc_current_locus = start;
515 skip_comment_line ();
519 if (gfc_option.flag_d_lines != -1 && (c == 'd' || c == 'D'))
521 if (gfc_option.flag_d_lines == 0)
523 skip_comment_line ();
527 *start.nextc = c = ' ';
532 while (gfc_is_whitespace (c))
544 if (col != 6 && c == '!')
546 skip_comment_line ();
554 gfc_current_locus = start;
558 /* Skips the current line if it is a comment. */
561 gfc_skip_comments (void)
563 if (gfc_current_form == FORM_FREE)
564 skip_free_comments ();
566 skip_fixed_comments ();
570 /* Get the next character from the input, taking continuation lines
571 and end-of-line comments into account. This implies that comment
572 lines between continued lines must be eaten here. For higher-level
573 subroutines, this flattens continued lines into a single logical
574 line. The in_string flag denotes whether we're inside a character
578 gfc_next_char_literal (int in_string)
581 int i, c, prev_openmp_flag;
590 if (gfc_current_form == FORM_FREE)
592 if (!in_string && c == '!')
595 && memcmp (&gfc_current_locus, &openmp_locus,
596 sizeof (gfc_current_locus)) == 0)
599 /* This line can't be continued */
606 /* Avoid truncation warnings for comment ending lines. */
607 gfc_current_locus.lb->truncated = 0;
615 /* If the next nonblank character is a ! or \n, we've got a
616 continuation line. */
617 old_loc = gfc_current_locus;
620 while (gfc_is_whitespace (c))
623 /* Character constants to be continued cannot have commentary
626 if (in_string && c != '\n')
628 gfc_current_locus = old_loc;
633 if (c != '!' && c != '\n')
635 gfc_current_locus = old_loc;
640 prev_openmp_flag = openmp_flag;
643 skip_comment_line ();
647 /* We've got a continuation line and need to find where it continues.
648 First eat any comment lines. */
649 gfc_skip_comments ();
651 if (prev_openmp_flag != openmp_flag)
653 gfc_current_locus = old_loc;
654 openmp_flag = prev_openmp_flag;
659 /* Now that we have a non-comment line, probe ahead for the
660 first non-whitespace character. If it is another '&', then
661 reading starts at the next character, otherwise we must back
662 up to where the whitespace started and resume from there. */
664 old_loc = gfc_current_locus;
667 while (gfc_is_whitespace (c))
672 for (i = 0; i < 5; i++, c = next_char ())
674 gcc_assert (TOLOWER (c) == "!$omp"[i]);
676 old_loc = gfc_current_locus;
678 while (gfc_is_whitespace (c))
684 if (in_string && gfc_option.warn_ampersand)
685 gfc_warning ("Missing '&' in continued character constant at %C");
687 gfc_current_locus.nextc--;
692 /* Fixed form continuation. */
693 if (!in_string && c == '!')
695 /* Skip comment at end of line. */
702 /* Avoid truncation warnings for comment ending lines. */
703 gfc_current_locus.lb->truncated = 0;
709 prev_openmp_flag = openmp_flag;
711 old_loc = gfc_current_locus;
714 gfc_skip_comments ();
716 /* See if this line is a continuation line. */
717 if (openmp_flag != prev_openmp_flag)
719 openmp_flag = prev_openmp_flag;
720 goto not_continuation;
724 for (i = 0; i < 5; i++)
728 goto not_continuation;
731 for (i = 0; i < 5; i++)
734 if (TOLOWER (c) != "*$omp"[i])
735 goto not_continuation;
739 if (c == '0' || c == ' ' || c == '\n')
740 goto not_continuation;
743 /* Ready to read first character of continuation line, which might
744 be another continuation line! */
749 gfc_current_locus = old_loc;
757 /* Get the next character of input, folded to lowercase. In fixed
758 form mode, we also ignore spaces. When matcher subroutines are
759 parsing character literals, they have to call
760 gfc_next_char_literal(). */
769 c = gfc_next_char_literal (0);
771 while (gfc_current_form == FORM_FIXED && gfc_is_whitespace (c));
783 old_loc = gfc_current_locus;
784 c = gfc_next_char ();
785 gfc_current_locus = old_loc;
791 /* Recover from an error. We try to get past the current statement
792 and get lined up for the next. The next statement follows a '\n'
793 or a ';'. We also assume that we are not within a character
794 constant, and deal with finding a '\'' or '"'. */
797 gfc_error_recovery (void)
806 c = gfc_next_char ();
807 if (c == '\n' || c == ';')
810 if (c != '\'' && c != '"')
839 /* Read ahead until the next character to be read is not whitespace. */
842 gfc_gobble_whitespace (void)
844 static int linenum = 0;
850 old_loc = gfc_current_locus;
851 c = gfc_next_char_literal (0);
852 /* Issue a warning for nonconforming tabs. We keep track of the line
853 number because the Fortran matchers will often back up and the same
854 line will be scanned multiple times. */
855 if (!gfc_option.warn_tabs && c == '\t')
857 #ifdef USE_MAPPED_LOCATION
858 int cur_linenum = LOCATION_LINE (gfc_current_locus.lb->location);
860 int cur_linenum = gfc_current_locus.lb->linenum;
862 if (cur_linenum != linenum)
864 linenum = cur_linenum;
865 gfc_warning_now ("Nonconforming tab character at %C");
869 while (gfc_is_whitespace (c));
871 gfc_current_locus = old_loc;
875 /* Load a single line into pbuf.
877 If pbuf points to a NULL pointer, it is allocated.
878 We truncate lines that are too long, unless we're dealing with
879 preprocessor lines or if the option -ffixed-line-length-none is set,
880 in which case we reallocate the buffer to fit the entire line, if
882 In fixed mode, we expand a tab that occurs within the statement
883 label region to expand to spaces that leave the next character in
885 load_line returns whether the line was truncated. */
888 load_line (FILE * input, char **pbuf, int *pbuflen)
890 static int linenum = 0, current_line = 1;
891 int c, maxlen, i, preprocessor_flag, buflen = *pbuflen;
892 int trunc_flag = 0, seen_comment = 0;
895 /* Determine the maximum allowed line length.
896 The default for free-form is GFC_MAX_LINE, for fixed-form or for
897 unknown form it is 72. Refer to the documentation in gfc_option_t. */
898 if (gfc_current_form == FORM_FREE)
900 if (gfc_option.free_line_length == -1)
901 maxlen = GFC_MAX_LINE;
903 maxlen = gfc_option.free_line_length;
905 else if (gfc_current_form == FORM_FIXED)
907 if (gfc_option.fixed_line_length == -1)
910 maxlen = gfc_option.fixed_line_length;
917 /* Allocate the line buffer, storing its length into buflen. */
921 buflen = GFC_MAX_LINE;
923 *pbuf = gfc_getmem (buflen + 1);
929 preprocessor_flag = 0;
932 /* In order to not truncate preprocessor lines, we have to
933 remember that this is one. */
934 preprocessor_flag = 1;
947 continue; /* Gobble characters. */
953 /* Ctrl-Z ends the file. */
954 while (fgetc (input) != EOF);
958 /* Is this a fixed-form comment? */
959 if (gfc_current_form == FORM_FIXED && i == 0
960 && (c == '*' || c == 'c' || c == 'd'))
963 if (gfc_current_form == FORM_FIXED && c == '\t' && i <= 6)
965 /* The error machinery isn't available at this point, so we can't
966 easily report line and column numbers consistent with other
967 parts of gfortran. */
968 if (!gfc_option.warn_tabs && seen_comment == 0
969 && current_line != linenum)
971 linenum = current_line;
973 "Nonconforming tab character in column 1 of line %d", linenum);
988 if (maxlen == 0 || preprocessor_flag)
992 /* Reallocate line buffer to double size to hold the
995 *pbuf = xrealloc (*pbuf, buflen + 1);
999 else if (i >= maxlen)
1001 /* Truncate the rest of the line. */
1005 if (c == '\n' || c == EOF)
1011 ungetc ('\n', input);
1015 /* Pad lines to the selected line length in fixed form. */
1016 if (gfc_current_form == FORM_FIXED
1017 && gfc_option.fixed_line_length != 0
1018 && !preprocessor_flag
1021 while (i++ < maxlen)
1033 /* Get a gfc_file structure, initialize it and add it to
1037 get_file (const char *name, enum lc_reason reason ATTRIBUTE_UNUSED)
1041 f = gfc_getmem (sizeof (gfc_file));
1043 f->filename = gfc_getmem (strlen (name) + 1);
1044 strcpy (f->filename, name);
1046 f->next = file_head;
1049 f->included_by = current_file;
1050 if (current_file != NULL)
1051 f->inclusion_line = current_file->line;
1053 #ifdef USE_MAPPED_LOCATION
1054 linemap_add (&line_table, reason, false, f->filename, 1);
1060 /* Deal with a line from the C preprocessor. The
1061 initial octothorp has already been seen. */
1064 preprocessor_line (char *c)
1070 int escaped, unescape;
1073 while (*c == ' ' || *c == '\t')
1076 if (*c < '0' || *c > '9')
1081 c = strchr (c, ' ');
1084 /* No file name given. Set new line number. */
1085 current_file->line = line;
1090 while (*c == ' ' || *c == '\t')
1100 /* Make filename end at quote. */
1103 while (*c && ! (! escaped && *c == '"'))
1107 else if (*c == '\\')
1116 /* Preprocessor line has no closing quote. */
1121 /* Undo effects of cpp_quote_string. */
1125 char *d = gfc_getmem (c - filename - unescape);
1141 flag[1] = flag[2] = flag[3] = flag[4] = false;
1145 c = strchr (c, ' ');
1152 if (1 <= i && i <= 4)
1156 /* Interpret flags. */
1158 if (flag[1]) /* Starting new file. */
1160 f = get_file (filename, LC_RENAME);
1161 f->up = current_file;
1165 if (flag[2]) /* Ending current file. */
1167 if (!current_file->up
1168 || strcmp (current_file->up->filename, filename) != 0)
1170 gfc_warning_now ("%s:%d: file %s left but not entered",
1171 current_file->filename, current_file->line,
1174 gfc_free (filename);
1177 current_file = current_file->up;
1180 /* The name of the file can be a temporary file produced by
1181 cpp. Replace the name if it is different. */
1183 if (strcmp (current_file->filename, filename) != 0)
1185 gfc_free (current_file->filename);
1186 current_file->filename = gfc_getmem (strlen (filename) + 1);
1187 strcpy (current_file->filename, filename);
1190 /* Set new line number. */
1191 current_file->line = line;
1193 gfc_free (filename);
1197 gfc_warning_now ("%s:%d: Illegal preprocessor directive",
1198 current_file->filename, current_file->line);
1199 current_file->line++;
1203 static try load_file (const char *, bool);
1205 /* include_line()-- Checks a line buffer to see if it is an include
1206 line. If so, we call load_file() recursively to load the included
1207 file. We never return a syntax error because a statement like
1208 "include = 5" is perfectly legal. We return false if no include was
1209 processed or true if we matched an include. */
1212 include_line (char *line)
1214 char quote, *c, *begin, *stop;
1217 while (*c == ' ' || *c == '\t')
1220 if (strncasecmp (c, "include", 7))
1224 while (*c == ' ' || *c == '\t')
1227 /* Find filename between quotes. */
1230 if (quote != '"' && quote != '\'')
1235 while (*c != quote && *c != '\0')
1243 while (*c == ' ' || *c == '\t')
1246 if (*c != '\0' && *c != '!')
1249 /* We have an include line at this point. */
1251 *stop = '\0'; /* It's ok to trash the buffer, as this line won't be
1252 read by anything else. */
1254 load_file (begin, false);
1258 /* Load a file into memory by calling load_line until the file ends. */
1261 load_file (const char *filename, bool initial)
1269 for (f = current_file; f; f = f->up)
1270 if (strcmp (filename, f->filename) == 0)
1272 gfc_error_now ("File '%s' is being included recursively", filename);
1280 input = gfc_src_file;
1281 gfc_src_file = NULL;
1284 input = gfc_open_file (filename);
1287 gfc_error_now ("Can't open file '%s'", filename);
1293 input = gfc_open_included_file (filename, false);
1296 gfc_error_now ("Can't open included file '%s'", filename);
1301 /* Load the file. */
1303 f = get_file (filename, initial ? LC_RENAME : LC_ENTER);
1304 f->up = current_file;
1306 current_file->line = 1;
1310 if (initial && gfc_src_preprocessor_lines[0])
1312 preprocessor_line (gfc_src_preprocessor_lines[0]);
1313 gfc_free (gfc_src_preprocessor_lines[0]);
1314 gfc_src_preprocessor_lines[0] = NULL;
1315 if (gfc_src_preprocessor_lines[1])
1317 preprocessor_line (gfc_src_preprocessor_lines[1]);
1318 gfc_free (gfc_src_preprocessor_lines[1]);
1319 gfc_src_preprocessor_lines[1] = NULL;
1325 int trunc = load_line (input, &line, &line_len);
1327 len = strlen (line);
1328 if (feof (input) && len == 0)
1331 /* There are three things this line can be: a line of Fortran
1332 source, an include line or a C preprocessor directive. */
1336 preprocessor_line (line);
1340 if (include_line (line))
1342 current_file->line++;
1348 b = gfc_getmem (gfc_linebuf_header_size + len + 1);
1350 #ifdef USE_MAPPED_LOCATION
1352 = linemap_line_start (&line_table, current_file->line++, 120);
1354 b->linenum = current_file->line++;
1356 b->file = current_file;
1357 b->truncated = trunc;
1358 strcpy (b->line, line);
1360 if (line_head == NULL)
1363 line_tail->next = b;
1368 /* Release the line buffer allocated in load_line. */
1373 current_file = current_file->up;
1374 #ifdef USE_MAPPED_LOCATION
1375 linemap_add (&line_table, LC_LEAVE, 0, NULL, 0);
1381 /* Open a new file and start scanning from that file. Returns SUCCESS
1382 if everything went OK, FAILURE otherwise. If form == FORM_UKNOWN
1383 it tries to determine the source form from the filename, defaulting
1391 result = load_file (gfc_source_file, true);
1393 gfc_current_locus.lb = line_head;
1394 gfc_current_locus.nextc = (line_head == NULL) ? NULL : line_head->line;
1396 #if 0 /* Debugging aid. */
1397 for (; line_head; line_head = line_head->next)
1398 gfc_status ("%s:%3d %s\n", line_head->file->filename,
1399 #ifdef USE_MAPPED_LOCATION
1400 LOCATION_LINE (line_head->location),
1413 unescape_filename (const char *ptr)
1415 const char *p = ptr, *s;
1417 int escaped, unescape = 0;
1419 /* Make filename end at quote. */
1421 while (*p && ! (! escaped && *p == '"'))
1425 else if (*p == '\\')
1436 /* Undo effects of cpp_quote_string. */
1438 d = gfc_getmem (p + 1 - ptr - unescape);
1453 /* For preprocessed files, if the first tokens are of the form # NUM.
1454 handle the directives so we know the original file name. */
1457 gfc_read_orig_filename (const char *filename, const char **canon_source_file)
1462 gfc_src_file = gfc_open_file (filename);
1463 if (gfc_src_file == NULL)
1466 c = fgetc (gfc_src_file);
1467 ungetc (c, gfc_src_file);
1473 load_line (gfc_src_file, &gfc_src_preprocessor_lines[0], &len);
1475 if (strncmp (gfc_src_preprocessor_lines[0], "# 1 \"", 5) != 0)
1478 filename = unescape_filename (gfc_src_preprocessor_lines[0] + 5);
1479 if (filename == NULL)
1482 c = fgetc (gfc_src_file);
1483 ungetc (c, gfc_src_file);
1489 load_line (gfc_src_file, &gfc_src_preprocessor_lines[1], &len);
1491 if (strncmp (gfc_src_preprocessor_lines[1], "# 1 \"", 5) != 0)
1494 dirname = unescape_filename (gfc_src_preprocessor_lines[1] + 5);
1495 if (dirname == NULL)
1498 len = strlen (dirname);
1499 if (len < 3 || dirname[len - 1] != '/' || dirname[len - 2] != '/')
1504 dirname[len - 2] = '\0';
1505 set_src_pwd (dirname);
1507 if (! IS_ABSOLUTE_PATH (filename))
1509 char *p = gfc_getmem (len + strlen (filename));
1511 memcpy (p, dirname, len - 2);
1513 strcpy (p + len - 1, filename);
1514 *canon_source_file = p;