2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006
3 Free Software Foundation, Inc.
4 Contributed by Andy Vaught
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 /* Set of subroutines to (ultimately) return the next character to the
24 various matching subroutines. This file's job is to read files and
25 build up lines that are parsed by the parser. This means that we
26 handle continuation lines and "include" lines.
28 The first thing the scanner does is to load an entire file into
29 memory. We load the entire file into memory for a couple reasons.
30 The first is that we want to be able to deal with nonseekable input
31 (pipes, stdin) and there is a lot of backing up involved during
34 The second is that we want to be able to print the locus of errors,
35 and an error on line 999999 could conflict with something on line
36 one. Given nonseekable input, we've got to store the whole thing.
38 One thing that helps are the column truncation limits that give us
39 an upper bound on the size of individual lines. We don't store the
42 From the scanner's viewpoint, the higher level subroutines ask for
43 new characters and do a lot of jumping backwards. */
50 /* Structure for holding module and include file search path. */
51 typedef struct gfc_directorylist
54 struct gfc_directorylist *next;
58 /* List of include file search directories. */
59 static gfc_directorylist *include_dirs;
61 static gfc_file *file_head, *current_file;
63 static int continue_flag, end_flag, openmp_flag;
64 static locus openmp_locus;
66 gfc_source_form gfc_current_form;
67 static gfc_linebuf *line_head, *line_tail;
69 locus gfc_current_locus;
70 const char *gfc_source_file;
71 static FILE *gfc_src_file;
72 static char *gfc_src_preprocessor_lines[2];
75 /* Main scanner initialization. */
78 gfc_scanner_init_1 (void)
88 /* Main scanner destructor. */
91 gfc_scanner_done_1 (void)
96 while(line_head != NULL)
103 while(file_head != NULL)
106 gfc_free(file_head->filename);
114 /* Adds path to the list pointed to by list. */
117 gfc_add_include_path (const char *path)
119 gfc_directorylist *dir;
123 while (*p == ' ' || *p == '\t') /* someone might do 'gfortran "-I include"' */
130 dir = include_dirs = gfc_getmem (sizeof (gfc_directorylist));
137 dir->next = gfc_getmem (sizeof (gfc_directorylist));
142 dir->path = gfc_getmem (strlen (p) + 2);
143 strcpy (dir->path, p);
144 strcat (dir->path, "/"); /* make '/' last character */
148 /* Release resources allocated for options. */
151 gfc_release_include_path (void)
153 gfc_directorylist *p;
155 gfc_free (gfc_option.module_dir);
156 while (include_dirs != NULL)
159 include_dirs = include_dirs->next;
165 /* Opens file for reading, searching through the include directories
166 given if necessary. If the include_cwd argument is true, we try
167 to open the file in the current directory first. */
170 gfc_open_included_file (const char *name, const bool include_cwd)
173 gfc_directorylist *p;
178 f = gfc_open_file (name);
183 for (p = include_dirs; p; p = p->next)
185 fullname = (char *) alloca(strlen (p->path) + strlen (name) + 1);
186 strcpy (fullname, p->path);
187 strcat (fullname, name);
189 f = gfc_open_file (fullname);
197 /* Test to see if we're at the end of the main source file. */
207 /* Test to see if we're at the end of the current file. */
216 if (line_head == NULL)
217 return 1; /* Null file */
219 if (gfc_current_locus.lb == NULL)
226 /* Test to see if we're at the beginning of a new line. */
234 return (gfc_current_locus.nextc == gfc_current_locus.lb->line);
238 /* Test to see if we're at the end of a line. */
247 return (*gfc_current_locus.nextc == '\0');
251 /* Advance the current line pointer to the next line. */
254 gfc_advance_line (void)
259 if (gfc_current_locus.lb == NULL)
265 gfc_current_locus.lb = gfc_current_locus.lb->next;
267 if (gfc_current_locus.lb != NULL)
268 gfc_current_locus.nextc = gfc_current_locus.lb->line;
271 gfc_current_locus.nextc = NULL;
277 /* Get the next character from the input, advancing gfc_current_file's
278 locus. When we hit the end of the line or the end of the file, we
279 start returning a '\n' in order to complete the current statement.
280 No Fortran line conventions are implemented here.
282 Requiring explicit advances to the next line prevents the parse
283 pointer from being on the wrong line if the current statement ends
291 if (gfc_current_locus.nextc == NULL)
294 c = *gfc_current_locus.nextc++;
297 gfc_current_locus.nextc--; /* Remain on this line. */
304 /* Skip a comment. When we come here the parse pointer is positioned
305 immediately after the comment character. If we ever implement
306 compiler directives withing comments, here is where we parse the
310 skip_comment_line (void)
324 /* Comment lines are null lines, lines containing only blanks or lines
325 on which the first nonblank line is a '!'. */
328 skip_free_comments (void)
336 at_bol = gfc_at_bol ();
337 start = gfc_current_locus;
343 while (gfc_is_whitespace (c));
353 /* If -fopenmp, we need to handle here 2 things:
354 1) don't treat !$omp as comments, but directives
355 2) handle OpenMP conditional compilation, where
356 !$ should be treated as 2 spaces (for initial lines
357 only if followed by space). */
358 if (gfc_option.flag_openmp && at_bol)
360 locus old_loc = gfc_current_locus;
361 if (next_char () == '$')
364 if (c == 'o' || c == 'O')
366 if (((c = next_char ()) == 'm' || c == 'M')
367 && ((c = next_char ()) == 'p' || c == 'P')
368 && ((c = next_char ()) == ' ' || continue_flag))
370 while (gfc_is_whitespace (c))
372 if (c != '\n' && c != '!')
375 openmp_locus = old_loc;
376 gfc_current_locus = start;
380 gfc_current_locus = old_loc;
384 if (continue_flag || c == ' ')
386 gfc_current_locus = old_loc;
391 gfc_current_locus = old_loc;
393 skip_comment_line ();
400 if (openmp_flag && at_bol)
402 gfc_current_locus = start;
406 /* Skip comment lines in fixed source mode. We have the same rules as
407 in skip_free_comment(), except that we can have a 'c', 'C' or '*'
408 in column 1, and a '!' cannot be in column 6. Also, we deal with
409 lines with 'd' or 'D' in column 1, if the user requested this. */
412 skip_fixed_comments (void)
420 start = gfc_current_locus;
425 while (gfc_is_whitespace (c));
430 skip_comment_line ();
435 gfc_current_locus = start;
442 start = gfc_current_locus;
453 if (c == '!' || c == 'c' || c == 'C' || c == '*')
455 /* If -fopenmp, we need to handle here 2 things:
456 1) don't treat !$omp|c$omp|*$omp as comments, but directives
457 2) handle OpenMP conditional compilation, where
458 !$|c$|*$ should be treated as 2 spaces if the characters
459 in columns 3 to 6 are valid fixed form label columns
461 if (gfc_option.flag_openmp)
463 if (next_char () == '$')
466 if (c == 'o' || c == 'O')
468 if (((c = next_char ()) == 'm' || c == 'M')
469 && ((c = next_char ()) == 'p' || c == 'P'))
473 && ((openmp_flag && continue_flag)
474 || c == ' ' || c == '0'))
477 while (gfc_is_whitespace (c))
479 if (c != '\n' && c != '!')
481 /* Canonicalize to *$omp. */
484 gfc_current_locus = start;
494 for (col = 3; col < 6; col++, c = next_char ())
497 else if (c < '0' || c > '9')
502 if (col == 6 && c != '\n'
503 && ((continue_flag && !digit_seen)
504 || c == ' ' || c == '0'))
506 gfc_current_locus = start;
507 start.nextc[0] = ' ';
508 start.nextc[1] = ' ';
513 gfc_current_locus = start;
515 skip_comment_line ();
519 if (gfc_option.flag_d_lines != -1 && (c == 'd' || c == 'D'))
521 if (gfc_option.flag_d_lines == 0)
523 skip_comment_line ();
527 *start.nextc = c = ' ';
532 while (gfc_is_whitespace (c))
544 if (col != 6 && c == '!')
546 skip_comment_line ();
554 gfc_current_locus = start;
558 /* Skips the current line if it is a comment. */
561 gfc_skip_comments (void)
563 if (gfc_current_form == FORM_FREE)
564 skip_free_comments ();
566 skip_fixed_comments ();
570 /* Get the next character from the input, taking continuation lines
571 and end-of-line comments into account. This implies that comment
572 lines between continued lines must be eaten here. For higher-level
573 subroutines, this flattens continued lines into a single logical
574 line. The in_string flag denotes whether we're inside a character
578 gfc_next_char_literal (int in_string)
581 int i, c, prev_openmp_flag;
590 if (gfc_current_form == FORM_FREE)
592 if (!in_string && c == '!')
595 && memcmp (&gfc_current_locus, &openmp_locus,
596 sizeof (gfc_current_locus)) == 0)
599 /* This line can't be continued */
606 /* Avoid truncation warnings for comment ending lines. */
607 gfc_current_locus.lb->truncated = 0;
615 /* If the next nonblank character is a ! or \n, we've got a
616 continuation line. */
617 old_loc = gfc_current_locus;
620 while (gfc_is_whitespace (c))
623 /* Character constants to be continued cannot have commentary
626 if (in_string && c != '\n')
628 gfc_current_locus = old_loc;
633 if (c != '!' && c != '\n')
635 gfc_current_locus = old_loc;
640 prev_openmp_flag = openmp_flag;
643 skip_comment_line ();
647 /* We've got a continuation line and need to find where it continues.
648 First eat any comment lines. */
649 gfc_skip_comments ();
651 if (prev_openmp_flag != openmp_flag)
653 gfc_current_locus = old_loc;
654 openmp_flag = prev_openmp_flag;
659 /* Now that we have a non-comment line, probe ahead for the
660 first non-whitespace character. If it is another '&', then
661 reading starts at the next character, otherwise we must back
662 up to where the whitespace started and resume from there. */
664 old_loc = gfc_current_locus;
667 while (gfc_is_whitespace (c))
672 for (i = 0; i < 5; i++, c = next_char ())
674 gcc_assert (TOLOWER (c) == "!$omp"[i]);
676 old_loc = gfc_current_locus;
678 while (gfc_is_whitespace (c))
684 if (in_string && gfc_option.warn_ampersand)
685 gfc_warning ("Missing '&' in continued character constant at %C");
687 gfc_current_locus.nextc--;
692 /* Fixed form continuation. */
693 if (!in_string && c == '!')
695 /* Skip comment at end of line. */
702 /* Avoid truncation warnings for comment ending lines. */
703 gfc_current_locus.lb->truncated = 0;
709 prev_openmp_flag = openmp_flag;
711 old_loc = gfc_current_locus;
714 gfc_skip_comments ();
716 /* See if this line is a continuation line. */
717 if (openmp_flag != prev_openmp_flag)
719 openmp_flag = prev_openmp_flag;
720 goto not_continuation;
724 for (i = 0; i < 5; i++)
728 goto not_continuation;
731 for (i = 0; i < 5; i++)
734 if (TOLOWER (c) != "*$omp"[i])
735 goto not_continuation;
739 if (c == '0' || c == ' ' || c == '\n')
740 goto not_continuation;
743 /* Ready to read first character of continuation line, which might
744 be another continuation line! */
749 gfc_current_locus = old_loc;
757 /* Get the next character of input, folded to lowercase. In fixed
758 form mode, we also ignore spaces. When matcher subroutines are
759 parsing character literals, they have to call
760 gfc_next_char_literal(). */
769 c = gfc_next_char_literal (0);
771 while (gfc_current_form == FORM_FIXED && gfc_is_whitespace (c));
783 old_loc = gfc_current_locus;
784 c = gfc_next_char ();
785 gfc_current_locus = old_loc;
791 /* Recover from an error. We try to get past the current statement
792 and get lined up for the next. The next statement follows a '\n'
793 or a ';'. We also assume that we are not within a character
794 constant, and deal with finding a '\'' or '"'. */
797 gfc_error_recovery (void)
806 c = gfc_next_char ();
807 if (c == '\n' || c == ';')
810 if (c != '\'' && c != '"')
839 /* Read ahead until the next character to be read is not whitespace. */
842 gfc_gobble_whitespace (void)
844 static int linenum = 0;
850 old_loc = gfc_current_locus;
851 c = gfc_next_char_literal (0);
852 /* Issue a warning for nonconforming tabs. We keep track of the line
853 number because the Fortran matchers will often back up and the same
854 line will be scanned multiple times. */
855 if (!gfc_option.warn_tabs && c == '\t'
856 && gfc_current_locus.lb->linenum != linenum)
858 linenum = gfc_current_locus.lb->linenum;
859 gfc_warning_now ("Nonconforming tab character at %C");
862 while (gfc_is_whitespace (c));
864 gfc_current_locus = old_loc;
868 /* Load a single line into pbuf.
870 If pbuf points to a NULL pointer, it is allocated.
871 We truncate lines that are too long, unless we're dealing with
872 preprocessor lines or if the option -ffixed-line-length-none is set,
873 in which case we reallocate the buffer to fit the entire line, if
875 In fixed mode, we expand a tab that occurs within the statement
876 label region to expand to spaces that leave the next character in
878 load_line returns whether the line was truncated. */
881 load_line (FILE * input, char **pbuf, int *pbuflen)
883 static int linenum = 0, current_line = 1;
884 int c, maxlen, i, preprocessor_flag, buflen = *pbuflen;
885 int trunc_flag = 0, seen_comment = 0;
888 /* Determine the maximum allowed line length.
889 The default for free-form is GFC_MAX_LINE, for fixed-form or for
890 unknown form it is 72. Refer to the documentation in gfc_option_t. */
891 if (gfc_current_form == FORM_FREE)
893 if (gfc_option.free_line_length == -1)
894 maxlen = GFC_MAX_LINE;
896 maxlen = gfc_option.free_line_length;
898 else if (gfc_current_form == FORM_FIXED)
900 if (gfc_option.fixed_line_length == -1)
903 maxlen = gfc_option.fixed_line_length;
910 /* Allocate the line buffer, storing its length into buflen. */
914 buflen = GFC_MAX_LINE;
916 *pbuf = gfc_getmem (buflen + 1);
922 preprocessor_flag = 0;
925 /* In order to not truncate preprocessor lines, we have to
926 remember that this is one. */
927 preprocessor_flag = 1;
940 continue; /* Gobble characters. */
946 /* Ctrl-Z ends the file. */
947 while (fgetc (input) != EOF);
951 /* Is this a fixed-form comment? */
952 if (gfc_current_form == FORM_FIXED && i == 0
953 && (c == '*' || c == 'c' || c == 'd'))
956 if (gfc_current_form == FORM_FIXED && c == '\t' && i <= 6)
958 /* The error machinery isn't available at this point, so we can't
959 easily report line and column numbers consistent with other
960 parts of gfortran. */
961 if (!gfc_option.warn_tabs && seen_comment == 0
962 && current_line != linenum)
964 linenum = current_line;
966 "Nonconforming tab character in column 1 of line %d", linenum);
981 if (maxlen == 0 || preprocessor_flag)
985 /* Reallocate line buffer to double size to hold the
988 *pbuf = xrealloc (*pbuf, buflen + 1);
992 else if (i >= maxlen)
994 /* Truncate the rest of the line. */
998 if (c == '\n' || c == EOF)
1004 ungetc ('\n', input);
1008 /* Pad lines to the selected line length in fixed form. */
1009 if (gfc_current_form == FORM_FIXED
1010 && gfc_option.fixed_line_length != 0
1011 && !preprocessor_flag
1014 while (i++ < maxlen)
1026 /* Get a gfc_file structure, initialize it and add it to
1030 get_file (const char *name, enum lc_reason reason ATTRIBUTE_UNUSED)
1034 f = gfc_getmem (sizeof (gfc_file));
1036 f->filename = gfc_getmem (strlen (name) + 1);
1037 strcpy (f->filename, name);
1039 f->next = file_head;
1042 f->included_by = current_file;
1043 if (current_file != NULL)
1044 f->inclusion_line = current_file->line;
1046 #ifdef USE_MAPPED_LOCATION
1047 linemap_add (&line_table, reason, false, f->filename, 1);
1053 /* Deal with a line from the C preprocessor. The
1054 initial octothorp has already been seen. */
1057 preprocessor_line (char *c)
1063 int escaped, unescape;
1066 while (*c == ' ' || *c == '\t')
1069 if (*c < '0' || *c > '9')
1074 c = strchr (c, ' ');
1077 /* No file name given. Set new line number. */
1078 current_file->line = line;
1083 while (*c == ' ' || *c == '\t')
1093 /* Make filename end at quote. */
1096 while (*c && ! (! escaped && *c == '"'))
1100 else if (*c == '\\')
1109 /* Preprocessor line has no closing quote. */
1114 /* Undo effects of cpp_quote_string. */
1118 char *d = gfc_getmem (c - filename - unescape);
1134 flag[1] = flag[2] = flag[3] = flag[4] = false;
1138 c = strchr (c, ' ');
1145 if (1 <= i && i <= 4)
1149 /* Interpret flags. */
1151 if (flag[1]) /* Starting new file. */
1153 f = get_file (filename, LC_RENAME);
1154 f->up = current_file;
1158 if (flag[2]) /* Ending current file. */
1160 if (!current_file->up
1161 || strcmp (current_file->up->filename, filename) != 0)
1163 gfc_warning_now ("%s:%d: file %s left but not entered",
1164 current_file->filename, current_file->line,
1167 gfc_free (filename);
1170 current_file = current_file->up;
1173 /* The name of the file can be a temporary file produced by
1174 cpp. Replace the name if it is different. */
1176 if (strcmp (current_file->filename, filename) != 0)
1178 gfc_free (current_file->filename);
1179 current_file->filename = gfc_getmem (strlen (filename) + 1);
1180 strcpy (current_file->filename, filename);
1183 /* Set new line number. */
1184 current_file->line = line;
1186 gfc_free (filename);
1190 gfc_warning_now ("%s:%d: Illegal preprocessor directive",
1191 current_file->filename, current_file->line);
1192 current_file->line++;
1196 static try load_file (const char *, bool);
1198 /* include_line()-- Checks a line buffer to see if it is an include
1199 line. If so, we call load_file() recursively to load the included
1200 file. We never return a syntax error because a statement like
1201 "include = 5" is perfectly legal. We return false if no include was
1202 processed or true if we matched an include. */
1205 include_line (char *line)
1207 char quote, *c, *begin, *stop;
1210 while (*c == ' ' || *c == '\t')
1213 if (strncasecmp (c, "include", 7))
1217 while (*c == ' ' || *c == '\t')
1220 /* Find filename between quotes. */
1223 if (quote != '"' && quote != '\'')
1228 while (*c != quote && *c != '\0')
1236 while (*c == ' ' || *c == '\t')
1239 if (*c != '\0' && *c != '!')
1242 /* We have an include line at this point. */
1244 *stop = '\0'; /* It's ok to trash the buffer, as this line won't be
1245 read by anything else. */
1247 load_file (begin, false);
1251 /* Load a file into memory by calling load_line until the file ends. */
1254 load_file (const char *filename, bool initial)
1262 for (f = current_file; f; f = f->up)
1263 if (strcmp (filename, f->filename) == 0)
1265 gfc_error_now ("File '%s' is being included recursively", filename);
1273 input = gfc_src_file;
1274 gfc_src_file = NULL;
1277 input = gfc_open_file (filename);
1280 gfc_error_now ("Can't open file '%s'", filename);
1286 input = gfc_open_included_file (filename, false);
1289 gfc_error_now ("Can't open included file '%s'", filename);
1294 /* Load the file. */
1296 f = get_file (filename, initial ? LC_RENAME : LC_ENTER);
1297 f->up = current_file;
1299 current_file->line = 1;
1303 if (initial && gfc_src_preprocessor_lines[0])
1305 preprocessor_line (gfc_src_preprocessor_lines[0]);
1306 gfc_free (gfc_src_preprocessor_lines[0]);
1307 gfc_src_preprocessor_lines[0] = NULL;
1308 if (gfc_src_preprocessor_lines[1])
1310 preprocessor_line (gfc_src_preprocessor_lines[1]);
1311 gfc_free (gfc_src_preprocessor_lines[1]);
1312 gfc_src_preprocessor_lines[1] = NULL;
1318 int trunc = load_line (input, &line, &line_len);
1320 len = strlen (line);
1321 if (feof (input) && len == 0)
1324 /* There are three things this line can be: a line of Fortran
1325 source, an include line or a C preprocessor directive. */
1329 preprocessor_line (line);
1333 if (include_line (line))
1335 current_file->line++;
1341 b = gfc_getmem (gfc_linebuf_header_size + len + 1);
1343 #ifdef USE_MAPPED_LOCATION
1345 = linemap_line_start (&line_table, current_file->line++, 120);
1347 b->linenum = current_file->line++;
1349 b->file = current_file;
1350 b->truncated = trunc;
1351 strcpy (b->line, line);
1353 if (line_head == NULL)
1356 line_tail->next = b;
1361 /* Release the line buffer allocated in load_line. */
1366 current_file = current_file->up;
1367 #ifdef USE_MAPPED_LOCATION
1368 linemap_add (&line_table, LC_LEAVE, 0, NULL, 0);
1374 /* Open a new file and start scanning from that file. Returns SUCCESS
1375 if everything went OK, FAILURE otherwise. If form == FORM_UKNOWN
1376 it tries to determine the source form from the filename, defaulting
1384 result = load_file (gfc_source_file, true);
1386 gfc_current_locus.lb = line_head;
1387 gfc_current_locus.nextc = (line_head == NULL) ? NULL : line_head->line;
1389 #if 0 /* Debugging aid. */
1390 for (; line_head; line_head = line_head->next)
1391 gfc_status ("%s:%3d %s\n", line_head->file->filename,
1392 #ifdef USE_MAPPED_LOCATION
1393 LOCATION_LINE (line_head->location),
1406 unescape_filename (const char *ptr)
1408 const char *p = ptr, *s;
1410 int escaped, unescape = 0;
1412 /* Make filename end at quote. */
1414 while (*p && ! (! escaped && *p == '"'))
1418 else if (*p == '\\')
1429 /* Undo effects of cpp_quote_string. */
1431 d = gfc_getmem (p + 1 - ptr - unescape);
1446 /* For preprocessed files, if the first tokens are of the form # NUM.
1447 handle the directives so we know the original file name. */
1450 gfc_read_orig_filename (const char *filename, const char **canon_source_file)
1455 gfc_src_file = gfc_open_file (filename);
1456 if (gfc_src_file == NULL)
1459 c = fgetc (gfc_src_file);
1460 ungetc (c, gfc_src_file);
1466 load_line (gfc_src_file, &gfc_src_preprocessor_lines[0], &len);
1468 if (strncmp (gfc_src_preprocessor_lines[0], "# 1 \"", 5) != 0)
1471 filename = unescape_filename (gfc_src_preprocessor_lines[0] + 5);
1472 if (filename == NULL)
1475 c = fgetc (gfc_src_file);
1476 ungetc (c, gfc_src_file);
1482 load_line (gfc_src_file, &gfc_src_preprocessor_lines[1], &len);
1484 if (strncmp (gfc_src_preprocessor_lines[1], "# 1 \"", 5) != 0)
1487 dirname = unescape_filename (gfc_src_preprocessor_lines[1] + 5);
1488 if (dirname == NULL)
1491 len = strlen (dirname);
1492 if (len < 3 || dirname[len - 1] != '/' || dirname[len - 2] != '/')
1497 dirname[len - 2] = '\0';
1498 set_src_pwd (dirname);
1500 if (! IS_ABSOLUTE_PATH (filename))
1502 char *p = gfc_getmem (len + strlen (filename));
1504 memcpy (p, dirname, len - 2);
1506 strcpy (p + len - 1, filename);
1507 *canon_source_file = p;