1 /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
2 Free Software Foundation, Inc.
3 Contributed by Andy Vaught
4 F2003 I/O support contributed by Jerry DeLisle
6 This file is part of the GNU Fortran 95 runtime library (libgfortran).
8 Libgfortran is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgfortran is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
28 /* format.c-- parse a FORMAT string into a binary format suitable for
29 * interpretation during I/O statements */
36 #define FARRAY_SIZE 64
38 typedef struct fnode_array
40 struct fnode_array *next;
41 fnode array[FARRAY_SIZE];
45 typedef struct format_data
47 char *format_string, *string;
50 format_token saved_token;
51 int value, format_string_len, reversion_ok;
53 const fnode *saved_format;
59 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
64 static const char posint_required[] = "Positive width required in format",
65 period_required[] = "Period required in format",
66 nonneg_required[] = "Nonnegative width required in format",
67 unexpected_element[] = "Unexpected element '%c' in format\n",
68 unexpected_end[] = "Unexpected end of format string",
69 bad_string[] = "Unterminated character constant in format",
70 bad_hollerith[] = "Hollerith constant extends past the end of the format",
71 reversion_error[] = "Exhausted data descriptors in format",
72 zero_width[] = "Zero width in format descriptor";
74 /* The following routines support caching format data from parsed format strings
75 into a hash table. This avoids repeatedly parsing duplicate format strings
76 or format strings in I/O statements that are repeated in loops. */
79 /* Traverse the table and free all data. */
82 free_format_hash_table (gfc_unit *u)
86 /* free_format_data handles any NULL pointers. */
87 for (i = 0; i < FORMAT_HASH_SIZE; i++)
89 if (u->format_hash_table[i].hashed_fmt != NULL)
91 free_format_data (u->format_hash_table[i].hashed_fmt);
92 free_mem (u->format_hash_table[i].key);
94 u->format_hash_table[i].key = NULL;
95 u->format_hash_table[i].key_len = 0;
96 u->format_hash_table[i].hashed_fmt = NULL;
100 /* Traverse the format_data structure and reset the fnode counters. */
103 reset_node (fnode *fn)
110 if (fn->format != FMT_LPAREN)
113 for (f = fn->u.child; f; f = f->next)
115 if (f->format == FMT_RPAREN)
122 reset_fnode_counters (st_parameter_dt *dtp)
129 /* Clear this pointer at the head so things start at the right place. */
130 fmt->array.array[0].current = NULL;
132 for (f = fmt->last->array[0].u.child; f; f = f->next)
137 /* A simple hashing function to generate an index into the hash table. */
140 uint32_t format_hash (st_parameter_dt *dtp)
143 gfc_charlen_type key_len;
147 /* Hash the format string. Super simple, but what the heck! */
149 key_len = dtp->format_len;
150 for (i = 0; i < key_len; i++)
152 hash &= (FORMAT_HASH_SIZE - 1);
158 save_parsed_format (st_parameter_dt *dtp)
163 hash = format_hash (dtp);
164 u = dtp->u.p.current_unit;
166 /* Index into the hash table. We are simply replacing whatever is there
167 relying on probability. */
168 if (u->format_hash_table[hash].hashed_fmt != NULL)
169 free_format_data (u->format_hash_table[hash].hashed_fmt);
170 u->format_hash_table[hash].hashed_fmt = NULL;
172 if (u->format_hash_table[hash].key != NULL)
173 free_mem (u->format_hash_table[hash].key);
174 u->format_hash_table[hash].key = get_mem (dtp->format_len);
175 memcpy (u->format_hash_table[hash].key, dtp->format, dtp->format_len);
177 u->format_hash_table[hash].key_len = dtp->format_len;
178 u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
183 find_parsed_format (st_parameter_dt *dtp)
188 hash = format_hash (dtp);
189 u = dtp->u.p.current_unit;
191 if (u->format_hash_table[hash].key != NULL)
193 /* See if it matches. */
194 if (u->format_hash_table[hash].key_len == dtp->format_len)
196 /* So far so good. */
197 if (strncmp (u->format_hash_table[hash].key,
198 dtp->format, dtp->format_len) == 0)
199 return u->format_hash_table[hash].hashed_fmt;
206 /* next_char()-- Return the next character in the format string.
207 * Returns -1 when the string is done. If the literal flag is set,
208 * spaces are significant, otherwise they are not. */
211 next_char (format_data *fmt, int literal)
217 if (fmt->format_string_len == 0)
220 fmt->format_string_len--;
221 c = toupper (*fmt->format_string++);
222 fmt->error_element = c;
224 while ((c == ' ' || c == '\t') && !literal);
230 /* unget_char()-- Back up one character position. */
232 #define unget_char(fmt) \
233 { fmt->format_string--; fmt->format_string_len++; }
236 /* get_fnode()-- Allocate a new format node, inserting it into the
237 * current singly linked list. These are initially allocated from the
241 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
245 if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
247 fmt->last->next = get_mem (sizeof (fnode_array));
248 fmt->last = fmt->last->next;
249 fmt->last->next = NULL;
250 fmt->avail = &fmt->last->array[0];
253 memset (f, '\0', sizeof (fnode));
265 f->source = fmt->format_string;
270 /* free_format_data()-- Free all allocated format data. */
273 free_format_data (format_data *fmt)
275 fnode_array *fa, *fa_next;
281 for (fa = fmt->array.next; fa; fa = fa_next)
292 /* format_lex()-- Simple lexical analyzer for getting the next token
293 * in a FORMAT string. We support a one-level token pushback in the
294 * fmt->saved_token variable. */
297 format_lex (format_data *fmt)
304 if (fmt->saved_token != FMT_NONE)
306 token = fmt->saved_token;
307 fmt->saved_token = FMT_NONE;
312 c = next_char (fmt, 0);
333 c = next_char (fmt, 0);
340 fmt->value = c - '0';
344 c = next_char (fmt, 0);
348 fmt->value = 10 * fmt->value + c - '0';
354 fmt->value = -fmt->value;
355 token = FMT_SIGNED_INT;
368 fmt->value = c - '0';
372 c = next_char (fmt, 0);
376 fmt->value = 10 * fmt->value + c - '0';
380 token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
404 switch (next_char (fmt, 0))
425 switch (next_char (fmt, 0))
442 switch (next_char (fmt, 0))
462 fmt->string = fmt->format_string;
463 fmt->value = 0; /* This is the length of the string */
467 c = next_char (fmt, 1);
470 token = FMT_BADSTRING;
471 fmt->error = bad_string;
477 c = next_char (fmt, 1);
481 token = FMT_BADSTRING;
482 fmt->error = bad_string;
520 switch (next_char (fmt, 0))
552 switch (next_char (fmt, 0))
568 switch (next_char (fmt, 0))
608 /* parse_format_list()-- Parse a format list. Assumes that a left
609 * paren has already been seen. Returns a list representing the
610 * parenthesis node which contains the rest of the list. */
613 parse_format_list (st_parameter_dt *dtp, bool *save_ok)
616 format_token t, u, t2;
618 format_data *fmt = dtp->u.p.fmt;
624 /* Get the next format item */
626 t = format_lex (fmt);
631 t = format_lex (fmt);
634 fmt->error = "Left parenthesis required after '*'";
637 get_fnode (fmt, &head, &tail, FMT_LPAREN);
638 tail->repeat = -2; /* Signifies unlimited format. */
639 tail->u.child = parse_format_list (dtp, &saveit);
640 if (fmt->error != NULL)
648 t = format_lex (fmt);
652 get_fnode (fmt, &head, &tail, FMT_LPAREN);
653 tail->repeat = repeat;
654 tail->u.child = parse_format_list (dtp, &saveit);
655 if (fmt->error != NULL)
661 get_fnode (fmt, &head, &tail, FMT_SLASH);
662 tail->repeat = repeat;
666 get_fnode (fmt, &head, &tail, FMT_X);
668 tail->u.k = fmt->value;
679 get_fnode (fmt, &head, &tail, FMT_LPAREN);
681 tail->u.child = parse_format_list (dtp, &saveit);
682 if (fmt->error != NULL)
687 case FMT_SIGNED_INT: /* Signed integer can only precede a P format. */
688 case FMT_ZERO: /* Same for zero. */
689 t = format_lex (fmt);
692 fmt->error = "Expected P edit descriptor in format";
697 get_fnode (fmt, &head, &tail, FMT_P);
698 tail->u.k = fmt->value;
701 t = format_lex (fmt);
702 if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
703 || t == FMT_G || t == FMT_E)
709 if (t != FMT_COMMA && t != FMT_RPAREN && t != FMT_SLASH)
711 fmt->error = "Comma required after P descriptor";
715 fmt->saved_token = t;
718 case FMT_P: /* P and X require a prior number */
719 fmt->error = "P descriptor requires leading scale factor";
726 If we would be pedantic in the library, we would have to reject
727 an X descriptor without an integer prefix:
729 fmt->error = "X descriptor requires leading space count";
732 However, this is an extension supported by many Fortran compilers,
733 including Cray, HP, AIX, and IRIX. Therefore, we allow it in the
734 runtime library, and make the front end reject it if the compiler
735 is in pedantic mode. The interpretation of 'X' is '1X'.
737 get_fnode (fmt, &head, &tail, FMT_X);
743 /* TODO: Find out why it is necessary to turn off format caching. */
745 get_fnode (fmt, &head, &tail, FMT_STRING);
746 tail->u.string.p = fmt->string;
747 tail->u.string.length = fmt->value;
757 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round "
758 "descriptor not allowed");
759 get_fnode (fmt, &head, &tail, t);
765 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
766 "descriptor not allowed");
773 get_fnode (fmt, &head, &tail, t);
778 get_fnode (fmt, &head, &tail, FMT_COLON);
783 get_fnode (fmt, &head, &tail, FMT_SLASH);
789 get_fnode (fmt, &head, &tail, FMT_DOLLAR);
791 notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
797 t2 = format_lex (fmt);
798 if (t2 != FMT_POSINT)
800 fmt->error = posint_required;
803 get_fnode (fmt, &head, &tail, t);
804 tail->u.n = fmt->value;
824 get_fnode (fmt, &head, &tail, FMT_STRING);
825 if (fmt->format_string_len < 1)
827 fmt->error = bad_hollerith;
831 tail->u.string.p = fmt->format_string;
832 tail->u.string.length = 1;
835 fmt->format_string++;
836 fmt->format_string_len--;
841 fmt->error = unexpected_end;
851 fmt->error = unexpected_element;
855 /* In this state, t must currently be a data descriptor. Deal with
856 things that can/must follow the descriptor */
861 t = format_lex (fmt);
864 if (notification_std(GFC_STD_GNU) == ERROR)
866 fmt->error = posint_required;
871 fmt->saved_token = t;
872 fmt->value = 1; /* Default width */
873 notify_std (&dtp->common, GFC_STD_GNU, posint_required);
877 get_fnode (fmt, &head, &tail, FMT_L);
878 tail->u.n = fmt->value;
879 tail->repeat = repeat;
883 t = format_lex (fmt);
886 fmt->error = zero_width;
892 fmt->saved_token = t;
893 fmt->value = -1; /* Width not present */
896 get_fnode (fmt, &head, &tail, FMT_A);
897 tail->repeat = repeat;
898 tail->u.n = fmt->value;
907 get_fnode (fmt, &head, &tail, t);
908 tail->repeat = repeat;
910 u = format_lex (fmt);
911 if (t == FMT_G && u == FMT_ZERO)
913 if (notification_std (GFC_STD_F2008) == ERROR
914 || dtp->u.p.mode == READING)
916 fmt->error = zero_width;
920 u = format_lex (fmt);
923 fmt->saved_token = u;
927 u = format_lex (fmt);
930 fmt->error = posint_required;
933 tail->u.real.d = fmt->value;
936 if (t == FMT_F && dtp->u.p.mode == WRITING)
938 if (u != FMT_POSINT && u != FMT_ZERO)
940 fmt->error = nonneg_required;
944 else if (u != FMT_POSINT)
946 fmt->error = posint_required;
950 tail->u.real.w = fmt->value;
952 t = format_lex (fmt);
955 /* We treat a missing decimal descriptor as 0. Note: This is only
956 allowed if -std=legacy, otherwise an error occurs. */
957 if (compile_options.warn_std != 0)
959 fmt->error = period_required;
962 fmt->saved_token = t;
968 t = format_lex (fmt);
969 if (t != FMT_ZERO && t != FMT_POSINT)
971 fmt->error = nonneg_required;
975 tail->u.real.d = fmt->value;
978 if (t2 == FMT_D || t2 == FMT_F)
982 /* Look for optional exponent */
983 t = format_lex (fmt);
985 fmt->saved_token = t;
988 t = format_lex (fmt);
991 fmt->error = "Positive exponent width required in format";
995 tail->u.real.e = fmt->value;
1001 if (repeat > fmt->format_string_len)
1003 fmt->error = bad_hollerith;
1007 get_fnode (fmt, &head, &tail, FMT_STRING);
1008 tail->u.string.p = fmt->format_string;
1009 tail->u.string.length = repeat;
1012 fmt->format_string += fmt->value;
1013 fmt->format_string_len -= repeat;
1021 get_fnode (fmt, &head, &tail, t);
1022 tail->repeat = repeat;
1024 t = format_lex (fmt);
1026 if (dtp->u.p.mode == READING)
1028 if (t != FMT_POSINT)
1030 fmt->error = posint_required;
1036 if (t != FMT_ZERO && t != FMT_POSINT)
1038 fmt->error = nonneg_required;
1043 tail->u.integer.w = fmt->value;
1044 tail->u.integer.m = -1;
1046 t = format_lex (fmt);
1047 if (t != FMT_PERIOD)
1049 fmt->saved_token = t;
1053 t = format_lex (fmt);
1054 if (t != FMT_ZERO && t != FMT_POSINT)
1056 fmt->error = nonneg_required;
1060 tail->u.integer.m = fmt->value;
1063 if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1065 fmt->error = "Minimum digits exceeds field width";
1072 fmt->error = unexpected_element;
1076 /* Between a descriptor and what comes next */
1078 t = format_lex (fmt);
1089 get_fnode (fmt, &head, &tail, t);
1091 goto optional_comma;
1094 fmt->error = unexpected_end;
1098 /* Assume a missing comma, this is a GNU extension */
1102 /* Optional comma is a weird between state where we've just finished
1103 reading a colon, slash or P descriptor. */
1105 t = format_lex (fmt);
1114 default: /* Assume that we have another format item */
1115 fmt->saved_token = t;
1129 /* format_error()-- Generate an error message for a format statement.
1130 * If the node that gives the location of the error is NULL, the error
1131 * is assumed to happen at parse time, and the current location of the
1134 * We generate a message showing where the problem is. We take extra
1135 * care to print only the relevant part of the format if it is longer
1136 * than a standard 80 column display. */
1139 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1141 int width, i, j, offset;
1142 char *p, buffer[300];
1143 format_data *fmt = dtp->u.p.fmt;
1146 fmt->format_string = f->source;
1148 if (message == unexpected_element)
1149 sprintf (buffer, message, fmt->error_element);
1151 sprintf (buffer, "%s\n", message);
1153 j = fmt->format_string - dtp->format;
1155 offset = (j > 60) ? j - 40 : 0;
1158 width = dtp->format_len - offset;
1163 /* Show the format */
1165 p = strchr (buffer, '\0');
1167 memcpy (p, dtp->format + offset, width);
1172 /* Show where the problem is */
1174 for (i = 1; i < j; i++)
1180 generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1184 /* revert()-- Do reversion of the format. Control reverts to the left
1185 * parenthesis that matches the rightmost right parenthesis. From our
1186 * tree structure, we are looking for the rightmost parenthesis node
1187 * at the second level, the first level always being a single
1188 * parenthesis node. If this node doesn't exit, we use the top
1192 revert (st_parameter_dt *dtp)
1195 format_data *fmt = dtp->u.p.fmt;
1197 dtp->u.p.reversion_flag = 1;
1201 for (f = fmt->array.array[0].u.child; f; f = f->next)
1202 if (f->format == FMT_LPAREN)
1205 /* If r is NULL because no node was found, the whole tree will be used */
1207 fmt->array.array[0].current = r;
1208 fmt->array.array[0].count = 0;
1211 /* parse_format()-- Parse a format string. */
1214 parse_format (st_parameter_dt *dtp)
1217 bool format_cache_ok;
1219 format_cache_ok = !is_internal_unit (dtp);
1221 /* Lookup format string to see if it has already been parsed. */
1222 if (format_cache_ok)
1224 dtp->u.p.fmt = find_parsed_format (dtp);
1226 if (dtp->u.p.fmt != NULL)
1228 dtp->u.p.fmt->reversion_ok = 0;
1229 dtp->u.p.fmt->saved_token = FMT_NONE;
1230 dtp->u.p.fmt->saved_format = NULL;
1231 reset_fnode_counters (dtp);
1236 /* Not found so proceed as follows. */
1238 dtp->u.p.fmt = fmt = get_mem (sizeof (format_data));
1239 fmt->format_string = dtp->format;
1240 fmt->format_string_len = dtp->format_len;
1243 fmt->saved_token = FMT_NONE;
1247 /* Initialize variables used during traversal of the tree. */
1249 fmt->reversion_ok = 0;
1250 fmt->saved_format = NULL;
1252 /* Allocate the first format node as the root of the tree. */
1254 fmt->last = &fmt->array;
1255 fmt->last->next = NULL;
1256 fmt->avail = &fmt->array.array[0];
1258 memset (fmt->avail, 0, sizeof (*fmt->avail));
1259 fmt->avail->format = FMT_LPAREN;
1260 fmt->avail->repeat = 1;
1263 if (format_lex (fmt) == FMT_LPAREN)
1264 fmt->array.array[0].u.child = parse_format_list (dtp, &format_cache_ok);
1266 fmt->error = "Missing initial left parenthesis in format";
1270 format_error (dtp, NULL, fmt->error);
1271 free_format_hash_table (dtp->u.p.current_unit);
1275 if (format_cache_ok)
1276 save_parsed_format (dtp);
1278 dtp->u.p.format_not_saved = 1;
1282 /* next_format0()-- Get the next format node without worrying about
1283 * reversion. Returns NULL when we hit the end of the list.
1284 * Parenthesis nodes are incremented after the list has been
1285 * exhausted, other nodes are incremented before they are returned. */
1287 static const fnode *
1288 next_format0 (fnode * f)
1295 if (f->format != FMT_LPAREN)
1298 if (f->count <= f->repeat)
1305 /* Deal with a parenthesis node with unlimited format. */
1307 if (f->repeat == -2) /* -2 signifies unlimited. */
1310 if (f->current == NULL)
1311 f->current = f->u.child;
1313 for (; f->current != NULL; f->current = f->current->next)
1315 r = next_format0 (f->current);
1321 /* Deal with a parenthesis node with specific repeat count. */
1322 for (; f->count < f->repeat; f->count++)
1324 if (f->current == NULL)
1325 f->current = f->u.child;
1327 for (; f->current != NULL; f->current = f->current->next)
1329 r = next_format0 (f->current);
1340 /* next_format()-- Return the next format node. If the format list
1341 * ends up being exhausted, we do reversion. Reversion is only
1342 * allowed if we've seen a data descriptor since the
1343 * initialization or the last reversion. We return NULL if there
1344 * are no more data descriptors to return (which is an error
1348 next_format (st_parameter_dt *dtp)
1352 format_data *fmt = dtp->u.p.fmt;
1354 if (fmt->saved_format != NULL)
1355 { /* Deal with a pushed-back format node */
1356 f = fmt->saved_format;
1357 fmt->saved_format = NULL;
1361 f = next_format0 (&fmt->array.array[0]);
1364 if (!fmt->reversion_ok)
1367 fmt->reversion_ok = 0;
1370 f = next_format0 (&fmt->array.array[0]);
1373 format_error (dtp, NULL, reversion_error);
1377 /* Push the first reverted token and return a colon node in case
1378 * there are no more data items. */
1380 fmt->saved_format = f;
1384 /* If this is a data edit descriptor, then reversion has become OK. */
1388 if (!fmt->reversion_ok &&
1389 (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1390 t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1391 t == FMT_A || t == FMT_D))
1392 fmt->reversion_ok = 1;
1397 /* unget_format()-- Push the given format back so that it will be
1398 * returned on the next call to next_format() without affecting
1399 * counts. This is necessary when we've encountered a data
1400 * descriptor, but don't know what the data item is yet. The format
1401 * node is pushed back, and we return control to the main program,
1402 * which calls the library back with the data item (or not). */
1405 unget_format (st_parameter_dt *dtp, const fnode *f)
1407 dtp->u.p.fmt->saved_format = f;