1 /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
2 Free Software Foundation, Inc.
3 Contributed by Andy Vaught
4 F2003 I/O support contributed by Jerry DeLisle
6 This file is part of the GNU Fortran 95 runtime library (libgfortran).
8 Libgfortran is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgfortran is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
28 /* format.c-- parse a FORMAT string into a binary format suitable for
29 * interpretation during I/O statements */
36 #define FARRAY_SIZE 64
38 typedef struct fnode_array
40 struct fnode_array *next;
41 fnode array[FARRAY_SIZE];
45 typedef struct format_data
47 char *format_string, *string;
50 format_token saved_token;
51 int value, format_string_len, reversion_ok;
53 const fnode *saved_format;
59 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
64 static const char posint_required[] = "Positive width required in format",
65 period_required[] = "Period required in format",
66 nonneg_required[] = "Nonnegative width required in format",
67 unexpected_element[] = "Unexpected element '%c' in format\n",
68 unexpected_end[] = "Unexpected end of format string",
69 bad_string[] = "Unterminated character constant in format",
70 bad_hollerith[] = "Hollerith constant extends past the end of the format",
71 reversion_error[] = "Exhausted data descriptors in format",
72 zero_width[] = "Zero width in format descriptor";
74 /* The following routines support caching format data from parsed format strings
75 into a hash table. This avoids repeatedly parsing duplicate format strings
76 or format strings in I/O statements that are repeated in loops. */
79 /* Traverse the table and free all data. */
82 free_format_hash_table (gfc_unit *u)
86 /* free_format_data handles any NULL pointers. */
87 for (i = 0; i < FORMAT_HASH_SIZE; i++)
89 if (u->format_hash_table[i].hashed_fmt != NULL)
91 free_format_data (u->format_hash_table[i].hashed_fmt);
92 free_mem (u->format_hash_table[i].key);
94 u->format_hash_table[i].key = NULL;
95 u->format_hash_table[i].key_len = 0;
96 u->format_hash_table[i].hashed_fmt = NULL;
100 /* Traverse the format_data structure and reset the fnode counters. */
103 reset_node (fnode *fn)
110 if (fn->format != FMT_LPAREN)
113 for (f = fn->u.child; f; f = f->next)
115 if (f->format == FMT_RPAREN)
122 reset_fnode_counters (st_parameter_dt *dtp)
129 /* Clear this pointer at the head so things start at the right place. */
130 fmt->array.array[0].current = NULL;
132 for (f = fmt->last->array[0].u.child; f; f = f->next)
137 /* A simple hashing function to generate an index into the hash table. */
140 uint32_t format_hash (st_parameter_dt *dtp)
143 gfc_charlen_type key_len;
147 /* Hash the format string. Super simple, but what the heck! */
149 key_len = dtp->format_len;
150 for (i = 0; i < key_len; i++)
152 hash &= (FORMAT_HASH_SIZE - 1);
158 save_parsed_format (st_parameter_dt *dtp)
163 hash = format_hash (dtp);
164 u = dtp->u.p.current_unit;
166 /* Index into the hash table. We are simply replacing whatever is there
167 relying on probability. */
168 if (u->format_hash_table[hash].hashed_fmt != NULL)
169 free_format_data (u->format_hash_table[hash].hashed_fmt);
170 u->format_hash_table[hash].hashed_fmt = NULL;
172 if (u->format_hash_table[hash].key != NULL)
173 free_mem (u->format_hash_table[hash].key);
174 u->format_hash_table[hash].key = get_mem (dtp->format_len);
175 memcpy (u->format_hash_table[hash].key, dtp->format, dtp->format_len);
177 u->format_hash_table[hash].key_len = dtp->format_len;
178 u->format_hash_table[hash].hashed_fmt = dtp->u.p.fmt;
183 find_parsed_format (st_parameter_dt *dtp)
188 hash = format_hash (dtp);
189 u = dtp->u.p.current_unit;
191 if (u->format_hash_table[hash].key != NULL)
193 /* See if it matches. */
194 if (u->format_hash_table[hash].key_len == dtp->format_len)
196 /* So far so good. */
197 if (strncmp (u->format_hash_table[hash].key,
198 dtp->format, dtp->format_len) == 0)
199 return u->format_hash_table[hash].hashed_fmt;
206 /* next_char()-- Return the next character in the format string.
207 * Returns -1 when the string is done. If the literal flag is set,
208 * spaces are significant, otherwise they are not. */
211 next_char (format_data *fmt, int literal)
217 if (fmt->format_string_len == 0)
220 fmt->format_string_len--;
221 c = toupper (*fmt->format_string++);
222 fmt->error_element = c;
224 while ((c == ' ' || c == '\t') && !literal);
230 /* unget_char()-- Back up one character position. */
232 #define unget_char(fmt) \
233 { fmt->format_string--; fmt->format_string_len++; }
236 /* get_fnode()-- Allocate a new format node, inserting it into the
237 * current singly linked list. These are initially allocated from the
241 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
245 if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
247 fmt->last->next = get_mem (sizeof (fnode_array));
248 fmt->last = fmt->last->next;
249 fmt->last->next = NULL;
250 fmt->avail = &fmt->last->array[0];
253 memset (f, '\0', sizeof (fnode));
265 f->source = fmt->format_string;
270 /* free_format_data()-- Free all allocated format data. */
273 free_format_data (format_data *fmt)
275 fnode_array *fa, *fa_next;
281 for (fa = fmt->array.next; fa; fa = fa_next)
292 /* format_lex()-- Simple lexical analyzer for getting the next token
293 * in a FORMAT string. We support a one-level token pushback in the
294 * fmt->saved_token variable. */
297 format_lex (format_data *fmt)
304 if (fmt->saved_token != FMT_NONE)
306 token = fmt->saved_token;
307 fmt->saved_token = FMT_NONE;
312 c = next_char (fmt, 0);
333 c = next_char (fmt, 0);
340 fmt->value = c - '0';
344 c = next_char (fmt, 0);
348 fmt->value = 10 * fmt->value + c - '0';
354 fmt->value = -fmt->value;
355 token = FMT_SIGNED_INT;
368 fmt->value = c - '0';
372 c = next_char (fmt, 0);
376 fmt->value = 10 * fmt->value + c - '0';
380 token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
404 switch (next_char (fmt, 0))
425 switch (next_char (fmt, 0))
442 switch (next_char (fmt, 0))
462 fmt->string = fmt->format_string;
463 fmt->value = 0; /* This is the length of the string */
467 c = next_char (fmt, 1);
470 token = FMT_BADSTRING;
471 fmt->error = bad_string;
477 c = next_char (fmt, 1);
481 token = FMT_BADSTRING;
482 fmt->error = bad_string;
520 switch (next_char (fmt, 0))
552 switch (next_char (fmt, 0))
568 switch (next_char (fmt, 0))
608 /* parse_format_list()-- Parse a format list. Assumes that a left
609 * paren has already been seen. Returns a list representing the
610 * parenthesis node which contains the rest of the list. */
613 parse_format_list (st_parameter_dt *dtp, bool *save_ok)
616 format_token t, u, t2;
618 format_data *fmt = dtp->u.p.fmt;
624 /* Get the next format item */
626 t = format_lex (fmt);
631 t = format_lex (fmt);
634 fmt->error = "Left parenthesis required after '*'";
637 get_fnode (fmt, &head, &tail, FMT_LPAREN);
638 tail->repeat = -2; /* Signifies unlimited format. */
639 tail->u.child = parse_format_list (dtp, &saveit);
640 if (fmt->error != NULL)
648 t = format_lex (fmt);
652 get_fnode (fmt, &head, &tail, FMT_LPAREN);
653 tail->repeat = repeat;
654 tail->u.child = parse_format_list (dtp, &saveit);
655 if (fmt->error != NULL)
661 get_fnode (fmt, &head, &tail, FMT_SLASH);
662 tail->repeat = repeat;
666 get_fnode (fmt, &head, &tail, FMT_X);
668 tail->u.k = fmt->value;
679 get_fnode (fmt, &head, &tail, FMT_LPAREN);
681 tail->u.child = parse_format_list (dtp, &saveit);
682 if (fmt->error != NULL)
687 case FMT_SIGNED_INT: /* Signed integer can only precede a P format. */
688 case FMT_ZERO: /* Same for zero. */
689 t = format_lex (fmt);
692 fmt->error = "Expected P edit descriptor in format";
697 get_fnode (fmt, &head, &tail, FMT_P);
698 tail->u.k = fmt->value;
701 t = format_lex (fmt);
702 if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
703 || t == FMT_G || t == FMT_E)
709 fmt->saved_token = t;
712 case FMT_P: /* P and X require a prior number */
713 fmt->error = "P descriptor requires leading scale factor";
720 If we would be pedantic in the library, we would have to reject
721 an X descriptor without an integer prefix:
723 fmt->error = "X descriptor requires leading space count";
726 However, this is an extension supported by many Fortran compilers,
727 including Cray, HP, AIX, and IRIX. Therefore, we allow it in the
728 runtime library, and make the front end reject it if the compiler
729 is in pedantic mode. The interpretation of 'X' is '1X'.
731 get_fnode (fmt, &head, &tail, FMT_X);
737 /* TODO: Find out why is is necessary to turn off format caching. */
739 get_fnode (fmt, &head, &tail, FMT_STRING);
740 tail->u.string.p = fmt->string;
741 tail->u.string.length = fmt->value;
751 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: Round "
752 "descriptor not allowed");
753 get_fnode (fmt, &head, &tail, t);
759 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
760 "descriptor not allowed");
767 get_fnode (fmt, &head, &tail, t);
772 get_fnode (fmt, &head, &tail, FMT_COLON);
777 get_fnode (fmt, &head, &tail, FMT_SLASH);
783 get_fnode (fmt, &head, &tail, FMT_DOLLAR);
785 notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
791 t2 = format_lex (fmt);
792 if (t2 != FMT_POSINT)
794 fmt->error = posint_required;
797 get_fnode (fmt, &head, &tail, t);
798 tail->u.n = fmt->value;
818 get_fnode (fmt, &head, &tail, FMT_STRING);
819 if (fmt->format_string_len < 1)
821 fmt->error = bad_hollerith;
825 tail->u.string.p = fmt->format_string;
826 tail->u.string.length = 1;
829 fmt->format_string++;
830 fmt->format_string_len--;
835 fmt->error = unexpected_end;
845 fmt->error = unexpected_element;
849 /* In this state, t must currently be a data descriptor. Deal with
850 things that can/must follow the descriptor */
855 t = format_lex (fmt);
858 fmt->error = "Repeat count cannot follow P descriptor";
862 fmt->saved_token = t;
863 get_fnode (fmt, &head, &tail, FMT_P);
868 t = format_lex (fmt);
871 if (notification_std(GFC_STD_GNU) == ERROR)
873 fmt->error = posint_required;
878 fmt->saved_token = t;
879 fmt->value = 1; /* Default width */
880 notify_std (&dtp->common, GFC_STD_GNU, posint_required);
884 get_fnode (fmt, &head, &tail, FMT_L);
885 tail->u.n = fmt->value;
886 tail->repeat = repeat;
890 t = format_lex (fmt);
893 fmt->error = zero_width;
899 fmt->saved_token = t;
900 fmt->value = -1; /* Width not present */
903 get_fnode (fmt, &head, &tail, FMT_A);
904 tail->repeat = repeat;
905 tail->u.n = fmt->value;
914 get_fnode (fmt, &head, &tail, t);
915 tail->repeat = repeat;
917 u = format_lex (fmt);
918 if (t == FMT_G && u == FMT_ZERO)
920 if (notification_std (GFC_STD_F2008) == ERROR
921 || dtp->u.p.mode == READING)
923 fmt->error = zero_width;
927 u = format_lex (fmt);
930 fmt->saved_token = u;
934 u = format_lex (fmt);
937 fmt->error = posint_required;
940 tail->u.real.d = fmt->value;
943 if (t == FMT_F || dtp->u.p.mode == WRITING)
945 if (u != FMT_POSINT && u != FMT_ZERO)
947 fmt->error = nonneg_required;
955 fmt->error = posint_required;
960 tail->u.real.w = fmt->value;
962 t = format_lex (fmt);
965 /* We treat a missing decimal descriptor as 0. Note: This is only
966 allowed if -std=legacy, otherwise an error occurs. */
967 if (compile_options.warn_std != 0)
969 fmt->error = period_required;
972 fmt->saved_token = t;
977 t = format_lex (fmt);
978 if (t != FMT_ZERO && t != FMT_POSINT)
980 fmt->error = nonneg_required;
984 tail->u.real.d = fmt->value;
986 if (t == FMT_D || t == FMT_F)
991 /* Look for optional exponent */
992 t = format_lex (fmt);
994 fmt->saved_token = t;
997 t = format_lex (fmt);
1000 fmt->error = "Positive exponent width required in format";
1004 tail->u.real.e = fmt->value;
1010 if (repeat > fmt->format_string_len)
1012 fmt->error = bad_hollerith;
1016 get_fnode (fmt, &head, &tail, FMT_STRING);
1017 tail->u.string.p = fmt->format_string;
1018 tail->u.string.length = repeat;
1021 fmt->format_string += fmt->value;
1022 fmt->format_string_len -= repeat;
1030 get_fnode (fmt, &head, &tail, t);
1031 tail->repeat = repeat;
1033 t = format_lex (fmt);
1035 if (dtp->u.p.mode == READING)
1037 if (t != FMT_POSINT)
1039 fmt->error = posint_required;
1045 if (t != FMT_ZERO && t != FMT_POSINT)
1047 fmt->error = nonneg_required;
1052 tail->u.integer.w = fmt->value;
1053 tail->u.integer.m = -1;
1055 t = format_lex (fmt);
1056 if (t != FMT_PERIOD)
1058 fmt->saved_token = t;
1062 t = format_lex (fmt);
1063 if (t != FMT_ZERO && t != FMT_POSINT)
1065 fmt->error = nonneg_required;
1069 tail->u.integer.m = fmt->value;
1072 if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
1074 fmt->error = "Minimum digits exceeds field width";
1081 fmt->error = unexpected_element;
1085 /* Between a descriptor and what comes next */
1087 t = format_lex (fmt);
1098 get_fnode (fmt, &head, &tail, t);
1100 goto optional_comma;
1103 fmt->error = unexpected_end;
1107 /* Assume a missing comma, this is a GNU extension */
1111 /* Optional comma is a weird between state where we've just finished
1112 reading a colon, slash or P descriptor. */
1114 t = format_lex (fmt);
1123 default: /* Assume that we have another format item */
1124 fmt->saved_token = t;
1138 /* format_error()-- Generate an error message for a format statement.
1139 * If the node that gives the location of the error is NULL, the error
1140 * is assumed to happen at parse time, and the current location of the
1143 * We generate a message showing where the problem is. We take extra
1144 * care to print only the relevant part of the format if it is longer
1145 * than a standard 80 column display. */
1148 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
1150 int width, i, j, offset;
1151 char *p, buffer[300];
1152 format_data *fmt = dtp->u.p.fmt;
1155 fmt->format_string = f->source;
1157 if (message == unexpected_element)
1158 sprintf (buffer, message, fmt->error_element);
1160 sprintf (buffer, "%s\n", message);
1162 j = fmt->format_string - dtp->format;
1164 offset = (j > 60) ? j - 40 : 0;
1167 width = dtp->format_len - offset;
1172 /* Show the format */
1174 p = strchr (buffer, '\0');
1176 memcpy (p, dtp->format + offset, width);
1181 /* Show where the problem is */
1183 for (i = 1; i < j; i++)
1189 generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1193 /* revert()-- Do reversion of the format. Control reverts to the left
1194 * parenthesis that matches the rightmost right parenthesis. From our
1195 * tree structure, we are looking for the rightmost parenthesis node
1196 * at the second level, the first level always being a single
1197 * parenthesis node. If this node doesn't exit, we use the top
1201 revert (st_parameter_dt *dtp)
1204 format_data *fmt = dtp->u.p.fmt;
1206 dtp->u.p.reversion_flag = 1;
1210 for (f = fmt->array.array[0].u.child; f; f = f->next)
1211 if (f->format == FMT_LPAREN)
1214 /* If r is NULL because no node was found, the whole tree will be used */
1216 fmt->array.array[0].current = r;
1217 fmt->array.array[0].count = 0;
1220 /* parse_format()-- Parse a format string. */
1223 parse_format (st_parameter_dt *dtp)
1226 bool format_cache_ok;
1228 format_cache_ok = !is_internal_unit (dtp);
1230 /* Lookup format string to see if it has already been parsed. */
1231 if (format_cache_ok)
1233 dtp->u.p.fmt = find_parsed_format (dtp);
1235 if (dtp->u.p.fmt != NULL)
1237 dtp->u.p.fmt->reversion_ok = 0;
1238 dtp->u.p.fmt->saved_token = FMT_NONE;
1239 dtp->u.p.fmt->saved_format = NULL;
1240 reset_fnode_counters (dtp);
1245 /* Not found so proceed as follows. */
1247 dtp->u.p.fmt = fmt = get_mem (sizeof (format_data));
1248 fmt->format_string = dtp->format;
1249 fmt->format_string_len = dtp->format_len;
1252 fmt->saved_token = FMT_NONE;
1256 /* Initialize variables used during traversal of the tree. */
1258 fmt->reversion_ok = 0;
1259 fmt->saved_format = NULL;
1261 /* Allocate the first format node as the root of the tree. */
1263 fmt->last = &fmt->array;
1264 fmt->last->next = NULL;
1265 fmt->avail = &fmt->array.array[0];
1267 memset (fmt->avail, 0, sizeof (*fmt->avail));
1268 fmt->avail->format = FMT_LPAREN;
1269 fmt->avail->repeat = 1;
1272 if (format_lex (fmt) == FMT_LPAREN)
1273 fmt->array.array[0].u.child = parse_format_list (dtp, &format_cache_ok);
1275 fmt->error = "Missing initial left parenthesis in format";
1279 format_error (dtp, NULL, fmt->error);
1280 free_format_hash_table (dtp->u.p.current_unit);
1284 if (format_cache_ok)
1285 save_parsed_format (dtp);
1287 dtp->u.p.format_not_saved = 1;
1291 /* next_format0()-- Get the next format node without worrying about
1292 * reversion. Returns NULL when we hit the end of the list.
1293 * Parenthesis nodes are incremented after the list has been
1294 * exhausted, other nodes are incremented before they are returned. */
1296 static const fnode *
1297 next_format0 (fnode * f)
1304 if (f->format != FMT_LPAREN)
1307 if (f->count <= f->repeat)
1314 /* Deal with a parenthesis node with unlimited format. */
1316 if (f->repeat == -2) /* -2 signifies unlimited. */
1319 if (f->current == NULL)
1320 f->current = f->u.child;
1322 for (; f->current != NULL; f->current = f->current->next)
1324 r = next_format0 (f->current);
1330 /* Deal with a parenthesis node with specific repeat count. */
1331 for (; f->count < f->repeat; f->count++)
1333 if (f->current == NULL)
1334 f->current = f->u.child;
1336 for (; f->current != NULL; f->current = f->current->next)
1338 r = next_format0 (f->current);
1349 /* next_format()-- Return the next format node. If the format list
1350 * ends up being exhausted, we do reversion. Reversion is only
1351 * allowed if we've seen a data descriptor since the
1352 * initialization or the last reversion. We return NULL if there
1353 * are no more data descriptors to return (which is an error
1357 next_format (st_parameter_dt *dtp)
1361 format_data *fmt = dtp->u.p.fmt;
1363 if (fmt->saved_format != NULL)
1364 { /* Deal with a pushed-back format node */
1365 f = fmt->saved_format;
1366 fmt->saved_format = NULL;
1370 f = next_format0 (&fmt->array.array[0]);
1373 if (!fmt->reversion_ok)
1376 fmt->reversion_ok = 0;
1379 f = next_format0 (&fmt->array.array[0]);
1382 format_error (dtp, NULL, reversion_error);
1386 /* Push the first reverted token and return a colon node in case
1387 * there are no more data items. */
1389 fmt->saved_format = f;
1393 /* If this is a data edit descriptor, then reversion has become OK. */
1397 if (!fmt->reversion_ok &&
1398 (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1399 t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1400 t == FMT_A || t == FMT_D))
1401 fmt->reversion_ok = 1;
1406 /* unget_format()-- Push the given format back so that it will be
1407 * returned on the next call to next_format() without affecting
1408 * counts. This is necessary when we've encountered a data
1409 * descriptor, but don't know what the data item is yet. The format
1410 * node is pushed back, and we return control to the main program,
1411 * which calls the library back with the data item (or not). */
1414 unget_format (st_parameter_dt *dtp, const fnode *f)
1416 dtp->u.p.fmt->saved_format = f;