1 /* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008
2 Free Software Foundation, Inc.
3 Contributed by Andy Vaught
4 F2003 I/O support contributed by Jerry DeLisle
6 This file is part of the GNU Fortran 95 runtime library (libgfortran).
8 Libgfortran is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 In addition to the permissions in the GNU General Public License, the
14 Free Software Foundation gives you unlimited permission to link the
15 compiled version of this file into combinations with other programs,
16 and to distribute those combinations without any restriction coming
17 from the use of this file. (The General Public License restrictions
18 do apply in other respects; for example, they cover modification of
19 the file, and distribution when not linked into a combine
22 Libgfortran is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 GNU General Public License for more details.
27 You should have received a copy of the GNU General Public License
28 along with Libgfortran; see the file COPYING. If not, write to
29 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
30 Boston, MA 02110-1301, USA. */
33 /* format.c-- parse a FORMAT string into a binary format suitable for
34 * interpretation during I/O statements */
40 #define FARRAY_SIZE 64
42 typedef struct fnode_array
44 struct fnode_array *next;
45 fnode array[FARRAY_SIZE];
49 typedef struct format_data
51 char *format_string, *string;
54 format_token saved_token;
55 int value, format_string_len, reversion_ok;
57 const fnode *saved_format;
63 static const fnode colon_node = { FMT_COLON, 0, NULL, NULL, {{ 0, 0, 0 }}, 0,
68 static const char posint_required[] = "Positive width required in format",
69 period_required[] = "Period required in format",
70 nonneg_required[] = "Nonnegative width required in format",
71 unexpected_element[] = "Unexpected element '%c' in format\n",
72 unexpected_end[] = "Unexpected end of format string",
73 bad_string[] = "Unterminated character constant in format",
74 bad_hollerith[] = "Hollerith constant extends past the end of the format",
75 reversion_error[] = "Exhausted data descriptors in format",
76 zero_width[] = "Zero width in format descriptor";
78 /* next_char()-- Return the next character in the format string.
79 * Returns -1 when the string is done. If the literal flag is set,
80 * spaces are significant, otherwise they are not. */
83 next_char (format_data *fmt, int literal)
89 if (fmt->format_string_len == 0)
92 fmt->format_string_len--;
93 fmt->error_element = c = toupper (*fmt->format_string++);
95 while ((c == ' ' || c == '\t') && !literal);
101 /* unget_char()-- Back up one character position. */
103 #define unget_char(fmt) \
104 { fmt->format_string--; fmt->format_string_len++; }
107 /* get_fnode()-- Allocate a new format node, inserting it into the
108 * current singly linked list. These are initially allocated from the
112 get_fnode (format_data *fmt, fnode **head, fnode **tail, format_token t)
116 if (fmt->avail == &fmt->last->array[FARRAY_SIZE])
118 fmt->last->next = get_mem (sizeof (fnode_array));
119 fmt->last = fmt->last->next;
120 fmt->last->next = NULL;
121 fmt->avail = &fmt->last->array[0];
124 memset (f, '\0', sizeof (fnode));
136 f->source = fmt->format_string;
141 /* free_format_data()-- Free all allocated format data. */
144 free_format_data (st_parameter_dt *dtp)
146 fnode_array *fa, *fa_next;
147 format_data *fmt = dtp->u.p.fmt;
152 for (fa = fmt->array.next; fa; fa = fa_next)
163 /* format_lex()-- Simple lexical analyzer for getting the next token
164 * in a FORMAT string. We support a one-level token pushback in the
165 * fmt->saved_token variable. */
168 format_lex (format_data *fmt)
175 if (fmt->saved_token != FMT_NONE)
177 token = fmt->saved_token;
178 fmt->saved_token = FMT_NONE;
183 c = next_char (fmt, 0);
192 c = next_char (fmt, 0);
199 fmt->value = c - '0';
203 c = next_char (fmt, 0);
207 fmt->value = 10 * fmt->value + c - '0';
213 fmt->value = -fmt->value;
214 token = FMT_SIGNED_INT;
227 fmt->value = c - '0';
231 c = next_char (fmt, 0);
235 fmt->value = 10 * fmt->value + c - '0';
239 token = (fmt->value == 0) ? FMT_ZERO : FMT_POSINT;
263 switch (next_char (fmt, 0))
292 switch (next_char (fmt, 0))
309 switch (next_char (fmt, 0))
329 fmt->string = fmt->format_string;
330 fmt->value = 0; /* This is the length of the string */
334 c = next_char (fmt, 1);
337 token = FMT_BADSTRING;
338 fmt->error = bad_string;
344 c = next_char (fmt, 1);
348 token = FMT_BADSTRING;
349 fmt->error = bad_string;
387 switch (next_char (fmt, 0))
419 switch (next_char (fmt, 0))
447 /* parse_format_list()-- Parse a format list. Assumes that a left
448 * paren has already been seen. Returns a list representing the
449 * parenthesis node which contains the rest of the list. */
452 parse_format_list (st_parameter_dt *dtp)
455 format_token t, u, t2;
457 format_data *fmt = dtp->u.p.fmt;
461 /* Get the next format item */
463 t = format_lex (fmt);
470 t = format_lex (fmt);
474 get_fnode (fmt, &head, &tail, FMT_LPAREN);
475 tail->repeat = repeat;
476 tail->u.child = parse_format_list (dtp);
477 if (fmt->error != NULL)
483 get_fnode (fmt, &head, &tail, FMT_SLASH);
484 tail->repeat = repeat;
488 get_fnode (fmt, &head, &tail, FMT_X);
490 tail->u.k = fmt->value;
501 get_fnode (fmt, &head, &tail, FMT_LPAREN);
503 tail->u.child = parse_format_list (dtp);
504 if (fmt->error != NULL)
509 case FMT_SIGNED_INT: /* Signed integer can only precede a P format. */
510 case FMT_ZERO: /* Same for zero. */
511 t = format_lex (fmt);
514 fmt->error = "Expected P edit descriptor in format";
519 get_fnode (fmt, &head, &tail, FMT_P);
520 tail->u.k = fmt->value;
523 t = format_lex (fmt);
524 if (t == FMT_F || t == FMT_EN || t == FMT_ES || t == FMT_D
525 || t == FMT_G || t == FMT_E)
531 fmt->saved_token = t;
534 case FMT_P: /* P and X require a prior number */
535 fmt->error = "P descriptor requires leading scale factor";
542 If we would be pedantic in the library, we would have to reject
543 an X descriptor without an integer prefix:
545 fmt->error = "X descriptor requires leading space count";
548 However, this is an extension supported by many Fortran compilers,
549 including Cray, HP, AIX, and IRIX. Therefore, we allow it in the
550 runtime library, and make the front end reject it if the compiler
551 is in pedantic mode. The interpretation of 'X' is '1X'.
553 get_fnode (fmt, &head, &tail, FMT_X);
559 get_fnode (fmt, &head, &tail, FMT_STRING);
561 tail->u.string.p = fmt->string;
562 tail->u.string.length = fmt->value;
568 notify_std (&dtp->common, GFC_STD_F2003, "Fortran 2003: DC or DP "
569 "descriptor not allowed");
576 get_fnode (fmt, &head, &tail, t);
581 get_fnode (fmt, &head, &tail, FMT_COLON);
586 get_fnode (fmt, &head, &tail, FMT_SLASH);
592 get_fnode (fmt, &head, &tail, FMT_DOLLAR);
594 notify_std (&dtp->common, GFC_STD_GNU, "Extension: $ descriptor");
601 t2 = format_lex (fmt);
602 if (t2 != FMT_POSINT)
604 fmt->error = posint_required;
607 get_fnode (fmt, &head, &tail, t);
608 tail->u.n = fmt->value;
628 get_fnode (fmt, &head, &tail, FMT_STRING);
630 if (fmt->format_string_len < 1)
632 fmt->error = bad_hollerith;
636 tail->u.string.p = fmt->format_string;
637 tail->u.string.length = 1;
640 fmt->format_string++;
641 fmt->format_string_len--;
646 fmt->error = unexpected_end;
656 fmt->error = unexpected_element;
660 /* In this state, t must currently be a data descriptor. Deal with
661 things that can/must follow the descriptor */
666 t = format_lex (fmt);
669 fmt->error = "Repeat count cannot follow P descriptor";
673 fmt->saved_token = t;
674 get_fnode (fmt, &head, &tail, FMT_P);
679 t = format_lex (fmt);
682 if (notification_std(GFC_STD_GNU) == ERROR)
684 fmt->error = posint_required;
689 fmt->saved_token = t;
690 fmt->value = 1; /* Default width */
691 notify_std (&dtp->common, GFC_STD_GNU, posint_required);
695 get_fnode (fmt, &head, &tail, FMT_L);
696 tail->u.n = fmt->value;
697 tail->repeat = repeat;
701 t = format_lex (fmt);
704 fmt->error = zero_width;
710 fmt->saved_token = t;
711 fmt->value = -1; /* Width not present */
714 get_fnode (fmt, &head, &tail, FMT_A);
715 tail->repeat = repeat;
716 tail->u.n = fmt->value;
725 get_fnode (fmt, &head, &tail, t);
726 tail->repeat = repeat;
728 u = format_lex (fmt);
729 if (t == FMT_G && u == FMT_ZERO)
731 if (notification_std (GFC_STD_F2008) == ERROR
732 || dtp->u.p.mode == READING)
734 fmt->error = zero_width;
738 u = format_lex (fmt);
741 fmt->saved_token = u;
745 u = format_lex (fmt);
748 fmt->error = posint_required;
751 tail->u.real.d = fmt->value;
754 if (t == FMT_F || dtp->u.p.mode == WRITING)
756 if (u != FMT_POSINT && u != FMT_ZERO)
758 fmt->error = nonneg_required;
766 fmt->error = posint_required;
771 tail->u.real.w = fmt->value;
773 t = format_lex (fmt);
776 /* We treat a missing decimal descriptor as 0. Note: This is only
777 allowed if -std=legacy, otherwise an error occurs. */
778 if (compile_options.warn_std != 0)
780 fmt->error = period_required;
783 fmt->saved_token = t;
788 t = format_lex (fmt);
789 if (t != FMT_ZERO && t != FMT_POSINT)
791 fmt->error = nonneg_required;
795 tail->u.real.d = fmt->value;
797 if (t == FMT_D || t == FMT_F)
802 /* Look for optional exponent */
803 t = format_lex (fmt);
805 fmt->saved_token = t;
808 t = format_lex (fmt);
811 fmt->error = "Positive exponent width required in format";
815 tail->u.real.e = fmt->value;
821 if (repeat > fmt->format_string_len)
823 fmt->error = bad_hollerith;
827 get_fnode (fmt, &head, &tail, FMT_STRING);
829 tail->u.string.p = fmt->format_string;
830 tail->u.string.length = repeat;
833 fmt->format_string += fmt->value;
834 fmt->format_string_len -= repeat;
842 get_fnode (fmt, &head, &tail, t);
843 tail->repeat = repeat;
845 t = format_lex (fmt);
847 if (dtp->u.p.mode == READING)
851 fmt->error = posint_required;
857 if (t != FMT_ZERO && t != FMT_POSINT)
859 fmt->error = nonneg_required;
864 tail->u.integer.w = fmt->value;
865 tail->u.integer.m = -1;
867 t = format_lex (fmt);
870 fmt->saved_token = t;
874 t = format_lex (fmt);
875 if (t != FMT_ZERO && t != FMT_POSINT)
877 fmt->error = nonneg_required;
881 tail->u.integer.m = fmt->value;
884 if (tail->u.integer.w != 0 && tail->u.integer.m > tail->u.integer.w)
886 fmt->error = "Minimum digits exceeds field width";
893 fmt->error = unexpected_element;
897 /* Between a descriptor and what comes next */
899 t = format_lex (fmt);
910 get_fnode (fmt, &head, &tail, t);
915 fmt->error = unexpected_end;
919 /* Assume a missing comma, this is a GNU extension */
923 /* Optional comma is a weird between state where we've just finished
924 reading a colon, slash or P descriptor. */
926 t = format_lex (fmt);
935 default: /* Assume that we have another format item */
936 fmt->saved_token = t;
947 /* format_error()-- Generate an error message for a format statement.
948 * If the node that gives the location of the error is NULL, the error
949 * is assumed to happen at parse time, and the current location of the
952 * We generate a message showing where the problem is. We take extra
953 * care to print only the relevant part of the format if it is longer
954 * than a standard 80 column display. */
957 format_error (st_parameter_dt *dtp, const fnode *f, const char *message)
959 int width, i, j, offset;
960 char *p, buffer[300];
961 format_data *fmt = dtp->u.p.fmt;
964 fmt->format_string = f->source;
966 if (message == unexpected_element)
967 sprintf (buffer, message, fmt->error_element);
969 sprintf (buffer, "%s\n", message);
971 j = fmt->format_string - dtp->format;
973 offset = (j > 60) ? j - 40 : 0;
976 width = dtp->format_len - offset;
981 /* Show the format */
983 p = strchr (buffer, '\0');
985 memcpy (p, dtp->format + offset, width);
990 /* Show where the problem is */
992 for (i = 1; i < j; i++)
998 generate_error (&dtp->common, LIBERROR_FORMAT, buffer);
1002 /* parse_format()-- Parse a format string. */
1005 parse_format (st_parameter_dt *dtp)
1009 dtp->u.p.fmt = fmt = get_mem (sizeof (format_data));
1010 fmt->format_string = dtp->format;
1011 fmt->format_string_len = dtp->format_len;
1014 fmt->saved_token = FMT_NONE;
1018 /* Initialize variables used during traversal of the tree */
1020 fmt->reversion_ok = 0;
1021 fmt->saved_format = NULL;
1023 /* Allocate the first format node as the root of the tree */
1025 fmt->last = &fmt->array;
1026 fmt->last->next = NULL;
1027 fmt->avail = &fmt->array.array[0];
1029 memset (fmt->avail, 0, sizeof (*fmt->avail));
1030 fmt->avail->format = FMT_LPAREN;
1031 fmt->avail->repeat = 1;
1034 if (format_lex (fmt) == FMT_LPAREN)
1035 fmt->array.array[0].u.child = parse_format_list (dtp);
1037 fmt->error = "Missing initial left parenthesis in format";
1040 format_error (dtp, NULL, fmt->error);
1044 /* revert()-- Do reversion of the format. Control reverts to the left
1045 * parenthesis that matches the rightmost right parenthesis. From our
1046 * tree structure, we are looking for the rightmost parenthesis node
1047 * at the second level, the first level always being a single
1048 * parenthesis node. If this node doesn't exit, we use the top
1052 revert (st_parameter_dt *dtp)
1055 format_data *fmt = dtp->u.p.fmt;
1057 dtp->u.p.reversion_flag = 1;
1061 for (f = fmt->array.array[0].u.child; f; f = f->next)
1062 if (f->format == FMT_LPAREN)
1065 /* If r is NULL because no node was found, the whole tree will be used */
1067 fmt->array.array[0].current = r;
1068 fmt->array.array[0].count = 0;
1072 /* next_format0()-- Get the next format node without worrying about
1073 * reversion. Returns NULL when we hit the end of the list.
1074 * Parenthesis nodes are incremented after the list has been
1075 * exhausted, other nodes are incremented before they are returned. */
1077 static const fnode *
1078 next_format0 (fnode * f)
1085 if (f->format != FMT_LPAREN)
1088 if (f->count <= f->repeat)
1095 /* Deal with a parenthesis node */
1097 for (; f->count < f->repeat; f->count++)
1099 if (f->current == NULL)
1100 f->current = f->u.child;
1102 for (; f->current != NULL; f->current = f->current->next)
1104 r = next_format0 (f->current);
1115 /* next_format()-- Return the next format node. If the format list
1116 * ends up being exhausted, we do reversion. Reversion is only
1117 * allowed if we've seen a data descriptor since the
1118 * initialization or the last reversion. We return NULL if there
1119 * are no more data descriptors to return (which is an error
1123 next_format (st_parameter_dt *dtp)
1127 format_data *fmt = dtp->u.p.fmt;
1129 if (fmt->saved_format != NULL)
1130 { /* Deal with a pushed-back format node */
1131 f = fmt->saved_format;
1132 fmt->saved_format = NULL;
1136 f = next_format0 (&fmt->array.array[0]);
1139 if (!fmt->reversion_ok)
1142 fmt->reversion_ok = 0;
1145 f = next_format0 (&fmt->array.array[0]);
1148 format_error (dtp, NULL, reversion_error);
1152 /* Push the first reverted token and return a colon node in case
1153 * there are no more data items. */
1155 fmt->saved_format = f;
1159 /* If this is a data edit descriptor, then reversion has become OK. */
1163 if (!fmt->reversion_ok &&
1164 (t == FMT_I || t == FMT_B || t == FMT_O || t == FMT_Z || t == FMT_F ||
1165 t == FMT_E || t == FMT_EN || t == FMT_ES || t == FMT_G || t == FMT_L ||
1166 t == FMT_A || t == FMT_D))
1167 fmt->reversion_ok = 1;
1172 /* unget_format()-- Push the given format back so that it will be
1173 * returned on the next call to next_format() without affecting
1174 * counts. This is necessary when we've encountered a data
1175 * descriptor, but don't know what the data item is yet. The format
1176 * node is pushed back, and we return control to the main program,
1177 * which calls the library back with the data item (or not). */
1180 unget_format (st_parameter_dt *dtp, const fnode *f)
1182 dtp->u.p.fmt->saved_format = f;