1 /* Copyright (C) 2002, 2003, 2005, 2007, 2008, 2009, 2010
2 Free Software Foundation, Inc.
3 Contributed by Andy Vaught
4 F2003 I/O support contributed by Jerry DeLisle
6 This file is part of the GNU Fortran 95 runtime library (libgfortran).
8 Libgfortran is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgfortran is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
37 typedef unsigned char uchar;
39 /* read.c -- Deal with formatted reads */
42 /* set_integer()-- All of the integer assignments come here to
43 actually place the value into memory. */
46 set_integer (void *dest, GFC_INTEGER_LARGEST value, int length)
50 #ifdef HAVE_GFC_INTEGER_16
51 /* length=10 comes about for kind=10 real/complex BOZ, cf. PR41711. */
55 GFC_INTEGER_16 tmp = value;
56 memcpy (dest, (void *) &tmp, length);
62 GFC_INTEGER_8 tmp = value;
63 memcpy (dest, (void *) &tmp, length);
68 GFC_INTEGER_4 tmp = value;
69 memcpy (dest, (void *) &tmp, length);
74 GFC_INTEGER_2 tmp = value;
75 memcpy (dest, (void *) &tmp, length);
80 GFC_INTEGER_1 tmp = value;
81 memcpy (dest, (void *) &tmp, length);
85 internal_error (NULL, "Bad integer kind");
90 /* max_value()-- Given a length (kind), return the maximum signed or
94 max_value (int length, int signed_flag)
96 GFC_UINTEGER_LARGEST value;
97 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
103 #if defined HAVE_GFC_REAL_16 || defined HAVE_GFC_REAL_10
107 for (n = 1; n < 4 * length; n++)
108 value = (value << 2) + 3;
114 value = signed_flag ? 0x7fffffffffffffff : 0xffffffffffffffff;
117 value = signed_flag ? 0x7fffffff : 0xffffffff;
120 value = signed_flag ? 0x7fff : 0xffff;
123 value = signed_flag ? 0x7f : 0xff;
126 internal_error (NULL, "Bad integer kind");
133 /* convert_real()-- Convert a character representation of a floating
134 point number to the machine number. Returns nonzero if there is a
135 range problem during conversion. Note: many architectures
136 (e.g. IA-64, HP-PA) require that the storage pointed to by the dest
137 argument is properly aligned for the type in question. */
140 convert_real (st_parameter_dt *dtp, void *dest, const char *buffer, int length)
147 *((GFC_REAL_4*) dest) =
148 #if defined(HAVE_STRTOF)
149 gfc_strtof (buffer, NULL);
151 (GFC_REAL_4) gfc_strtod (buffer, NULL);
156 *((GFC_REAL_8*) dest) = gfc_strtod (buffer, NULL);
159 #if defined(HAVE_GFC_REAL_10) && defined (HAVE_STRTOLD)
161 *((GFC_REAL_10*) dest) = gfc_strtold (buffer, NULL);
165 #if defined(HAVE_GFC_REAL_16)
166 # if defined(GFC_REAL_16_IS_FLOAT128)
168 __qmath_(quadmath_strtopQ) (buffer, NULL, dest);
170 # elif defined(HAVE_STRTOLD)
172 *((GFC_REAL_16*) dest) = gfc_strtold (buffer, NULL);
178 internal_error (&dtp->common, "Unsupported real kind during IO");
183 generate_error (&dtp->common, LIBERROR_READ_VALUE,
184 "Error during floating point read");
185 next_record (dtp, 1);
193 /* read_l()-- Read a logical value */
196 read_l (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
203 p = read_block_form (dtp, &w);
226 set_integer (dest, (GFC_INTEGER_LARGEST) 1, length);
230 set_integer (dest, (GFC_INTEGER_LARGEST) 0, length);
234 generate_error (&dtp->common, LIBERROR_READ_VALUE,
235 "Bad value on logical read");
236 next_record (dtp, 1);
243 read_utf8 (st_parameter_dt *dtp, int *nbytes)
245 static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
246 static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
253 s = read_block_form (dtp, nbytes);
257 /* If this is a short read, just return. */
265 /* The number of leading 1-bits in the first byte indicates how many
267 for (nb = 2; nb < 7; nb++)
268 if ((c & ~masks[nb-1]) == patns[nb-1])
273 c = (c & masks[nb-1]);
276 s = read_block_form (dtp, &nread);
279 /* Decode the bytes read. */
280 for (i = 1; i < nb; i++)
282 gfc_char4_t n = *s++;
284 if ((n & 0xC0) != 0x80)
287 c = ((c << 6) + (n & 0x3F));
290 /* Make sure the shortest possible encoding was used. */
291 if (c <= 0x7F && nb > 1) goto invalid;
292 if (c <= 0x7FF && nb > 2) goto invalid;
293 if (c <= 0xFFFF && nb > 3) goto invalid;
294 if (c <= 0x1FFFFF && nb > 4) goto invalid;
295 if (c <= 0x3FFFFFF && nb > 5) goto invalid;
297 /* Make sure the character is valid. */
298 if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF))
304 generate_error (&dtp->common, LIBERROR_READ_VALUE, "Invalid UTF-8 encoding");
305 return (gfc_char4_t) '?';
310 read_utf8_char1 (st_parameter_dt *dtp, char *p, int len, int width)
317 len = (width < len) ? len : width;
321 /* Proceed with decoding one character at a time. */
322 for (j = 0; j < len; j++, dest++)
324 c = read_utf8 (dtp, &nbytes);
326 /* Check for a short read and if so, break out. */
330 *dest = c > 255 ? '?' : (uchar) c;
333 /* If there was a short read, pad the remaining characters. */
334 for (i = j; i < len; i++)
340 read_default_char1 (st_parameter_dt *dtp, char *p, int len, int width)
345 s = read_block_form (dtp, &width);
352 m = (width > len) ? len : width;
357 memset (p + m, ' ', n);
362 read_utf8_char4 (st_parameter_dt *dtp, void *p, int len, int width)
368 len = (width < len) ? len : width;
370 dest = (gfc_char4_t *) p;
372 /* Proceed with decoding one character at a time. */
373 for (j = 0; j < len; j++, dest++)
375 *dest = read_utf8 (dtp, &nbytes);
377 /* Check for a short read and if so, break out. */
382 /* If there was a short read, pad the remaining characters. */
383 for (i = j; i < len; i++)
384 *dest++ = (gfc_char4_t) ' ';
390 read_default_char4 (st_parameter_dt *dtp, char *p, int len, int width)
395 if (is_char4_unit(dtp))
399 s4 = (gfc_char4_t *) read_block_form4 (dtp, &width);
406 m = ((int) width > len) ? len : (int) width;
408 dest = (gfc_char4_t *) p;
410 for (n = 0; n < m; n++)
413 for (n = 0; n < len - (int) width; n++)
414 *dest++ = (gfc_char4_t) ' ';
420 s = read_block_form (dtp, &width);
427 m = ((int) width > len) ? len : (int) width;
429 dest = (gfc_char4_t *) p;
431 for (n = 0; n < m; n++, dest++, s++)
432 *dest = (unsigned char ) *s;
434 for (n = 0; n < len - (int) width; n++, dest++)
435 *dest = (unsigned char) ' ';
440 /* read_a()-- Read a character record into a KIND=1 character destination,
441 processing UTF-8 encoding if necessary. */
444 read_a (st_parameter_dt *dtp, const fnode *f, char *p, int length)
450 if (wi == -1) /* '(A)' edit descriptor */
454 /* Read in w characters, treating comma as not a separator. */
455 dtp->u.p.sf_read_comma = 0;
457 if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
458 read_utf8_char1 (dtp, p, length, w);
460 read_default_char1 (dtp, p, length, w);
462 dtp->u.p.sf_read_comma =
463 dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA ? 0 : 1;
467 /* read_a_char4()-- Read a character record into a KIND=4 character destination,
468 processing UTF-8 encoding if necessary. */
471 read_a_char4 (st_parameter_dt *dtp, const fnode *f, char *p, int length)
476 if (w == -1) /* '(A)' edit descriptor */
479 /* Read in w characters, treating comma as not a separator. */
480 dtp->u.p.sf_read_comma = 0;
482 if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
483 read_utf8_char4 (dtp, p, length, w);
485 read_default_char4 (dtp, p, length, w);
487 dtp->u.p.sf_read_comma =
488 dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA ? 0 : 1;
491 /* eat_leading_spaces()-- Given a character pointer and a width,
492 * ignore the leading spaces. */
495 eat_leading_spaces (int *width, char *p)
499 if (*width == 0 || *p != ' ')
511 next_char (st_parameter_dt *dtp, char **p, int *w)
526 if (dtp->u.p.blank_status != BLANK_UNSPECIFIED)
527 return ' '; /* return a blank to signal a null */
529 /* At this point, the rest of the field has to be trailing blanks */
543 /* read_decimal()-- Read a decimal integer value. The values here are
547 read_decimal (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
549 GFC_UINTEGER_LARGEST value, maxv, maxv_10;
550 GFC_INTEGER_LARGEST v;
556 p = read_block_form (dtp, &w);
561 p = eat_leading_spaces (&w, p);
564 set_integer (dest, (GFC_INTEGER_LARGEST) 0, length);
568 maxv = max_value (length, 1);
590 /* At this point we have a digit-string */
595 c = next_char (dtp, &p, &w);
601 if (dtp->u.p.blank_status == BLANK_NULL) continue;
602 if (dtp->u.p.blank_status == BLANK_ZERO) c = '0';
605 if (c < '0' || c > '9')
608 if (value > maxv_10 && compile_options.range_check == 1)
614 if (value > maxv - c && compile_options.range_check == 1)
623 set_integer (dest, v, length);
627 generate_error (&dtp->common, LIBERROR_READ_VALUE,
628 "Bad value during integer read");
629 next_record (dtp, 1);
633 generate_error (&dtp->common, LIBERROR_READ_OVERFLOW,
634 "Value overflowed during integer read");
635 next_record (dtp, 1);
640 /* read_radix()-- This function reads values for non-decimal radixes.
641 * The difference here is that we treat the values here as unsigned
642 * values for the purposes of overflow. If minus sign is present and
643 * the top bit is set, the value will be incorrect. */
646 read_radix (st_parameter_dt *dtp, const fnode *f, char *dest, int length,
649 GFC_UINTEGER_LARGEST value, maxv, maxv_r;
650 GFC_INTEGER_LARGEST v;
656 p = read_block_form (dtp, &w);
661 p = eat_leading_spaces (&w, p);
664 set_integer (dest, (GFC_INTEGER_LARGEST) 0, length);
668 maxv = max_value (length, 0);
669 maxv_r = maxv / radix;
690 /* At this point we have a digit-string */
695 c = next_char (dtp, &p, &w);
700 if (dtp->u.p.blank_status == BLANK_NULL) continue;
701 if (dtp->u.p.blank_status == BLANK_ZERO) c = '0';
707 if (c < '0' || c > '1')
712 if (c < '0' || c > '7')
737 c = c - 'a' + '9' + 1;
746 c = c - 'A' + '9' + 1;
760 value = radix * value;
762 if (maxv - c < value)
771 set_integer (dest, v, length);
775 generate_error (&dtp->common, LIBERROR_READ_VALUE,
776 "Bad value during integer read");
777 next_record (dtp, 1);
781 generate_error (&dtp->common, LIBERROR_READ_OVERFLOW,
782 "Value overflowed during integer read");
783 next_record (dtp, 1);
788 /* read_f()-- Read a floating point number with F-style editing, which
789 is what all of the other floating point descriptors behave as. The
790 tricky part is that optional spaces are allowed after an E or D,
791 and the implicit decimal point if a decimal point is not present in
795 read_f (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
797 int w, seen_dp, exponent;
802 int seen_int_digit; /* Seen a digit before the decimal point? */
803 int seen_dec_digit; /* Seen a digit after the decimal point? */
812 /* Read in the next block. */
813 p = read_block_form (dtp, &w);
816 p = eat_leading_spaces (&w, (char*) p);
820 /* In this buffer we're going to re-format the number cleanly to be parsed
821 by convert_real in the end; this assures we're using strtod from the
822 C library for parsing and thus probably get the best accuracy possible.
823 This process may add a '+0.0' in front of the number as well as change the
824 exponent because of an implicit decimal point or the like. Thus allocating
825 strlen ("+0.0e-1000") == 10 characters plus one for NUL more than the
826 original buffer had should be enough. */
827 buffer = gfc_alloca (w + 11);
831 if (*p == '-' || *p == '+')
839 p = eat_leading_spaces (&w, (char*) p);
843 /* Check for Infinity or NaN. */
844 if (unlikely ((w >= 3 && (*p == 'i' || *p == 'I' || *p == 'n' || *p == 'N'))))
849 /* Scan through the buffer keeping track of spaces and parenthesis. We
850 null terminate the string as soon as we see a left paren or if we are
851 BLANK_NULL mode. Leading spaces have already been skipped above,
852 trailing spaces are ignored by converting to '\0'. A space
853 between "NaN" and the optional perenthesis is not permitted. */
860 if (dtp->u.p.blank_status == BLANK_ZERO)
874 if (seen_paren++ != 1)
888 if (seen_paren != 0 && seen_paren != 2)
891 if ((strcmp (save, "inf") == 0) || (strcmp (save, "infinity") == 0))
896 else if (strcmp (save, "nan") != 0)
899 convert_real (dtp, dest, buffer, length);
903 /* Process the mantissa string. */
909 if (dtp->u.p.current_unit->decimal_status != DECIMAL_COMMA)
922 if (dtp->u.p.blank_status == BLANK_ZERO)
927 else if (dtp->u.p.blank_status == BLANK_NULL)
930 /* TODO: Should we check instead that there are only trailing
931 blanks here, as is done below for exponents? */
972 /* No exponent has been seen, so we use the current scale factor. */
973 exponent = - dtp->u.p.scale_factor;
976 /* At this point the start of an exponent has been found. */
978 p = eat_leading_spaces (&w, (char*) p);
979 if (*p == '-' || *p == '+')
987 /* At this point a digit string is required. We calculate the value
988 of the exponent in order to take account of the scale factor and
989 the d parameter before explict conversion takes place. */
994 if (dtp->u.p.blank_status == BLANK_UNSPECIFIED)
996 while (w > 0 && isdigit (*p))
999 exponent += *p - '0';
1004 /* Only allow trailing blanks. */
1013 else /* BZ or BN status is enabled. */
1019 if (dtp->u.p.blank_status == BLANK_ZERO)
1022 assert (dtp->u.p.blank_status == BLANK_NULL);
1024 else if (!isdigit (*p))
1029 exponent += *p - '0';
1037 exponent *= exponent_sign;
1040 /* Use the precision specified in the format if no decimal point has been
1043 exponent -= f->u.real.d;
1045 /* Output a trailing '0' after decimal point if not yet found. */
1046 if (seen_dp && !seen_dec_digit)
1049 /* Print out the exponent to finish the reformatted number. Maximum 4
1050 digits for the exponent. */
1059 exponent = - exponent;
1062 assert (exponent < 10000);
1063 for (dig = 3; dig >= 0; --dig)
1065 out[dig] = (char) ('0' + exponent % 10);
1072 /* Do the actual conversion. */
1073 convert_real (dtp, dest, buffer, length);
1077 /* The value read is zero. */
1082 *((GFC_REAL_4 *) dest) = 0.0;
1086 *((GFC_REAL_8 *) dest) = 0.0;
1089 #ifdef HAVE_GFC_REAL_10
1091 *((GFC_REAL_10 *) dest) = 0.0;
1095 #ifdef HAVE_GFC_REAL_16
1097 *((GFC_REAL_16 *) dest) = 0.0;
1102 internal_error (&dtp->common, "Unsupported real kind during IO");
1107 generate_error (&dtp->common, LIBERROR_READ_VALUE,
1108 "Bad value during floating point read");
1109 next_record (dtp, 1);
1114 /* read_x()-- Deal with the X/TR descriptor. We just read some data
1115 * and never look at it. */
1118 read_x (st_parameter_dt *dtp, int n)
1123 if ((dtp->u.p.current_unit->pad_status == PAD_NO || is_internal_unit (dtp))
1124 && dtp->u.p.current_unit->bytes_left < n)
1125 n = dtp->u.p.current_unit->bytes_left;
1132 if (is_internal_unit (dtp))
1134 p = mem_alloc_r (dtp->u.p.current_unit->s, &length);
1135 if (unlikely (length < n))
1140 if (dtp->u.p.sf_seen_eor)
1143 p = fbuf_read (dtp->u.p.current_unit, &length);
1150 if (length == 0 && dtp->u.p.item_count == 1)
1152 if (dtp->u.p.current_unit->pad_status == PAD_NO)
1165 if (q == '\n' || q == '\r')
1167 /* Unexpected end of line. Set the position. */
1168 fbuf_seek (dtp->u.p.current_unit, n + 1 ,SEEK_CUR);
1169 dtp->u.p.sf_seen_eor = 1;
1171 /* If we encounter a CR, it might be a CRLF. */
1172 if (q == '\r') /* Probably a CRLF */
1174 /* See if there is an LF. Use fbuf_read rather then fbuf_getc so
1175 the position is not advanced unless it really is an LF. */
1177 p = fbuf_read (dtp->u.p.current_unit, &readlen);
1178 if (*p == '\n' && readlen == 1)
1180 dtp->u.p.sf_seen_eor = 2;
1181 fbuf_seek (dtp->u.p.current_unit, 1 ,SEEK_CUR);
1190 fbuf_seek (dtp->u.p.current_unit, n, SEEK_CUR);
1193 if ((dtp->common.flags & IOPARM_DT_HAS_SIZE) != 0)
1194 dtp->u.p.size_used += (GFC_IO_INT) n;
1195 dtp->u.p.current_unit->bytes_left -= n;
1196 dtp->u.p.current_unit->strm_pos += (gfc_offset) n;