-/* Copyright (C) 2002, 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
Contributed by Andy Vaught
F2003 I/O support contributed by Jerry DeLisle
Libgfortran is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
+the Free Software Foundation; either version 3, or (at your option)
any later version.
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file. (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
-
Libgfortran is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with Libgfortran; see the file COPYING. If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA. */
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
#include "io.h"
#include <string.h>
#include <errno.h>
#include <ctype.h>
#include <stdlib.h>
+#include <assert.h>
+
+typedef unsigned char uchar;
/* read.c -- Deal with formatted reads */
switch (length)
{
case 4:
- {
- GFC_REAL_4 tmp =
+ *((GFC_REAL_4*) dest) =
#if defined(HAVE_STRTOF)
- strtof (buffer, NULL);
+ strtof (buffer, NULL);
#else
- (GFC_REAL_4) strtod (buffer, NULL);
+ (GFC_REAL_4) strtod (buffer, NULL);
#endif
- memcpy (dest, (void *) &tmp, length);
- }
break;
+
case 8:
- {
- GFC_REAL_8 tmp = strtod (buffer, NULL);
- memcpy (dest, (void *) &tmp, length);
- }
+ *((GFC_REAL_8*) dest) = strtod (buffer, NULL);
break;
+
#if defined(HAVE_GFC_REAL_10) && defined (HAVE_STRTOLD)
case 10:
- {
- GFC_REAL_10 tmp = strtold (buffer, NULL);
- memcpy (dest, (void *) &tmp, length);
- }
+ *((GFC_REAL_10*) dest) = strtold (buffer, NULL);
break;
#endif
+
#if defined(HAVE_GFC_REAL_16) && defined (HAVE_STRTOLD)
case 16:
- {
- GFC_REAL_16 tmp = strtold (buffer, NULL);
- memcpy (dest, (void *) &tmp, length);
- }
+ *((GFC_REAL_16*) dest) = strtold (buffer, NULL);
break;
#endif
+
default:
internal_error (&dtp->common, "Unsupported real kind during IO");
}
read_l (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
{
char *p;
- size_t w;
+ int w;
w = f->u.w;
- p = gfc_alloca (w);
+ p = read_block_form (dtp, &w);
- if (read_block_form (dtp, p, &w) == FAILURE)
+ if (p == NULL)
return;
while (*p == ' ')
}
-/* read_a()-- Read a character record. This one is pretty easy. */
-
-void
-read_a (st_parameter_dt *dtp, const fnode *f, char *p, int length)
+static gfc_char4_t
+read_utf8 (st_parameter_dt *dtp, int *nbytes)
{
+ static const uchar masks[6] = { 0x7F, 0x1F, 0x0F, 0x07, 0x02, 0x01 };
+ static const uchar patns[6] = { 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+ int i, nb, nread;
+ gfc_char4_t c;
char *s;
- int m, n, wi, status;
- size_t w;
- wi = f->u.w;
- if (wi == -1) /* '(A)' edit descriptor */
- wi = length;
+ *nbytes = 1;
- w = wi;
+ s = read_block_form (dtp, nbytes);
+ if (s == NULL)
+ return 0;
- s = gfc_alloca (w);
+ /* If this is a short read, just return. */
+ if (*nbytes == 0)
+ return 0;
- dtp->u.p.sf_read_comma = 0;
- status = read_block_form (dtp, s, &w);
- dtp->u.p.sf_read_comma =
- dtp->u.p.decimal_status == DECIMAL_COMMA ? 0 : 1;
- if (status == FAILURE)
+ c = (uchar) s[0];
+ if (c < 0x80)
+ return c;
+
+ /* The number of leading 1-bits in the first byte indicates how many
+ bytes follow. */
+ for (nb = 2; nb < 7; nb++)
+ if ((c & ~masks[nb-1]) == patns[nb-1])
+ goto found;
+ goto invalid;
+
+ found:
+ c = (c & masks[nb-1]);
+ nread = nb - 1;
+
+ s = read_block_form (dtp, &nread);
+ if (s == NULL)
+ return 0;
+ /* Decode the bytes read. */
+ for (i = 1; i < nb; i++)
+ {
+ gfc_char4_t n = *s++;
+
+ if ((n & 0xC0) != 0x80)
+ goto invalid;
+
+ c = ((c << 6) + (n & 0x3F));
+ }
+
+ /* Make sure the shortest possible encoding was used. */
+ if (c <= 0x7F && nb > 1) goto invalid;
+ if (c <= 0x7FF && nb > 2) goto invalid;
+ if (c <= 0xFFFF && nb > 3) goto invalid;
+ if (c <= 0x1FFFFF && nb > 4) goto invalid;
+ if (c <= 0x3FFFFFF && nb > 5) goto invalid;
+
+ /* Make sure the character is valid. */
+ if (c > 0x7FFFFFFF || (c >= 0xD800 && c <= 0xDFFF))
+ goto invalid;
+
+ return c;
+
+ invalid:
+ generate_error (&dtp->common, LIBERROR_READ_VALUE, "Invalid UTF-8 encoding");
+ return (gfc_char4_t) '?';
+}
+
+
+static void
+read_utf8_char1 (st_parameter_dt *dtp, char *p, int len, int width)
+{
+ gfc_char4_t c;
+ char *dest;
+ int nbytes;
+ int i, j;
+
+ len = (width < len) ? len : width;
+
+ dest = (char *) p;
+
+ /* Proceed with decoding one character at a time. */
+ for (j = 0; j < len; j++, dest++)
+ {
+ c = read_utf8 (dtp, &nbytes);
+
+ /* Check for a short read and if so, break out. */
+ if (nbytes == 0)
+ break;
+
+ *dest = c > 255 ? '?' : (uchar) c;
+ }
+
+ /* If there was a short read, pad the remaining characters. */
+ for (i = j; i < len; i++)
+ *dest++ = ' ';
+ return;
+}
+
+static void
+read_default_char1 (st_parameter_dt *dtp, char *p, int len, int width)
+{
+ char *s;
+ int m, n;
+
+ s = read_block_form (dtp, &width);
+
+ if (s == NULL)
return;
- if (w > (size_t) length)
- s += (w - length);
+ if (width > len)
+ s += (width - len);
- m = ((int) w > length) ? length : (int) w;
+ m = (width > len) ? len : width;
memcpy (p, s, m);
- n = length - w;
+ n = len - width;
if (n > 0)
memset (p + m, ' ', n);
}
-void
-read_a_char4 (st_parameter_dt *dtp, const fnode *f, char *p, int length)
+
+static void
+read_utf8_char4 (st_parameter_dt *dtp, void *p, int len, int width)
{
- char *s;
gfc_char4_t *dest;
- int m, n, wi, status;
- size_t w;
+ int nbytes;
+ int i, j;
- wi = f->u.w;
- if (wi == -1) /* '(A)' edit descriptor */
- wi = length;
+ len = (width < len) ? len : width;
- w = wi;
+ dest = (gfc_char4_t *) p;
- s = gfc_alloca (w);
+ /* Proceed with decoding one character at a time. */
+ for (j = 0; j < len; j++, dest++)
+ {
+ *dest = read_utf8 (dtp, &nbytes);
- /* Read in w bytes, treating comma as not a separator. */
- dtp->u.p.sf_read_comma = 0;
- status = read_block_form (dtp, s, &w);
- dtp->u.p.sf_read_comma =
- dtp->u.p.decimal_status == DECIMAL_COMMA ? 0 : 1;
+ /* Check for a short read and if so, break out. */
+ if (nbytes == 0)
+ break;
+ }
+
+ /* If there was a short read, pad the remaining characters. */
+ for (i = j; i < len; i++)
+ *dest++ = (gfc_char4_t) ' ';
+ return;
+}
+
+
+static void
+read_default_char4 (st_parameter_dt *dtp, char *p, int len, int width)
+{
+ char *s;
+ gfc_char4_t *dest;
+ int m, n;
+
+ s = read_block_form (dtp, &width);
- if (status == FAILURE)
+ if (s == NULL)
return;
- if (w > (size_t) length)
- s += (w - length);
+ if (width > len)
+ s += (width - len);
- m = ((int) w > length) ? length : (int) w;
+ m = ((int) width > len) ? len : (int) width;
dest = (gfc_char4_t *) p;
for (n = 0; n < m; n++, dest++, s++)
*dest = (unsigned char ) *s;
- for (n = 0; n < length - (int) w; n++, dest++)
+ for (n = 0; n < len - (int) width; n++, dest++)
*dest = (unsigned char) ' ';
}
+
+/* read_a()-- Read a character record into a KIND=1 character destination,
+ processing UTF-8 encoding if necessary. */
+
+void
+read_a (st_parameter_dt *dtp, const fnode *f, char *p, int length)
+{
+ int wi;
+ int w;
+
+ wi = f->u.w;
+ if (wi == -1) /* '(A)' edit descriptor */
+ wi = length;
+ w = wi;
+
+ /* Read in w characters, treating comma as not a separator. */
+ dtp->u.p.sf_read_comma = 0;
+
+ if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
+ read_utf8_char1 (dtp, p, length, w);
+ else
+ read_default_char1 (dtp, p, length, w);
+
+ dtp->u.p.sf_read_comma =
+ dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA ? 0 : 1;
+}
+
+
+/* read_a_char4()-- Read a character record into a KIND=4 character destination,
+ processing UTF-8 encoding if necessary. */
+
+void
+read_a_char4 (st_parameter_dt *dtp, const fnode *f, char *p, int length)
+{
+ int w;
+
+ w = f->u.w;
+ if (w == -1) /* '(A)' edit descriptor */
+ w = length;
+
+ /* Read in w characters, treating comma as not a separator. */
+ dtp->u.p.sf_read_comma = 0;
+
+ if (dtp->u.p.current_unit->flags.encoding == ENCODING_UTF8)
+ read_utf8_char4 (dtp, p, length, w);
+ else
+ read_default_char4 (dtp, p, length, w);
+
+ dtp->u.p.sf_read_comma =
+ dtp->u.p.current_unit->decimal_status == DECIMAL_COMMA ? 0 : 1;
+}
+
/* eat_leading_spaces()-- Given a character pointer and a width,
* ignore the leading spaces. */
GFC_UINTEGER_LARGEST value, maxv, maxv_10;
GFC_INTEGER_LARGEST v;
int w, negative;
- size_t wu;
char c, *p;
- wu = f->u.w;
+ w = f->u.w;
- p = gfc_alloca (wu);
+ p = read_block_form (dtp, &w);
- if (read_block_form (dtp, p, &wu) == FAILURE)
+ if (p == NULL)
return;
- w = wu;
-
p = eat_leading_spaces (&w, p);
if (w == 0)
{
GFC_INTEGER_LARGEST v;
int w, negative;
char c, *p;
- size_t wu;
- wu = f->u.w;
+ w = f->u.w;
- p = gfc_alloca (wu);
+ p = read_block_form (dtp, &w);
- if (read_block_form (dtp, p, &wu) == FAILURE)
+ if (p == NULL)
return;
- w = wu;
-
p = eat_leading_spaces (&w, p);
if (w == 0)
{
void
read_f (st_parameter_dt *dtp, const fnode *f, char *dest, int length)
{
- size_t wu;
int w, seen_dp, exponent;
- int exponent_sign, val_sign;
- int ndigits;
- int edigits;
- int i;
- char *p, *buffer;
- char *digits;
- char scratch[SCRATCH_SIZE];
-
- val_sign = 1;
- seen_dp = 0;
- wu = f->u.w;
+ int exponent_sign;
+ const char *p;
+ char *buffer;
+ char *out;
+ int seen_int_digit; /* Seen a digit before the decimal point? */
+ int seen_dec_digit; /* Seen a digit after the decimal point? */
- p = gfc_alloca (wu);
+ seen_dp = 0;
+ seen_int_digit = 0;
+ seen_dec_digit = 0;
+ exponent_sign = 1;
+ exponent = 0;
+ w = f->u.w;
- if (read_block_form (dtp, p, &wu) == FAILURE)
+ /* Read in the next block. */
+ p = read_block_form (dtp, &w);
+ if (p == NULL)
return;
-
- w = wu;
-
- p = eat_leading_spaces (&w, p);
+ p = eat_leading_spaces (&w, (char*) p);
if (w == 0)
goto zero;
- /* Optional sign */
+ /* In this buffer we're going to re-format the number cleanly to be parsed
+ by convert_real in the end; this assures we're using strtod from the
+ C library for parsing and thus probably get the best accuracy possible.
+ This process may add a '+0.0' in front of the number as well as change the
+ exponent because of an implicit decimal point or the like. Thus allocating
+ strlen ("+0.0e-1000") == 10 characters plus one for NUL more than the
+ original buffer had should be enough. */
+ buffer = gfc_alloca (w + 11);
+ out = buffer;
+ /* Optional sign */
if (*p == '-' || *p == '+')
{
if (*p == '-')
- val_sign = -1;
- p++;
- w--;
+ *(out++) = '-';
+ ++p;
+ --w;
}
- exponent_sign = 1;
- p = eat_leading_spaces (&w, p);
+ p = eat_leading_spaces (&w, (char*) p);
if (w == 0)
goto zero;
- /* A digit, a '.' or a exponent character ('e', 'E', 'd' or 'D')
- is required at this point */
-
- if (!isdigit (*p) && *p != '.' && *p != ',' && *p != 'd' && *p != 'D'
- && *p != 'e' && *p != 'E')
- goto bad_float;
-
- /* Remember the position of the first digit. */
- digits = p;
- ndigits = 0;
-
- /* Scan through the string to find the exponent. */
+ /* Process the mantissa string. */
while (w > 0)
{
switch (*p)
{
case ',':
- if (dtp->u.p.decimal_status == DECIMAL_COMMA && *p == ',')
- *p = '.';
- /* Fall through */
+ if (dtp->u.p.current_unit->decimal_status != DECIMAL_COMMA)
+ goto bad_float;
+ /* Fall through. */
case '.':
if (seen_dp)
goto bad_float;
+ if (!seen_int_digit)
+ *(out++) = '0';
+ *(out++) = '.';
seen_dp = 1;
- /* Fall through */
+ break;
+ case ' ':
+ if (dtp->u.p.blank_status == BLANK_ZERO)
+ {
+ *(out++) = '0';
+ goto found_digit;
+ }
+ else if (dtp->u.p.blank_status == BLANK_NULL)
+ break;
+ else
+ /* TODO: Should we check instead that there are only trailing
+ blanks here, as is done below for exponents? */
+ goto done;
+ /* Fall through. */
case '0':
case '1':
case '2':
case '7':
case '8':
case '9':
- case ' ':
- ndigits++;
- p++;
- w--;
+ *(out++) = *p;
+found_digit:
+ if (!seen_dp)
+ seen_int_digit = 1;
+ else
+ seen_dec_digit = 1;
break;
case '-':
- exponent_sign = -1;
- /* Fall through */
-
case '+':
- p++;
- w--;
- goto exp2;
+ goto exponent;
- case 'd':
case 'e':
- case 'D':
case 'E':
- p++;
- w--;
- goto exp1;
+ case 'd':
+ case 'D':
+ ++p;
+ --w;
+ goto exponent;
default:
goto bad_float;
}
- }
-
- /* No exponent has been seen, so we use the current scale factor */
- exponent = -dtp->u.p.scale_factor;
- goto done;
- bad_float:
- generate_error (&dtp->common, LIBERROR_READ_VALUE,
- "Bad value during floating point read");
- next_record (dtp, 1);
- return;
-
- /* The value read is zero */
- zero:
- switch (length)
- {
- case 4:
- *((GFC_REAL_4 *) dest) = 0;
- break;
-
- case 8:
- *((GFC_REAL_8 *) dest) = 0;
- break;
-
-#ifdef HAVE_GFC_REAL_10
- case 10:
- *((GFC_REAL_10 *) dest) = 0;
- break;
-#endif
-
-#ifdef HAVE_GFC_REAL_16
- case 16:
- *((GFC_REAL_16 *) dest) = 0;
- break;
-#endif
-
- default:
- internal_error (&dtp->common, "Unsupported real kind during IO");
- }
- return;
-
- /* At this point the start of an exponent has been found */
- exp1:
- while (w > 0 && *p == ' ')
- {
- w--;
- p++;
+ ++p;
+ --w;
}
+
+ /* No exponent has been seen, so we use the current scale factor. */
+ exponent = - dtp->u.p.scale_factor;
+ goto done;
- switch (*p)
+ /* At this point the start of an exponent has been found. */
+exponent:
+ p = eat_leading_spaces (&w, (char*) p);
+ if (*p == '-' || *p == '+')
{
- case '-':
- exponent_sign = -1;
- /* Fall through */
-
- case '+':
- p++;
- w--;
- break;
+ if (*p == '-')
+ exponent_sign = -1;
+ ++p;
+ --w;
}
- if (w == 0)
- goto bad_float;
-
/* At this point a digit string is required. We calculate the value
of the exponent in order to take account of the scale factor and
- the d parameter before explict conversion takes place. */
- exp2:
- if (!isdigit (*p))
- goto bad_float;
+ the d parameter before explict conversion takes place. */
- exponent = *p - '0';
- p++;
- w--;
+ if (w == 0)
+ goto bad_float;
- if (dtp->u.p.blank_status == BLANK_UNSPECIFIED) /* Normal processing of exponent */
+ if (dtp->u.p.blank_status == BLANK_UNSPECIFIED)
{
while (w > 0 && isdigit (*p))
- {
- exponent = 10 * exponent + *p - '0';
- p++;
- w--;
- }
-
- /* Only allow trailing blanks */
-
+ {
+ exponent *= 10;
+ exponent += *p - '0';
+ ++p;
+ --w;
+ }
+
+ /* Only allow trailing blanks. */
while (w > 0)
- {
- if (*p != ' ')
- goto bad_float;
- p++;
- w--;
- }
+ {
+ if (*p != ' ')
+ goto bad_float;
+ ++p;
+ --w;
+ }
}
- else /* BZ or BN status is enabled */
+ else /* BZ or BN status is enabled. */
{
while (w > 0)
- {
- if (*p == ' ')
- {
- if (dtp->u.p.blank_status == BLANK_ZERO) *p = '0';
- if (dtp->u.p.blank_status == BLANK_NULL)
- {
- p++;
- w--;
- continue;
- }
- }
- else if (!isdigit (*p))
- goto bad_float;
-
- exponent = 10 * exponent + *p - '0';
- p++;
- w--;
- }
+ {
+ if (*p == ' ')
+ {
+ if (dtp->u.p.blank_status == BLANK_ZERO)
+ exponent *= 10;
+ else
+ assert (dtp->u.p.blank_status == BLANK_NULL);
+ }
+ else if (!isdigit (*p))
+ goto bad_float;
+ else
+ {
+ exponent *= 10;
+ exponent += *p - '0';
+ }
+
+ ++p;
+ --w;
+ }
}
- exponent = exponent * exponent_sign;
+ exponent *= exponent_sign;
- done:
+done:
/* Use the precision specified in the format if no decimal point has been
seen. */
if (!seen_dp)
exponent -= f->u.real.d;
- if (exponent > 0)
- {
- edigits = 2;
- i = exponent;
- }
- else
- {
- edigits = 3;
- i = -exponent;
- }
+ /* Output a trailing '0' after decimal point if not yet found. */
+ if (seen_dp && !seen_dec_digit)
+ *(out++) = '0';
- while (i >= 10)
+ /* Print out the exponent to finish the reformatted number. Maximum 4
+ digits for the exponent. */
+ if (exponent != 0)
{
- i /= 10;
- edigits++;
- }
+ int dig;
- i = ndigits + edigits + 1;
- if (val_sign < 0)
- i++;
+ *(out++) = 'e';
+ if (exponent < 0)
+ {
+ *(out++) = '-';
+ exponent = - exponent;
+ }
- if (i < SCRATCH_SIZE)
- buffer = scratch;
- else
- buffer = get_mem (i);
-
- /* Reformat the string into a temporary buffer. As we're using atof it's
- easiest to just leave the decimal point in place. */
- p = buffer;
- if (val_sign < 0)
- *(p++) = '-';
- for (; ndigits > 0; ndigits--)
- {
- if (*digits == ' ')
- {
- if (dtp->u.p.blank_status == BLANK_ZERO) *digits = '0';
- if (dtp->u.p.blank_status == BLANK_NULL)
- {
- digits++;
- continue;
- }
- }
- *p = *digits;
- p++;
- digits++;
+ assert (exponent < 10000);
+ for (dig = 3; dig >= 0; --dig)
+ {
+ out[dig] = (char) ('0' + exponent % 10);
+ exponent /= 10;
+ }
+ out += 4;
}
- *(p++) = 'e';
- sprintf (p, "%d", exponent);
+ *(out++) = '\0';
/* Do the actual conversion. */
convert_real (dtp, dest, buffer, length);
- if (buffer != scratch)
- free_mem (buffer);
+ return;
+
+ /* The value read is zero. */
+zero:
+ switch (length)
+ {
+ case 4:
+ *((GFC_REAL_4 *) dest) = 0.0;
+ break;
+
+ case 8:
+ *((GFC_REAL_8 *) dest) = 0.0;
+ break;
+
+#ifdef HAVE_GFC_REAL_10
+ case 10:
+ *((GFC_REAL_10 *) dest) = 0.0;
+ break;
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+ case 16:
+ *((GFC_REAL_16 *) dest) = 0.0;
+ break;
+#endif
+
+ default:
+ internal_error (&dtp->common, "Unsupported real kind during IO");
+ }
+ return;
+bad_float:
+ generate_error (&dtp->common, LIBERROR_READ_VALUE,
+ "Bad value during floating point read");
+ next_record (dtp, 1);
+ return;
}
void
read_x (st_parameter_dt * dtp, int n)
{
- if ((dtp->u.p.pad_status == PAD_NO || is_internal_unit (dtp))
- && dtp->u.p.current_unit->bytes_left < n)
+ if ((dtp->u.p.current_unit->pad_status == PAD_NO || is_internal_unit (dtp))
+ && dtp->u.p.current_unit->bytes_left < n)
n = dtp->u.p.current_unit->bytes_left;
dtp->u.p.sf_read_comma = 0;