libgfortran/intrinsics/pack_generic.c

   1 /* Generic implementation of the PACK intrinsic
   2    Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Paul Brook <paul@nowt.org>
   4
   5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
   6
   7 Libgfortran is free software; you can redistribute it and/or
   8 modify it under the terms of the GNU General Public
   9 License as published by the Free Software Foundation; either
  10 version 2 of the License, or (at your option) any later version.
  11
  12 In addition to the permissions in the GNU General Public License, the
  13 Free Software Foundation gives you unlimited permission to link the
  14 compiled version of this file into combinations with other programs,
  15 and to distribute those combinations without any restriction coming
  16 from the use of this file.  (The General Public License restrictions
  17 do apply in other respects; for example, they cover modification of
  18 the file, and distribution when not linked into a combine
  19 executable.)
  20
  21 Ligbfortran is distributed in the hope that it will be useful,
  22 but WITHOUT ANY WARRANTY; without even the implied warranty of
  23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  24 GNU General Public License for more details.
  25
  26 You should have received a copy of the GNU General Public
  27 License along with libgfortran; see the file COPYING.  If not,
  28 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  29 Boston, MA 02111-1307, USA.  */
  30
  31 #include "config.h"
  32 #include <stdlib.h>
  33 #include <assert.h>
  34 #include <string.h>
  35 #include "libgfortran.h"
  36
  37 /* PACK is specified as follows:
  38
  39    13.14.80 PACK (ARRAY, MASK, [VECTOR])
  40
  41    Description: Pack an array into an array of rank one under the
  42    control of a mask.
  43
  44    Class: Transformational fucntion.
  45
  46    Arguments:
  47       ARRAY   may be of any type. It shall not be scalar.
  48       MASK    shall be of type LOGICAL. It shall be conformable with ARRAY.
  49       VECTOR  (optional) shall be of the same type and type parameters
  50               as ARRAY. VECTOR shall have at least as many elements as
  51               there are true elements in MASK. If MASK is a scalar
  52               with the value true, VECTOR shall have at least as many
  53               elements as there are in ARRAY.
  54
  55    Result Characteristics: The result is an array of rank one with the
  56    same type and type parameters as ARRAY. If VECTOR is present, the
  57    result size is that of VECTOR; otherwise, the result size is the
  58    number /t/ of true elements in MASK unless MASK is scalar with the
  59    value true, in which case the result size is the size of ARRAY.
  60
  61    Result Value: Element /i/ of the result is the element of ARRAY
  62    that corresponds to the /i/th true element of MASK, taking elements
  63    in array element order, for /i/ = 1, 2, ..., /t/. If VECTOR is
  64    present and has size /n/ > /t/, element /i/ of the result has the
  65    value VECTOR(/i/), for /i/ = /t/ + 1, ..., /n/.
  66
  67    Examples: The nonzero elements of an array M with the value
  68    | 0 0 0 |
  69    | 9 0 0 | may be "gathered" by the function PACK. The result of
  70    | 0 0 7 |
  71    PACK (M, MASK = M.NE.0) is [9,7] and the result of PACK (M, M.NE.0,
  72    VECTOR = (/ 2,4,6,8,10,12 /)) is [9,7,6,8,10,12].
  73
  74 There are two variants of the PACK intrinsic: one, where MASK is
  75 array valued, and the other one where MASK is scalar.  */
  76
  77 extern void pack (gfc_array_char *, const gfc_array_char *,
  78                   const gfc_array_l4 *, const gfc_array_char *);
  79 export_proto(pack);
  80
  81 void
  82 pack (gfc_array_char *ret, const gfc_array_char *array,
  83       const gfc_array_l4 *mask, const gfc_array_char *vector)
  84 {
  85   /* r.* indicates the return array.  */
  86   index_type rstride0;
  87   char *rptr;
  88   /* s.* indicates the source array.  */
  89   index_type sstride[GFC_MAX_DIMENSIONS];
  90   index_type sstride0;
  91   const char *sptr;
  92   /* m.* indicates the mask array.  */
  93   index_type mstride[GFC_MAX_DIMENSIONS];
  94   index_type mstride0;
  95   const GFC_LOGICAL_4 *mptr;
  96
  97   index_type count[GFC_MAX_DIMENSIONS];
  98   index_type extent[GFC_MAX_DIMENSIONS];
  99   index_type n;
 100   index_type dim;
 101   index_type size;
 102   index_type nelem;
 103
 104   size = GFC_DESCRIPTOR_SIZE (array);
 105   dim = GFC_DESCRIPTOR_RANK (array);
 106   for (n = 0; n < dim; n++)
 107     {
 108       count[n] = 0;
 109       extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound;
 110       sstride[n] = array->dim[n].stride * size;
 111       mstride[n] = mask->dim[n].stride;
 112     }
 113   if (sstride[0] == 0)
 114     sstride[0] = size;
 115   if (mstride[0] == 0)
 116     mstride[0] = 1;
 117
 118   sptr = array->data;
 119   mptr = mask->data;
 120
 121   /* Use the same loop for both logical types. */
 122   if (GFC_DESCRIPTOR_SIZE (mask) != 4)
 123     {
 124       if (GFC_DESCRIPTOR_SIZE (mask) != 8)
 125         runtime_error ("Funny sized logical array");
 126       for (n = 0; n < dim; n++)
 127         mstride[n] <<= 1;
 128       mptr = GFOR_POINTER_L8_TO_L4 (mptr);
 129     }
 130
 131   if (ret->data == NULL)
 132     {
 133       /* Allocate the memory for the result.  */
 134       int total;
 135
 136       if (vector != NULL)
 137         {
 138           /* The return array will have as many
 139              elements as there are in VECTOR.  */
 140           total = vector->dim[0].ubound + 1 - vector->dim[0].lbound;
 141         }
 142       else
 143         {
 144           /* We have to count the true elements in MASK.  */
 145
 146           /* TODO: We could speed up pack easily in the case of only
 147              few .TRUE. entries in MASK, by keeping track of where we
 148              would be in the source array during the initial traversal
 149              of MASK, and caching the pointers to those elements. Then,
 150              supposed the number of elements is small enough, we would
 151              only have to traverse the list, and copy those elements
 152              into the result array. In the case of datatypes which fit
 153              in one of the integer types we could also cache the
 154              value instead of a pointer to it.
 155              This approach might be bad from the point of view of
 156              cache behavior in the case where our cache is not big
 157              enough to hold all elements that have to be copied.  */
 158
 159           const GFC_LOGICAL_4 *m = mptr;
 160
 161           total = 0;
 162
 163           while (m)
 164             {
 165               /* Test this element.  */
 166               if (*m)
 167                 total++;
 168
 169               /* Advance to the next element.  */
 170               m += mstride[0];
 171               count[0]++;
 172               n = 0;
 173               while (count[n] == extent[n])
 174                 {
 175                   /* When we get to the end of a dimension, reset it
 176                      and increment the next dimension.  */
 177                   count[n] = 0;
 178                   /* We could precalculate this product, but this is a
 179                      less frequently used path so proabably not worth
 180                      it.  */
 181                   m -= mstride[n] * extent[n];
 182                   n++;
 183                   if (n >= dim)
 184                     {
 185                       /* Break out of the loop.  */
 186                       m = NULL;
 187                       break;
 188                     }
 189                   else
 190                     {
 191                       count[n]++;
 192                       mptr += mstride[n];
 193                     }
 194                 }
 195             }
 196         }
 197
 198       /* Setup the array descriptor.  */
 199       ret->dim[0].lbound = 0;
 200       ret->dim[0].ubound = total - 1;
 201       ret->dim[0].stride = 1;
 202
 203       ret->data = internal_malloc_size (size * total);
 204       ret->base = 0;
 205
 206       if (total == 0)
 207         /* In this case, nothing remains to be done.  */
 208         return;
 209     }
 210
 211   rstride0 = ret->dim[0].stride * size;
 212   if (rstride0 == 0)
 213     rstride0 = size;
 214   sstride0 = sstride[0];
 215   mstride0 = mstride[0];
 216   rptr = ret->data;
 217
 218   while (sptr)
 219     {
 220       /* Test this element.  */
 221       if (*mptr)
 222         {
 223           /* Add it.  */
 224           memcpy (rptr, sptr, size);
 225           rptr += rstride0;
 226         }
 227       /* Advance to the next element.  */
 228       sptr += sstride0;
 229       mptr += mstride0;
 230       count[0]++;
 231       n = 0;
 232       while (count[n] == extent[n])
 233         {
 234           /* When we get to the end of a dimension, reset it and increment
 235              the next dimension.  */
 236           count[n] = 0;
 237           /* We could precalculate these products, but this is a less
 238              frequently used path so proabably not worth it.  */
 239           sptr -= sstride[n] * extent[n];
 240           mptr -= mstride[n] * extent[n];
 241           n++;
 242           if (n >= dim)
 243             {
 244               /* Break out of the loop.  */
 245               sptr = NULL;
 246               break;
 247             }
 248           else
 249             {
 250               count[n]++;
 251               sptr += sstride[n];
 252               mptr += mstride[n];
 253             }
 254         }
 255     }
 256
 257   /* Add any remaining elements from VECTOR.  */
 258   if (vector)
 259     {
 260       n = vector->dim[0].ubound + 1 - vector->dim[0].lbound;
 261       nelem = ((rptr - ret->data) / rstride0);
 262       if (n > nelem)
 263         {
 264           sstride0 = vector->dim[0].stride * size;
 265           if (sstride0 == 0)
 266             sstride0 = size;
 267
 268           sptr = vector->data + sstride0 * nelem;
 269           n -= nelem;
 270           while (n--)
 271             {
 272               memcpy (rptr, sptr, size);
 273               rptr += rstride0;
 274               sptr += sstride0;
 275             }
 276         }
 277     }
 278 }
 279
 280 extern void pack_s (gfc_array_char *ret, const gfc_array_char *array,
 281                     const GFC_LOGICAL_4 *, const gfc_array_char *);
 282 export_proto(pack_s);
 283
 284 void
 285 pack_s (gfc_array_char *ret, const gfc_array_char *array,
 286         const GFC_LOGICAL_4 *mask, const gfc_array_char *vector)
 287 {
 288   /* r.* indicates the return array.  */
 289   index_type rstride0;
 290   char *rptr;
 291   /* s.* indicates the source array.  */
 292   index_type sstride[GFC_MAX_DIMENSIONS];
 293   index_type sstride0;
 294   const char *sptr;
 295
 296   index_type count[GFC_MAX_DIMENSIONS];
 297   index_type extent[GFC_MAX_DIMENSIONS];
 298   index_type n;
 299   index_type dim;
 300   index_type size;
 301   index_type nelem;
 302
 303   size = GFC_DESCRIPTOR_SIZE (array);
 304   dim = GFC_DESCRIPTOR_RANK (array);
 305   for (n = 0; n < dim; n++)
 306     {
 307       count[n] = 0;
 308       extent[n] = array->dim[n].ubound + 1 - array->dim[n].lbound;
 309       sstride[n] = array->dim[n].stride * size;
 310     }
 311   if (sstride[0] == 0)
 312     sstride[0] = size;
 313
 314   sstride0 = sstride[0];
 315   sptr = array->data;
 316
 317   if (ret->data == NULL)
 318     {
 319       /* Allocate the memory for the result.  */
 320       int total;
 321
 322       if (vector != NULL)
 323         {
 324           /* The return array will have as many elements as there are
 325              in vector.  */
 326           total = vector->dim[0].ubound + 1 - vector->dim[0].lbound;
 327         }
 328       else
 329         {
 330           if (*mask)
 331             {
 332               /* The result array will have as many elements as the input
 333                  array.  */
 334               total = extent[0];
 335               for (n = 1; n < dim; n++)
 336                 total *= extent[n];
 337             }
 338           else
 339             {
 340               /* The result array will be empty.  */
 341               ret->dim[0].lbound = 0;
 342               ret->dim[0].ubound = -1;
 343               ret->dim[0].stride = 1;
 344               ret->data = internal_malloc_size (0);
 345               ret->base = 0;
 346
 347               return;
 348             }
 349         }
 350
 351       /* Setup the array descriptor.  */
 352       ret->dim[0].lbound = 0;
 353       ret->dim[0].ubound = total - 1;
 354       ret->dim[0].stride = 1;
 355
 356       ret->data = internal_malloc_size (size * total);
 357       ret->base = 0;
 358     }
 359
 360   rstride0 = ret->dim[0].stride * size;
 361   if (rstride0 == 0)
 362     rstride0 = size;
 363   rptr = ret->data;
 364
 365   /* The remaining possibilities are now:
 366        If MASK is .TRUE., we have to copy the source array into the
 367      result array. We then have to fill it up with elements from VECTOR.
 368        If MASK is .FALSE., we have to copy VECTOR into the result
 369      array. If VECTOR were not present we would have already returned.  */
 370
 371   if (*mask)
 372     {
 373       while (sptr)
 374         {
 375           /* Add this element.  */
 376           memcpy (rptr, sptr, size);
 377           rptr += rstride0;
 378
 379           /* Advance to the next element.  */
 380           sptr += sstride0;
 381           count[0]++;
 382           n = 0;
 383           while (count[n] == extent[n])
 384             {
 385               /* When we get to the end of a dimension, reset it and
 386                  increment the next dimension.  */
 387               count[n] = 0;
 388               /* We could precalculate these products, but this is a
 389                  less frequently used path so proabably not worth it.  */
 390               sptr -= sstride[n] * extent[n];
 391               n++;
 392               if (n >= dim)
 393                 {
 394                   /* Break out of the loop.  */
 395                   sptr = NULL;
 396                   break;
 397                 }
 398               else
 399                 {
 400                   count[n]++;
 401                   sptr += sstride[n];
 402                 }
 403             }
 404         }
 405     }
 406
 407   /* Add any remaining elements from VECTOR.  */
 408   if (vector)
 409     {
 410       n = vector->dim[0].ubound + 1 - vector->dim[0].lbound;
 411       nelem = ((rptr - ret->data) / rstride0);
 412       if (n > nelem)
 413         {
 414           sstride0 = vector->dim[0].stride * size;
 415           if (sstride0 == 0)
 416             sstride0 = size;
 417
 418           sptr = vector->data + sstride0 * nelem;
 419           n -= nelem;
 420           while (n--)
 421             {
 422               memcpy (rptr, sptr, size);
 423               rptr += rstride0;
 424               sptr += sstride0;
 425             }
 426         }
 427     }
 428 }