1 /* Implementation of the MATMUL intrinsic
2 Copyright 2002 Free Software Foundation, Inc.
3 Contributed by Paul Brook <paul@nowt.org>
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with libgfor; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
26 #include "libgfortran.h"
28 /* This is a C version of the following fortran pseudo-code. The key
29 point is the loop order -- we access all arrays column-first, which
30 improves the performance enough to boost galgel spec score by 50%.
32 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
37 C(I,J) = C(I,J)+A(I,K)*B(K,J)
41 __matmul_i8 (gfc_array_i8 * retarray, gfc_array_i8 * a, gfc_array_i8 * b)
47 index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
48 index_type x, y, n, count, xcount, ycount;
50 assert (GFC_DESCRIPTOR_RANK (a) == 2
51 || GFC_DESCRIPTOR_RANK (b) == 2);
53 /* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
55 Either A or B (but not both) can be rank 1:
57 o One-dimensional argument A is implicitly treated as a row matrix
58 dimensioned [1,count], so xcount=1.
60 o One-dimensional argument B is implicitly treated as a column matrix
61 dimensioned [count, 1], so ycount=1.
64 if (retarray->data == NULL)
66 if (GFC_DESCRIPTOR_RANK (a) == 1)
68 retarray->dim[0].lbound = 0;
69 retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
70 retarray->dim[0].stride = 1;
72 else if (GFC_DESCRIPTOR_RANK (b) == 1)
74 retarray->dim[0].lbound = 0;
75 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
76 retarray->dim[0].stride = 1;
80 retarray->dim[0].lbound = 0;
81 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
82 retarray->dim[0].stride = 1;
84 retarray->dim[1].lbound = 0;
85 retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
86 retarray->dim[1].stride = retarray->dim[0].ubound+1;
90 = internal_malloc_size (sizeof (GFC_INTEGER_8) * size0 (retarray));
96 dest = retarray->data;
98 if (retarray->dim[0].stride == 0)
99 retarray->dim[0].stride = 1;
100 if (a->dim[0].stride == 0)
101 a->dim[0].stride = 1;
102 if (b->dim[0].stride == 0)
103 b->dim[0].stride = 1;
106 if (GFC_DESCRIPTOR_RANK (retarray) == 1)
108 /* One-dimensional result may be addressed in the code below
109 either as a row or a column matrix. We want both cases to
111 rxstride = rystride = retarray->dim[0].stride;
115 rxstride = retarray->dim[0].stride;
116 rystride = retarray->dim[1].stride;
120 if (GFC_DESCRIPTOR_RANK (a) == 1)
122 /* Treat it as a a row matrix A[1,count]. */
123 axstride = a->dim[0].stride;
127 count = a->dim[0].ubound + 1 - a->dim[0].lbound;
131 axstride = a->dim[0].stride;
132 aystride = a->dim[1].stride;
134 count = a->dim[1].ubound + 1 - a->dim[1].lbound;
135 xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
138 assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);
140 if (GFC_DESCRIPTOR_RANK (b) == 1)
142 /* Treat it as a column matrix B[count,1] */
143 bxstride = b->dim[0].stride;
145 /* bystride should never be used for 1-dimensional b.
146 in case it is we want it to cause a segfault, rather than
147 an incorrect result. */
148 bystride = 0xDEADBEEF;
153 bxstride = b->dim[0].stride;
154 bystride = b->dim[1].stride;
155 ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
158 assert (a->base == 0);
159 assert (b->base == 0);
160 assert (retarray->base == 0);
164 dest = retarray->data;
166 if (rxstride == 1 && axstride == 1 && bxstride == 1)
168 GFC_INTEGER_8 *bbase_y;
169 GFC_INTEGER_8 *dest_y;
170 GFC_INTEGER_8 *abase_n;
171 GFC_INTEGER_8 bbase_yn;
173 memset (dest, 0, (sizeof (GFC_INTEGER_8) * size0(retarray)));
175 for (y = 0; y < ycount; y++)
177 bbase_y = bbase + y*bystride;
178 dest_y = dest + y*rystride;
179 for (n = 0; n < count; n++)
181 abase_n = abase + n*aystride;
182 bbase_yn = bbase_y[n];
183 for (x = 0; x < xcount; x++)
185 dest_y[x] += abase_n[x] * bbase_yn;
192 for (y = 0; y < ycount; y++)
193 for (x = 0; x < xcount; x++)
194 dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
196 for (y = 0; y < ycount; y++)
197 for (n = 0; n < count; n++)
198 for (x = 0; x < xcount; x++)
199 /* dest[x,y] += a[x,n] * b[n,y] */
200 dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];