1 `/* Implementation of the MATMUL intrinsic
2 Copyright 2002 Free Software Foundation, Inc.
3 Contributed by Paul Brook <paul@nowt.org>
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with libgfor; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
26 #include "libgfortran.h"'
29 /* This is a C version of the following fortran pseudo-code. The key
30 point is the loop order -- we access all arrays column-first, which
31 improves the performance enough to boost galgel spec score by 50%.
33 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
38 C(I,J) = C(I,J)+A(I,K)*B(K,J)
42 `__matmul_'rtype_code (rtype * retarray, rtype * a, rtype * b)
48 index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
49 index_type x, y, n, count, xcount, ycount;
51 assert (GFC_DESCRIPTOR_RANK (a) == 2
52 || GFC_DESCRIPTOR_RANK (b) == 2);
54 /* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
56 Either A or B (but not both) can be rank 1:
58 o One-dimensional argument A is implicitly treated as a row matrix
59 dimensioned [1,count], so xcount=1.
61 o One-dimensional argument B is implicitly treated as a column matrix
62 dimensioned [count, 1], so ycount=1.
65 if (retarray->data == NULL)
67 if (GFC_DESCRIPTOR_RANK (a) == 1)
69 retarray->dim[0].lbound = 0;
70 retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
71 retarray->dim[0].stride = 1;
73 else if (GFC_DESCRIPTOR_RANK (b) == 1)
75 retarray->dim[0].lbound = 0;
76 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
77 retarray->dim[0].stride = 1;
81 retarray->dim[0].lbound = 0;
82 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
83 retarray->dim[0].stride = 1;
85 retarray->dim[1].lbound = 0;
86 retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
87 retarray->dim[1].stride = retarray->dim[0].ubound+1;
91 = internal_malloc_size (sizeof (rtype_name) * size0 (retarray));
97 dest = retarray->data;
99 if (retarray->dim[0].stride == 0)
100 retarray->dim[0].stride = 1;
101 if (a->dim[0].stride == 0)
102 a->dim[0].stride = 1;
103 if (b->dim[0].stride == 0)
104 b->dim[0].stride = 1;
106 sinclude(`matmul_asm_'rtype_code`.m4')dnl
108 if (GFC_DESCRIPTOR_RANK (retarray) == 1)
110 /* One-dimensional result may be addressed in the code below
111 either as a row or a column matrix. We want both cases to
113 rxstride = rystride = retarray->dim[0].stride;
117 rxstride = retarray->dim[0].stride;
118 rystride = retarray->dim[1].stride;
122 if (GFC_DESCRIPTOR_RANK (a) == 1)
124 /* Treat it as a a row matrix A[1,count]. */
125 axstride = a->dim[0].stride;
129 count = a->dim[0].ubound + 1 - a->dim[0].lbound;
133 axstride = a->dim[0].stride;
134 aystride = a->dim[1].stride;
136 count = a->dim[1].ubound + 1 - a->dim[1].lbound;
137 xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
140 assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);
142 if (GFC_DESCRIPTOR_RANK (b) == 1)
144 /* Treat it as a column matrix B[count,1] */
145 bxstride = b->dim[0].stride;
147 /* bystride should never be used for 1-dimensional b.
148 in case it is we want it to cause a segfault, rather than
149 an incorrect result. */
150 bystride = 0xDEADBEEF;
155 bxstride = b->dim[0].stride;
156 bystride = b->dim[1].stride;
157 ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
160 assert (a->base == 0);
161 assert (b->base == 0);
162 assert (retarray->base == 0);
166 dest = retarray->data;
168 if (rxstride == 1 && axstride == 1 && bxstride == 1)
175 memset (dest, 0, (sizeof (rtype_name) * size0(retarray)));
177 for (y = 0; y < ycount; y++)
179 bbase_y = bbase + y*bystride;
180 dest_y = dest + y*rystride;
181 for (n = 0; n < count; n++)
183 abase_n = abase + n*aystride;
184 bbase_yn = bbase_y[n];
185 for (x = 0; x < xcount; x++)
187 dest_y[x] += abase_n[x] * bbase_yn;
194 for (y = 0; y < ycount; y++)
195 for (x = 0; x < xcount; x++)
196 dest[x*rxstride + y*rystride] = (rtype_name)0;
198 for (y = 0; y < ycount; y++)
199 for (n = 0; n < count; n++)
200 for (x = 0; x < xcount; x++)
201 /* dest[x,y] += a[x,n] * b[n,y] */
202 dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];