1 `/* Implementation of the MATMUL intrinsic
2 Copyright 2002 Free Software Foundation, Inc.
3 Contributed by Paul Brook <paul@nowt.org>
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with libgfor; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
26 #include "libgfortran.h"'
29 /* This is a C version of the following fortran pseudo-code. The key
30 point is the loop order -- we access all arrays column-first, which
31 improves the performance enough to boost galgel spec score by 50%.
33 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
38 C(I,J) = C(I,J)+A(I,K)*B(K,J)
42 `__matmul_'rtype_code (rtype * retarray, rtype * a, rtype * b)
48 index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
49 index_type x, y, n, count, xcount, ycount;
51 assert (GFC_DESCRIPTOR_RANK (a) == 2
52 || GFC_DESCRIPTOR_RANK (b) == 2);
54 /* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
56 Either A or B (but not both) can be rank 1:
58 o One-dimensional argument A is implicitly treated as a row matrix
59 dimensioned [1,count], so xcount=1.
61 o One-dimensional argument B is implicitly treated as a column matrix
62 dimensioned [count, 1], so ycount=1.
65 if (retarray->data == NULL)
67 if (GFC_DESCRIPTOR_RANK (a) == 1)
69 retarray->dim[0].lbound = 0;
70 retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
71 retarray->dim[0].stride = 1;
73 else if (GFC_DESCRIPTOR_RANK (b) == 1)
75 retarray->dim[0].lbound = 0;
76 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
77 retarray->dim[0].stride = 1;
81 retarray->dim[0].lbound = 0;
82 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
83 retarray->dim[0].stride = 1;
85 retarray->dim[1].lbound = 0;
86 retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
87 retarray->dim[1].stride = retarray->dim[0].ubound+1;
90 retarray->data = internal_malloc (sizeof (rtype_name) * size0 (retarray));
96 dest = retarray->data;
98 if (retarray->dim[0].stride == 0)
99 retarray->dim[0].stride = 1;
100 if (a->dim[0].stride == 0)
101 a->dim[0].stride = 1;
102 if (b->dim[0].stride == 0)
103 b->dim[0].stride = 1;
105 sinclude(`matmul_asm_'rtype_code`.m4')dnl
107 if (GFC_DESCRIPTOR_RANK (retarray) == 1)
109 /* One-dimensional result may be addressed in the code below
110 either as a row or a column matrix. We want both cases to
112 rxstride = rystride = retarray->dim[0].stride;
116 rxstride = retarray->dim[0].stride;
117 rystride = retarray->dim[1].stride;
121 if (GFC_DESCRIPTOR_RANK (a) == 1)
123 /* Treat it as a a row matrix A[1,count]. */
124 axstride = a->dim[0].stride;
128 count = a->dim[0].ubound + 1 - a->dim[0].lbound;
132 axstride = a->dim[0].stride;
133 aystride = a->dim[1].stride;
135 count = a->dim[1].ubound + 1 - a->dim[1].lbound;
136 xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
139 assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);
141 if (GFC_DESCRIPTOR_RANK (b) == 1)
143 /* Treat it as a column matrix B[count,1] */
144 bxstride = b->dim[0].stride;
146 /* bystride should never be used for 1-dimensional b.
147 in case it is we want it to cause a segfault, rather than
148 an incorrect result. */
149 bystride = 0xDEADBEEF;
154 bxstride = b->dim[0].stride;
155 bystride = b->dim[1].stride;
156 ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
159 assert (a->base == 0);
160 assert (b->base == 0);
161 assert (retarray->base == 0);
165 dest = retarray->data;
167 if (rxstride == 1 && axstride == 1 && bxstride == 1)
174 memset (dest, 0, (sizeof (rtype_name) * size0(retarray)));
176 for (y = 0; y < ycount; y++)
178 bbase_y = bbase + y*bystride;
179 dest_y = dest + y*rystride;
180 for (n = 0; n < count; n++)
182 abase_n = abase + n*aystride;
183 bbase_yn = bbase_y[n];
184 for (x = 0; x < xcount; x++)
186 dest_y[x] += abase_n[x] * bbase_yn;
193 for (y = 0; y < ycount; y++)
194 for (x = 0; x < xcount; x++)
195 dest[x*rxstride + y*rystride] = (rtype_name)0;
197 for (y = 0; y < ycount; y++)
198 for (n = 0; n < count; n++)
199 for (x = 0; x < xcount; x++)
200 /* dest[x,y] += a[x,n] * b[n,y] */
201 dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];