- for (x = 0; x < xcount; x++)
- {
- /* Do the summation for this element. For real and integer types
- this is the same as DOT_PRODUCT. For complex types we use do
- a*b, not conjg(a)*b. */
- pa = abase;
- pb = bbase;
- res = 0;
-
- for (n = 0; n < count; n++)
- {
- res += *pa * *pb;
- pa += astride;
- pb += bstride;
- }
-
- *dest = res;
-
- dest += rxstride;
- abase += xstride;
- }
- abase -= xstride * xcount;
- bbase += ystride;
- dest += rystride - (rxstride * xcount);
+ GFC_REAL_4 *bbase_y;
+ GFC_REAL_4 *dest_y;
+ GFC_REAL_4 *abase_n;
+ GFC_REAL_4 bbase_yn;
+
+ memset (dest, 0, (sizeof (GFC_REAL_4) * size0(retarray)));
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = bbase + y*bystride;
+ dest_y = dest + y*rystride;
+ for (n = 0; n < count; n++)
+ {
+ abase_n = abase + n*aystride;
+ bbase_yn = bbase_y[n];
+ for (x = 0; x < xcount; x++)
+ {
+ dest_y[x] += abase_n[x] * bbase_yn;
+ }
+ }
+ }
+ }
+ else
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];