1 /* { dg-do run { target i?86-*-* x86_64-*-* } } */
2 /* { dg-require-effective-target sse4 } */
3 /* { dg-options "-O2 -msse4.1" } */
5 #include "sse4_1-check.h"
46 } val1, val2, res[16];
60 res[0].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk0);
61 res[1].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk1);
62 res[2].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk2);
63 res[3].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk3);
64 res[4].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk01);
65 res[5].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk02);
66 res[6].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk03);
67 res[7].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk12);
68 res[8].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk13);
69 res[9].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk23);
70 res[10].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk0));
71 res[11].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk1));
72 res[12].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk2));
73 res[13].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk3));
74 res[14].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskN);
75 res[15].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskA);
77 masks[0] = HIMASK | lmsk0;
78 masks[1] = HIMASK | lmsk1;
79 masks[2] = HIMASK | lmsk2;
80 masks[3] = HIMASK | lmsk3;
81 masks[4] = HIMASK | lmsk01;
82 masks[5] = HIMASK | lmsk02;
83 masks[6] = HIMASK | lmsk03;
84 masks[7] = HIMASK | lmsk12;
85 masks[8] = HIMASK | lmsk13;
86 masks[9] = HIMASK | lmsk23;
87 masks[10] = HIMASK | (0x0F & ~lmsk0);
88 masks[11] = HIMASK | (0x0F & ~lmsk1);
89 masks[12] = HIMASK | (0x0F & ~lmsk2);
90 masks[13] = HIMASK | (0x0F & ~lmsk3);
91 masks[14] = HIMASK | lmskN;
92 masks[15] = HIMASK | lmskA;
94 for (i = 0; i <= 15; i++)
98 for (j = 0; j < 4; j++)
99 if ((HIMASK & (0x10 << j)))
100 tmp += val1.f[j] * val2.f[j];
102 for (j = 0; j < 4; j++)
103 if ((masks[i] & (1 << j)) && res[i].f[j] != tmp)