2 /* { dg-require-effective-target sse4 } */
3 /* { dg-options "-O2 -msse4.1" } */
6 #define CHECK_H "sse4_1-check.h"
10 #define TEST sse4_1_test
15 #include <smmintrin.h>
54 } val1, val2, res[16];
68 res[0].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk0);
69 res[1].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk1);
70 res[2].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk2);
71 res[3].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk3);
72 res[4].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk01);
73 res[5].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk02);
74 res[6].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk03);
75 res[7].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk12);
76 res[8].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk13);
77 res[9].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk23);
78 res[10].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk0));
79 res[11].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk1));
80 res[12].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk2));
81 res[13].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk3));
82 res[14].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskN);
83 res[15].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskA);
85 masks[0] = HIMASK | lmsk0;
86 masks[1] = HIMASK | lmsk1;
87 masks[2] = HIMASK | lmsk2;
88 masks[3] = HIMASK | lmsk3;
89 masks[4] = HIMASK | lmsk01;
90 masks[5] = HIMASK | lmsk02;
91 masks[6] = HIMASK | lmsk03;
92 masks[7] = HIMASK | lmsk12;
93 masks[8] = HIMASK | lmsk13;
94 masks[9] = HIMASK | lmsk23;
95 masks[10] = HIMASK | (0x0F & ~lmsk0);
96 masks[11] = HIMASK | (0x0F & ~lmsk1);
97 masks[12] = HIMASK | (0x0F & ~lmsk2);
98 masks[13] = HIMASK | (0x0F & ~lmsk3);
99 masks[14] = HIMASK | lmskN;
100 masks[15] = HIMASK | lmskA;
102 for (i = 0; i <= 15; i++)
106 for (j = 0; j < 4; j++)
107 if ((HIMASK & (0x10 << j)))
108 tmp += val1.f[j] * val2.f[j];
110 for (j = 0; j < 4; j++)
111 if ((masks[i] & (1 << j)) && res[i].f[j] != tmp)