1 /* { dg-do run { target powerpc*-*-* } } */
2 /* { dg-options "-faltivec -O2" } */
4 int printf(const char * , ...);
7 void foo(char *bS, char *bS_edge, int field_MBAFF, int top){
8 char intra[16] __attribute__ ((aligned(16)));
9 signed short mv_const[8] __attribute__((aligned(16)));
11 vector signed short v_three, v_ref_mask00, v_ref_mask01, v_vec_maskv, v_vec_maskh;
12 vector unsigned char v_permv, v_permh, v_bS, v_bSh, v_bSv, v_cbp_maskv, v_cbp_maskvn, v_cbp_maskh, v_cbp_maskhn, v_intra_maskh, v_intra_maskv, v_intra_maskhn, v_intra_maskvn;
13 vector unsigned char tmp7, tmp8, tmp9, tmp10, v_c1, v_cbp1, v_cbp2, v_pocl, v_poch;
14 vector signed short v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
15 vector signed short idx0;
16 vector signed short tmp00, tmp01, tmp02, tmp03;
17 vector unsigned char v_zero = (vector unsigned char) {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p'};
18 v_three = (vector signed short) vec_ld (0, (vector signed short *) mv_const);
20 vector unsigned char v_coef_mask = vec_ld(0, (vector unsigned char *)mv_const);
21 vector unsigned char v_coef_mask_hi = vec_splat(v_coef_mask, 0);
22 vector unsigned char v_coef_mask_lo = vec_splat(v_coef_mask, 1);
23 v_coef_mask = vec_sld(v_coef_mask_hi, v_coef_mask_lo, 8);
24 vector unsigned char v_bit_mask = vec_sub(vec_splat_u8(7), vec_lvsl(0, (unsigned char *)0));
25 v_bit_mask = vec_sld(vec_sld(v_bit_mask, v_bit_mask, 8), v_bit_mask, 8);
26 v_bit_mask = vec_sl(vec_splat_u8(1), v_bit_mask);
27 tmp5 = (vector signed short) vec_and(v_coef_mask, v_bit_mask);
30 tmp8 = vec_ld (0, (vector unsigned char *) intra);
31 tmp9 = vec_ld (0, (vector unsigned char *) mv_const);
32 tmp10 = vec_ld (0, (vector unsigned char *) mv_const);
33 v_permv = vec_ld (0, (vector unsigned char *) mv_const);
34 v_permh = vec_ld (0, (vector unsigned char *) mv_const);
35 tmp6 = vec_ld (0, (vector signed short *) mv_const);
37 tmp8 = vec_splat((vector unsigned char) tmp8, 0);
38 tmp9 = vec_splat((vector unsigned char) tmp9, 12);
39 tmp10 = vec_splat((vector unsigned char) tmp10, 12);
40 tmp9 = vec_sld ((vector unsigned char) tmp9,(vector unsigned char) tmp8, 12);
41 tmp10 = vec_sld ((vector unsigned char) tmp10, (vector unsigned char) tmp8, 12);
42 v_intra_maskv = vec_or (tmp9, tmp8);
43 v_intra_maskh = vec_or (tmp10, tmp8);
44 v_intra_maskv = (vector unsigned char) vec_cmpgt ((vector unsigned char) v_intra_maskv, (vector unsigned char) v_zero);
45 v_intra_maskh = (vector unsigned char) vec_cmpgt ((vector unsigned char) v_intra_maskh, (vector unsigned char) v_zero);
47 tmp9 = vec_lvsl (4 + (top<<2), (unsigned char *) 0x0);
48 v_cbp1 = vec_perm ((vector unsigned char) tmp6, (vector unsigned char) tmp6, tmp9);
49 v_cbp2 = (vector unsigned char) vec_perm ((vector unsigned char) tmp5, (vector unsigned char) tmp5, (vector unsigned char) v_permv);
50 v_cbp1 = (vector unsigned char) vec_sld ((vector unsigned char) v_cbp1,(vector unsigned char) v_cbp2, 12);
51 v_cbp_maskv = vec_or (v_cbp1, v_cbp2);
53 tmp9 = vec_lvsl (12 + (top<<2), (unsigned char *) 0x0);
54 v_cbp1 = vec_perm ((vector unsigned char) tmp6, (vector unsigned char) tmp6, tmp9);
55 v_cbp2 = (vector unsigned char) vec_perm ((vector unsigned char) tmp5, (vector unsigned char) tmp5, (vector unsigned char) v_permh);
56 v_cbp1 = (vector unsigned char) vec_sld ((vector unsigned char) v_cbp1,(vector unsigned char) v_cbp2, 12);
57 v_cbp_maskh = vec_or (v_cbp1, v_cbp2);
59 v_cbp_maskv = (vector unsigned char) vec_cmpgt ((vector unsigned char) v_cbp_maskv, (vector unsigned char) v_zero);
60 v_cbp_maskh = (vector unsigned char) vec_cmpgt ((vector unsigned char) v_cbp_maskh, (vector unsigned char) v_zero);
76 idx0 = vec_ld (0, (signed short *) intra);
78 v_c1 = (vector unsigned char) {'1','2','3','4','5','6','7','8','1','2','3','4','5','6','7','8'};
81 v0 = (vector signed short) vec_and ((vector unsigned char) idx0, v_c1);
82 idx0 = (vector signed short) vec_sra ((vector unsigned char) idx0, v_c1);
84 v1 = vec_sld (v0, v0, 15);
85 v1 = (vector signed short) vec_pack (v1, v0);
87 v2 = vec_sld (v1, v1, 2);
88 v3 = vec_sld (v1, v1, 10);
90 v4 = (vector signed short) vec_cmpeq ((vector signed char) v1, (vector signed char) v2);
91 v5 = (vector signed short) vec_cmpeq ((vector signed char) v1, (vector signed char) v3);
92 v6 = (vector signed short) vec_cmpeq ((vector signed char) v2, (vector signed char) v3);
95 v4 = v5 = v6 = vec_nor (v_zero, v_zero);
98 tmp1 = (vector signed short) vec_sl ((vector unsigned char) idx0, v_c1);
99 v_c1 = vec_mergeh ((vector unsigned char) v_zero, v_c1);
100 tmp1 = (vector signed short) vec_add (tmp1, (vector signed short) v_c1);
102 v_pocl = vec_ld (0, (vector unsigned char *) mv_const);
103 v_poch = vec_ld (0, (vector unsigned char *) mv_const);
104 tmp2 = (vector signed short) vec_perm (v_pocl, v_poch, (vector unsigned char) tmp1);
106 v_pocl = vec_ld (0, (vector unsigned char *) mv_const);
107 v_poch = vec_ld (16, (vector unsigned char *) mv_const);
108 tmp1 = (vector signed short) vec_perm (v_pocl, v_poch, (vector unsigned char) tmp1);
109 tmp1 = vec_sel (tmp1, tmp2, (vector unsigned short) {0xffff,0xffff,0,0,0,0,0,0});
111 tmp3 = (vector signed short) vec_splat ((vector unsigned char) idx0, 12);
112 v_c1 = (vector unsigned char) vec_nor (v_zero, v_zero);
113 tmp0 = (vector signed short) vec_cmpeq ((vector signed char) idx0, (vector signed char) v_c1);
114 tmp1 = vec_sel (tmp1, (vector signed short) tmp3, (vector unsigned short) tmp0);
116 tmp2 = vec_sld (tmp1, tmp1, 15);
117 tmp1 = (vector signed short) vec_pack (tmp2, tmp1);
119 tmp2 = vec_sld (tmp1, tmp1, 2);
120 tmp3 = vec_sld (tmp1, tmp1, 10);
122 tmp0 = (vector signed short) vec_cmpeq ((vector signed char) tmp1, (vector signed char) tmp2);
123 tmp4 = (vector signed short) vec_cmpeq ((vector signed char) tmp1, (vector signed char) tmp3);
124 tmp1 = (vector signed short) vec_cmpeq ((vector signed char) tmp2, (vector signed char) tmp3);
125 tmp0 = vec_and (tmp0, v4);
126 tmp4 = vec_and (tmp4, v5);
127 tmp1 = vec_and (tmp1, v6);
128 tmp2 = vec_sld ((vector signed short) tmp0, (vector signed short) tmp0, 8);
129 tmp3 = vec_sld ((vector signed short) tmp4, (vector signed short) tmp4, 8);
130 tmp5 = vec_sld ((vector signed short) tmp1, (vector signed short) tmp1, 8);
131 tmp0 = vec_and (tmp0, tmp2);
132 tmp4 = vec_and (tmp4, tmp3);
133 tmp1 = vec_and (tmp1, tmp5);
134 v_ref_mask00 = vec_mergeh ((vector signed short) tmp0, (vector signed short) v_c1);
135 v_ref_mask01 = vec_mergeh ((vector signed short) tmp4, (vector signed short) tmp1);
136 v_ref_mask00 = (vector signed short) vec_mergeh ((vector unsigned char) v_ref_mask00, (vector unsigned char) v_ref_mask00);
137 v_ref_mask01 = (vector signed short) vec_mergeh ((vector unsigned char) v_ref_mask01, (vector unsigned char) v_ref_mask01);
139 v0 = vec_ld (0, (vector signed short *) mv_const);
140 v1 = vec_ld (16, (vector signed short *) mv_const);
141 v4 = vec_ld (64, (vector signed short *) mv_const);
142 v5 = vec_ld (80, (vector signed short *) mv_const);
143 v8 = vec_ld (0, (vector signed short *) mv_const);
144 v9 = vec_ld (16, (vector signed short *) mv_const);
146 tmp0 = (vector signed short) vec_perm ((vector unsigned char) v8,
147 (vector unsigned char) v8, (vector unsigned char) {0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15});
148 tmp1 = (vector signed short) vec_mergeh ((vector signed int) v0, (vector signed int) v1);
149 tmp2 = vec_sld (tmp1, tmp1, 8);
150 tmp3 = vec_sub (vec_max (tmp0, tmp1), vec_min (tmp0, tmp1));
151 tmp4 = vec_sub (vec_max (tmp0, tmp2), vec_min (tmp0, tmp2));
152 tmp3 = (vector signed short) vec_cmpgt (tmp3, v_three);
153 tmp4 = (vector signed short) vec_cmpgt (tmp4, v_three);
154 tmp5 = vec_sld (tmp3, tmp3, 14);
155 tmp6 = vec_sld (tmp4, tmp4, 14);
156 tmp3 = vec_or (tmp3, tmp5);
157 tmp4 = vec_or (tmp4, tmp6);
158 tmp0 = (vector signed short) vec_perm ((vector unsigned char) v9, (vector unsigned char) v9,
159 (vector unsigned char) {0,1,2,3,8,9,10,11,4,5,6,7,12,13,14,15});
160 tmp1 = (vector signed short) vec_mergeh ((vector signed int) v4, (vector signed int) v5);
161 tmp2 = vec_sld (tmp1, tmp1, 8);
162 tmp5 = vec_sub (vec_max (tmp0, tmp1), vec_min (tmp0, tmp1));
163 tmp6 = vec_sub (vec_max (tmp0, tmp2), vec_min (tmp0, tmp2));
164 tmp5 = (vector signed short) vec_cmpgt (tmp5, v_three);
165 tmp6 = (vector signed short) vec_cmpgt (tmp6, v_three);
166 tmp0 = vec_sld (tmp5, tmp5, 14);
167 tmp1 = vec_sld (tmp6, tmp6, 14);
168 tmp5 = vec_or (tmp0, tmp5);
169 tmp6 = vec_or (tmp1, tmp6);
171 tmp3 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp5);
172 tmp4 = (vector signed short) vec_pack ((vector unsigned int) tmp4, (vector unsigned int) tmp6);
173 tmp5 = vec_sld (tmp3, tmp3, 12);
174 tmp6 = vec_sld (tmp4, tmp4, 12);
175 tmp3 = vec_or (tmp3, tmp5);
176 tmp4 = vec_or (tmp4, tmp6);
177 tmp00 = (vector signed short) vec_pack ((vector unsigned short) tmp3, (vector unsigned short) tmp4);
179 tmp0 = (vector signed short) vec_mergeh ((vector signed int) v0, (vector signed int) v1);
180 tmp1 = (vector signed short) vec_mergel ((vector signed int) v0, (vector signed int) v1);
181 tmp2 = vec_sld (tmp1, tmp1, 8);
182 tmp3 = vec_sub (vec_max (tmp0, tmp1), vec_min (tmp0, tmp1));
183 tmp4 = vec_sub (vec_max (tmp0, tmp2), vec_min (tmp0, tmp2));
184 tmp3 = (vector signed short) vec_cmpgt (tmp3, v_three);
185 tmp4 = (vector signed short) vec_cmpgt (tmp4, v_three);
186 tmp5 = vec_sld (tmp3, tmp3, 14);
187 tmp6 = vec_sld (tmp4, tmp4, 14);
188 tmp3 = vec_or (tmp3, tmp5);
189 tmp4 = vec_or (tmp4, tmp6);
191 tmp0 = (vector signed short) vec_mergeh ((vector signed int) v4, (vector signed int) v5);
192 tmp1 = (vector signed short) vec_mergel ((vector signed int) v4, (vector signed int) v5);
193 tmp2 = vec_sld (tmp1, tmp1, 8);
194 tmp5 = vec_sub (vec_max (tmp0, tmp1), vec_min (tmp0, tmp1));
195 tmp6 = vec_sub (vec_max (tmp0, tmp2), vec_min (tmp0, tmp2));
196 tmp5 = (vector signed short) vec_cmpgt (tmp5, v_three);
197 tmp6 = (vector signed short) vec_cmpgt (tmp6, v_three);
198 tmp0 = vec_sld (tmp5, tmp5, 14);
199 tmp1 = vec_sld (tmp6, tmp6, 14);
200 tmp5 = vec_or (tmp0, tmp5);
201 tmp6 = vec_or (tmp1, tmp6);
203 tmp3 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp5);
204 tmp4 = (vector signed short) vec_pack ((vector unsigned int) tmp4, (vector unsigned int) tmp6);
205 tmp5 = vec_sld (tmp3, tmp3, 12);
206 tmp6 = vec_sld (tmp4, tmp4, 12);
207 tmp3 = vec_or (tmp3, tmp5);
208 tmp4 = vec_or (tmp4, tmp6);
209 tmp01 = (vector signed short) vec_pack ((vector unsigned short) tmp3, (vector unsigned short) tmp4);
211 v2 = vec_ld (32, (vector signed short *) mv_const);
212 v3 = vec_ld (48, (vector signed short *) mv_const);
213 v6 = vec_ld (96, (vector signed short *) mv_const);
214 v7 = vec_ld (112,(vector signed short *) mv_const);
216 tmp0 = (vector signed short) vec_mergel ((vector signed int) v0, (vector signed int) v1);
217 tmp1 = (vector signed short) vec_mergeh ((vector signed int) v2, (vector signed int) v3);
218 tmp2 = vec_sld (tmp1, tmp1, 8);
219 tmp3 = vec_sub (vec_max (tmp0, tmp1), vec_min (tmp0, tmp1));
220 tmp4 = vec_sub (vec_max (tmp0, tmp2), vec_min (tmp0, tmp2));
221 tmp3 = (vector signed short) vec_cmpgt (tmp3, v_three);
222 tmp4 = (vector signed short) vec_cmpgt (tmp4, v_three);
223 tmp5 = vec_sld (tmp3, tmp3, 14);
224 tmp6 = vec_sld (tmp4, tmp4, 14);
225 tmp3 = vec_or (tmp3, tmp5);
226 tmp4 = vec_or (tmp4, tmp6);
228 tmp0 = (vector signed short) vec_mergel ((vector signed int) v4, (vector signed int) v5);
229 tmp1 = (vector signed short) vec_mergeh ((vector signed int) v6, (vector signed int) v7);
230 tmp2 = vec_sld (tmp1, tmp1, 8);
231 tmp5 = vec_sub (vec_max (tmp0, tmp1), vec_min (tmp0, tmp1));
232 tmp6 = vec_sub (vec_max (tmp0, tmp2), vec_min (tmp0, tmp2));
233 tmp5 = (vector signed short) vec_cmpgt (tmp5, v_three);
234 tmp6 = (vector signed short) vec_cmpgt (tmp6, v_three);
235 tmp0 = vec_sld (tmp5, tmp5, 14);
236 tmp1 = vec_sld (tmp6, tmp6, 14);
237 tmp5 = vec_or (tmp0, tmp5);
238 tmp6 = vec_or (tmp1, tmp6);
240 tmp3 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp5);
241 tmp4 = (vector signed short) vec_pack ((vector unsigned int) tmp4, (vector unsigned int) tmp6);
242 tmp5 = vec_sld (tmp3, tmp3, 12);
243 tmp6 = vec_sld (tmp4, tmp4, 12);
244 tmp3 = vec_or (tmp3, tmp5);
245 tmp4 = vec_or (tmp4, tmp6);
246 tmp02 = (vector signed short) vec_pack ((vector unsigned short) tmp3, (vector unsigned short) tmp4);
248 tmp0 = (vector signed short) vec_mergeh ((vector signed int) v2, (vector signed int) v3);
249 tmp1 = (vector signed short) vec_mergel ((vector signed int) v2, (vector signed int) v3);
250 tmp2 = vec_sld (tmp1, tmp1, 8);
251 tmp3 = vec_sub (vec_max (tmp0, tmp1), vec_min (tmp0, tmp1));
252 tmp4 = vec_sub (vec_max (tmp0, tmp2), vec_min (tmp0, tmp2));
253 tmp3 = (vector signed short) vec_cmpgt (tmp3, v_three);
254 tmp4 = (vector signed short) vec_cmpgt (tmp4, v_three);
255 tmp5 = vec_sld (tmp3, tmp3, 14);
256 tmp6 = vec_sld (tmp4, tmp4, 14);
257 tmp3 = vec_or (tmp3, tmp5);
258 tmp4 = vec_or (tmp4, tmp6);
260 tmp0 = (vector signed short) vec_mergeh ((vector signed int) v6, (vector signed int) v7);
261 tmp1 = (vector signed short) vec_mergel ((vector signed int) v6, (vector signed int) v7);
262 tmp2 = vec_sld (tmp1, tmp1, 8);
263 tmp5 = vec_sub (vec_max (tmp0, tmp1), vec_min (tmp0, tmp1));
264 tmp6 = vec_sub (vec_max (tmp0, tmp2), vec_min (tmp0, tmp2));
265 tmp5 = (vector signed short) vec_cmpgt (tmp5, v_three);
266 tmp6 = (vector signed short) vec_cmpgt (tmp6, v_three);
267 tmp0 = vec_sld (tmp5, tmp5, 14);
268 tmp1 = vec_sld (tmp6, tmp6, 14);
269 tmp5 = vec_or (tmp0, tmp5);
270 tmp6 = vec_or (tmp1, tmp6);
272 tmp3 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp5);
273 tmp4 = (vector signed short) vec_pack ((vector unsigned int) tmp4, (vector unsigned int) tmp6);
274 tmp5 = vec_sld (tmp3, tmp3, 12);
275 tmp6 = vec_sld (tmp4, tmp4, 12);
276 tmp3 = vec_or (tmp3, tmp5);
277 tmp4 = vec_or (tmp4, tmp6);
278 tmp03 = (vector signed short) vec_pack ((vector unsigned short) tmp3, (vector unsigned short) tmp4);
280 tmp0 = (vector signed short) vec_pack ((vector unsigned int) tmp00, (vector unsigned int) tmp01);
281 tmp1 = (vector signed short) vec_pack ((vector unsigned int) tmp02, (vector unsigned int) tmp03);
282 tmp2 = (vector signed short) vec_mergeh ((vector signed int) tmp0, (vector signed int) tmp1);
283 tmp3 = (vector signed short) vec_mergel ((vector signed int) tmp0, (vector signed int) tmp1);
284 tmp4 = (vector signed short) vec_mergeh ((vector signed int) tmp2, (vector signed int) tmp3);
285 tmp5 = (vector signed short) vec_mergel ((vector signed int) tmp2, (vector signed int) tmp3);
286 tmp4 = vec_and (v_ref_mask00, tmp4);
287 tmp5 = vec_and (v_ref_mask01, tmp5);
289 tmp0 = vec_nor (v_ref_mask00, v_ref_mask01);
290 tmp1 = vec_and (v_ref_mask00, v_ref_mask01);
291 tmp2 = vec_and (tmp4, tmp5);
292 tmp2 = vec_and (tmp2, tmp1);
293 tmp3 = vec_nor (tmp4, tmp5);
294 tmp3 = vec_nor (tmp3, tmp1);
295 v_vec_maskv = vec_or (tmp0, tmp2);
296 v_vec_maskv = vec_or (v_vec_maskv, tmp3);
315 idx0 = vec_ld (0, (signed short *) intra);
317 v_c1 = (vector unsigned char) {'1','2','3','4','5','6','7','8','1','2','3','4','5','6','7','8'};
320 v8 = (vector signed short) vec_and ((vector unsigned char) idx0, v_c1);
321 idx0 = (vector signed short) vec_sra ((vector unsigned char) idx0, v_c1);
323 v9 = vec_sld (v8, v8, 15);
324 v9 = (vector signed short) vec_pack (v9, v8);
326 v10 = vec_sld (v9, v9, 2);
327 v11 = vec_sld (v9, v9, 10);
329 v8 = (vector signed short) vec_cmpeq ((vector signed char) v9, (vector signed char) v10);
330 v9 = (vector signed short) vec_cmpeq ((vector signed char) v9, (vector signed char) v11);
331 v10 = (vector signed short) vec_cmpeq ((vector signed char) v10, (vector signed char) v11);
334 v8 = v9 = v10 = vec_nor (v_zero, v_zero);
337 tmp1 = (vector signed short) vec_sl ((vector unsigned char) idx0, v_c1);
341 unsigned char toto2[16] __attribute__((aligned(16)));
344 vec_st(v_c1, 0, (unsigned char *) toto2);
345 for (m=0; m<16;m++) {printf("%c ", toto2[m]);}
347 printf("\nv_zero\n");
349 vec_st (v_zero, 0, (unsigned char *) toto2);
350 for (m=0; m< 16; m++) {printf("%c ", toto2[m]);}
354 v_c1 = vec_mergeh ((vector unsigned char) v_zero, v_c1);
355 tmp1 = (vector signed short) vec_add (tmp1, (vector signed short) v_c1);
358 vector unsigned char vres =
359 (vector unsigned char){'a','1','b','2','c','3','d','4','e','5','f','6','g','7','h','8'};
360 unsigned char toto2[16] __attribute__((aligned(16)));
364 vec_st(v_c1, 0, (unsigned char *) toto2);
365 for (m=0; m<16;m++) {printf("%c ", toto2[m]);}
367 if (!vec_all_eq (vres, v_c1))
371 v_pocl = vec_ld (32, (vector unsigned char *) mv_const);
372 v_poch = vec_ld (48, (vector unsigned char *) mv_const);
373 tmp2 = (vector signed short) vec_perm (v_pocl, v_poch, (vector unsigned char) tmp1);
375 v_pocl = vec_ld (0, (vector unsigned char *) mv_const);
376 v_poch = vec_ld (16, (vector unsigned char *) mv_const);
378 tmp1 = (vector signed short) vec_perm (v_pocl, v_poch, (vector unsigned char) tmp1);
380 tmp1 = vec_sel (tmp1, tmp2, (vector unsigned short) {0xffff,0xffff,0,0,0,0,0,0});
383 tmp3 = (vector signed short) vec_splat ((vector unsigned char) idx0, 12);
384 v_c1 = (vector unsigned char) vec_nor (v_zero, v_zero);
385 tmp0 = (vector signed short) vec_cmpeq ((vector signed char) idx0, (vector signed char) v_c1);
386 tmp1 = vec_sel (tmp1, (vector signed short) tmp3, (vector unsigned short) tmp0);
388 tmp2 = vec_sld (tmp1, tmp1, 15);
389 tmp1 = (vector signed short) vec_pack (tmp2, tmp1);
392 tmp2 = vec_sld (tmp1, tmp1, 2);
393 tmp3 = vec_sld (tmp1, tmp1, 10);
395 tmp0 = (vector signed short) vec_cmpeq ((vector signed char) tmp1, (vector signed char) tmp2);
396 tmp4 = (vector signed short) vec_cmpeq ((vector signed char) tmp1, (vector signed char) tmp3);
397 tmp1 = (vector signed short) vec_cmpeq ((vector signed char) tmp2, (vector signed char) tmp3);
398 tmp0 = vec_and (tmp0, v8);
399 tmp4 = vec_and (tmp4, v9);
400 tmp1 = vec_and (tmp1, v10);
401 tmp2 = vec_sld ((vector signed short) tmp0, (vector signed short) tmp0, 8);
402 tmp3 = vec_sld ((vector signed short) tmp4, (vector signed short) tmp4, 8);
403 tmp5 = vec_sld ((vector signed short) tmp1, (vector signed short) tmp1, 8);
404 tmp0 = vec_and (tmp0, tmp2);
405 tmp4 = vec_and (tmp4, tmp3);
406 tmp1 = vec_and (tmp1, tmp5);
407 v_ref_mask00 = vec_mergeh ((vector signed short) tmp0, (vector signed short) v_c1);
408 v_ref_mask01 = vec_mergeh ((vector signed short) tmp4, (vector signed short) tmp1);
409 v_ref_mask00 = (vector signed short) vec_mergeh ((vector unsigned char) v_ref_mask00, (vector unsigned char) v_ref_mask00);
410 v_ref_mask01 = (vector signed short) vec_mergeh ((vector unsigned char) v_ref_mask01, (vector unsigned char) v_ref_mask01);
413 v_permv= vec_ld (0, (vector unsigned char *) mv_const);
414 v8 = vec_ld (0, (vector signed short *) mv_const);
415 v9 = vec_ld (16, (vector signed short *) mv_const);
416 tmp2 = vec_perm (v0, v0, v_permv);
417 tmp3 = vec_sub (vec_max (v8, v0), vec_min (v8, v0));
418 tmp4 = vec_sub (vec_max (v8, tmp2), vec_min (v8, tmp2));
419 tmp3 = (vector signed short) vec_cmpgt (tmp3, v_three);
420 tmp4 = (vector signed short) vec_cmpgt (tmp4, v_three);
421 tmp5 = vec_sld (tmp3, tmp3, 14);
422 tmp6 = vec_sld (tmp4, tmp4, 14);
423 tmp3 = vec_or (tmp3, tmp5);
424 tmp4 = vec_or (tmp4, tmp6);
426 tmp2 = vec_perm (v2, v2, v_permv);
427 tmp5 = vec_sub (vec_max (v9, v2), vec_min (v9, v2));
428 tmp6 = vec_sub (vec_max (v9, tmp2), vec_min (v9, tmp2));
429 tmp5 = (vector signed short) vec_cmpgt (tmp5, v_three);
430 tmp6 = (vector signed short) vec_cmpgt (tmp6, v_three);
431 tmp0 = vec_sld (tmp5, tmp5, 14);
432 tmp1 = vec_sld (tmp6, tmp6, 14);
433 tmp5 = vec_or (tmp0, tmp5);
434 tmp6 = vec_or (tmp1, tmp6);
436 tmp3 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp5);
437 tmp4 = (vector signed short) vec_pack ((vector unsigned int) tmp4, (vector unsigned int) tmp6);
438 tmp5 = vec_sld (tmp3, tmp3, 14);
439 tmp6 = vec_sld (tmp4, tmp4, 14);
440 tmp3 = vec_or (tmp3, tmp5);
441 tmp4 = vec_or (tmp4, tmp6);
442 tmp00 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp4);
444 tmp2 = vec_perm (v1, v1, v_permv);
445 tmp3 = vec_sub (vec_max (v0, v1), vec_min (v0, v1));
446 tmp4 = vec_sub (vec_max (v0, tmp2), vec_min (v0, tmp2));
447 tmp3 = (vector signed short) vec_cmpgt (tmp3, v_three);
448 tmp4 = (vector signed short) vec_cmpgt (tmp4, v_three);
449 tmp5 = vec_sld (tmp3, tmp3, 14);
450 tmp6 = vec_sld (tmp4, tmp4, 14);
451 tmp3 = vec_or (tmp3, tmp5);
452 tmp4 = vec_or (tmp4, tmp6);
454 tmp2 = vec_perm (v3, v3, v_permv);
455 tmp5 = vec_sub (vec_max (v2, v3), vec_min (v2, v3));
456 tmp6 = vec_sub (vec_max (v2, tmp2), vec_min (v2, tmp2));
457 tmp5 = (vector signed short) vec_cmpgt (tmp5, v_three);
458 tmp6 = (vector signed short) vec_cmpgt (tmp6, v_three);
459 tmp0 = vec_sld (tmp5, tmp5, 14);
460 tmp1 = vec_sld (tmp6, tmp6, 14);
461 tmp5 = vec_or (tmp0, tmp5);
462 tmp6 = vec_or (tmp1, tmp6);
464 tmp3 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp5);
465 tmp4 = (vector signed short) vec_pack ((vector unsigned int) tmp4, (vector unsigned int) tmp6);
466 tmp5 = vec_sld (tmp3, tmp3, 14);
467 tmp6 = vec_sld (tmp4, tmp4, 14);
468 tmp3 = vec_or (tmp3, tmp5);
469 tmp4 = vec_or (tmp4, tmp6);
470 tmp01 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp4);
472 tmp2 = vec_perm (v4, v4, v_permv);
473 tmp3 = vec_sub (vec_max (v1, v4), vec_min (v1, v4));
474 tmp4 = vec_sub (vec_max (v1, tmp2), vec_min (v1, tmp2));
475 tmp3 = (vector signed short) vec_cmpgt (tmp3, v_three);
476 tmp4 = (vector signed short) vec_cmpgt (tmp4, v_three);
477 tmp5 = vec_sld (tmp3, tmp3, 14);
478 tmp6 = vec_sld (tmp4, tmp4, 14);
479 tmp3 = vec_or (tmp3, tmp5);
480 tmp4 = vec_or (tmp4, tmp6);
482 tmp2 = vec_perm (v6, v6, v_permv);
483 tmp5 = vec_sub (vec_max (v3, v6), vec_min (v3, v6));
484 tmp6 = vec_sub (vec_max (v3, tmp2), vec_min (v3, tmp2));
485 tmp5 = (vector signed short) vec_cmpgt (tmp5, v_three);
486 tmp6 = (vector signed short) vec_cmpgt (tmp6, v_three);
487 tmp0 = vec_sld (tmp5, tmp5, 14);
488 tmp1 = vec_sld (tmp6, tmp6, 14);
489 tmp5 = vec_or (tmp0, tmp5);
490 tmp6 = vec_or (tmp1, tmp6);
492 tmp3 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp5);
493 tmp4 = (vector signed short) vec_pack ((vector unsigned int) tmp4, (vector unsigned int) tmp6);
494 tmp5 = vec_sld (tmp3, tmp3, 14);
495 tmp6 = vec_sld (tmp4, tmp4, 14);
496 tmp3 = vec_or (tmp3, tmp5);
497 tmp4 = vec_or (tmp4, tmp6);
498 tmp02 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp4);
501 tmp2 = vec_perm (v5, v5, v_permv);
502 tmp3 = vec_sub (vec_max (v4, v5), vec_min (v4, v5));
503 tmp4 = vec_sub (vec_max (v4, tmp2), vec_min (v4, tmp2));
504 tmp3 = (vector signed short) vec_cmpgt (tmp3, v_three);
505 tmp4 = (vector signed short) vec_cmpgt (tmp4, v_three);
506 tmp5 = vec_sld (tmp3, tmp3, 14);
507 tmp6 = vec_sld (tmp4, tmp4, 14);
508 tmp3 = vec_or (tmp3, tmp5);
509 tmp4 = vec_or (tmp4, tmp6);
511 tmp2 = vec_perm (v7, v7, v_permv);
512 tmp5 = vec_sub (vec_max (v6, v7), vec_min (v6, v7));
513 tmp6 = vec_sub (vec_max (v6, tmp2), vec_min (v6, tmp2));
514 tmp5 = (vector signed short) vec_cmpgt (tmp5, v_three);
515 tmp6 = (vector signed short) vec_cmpgt (tmp6, v_three);
516 tmp0 = vec_sld (tmp5, tmp5, 14);
517 tmp1 = vec_sld (tmp6, tmp6, 14);
518 tmp5 = vec_or (tmp0, tmp5);
519 tmp6 = vec_or (tmp1, tmp6);
521 tmp3 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp5);
522 tmp4 = (vector signed short) vec_pack ((vector unsigned int) tmp4, (vector unsigned int) tmp6);
523 tmp5 = vec_sld (tmp3, tmp3, 14);
524 tmp6 = vec_sld (tmp4, tmp4, 14);
525 tmp3 = vec_or (tmp3, tmp5);
526 tmp4 = vec_or (tmp4, tmp6);
527 tmp03 = (vector signed short) vec_pack ((vector unsigned int) tmp3, (vector unsigned int) tmp4);
529 tmp0 = (vector signed short) vec_pack ((vector unsigned short) tmp00, (vector unsigned short) tmp01);
530 tmp1 = (vector signed short) vec_pack ((vector unsigned short) tmp02, (vector unsigned short) tmp03);
531 tmp2 = (vector signed short) vec_mergeh ((vector signed int) tmp0, (vector signed int) tmp1);
532 tmp3 = (vector signed short) vec_mergel ((vector signed int) tmp0, (vector signed int) tmp1);
533 tmp4 = (vector signed short) vec_mergeh ((vector signed int) tmp2, (vector signed int) tmp3);
534 tmp5 = (vector signed short) vec_mergel ((vector signed int) tmp2, (vector signed int) tmp3);
535 tmp4 = vec_and (v_ref_mask00, tmp4);
536 tmp5 = vec_and (v_ref_mask01, tmp5);
538 tmp0 = vec_nor (v_ref_mask00, v_ref_mask01);
539 tmp1 = vec_and (v_ref_mask00, v_ref_mask01);
540 tmp2 = vec_and (tmp4, tmp5);
541 tmp2 = vec_and (tmp2, tmp1);
542 tmp3 = vec_nor (tmp4, tmp5);
543 tmp3 = vec_nor (tmp3, tmp1);
544 v_vec_maskh = vec_or (tmp0, tmp2);
545 v_vec_maskh = vec_or (v_vec_maskh, tmp3);
548 v_intra_maskvn = vec_nor (v_intra_maskv, v_intra_maskv);
549 v_intra_maskhn = vec_nor (v_intra_maskh, v_intra_maskh);
550 v_cbp_maskvn = (vector unsigned char) vec_cmpeq ((vector unsigned char) v_cbp_maskv, (vector unsigned char) v_zero);
551 v_cbp_maskhn = (vector unsigned char) vec_cmpeq ((vector unsigned char) v_cbp_maskh, (vector unsigned char) v_zero);
553 v_cbp_maskv = vec_and (v_cbp_maskv, v_intra_maskvn);
554 v_cbp_maskh = vec_and (v_cbp_maskh, v_intra_maskhn);
555 v_vec_maskv = vec_and (v_vec_maskv, (vector signed short) v_intra_maskvn);
556 v_vec_maskv = vec_and (v_vec_maskv, (vector signed short) v_cbp_maskvn);
557 v_vec_maskh = vec_and (v_vec_maskh, (vector signed short) v_intra_maskhn);
558 v_vec_maskh = vec_and (v_vec_maskh, (vector signed short) v_cbp_maskhn);
560 tmp9 = vec_splat_u8(2);
561 tmp8 = vec_splat_u8(1);
562 v_bS = vec_ld (0, (vector unsigned char *) mv_const);
564 v_bSv = vec_and ((vector unsigned char) v_bS, (vector unsigned char)v_intra_maskv);
565 tmp7 = vec_and ((vector unsigned char)tmp9, (vector unsigned char)v_cbp_maskv);
566 tmp6 = (vector signed short) vec_and ((vector unsigned char)tmp8, (vector unsigned char)v_vec_maskv);
567 tmp7 = vec_or ((vector unsigned char)tmp7, (vector unsigned char)tmp6);
568 v_bSv = vec_or ((vector unsigned char)tmp7, (vector unsigned char)v_bSv);
570 v_bS = vec_ld (0, (vector unsigned char *) mv_const);
571 v_bSh = vec_and ((vector unsigned char) v_bS, (vector unsigned char)v_intra_maskh);
572 tmp7 = vec_and ((vector unsigned char)tmp9, (vector unsigned char)v_cbp_maskh);
573 tmp6 = (vector signed short) vec_and ((vector unsigned char)tmp8, (vector unsigned char)v_vec_maskh);
574 tmp7 = vec_or ((vector unsigned char)tmp7, (vector unsigned char)tmp6);
575 v_bSh = vec_or ((vector unsigned char)tmp7, (vector unsigned char)v_bSh);
577 v_permh = (vector unsigned char) vec_ld (0 , (vector unsigned char *) mv_const);
578 v_permv = (vector unsigned char) vec_ld (0, (vector unsigned char *) mv_const);
579 v_bSv = vec_and (v_bSv, v_permv);
580 v_bSh = vec_and (v_bSh, v_permh);
582 vec_st (v_bSv, 0, (unsigned char *) mv_const);
583 vec_st (v_bSh, 0, (unsigned char *) mv_const);
585 v_bSv = vec_mergeh (v_bSv, v_bSv);
586 v_bSv = vec_mergeh (v_bSv, v_bSv);
587 v_bSh = vec_mergeh (v_bSh, v_bSh);
588 v_bSh = vec_mergeh (v_bSh, v_bSh);
590 vec_st (v_bSv, 0, (vector unsigned char *) mv_const);
591 vec_st (v_bSh, 0,(vector unsigned char *) mv_const);
595 int main(int argc, char **argv)
597 char toto[32] __attribute__((aligned(16)));
598 foo(toto, toto, 0, 0);