1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V?TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
27 (V2TI "TARGET_AVX") V1TI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 (define_mode_iterator VI_AVX2
77 [(V32QI "TARGET_AVX2") V16QI
78 (V16HI "TARGET_AVX2") V8HI
79 (V8SI "TARGET_AVX2") V4SI
80 (V4DI "TARGET_AVX2") V2DI])
82 ;; All QImode vector integer modes
83 (define_mode_iterator VI1
84 [(V32QI "TARGET_AVX") V16QI])
86 ;; All DImode vector integer modes
87 (define_mode_iterator VI8
88 [(V4DI "TARGET_AVX") V2DI])
90 (define_mode_iterator VI1_AVX2
91 [(V32QI "TARGET_AVX2") V16QI])
93 (define_mode_iterator VI2_AVX2
94 [(V16HI "TARGET_AVX2") V8HI])
96 (define_mode_iterator VI4_AVX2
97 [(V8SI "TARGET_AVX2") V4SI])
99 (define_mode_iterator VI8_AVX2
100 [(V4DI "TARGET_AVX2") V2DI])
102 ;; ??? We should probably use TImode instead.
103 (define_mode_iterator VIMAX_AVX2
104 [(V2TI "TARGET_AVX2") V1TI])
106 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
107 (define_mode_iterator SSESCALARMODE
108 [(V2TI "TARGET_AVX2") TI])
110 (define_mode_iterator VI12_AVX2
111 [(V32QI "TARGET_AVX2") V16QI
112 (V16HI "TARGET_AVX2") V8HI])
114 (define_mode_iterator VI24_AVX2
115 [(V16HI "TARGET_AVX2") V8HI
116 (V8SI "TARGET_AVX2") V4SI])
118 (define_mode_iterator VI124_AVX2
119 [(V32QI "TARGET_AVX2") V16QI
120 (V16HI "TARGET_AVX2") V8HI
121 (V8SI "TARGET_AVX2") V4SI])
123 (define_mode_iterator VI248_AVX2
124 [(V16HI "TARGET_AVX2") V8HI
125 (V8SI "TARGET_AVX2") V4SI
126 (V4DI "TARGET_AVX2") V2DI])
128 (define_mode_iterator VI48_AVX2
129 [(V8SI "TARGET_AVX2") V4SI
130 (V4DI "TARGET_AVX2") V2DI])
132 (define_mode_iterator V48_AVX2
135 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
136 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
138 (define_mode_attr sse2_avx2
139 [(V16QI "sse2") (V32QI "avx2")
140 (V8HI "sse2") (V16HI "avx2")
141 (V4SI "sse2") (V8SI "avx2")
142 (V2DI "sse2") (V4DI "avx2")
143 (V1TI "sse2") (V2TI "avx2")])
145 (define_mode_attr ssse3_avx2
146 [(V16QI "ssse3") (V32QI "avx2")
147 (V8HI "ssse3") (V16HI "avx2")
148 (V4SI "ssse3") (V8SI "avx2")
149 (V2DI "ssse3") (V4DI "avx2")
150 (TI "ssse3") (V2TI "avx2")])
152 (define_mode_attr sse4_1_avx2
153 [(V16QI "sse4_1") (V32QI "avx2")
154 (V8HI "sse4_1") (V16HI "avx2")
155 (V4SI "sse4_1") (V8SI "avx2")
156 (V2DI "sse4_1") (V4DI "avx2")])
158 (define_mode_attr avx_avx2
159 [(V4SF "avx") (V2DF "avx")
160 (V8SF "avx") (V4DF "avx")
161 (V4SI "avx2") (V2DI "avx2")
162 (V8SI "avx2") (V4DI "avx2")])
164 (define_mode_attr vec_avx2
165 [(V16QI "vec") (V32QI "avx2")
166 (V8HI "vec") (V16HI "avx2")
167 (V4SI "vec") (V8SI "avx2")
168 (V2DI "vec") (V4DI "avx2")])
170 (define_mode_attr ssedoublemode
171 [(V16HI "V16SI") (V8HI "V8SI")])
173 (define_mode_attr ssebytemode
174 [(V4DI "V32QI") (V2DI "V16QI")])
176 ;; All 128bit vector integer modes
177 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
179 ;; All 256bit vector integer modes
180 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
182 ;; Random 128bit vector integer mode combinations
183 (define_mode_iterator VI12_128 [V16QI V8HI])
184 (define_mode_iterator VI14_128 [V16QI V4SI])
185 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
186 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
187 (define_mode_iterator VI24_128 [V8HI V4SI])
188 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
189 (define_mode_iterator VI48_128 [V4SI V2DI])
191 ;; Random 256bit vector integer mode combinations
192 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
193 (define_mode_iterator VI48_256 [V8SI V4DI])
195 ;; Int-float size matches
196 (define_mode_iterator VI4F_128 [V4SI V4SF])
197 (define_mode_iterator VI8F_128 [V2DI V2DF])
198 (define_mode_iterator VI4F_256 [V8SI V8SF])
199 (define_mode_iterator VI8F_256 [V4DI V4DF])
201 ;; Mapping from float mode to required SSE level
202 (define_mode_attr sse
203 [(SF "sse") (DF "sse2")
204 (V4SF "sse") (V2DF "sse2")
205 (V8SF "avx") (V4DF "avx")])
207 (define_mode_attr sse2
208 [(V16QI "sse2") (V32QI "avx")
209 (V2DI "sse2") (V4DI "avx")])
211 (define_mode_attr sse3
212 [(V16QI "sse3") (V32QI "avx")])
214 (define_mode_attr sse4_1
215 [(V4SF "sse4_1") (V2DF "sse4_1")
216 (V8SF "avx") (V4DF "avx")])
218 (define_mode_attr avxsizesuffix
219 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
220 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
221 (V8SF "256") (V4DF "256")
222 (V4SF "") (V2DF "")])
224 ;; SSE instruction mode
225 (define_mode_attr sseinsnmode
226 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
227 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
228 (V8SF "V8SF") (V4DF "V4DF")
229 (V4SF "V4SF") (V2DF "V2DF")
232 ;; Mapping of vector float modes to an integer mode of the same size
233 (define_mode_attr sseintvecmode
234 [(V8SF "V8SI") (V4DF "V4DI")
235 (V4SF "V4SI") (V2DF "V2DI")
236 (V8SI "V8SI") (V4DI "V4DI")
237 (V4SI "V4SI") (V2DI "V2DI")
238 (V16HI "V16HI") (V8HI "V8HI")
239 (V32QI "V32QI") (V16QI "V16QI")])
241 (define_mode_attr sseintvecmodelower
242 [(V8SF "v8si") (V4DF "v4di")
243 (V4SF "v4si") (V2DF "v2di")
244 (V8SI "v8si") (V4DI "v4di")
245 (V4SI "v4si") (V2DI "v2di")
246 (V16HI "v16hi") (V8HI "v8hi")
247 (V32QI "v32qi") (V16QI "v16qi")])
249 ;; Mapping of vector modes to a vector mode of double size
250 (define_mode_attr ssedoublevecmode
251 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
252 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
253 (V8SF "V16SF") (V4DF "V8DF")
254 (V4SF "V8SF") (V2DF "V4DF")])
256 ;; Mapping of vector modes to a vector mode of half size
257 (define_mode_attr ssehalfvecmode
258 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
259 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
260 (V8SF "V4SF") (V4DF "V2DF")
263 ;; Mapping of vector modes back to the scalar modes
264 (define_mode_attr ssescalarmode
265 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
266 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
267 (V8SF "SF") (V4DF "DF")
268 (V4SF "SF") (V2DF "DF")])
270 ;; Number of scalar elements in each vector type
271 (define_mode_attr ssescalarnum
272 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
273 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
274 (V8SF "8") (V4DF "4")
275 (V4SF "4") (V2DF "2")])
277 ;; SSE prefix for integer vector modes
278 (define_mode_attr sseintprefix
279 [(V2DI "p") (V2DF "")
282 (V8SI "p") (V8SF "")])
284 ;; SSE scalar suffix for vector modes
285 (define_mode_attr ssescalarmodesuffix
287 (V8SF "ss") (V4DF "sd")
288 (V4SF "ss") (V2DF "sd")
289 (V8SI "ss") (V4DI "sd")
292 ;; Pack/unpack vector modes
293 (define_mode_attr sseunpackmode
294 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
295 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
297 (define_mode_attr ssepackmode
298 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
299 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
301 ;; Mapping of the max integer size for xop rotate immediate constraint
302 (define_mode_attr sserotatemax
303 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
305 ;; Mapping of mode to cast intrinsic name
306 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
308 ;; Instruction suffix for sign and zero extensions.
309 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
311 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
312 (define_mode_attr i128
313 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
314 (V8SI "%~128") (V4DI "%~128")])
317 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
319 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
321 ;; Mapping of immediate bits for blend instructions
322 (define_mode_attr blendbits
323 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
325 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
327 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
331 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
333 ;; All of these patterns are enabled for SSE1 as well as SSE2.
334 ;; This is essential for maintaining stable calling conventions.
336 (define_expand "mov<mode>"
337 [(set (match_operand:V16 0 "nonimmediate_operand" "")
338 (match_operand:V16 1 "nonimmediate_operand" ""))]
341 ix86_expand_vector_move (<MODE>mode, operands);
345 (define_insn "*mov<mode>_internal"
346 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
347 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
349 && (register_operand (operands[0], <MODE>mode)
350 || register_operand (operands[1], <MODE>mode))"
352 switch (which_alternative)
355 return standard_sse_constant_opcode (insn, operands[1]);
358 switch (get_attr_mode (insn))
363 && (misaligned_operand (operands[0], <MODE>mode)
364 || misaligned_operand (operands[1], <MODE>mode)))
365 return "vmovups\t{%1, %0|%0, %1}";
367 return "%vmovaps\t{%1, %0|%0, %1}";
372 && (misaligned_operand (operands[0], <MODE>mode)
373 || misaligned_operand (operands[1], <MODE>mode)))
374 return "vmovupd\t{%1, %0|%0, %1}";
375 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
376 return "%vmovaps\t{%1, %0|%0, %1}";
378 return "%vmovapd\t{%1, %0|%0, %1}";
383 && (misaligned_operand (operands[0], <MODE>mode)
384 || misaligned_operand (operands[1], <MODE>mode)))
385 return "vmovdqu\t{%1, %0|%0, %1}";
386 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
387 return "%vmovaps\t{%1, %0|%0, %1}";
389 return "%vmovdqa\t{%1, %0|%0, %1}";
398 [(set_attr "type" "sselog1,ssemov,ssemov")
399 (set_attr "prefix" "maybe_vex")
401 (cond [(match_test "TARGET_AVX")
402 (const_string "<sseinsnmode>")
403 (ior (ior (match_test "optimize_function_for_size_p (cfun)")
404 (not (match_test "TARGET_SSE2")))
405 (and (eq_attr "alternative" "2")
406 (match_test "TARGET_SSE_TYPELESS_STORES")))
407 (const_string "V4SF")
408 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
409 (const_string "V4SF")
410 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
411 (const_string "V2DF")
413 (const_string "TI")))])
415 (define_insn "sse2_movq128"
416 [(set (match_operand:V2DI 0 "register_operand" "=x")
419 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
420 (parallel [(const_int 0)]))
423 "%vmovq\t{%1, %0|%0, %1}"
424 [(set_attr "type" "ssemov")
425 (set_attr "prefix" "maybe_vex")
426 (set_attr "mode" "TI")])
428 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
429 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
430 ;; from memory, we'd prefer to load the memory directly into the %xmm
431 ;; register. To facilitate this happy circumstance, this pattern won't
432 ;; split until after register allocation. If the 64-bit value didn't
433 ;; come from memory, this is the best we can do. This is much better
434 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
437 (define_insn_and_split "movdi_to_sse"
439 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
440 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
441 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
442 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
444 "&& reload_completed"
447 if (register_operand (operands[1], DImode))
449 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
450 Assemble the 64-bit DImode value in an xmm register. */
451 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
452 gen_rtx_SUBREG (SImode, operands[1], 0)));
453 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
454 gen_rtx_SUBREG (SImode, operands[1], 4)));
455 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
458 else if (memory_operand (operands[1], DImode))
459 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
460 operands[1], const0_rtx));
466 [(set (match_operand:V4SF 0 "register_operand" "")
467 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
468 "TARGET_SSE && reload_completed"
471 (vec_duplicate:V4SF (match_dup 1))
475 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
476 operands[2] = CONST0_RTX (V4SFmode);
480 [(set (match_operand:V2DF 0 "register_operand" "")
481 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
482 "TARGET_SSE2 && reload_completed"
483 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
485 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
486 operands[2] = CONST0_RTX (DFmode);
489 (define_expand "push<mode>1"
490 [(match_operand:V16 0 "register_operand" "")]
493 ix86_expand_push (<MODE>mode, operands[0]);
497 (define_expand "movmisalign<mode>"
498 [(set (match_operand:V16 0 "nonimmediate_operand" "")
499 (match_operand:V16 1 "nonimmediate_operand" ""))]
502 ix86_expand_vector_move_misalign (<MODE>mode, operands);
506 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
507 [(set (match_operand:VF 0 "nonimmediate_operand" "")
509 [(match_operand:VF 1 "nonimmediate_operand" "")]
513 if (MEM_P (operands[0]) && MEM_P (operands[1]))
514 operands[1] = force_reg (<MODE>mode, operands[1]);
517 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
518 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
520 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
522 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
523 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
524 [(set_attr "type" "ssemov")
525 (set_attr "movu" "1")
526 (set_attr "prefix" "maybe_vex")
527 (set_attr "mode" "<MODE>")])
529 (define_expand "<sse2>_movdqu<avxsizesuffix>"
530 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
531 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
535 if (MEM_P (operands[0]) && MEM_P (operands[1]))
536 operands[1] = force_reg (<MODE>mode, operands[1]);
539 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
540 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
541 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
543 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
544 "%vmovdqu\t{%1, %0|%0, %1}"
545 [(set_attr "type" "ssemov")
546 (set_attr "movu" "1")
547 (set (attr "prefix_data16")
549 (match_test "TARGET_AVX")
552 (set_attr "prefix" "maybe_vex")
553 (set_attr "mode" "<sseinsnmode>")])
555 (define_insn "<sse3>_lddqu<avxsizesuffix>"
556 [(set (match_operand:VI1 0 "register_operand" "=x")
557 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
560 "%vlddqu\t{%1, %0|%0, %1}"
561 [(set_attr "type" "ssemov")
562 (set_attr "movu" "1")
563 (set (attr "prefix_data16")
565 (match_test "TARGET_AVX")
568 (set (attr "prefix_rep")
570 (match_test "TARGET_AVX")
573 (set_attr "prefix" "maybe_vex")
574 (set_attr "mode" "<sseinsnmode>")])
576 (define_insn "sse2_movntsi"
577 [(set (match_operand:SI 0 "memory_operand" "=m")
578 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
581 "movnti\t{%1, %0|%0, %1}"
582 [(set_attr "type" "ssemov")
583 (set_attr "prefix_data16" "0")
584 (set_attr "mode" "V2DF")])
586 (define_insn "<sse>_movnt<mode>"
587 [(set (match_operand:VF 0 "memory_operand" "=m")
588 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
591 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
592 [(set_attr "type" "ssemov")
593 (set_attr "prefix" "maybe_vex")
594 (set_attr "mode" "<MODE>")])
596 (define_insn "<sse2>_movnt<mode>"
597 [(set (match_operand:VI8 0 "memory_operand" "=m")
598 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
601 "%vmovntdq\t{%1, %0|%0, %1}"
602 [(set_attr "type" "ssecvt")
603 (set (attr "prefix_data16")
605 (match_test "TARGET_AVX")
608 (set_attr "prefix" "maybe_vex")
609 (set_attr "mode" "<sseinsnmode>")])
611 ; Expand patterns for non-temporal stores. At the moment, only those
612 ; that directly map to insns are defined; it would be possible to
613 ; define patterns for other modes that would expand to several insns.
615 ;; Modes handled by storent patterns.
616 (define_mode_iterator STORENT_MODE
617 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
619 (V8SF "TARGET_AVX") V4SF
620 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
622 (define_expand "storent<mode>"
623 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
625 [(match_operand:STORENT_MODE 1 "register_operand" "")]
629 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
631 ;; Parallel floating point arithmetic
633 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
635 (define_expand "<code><mode>2"
636 [(set (match_operand:VF 0 "register_operand" "")
638 (match_operand:VF 1 "register_operand" "")))]
640 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
642 (define_insn_and_split "*absneg<mode>2"
643 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
644 (match_operator:VF 3 "absneg_operator"
645 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
646 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
649 "&& reload_completed"
652 enum rtx_code absneg_op;
658 if (MEM_P (operands[1]))
659 op1 = operands[2], op2 = operands[1];
661 op1 = operands[1], op2 = operands[2];
666 if (rtx_equal_p (operands[0], operands[1]))
672 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
673 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
674 t = gen_rtx_SET (VOIDmode, operands[0], t);
678 [(set_attr "isa" "noavx,noavx,avx,avx")])
680 (define_expand "<plusminus_insn><mode>3"
681 [(set (match_operand:VF 0 "register_operand" "")
683 (match_operand:VF 1 "nonimmediate_operand" "")
684 (match_operand:VF 2 "nonimmediate_operand" "")))]
686 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
688 (define_insn "*<plusminus_insn><mode>3"
689 [(set (match_operand:VF 0 "register_operand" "=x,x")
691 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
692 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
693 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
695 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
696 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
697 [(set_attr "isa" "noavx,avx")
698 (set_attr "type" "sseadd")
699 (set_attr "prefix" "orig,vex")
700 (set_attr "mode" "<MODE>")])
702 (define_insn "<sse>_vm<plusminus_insn><mode>3"
703 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
706 (match_operand:VF_128 1 "register_operand" "0,x")
707 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
712 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
713 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
714 [(set_attr "isa" "noavx,avx")
715 (set_attr "type" "sseadd")
716 (set_attr "prefix" "orig,vex")
717 (set_attr "mode" "<ssescalarmode>")])
719 (define_expand "mul<mode>3"
720 [(set (match_operand:VF 0 "register_operand" "")
722 (match_operand:VF 1 "nonimmediate_operand" "")
723 (match_operand:VF 2 "nonimmediate_operand" "")))]
725 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
727 (define_insn "*mul<mode>3"
728 [(set (match_operand:VF 0 "register_operand" "=x,x")
730 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
731 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
732 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
734 mul<ssemodesuffix>\t{%2, %0|%0, %2}
735 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
736 [(set_attr "isa" "noavx,avx")
737 (set_attr "type" "ssemul")
738 (set_attr "prefix" "orig,vex")
739 (set_attr "mode" "<MODE>")])
741 (define_insn "<sse>_vmmul<mode>3"
742 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
745 (match_operand:VF_128 1 "register_operand" "0,x")
746 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
751 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
752 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
753 [(set_attr "isa" "noavx,avx")
754 (set_attr "type" "ssemul")
755 (set_attr "prefix" "orig,vex")
756 (set_attr "mode" "<ssescalarmode>")])
758 (define_expand "div<mode>3"
759 [(set (match_operand:VF2 0 "register_operand" "")
760 (div:VF2 (match_operand:VF2 1 "register_operand" "")
761 (match_operand:VF2 2 "nonimmediate_operand" "")))]
763 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
765 (define_expand "div<mode>3"
766 [(set (match_operand:VF1 0 "register_operand" "")
767 (div:VF1 (match_operand:VF1 1 "register_operand" "")
768 (match_operand:VF1 2 "nonimmediate_operand" "")))]
771 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
774 && TARGET_RECIP_VEC_DIV
775 && !optimize_insn_for_size_p ()
776 && flag_finite_math_only && !flag_trapping_math
777 && flag_unsafe_math_optimizations)
779 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
784 (define_insn "<sse>_div<mode>3"
785 [(set (match_operand:VF 0 "register_operand" "=x,x")
787 (match_operand:VF 1 "register_operand" "0,x")
788 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
791 div<ssemodesuffix>\t{%2, %0|%0, %2}
792 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
793 [(set_attr "isa" "noavx,avx")
794 (set_attr "type" "ssediv")
795 (set_attr "prefix" "orig,vex")
796 (set_attr "mode" "<MODE>")])
798 (define_insn "<sse>_vmdiv<mode>3"
799 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
802 (match_operand:VF_128 1 "register_operand" "0,x")
803 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
808 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
809 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
810 [(set_attr "isa" "noavx,avx")
811 (set_attr "type" "ssediv")
812 (set_attr "prefix" "orig,vex")
813 (set_attr "mode" "<ssescalarmode>")])
815 (define_insn "<sse>_rcp<mode>2"
816 [(set (match_operand:VF1 0 "register_operand" "=x")
818 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
820 "%vrcpps\t{%1, %0|%0, %1}"
821 [(set_attr "type" "sse")
822 (set_attr "atom_sse_attr" "rcp")
823 (set_attr "prefix" "maybe_vex")
824 (set_attr "mode" "<MODE>")])
826 (define_insn "sse_vmrcpv4sf2"
827 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
829 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
831 (match_operand:V4SF 2 "register_operand" "0,x")
835 rcpss\t{%1, %0|%0, %1}
836 vrcpss\t{%1, %2, %0|%0, %2, %1}"
837 [(set_attr "isa" "noavx,avx")
838 (set_attr "type" "sse")
839 (set_attr "atom_sse_attr" "rcp")
840 (set_attr "prefix" "orig,vex")
841 (set_attr "mode" "SF")])
843 (define_expand "sqrt<mode>2"
844 [(set (match_operand:VF2 0 "register_operand" "")
845 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
848 (define_expand "sqrt<mode>2"
849 [(set (match_operand:VF1 0 "register_operand" "")
850 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
854 && TARGET_RECIP_VEC_SQRT
855 && !optimize_insn_for_size_p ()
856 && flag_finite_math_only && !flag_trapping_math
857 && flag_unsafe_math_optimizations)
859 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
864 (define_insn "<sse>_sqrt<mode>2"
865 [(set (match_operand:VF 0 "register_operand" "=x")
866 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
868 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
869 [(set_attr "type" "sse")
870 (set_attr "atom_sse_attr" "sqrt")
871 (set_attr "prefix" "maybe_vex")
872 (set_attr "mode" "<MODE>")])
874 (define_insn "<sse>_vmsqrt<mode>2"
875 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
878 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
879 (match_operand:VF_128 2 "register_operand" "0,x")
883 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
884 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
885 [(set_attr "isa" "noavx,avx")
886 (set_attr "type" "sse")
887 (set_attr "atom_sse_attr" "sqrt")
888 (set_attr "prefix" "orig,vex")
889 (set_attr "mode" "<ssescalarmode>")])
891 (define_expand "rsqrt<mode>2"
892 [(set (match_operand:VF1 0 "register_operand" "")
894 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
897 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
901 (define_insn "<sse>_rsqrt<mode>2"
902 [(set (match_operand:VF1 0 "register_operand" "=x")
904 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
906 "%vrsqrtps\t{%1, %0|%0, %1}"
907 [(set_attr "type" "sse")
908 (set_attr "prefix" "maybe_vex")
909 (set_attr "mode" "<MODE>")])
911 (define_insn "sse_vmrsqrtv4sf2"
912 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
914 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
916 (match_operand:V4SF 2 "register_operand" "0,x")
920 rsqrtss\t{%1, %0|%0, %1}
921 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
922 [(set_attr "isa" "noavx,avx")
923 (set_attr "type" "sse")
924 (set_attr "prefix" "orig,vex")
925 (set_attr "mode" "SF")])
927 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
928 ;; isn't really correct, as those rtl operators aren't defined when
929 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
931 (define_expand "<code><mode>3"
932 [(set (match_operand:VF 0 "register_operand" "")
934 (match_operand:VF 1 "nonimmediate_operand" "")
935 (match_operand:VF 2 "nonimmediate_operand" "")))]
938 if (!flag_finite_math_only)
939 operands[1] = force_reg (<MODE>mode, operands[1]);
940 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
943 (define_insn "*<code><mode>3_finite"
944 [(set (match_operand:VF 0 "register_operand" "=x,x")
946 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
947 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
948 "TARGET_SSE && flag_finite_math_only
949 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
951 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
952 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
953 [(set_attr "isa" "noavx,avx")
954 (set_attr "type" "sseadd")
955 (set_attr "prefix" "orig,vex")
956 (set_attr "mode" "<MODE>")])
958 (define_insn "*<code><mode>3"
959 [(set (match_operand:VF 0 "register_operand" "=x,x")
961 (match_operand:VF 1 "register_operand" "0,x")
962 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
963 "TARGET_SSE && !flag_finite_math_only"
965 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
966 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
967 [(set_attr "isa" "noavx,avx")
968 (set_attr "type" "sseadd")
969 (set_attr "prefix" "orig,vex")
970 (set_attr "mode" "<MODE>")])
972 (define_insn "<sse>_vm<code><mode>3"
973 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
976 (match_operand:VF_128 1 "register_operand" "0,x")
977 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
982 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
983 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
984 [(set_attr "isa" "noavx,avx")
985 (set_attr "type" "sse")
986 (set_attr "prefix" "orig,vex")
987 (set_attr "mode" "<ssescalarmode>")])
989 ;; These versions of the min/max patterns implement exactly the operations
990 ;; min = (op1 < op2 ? op1 : op2)
991 ;; max = (!(op1 < op2) ? op1 : op2)
992 ;; Their operands are not commutative, and thus they may be used in the
993 ;; presence of -0.0 and NaN.
995 (define_insn "*ieee_smin<mode>3"
996 [(set (match_operand:VF 0 "register_operand" "=x,x")
998 [(match_operand:VF 1 "register_operand" "0,x")
999 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1003 min<ssemodesuffix>\t{%2, %0|%0, %2}
1004 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1005 [(set_attr "isa" "noavx,avx")
1006 (set_attr "type" "sseadd")
1007 (set_attr "prefix" "orig,vex")
1008 (set_attr "mode" "<MODE>")])
1010 (define_insn "*ieee_smax<mode>3"
1011 [(set (match_operand:VF 0 "register_operand" "=x,x")
1013 [(match_operand:VF 1 "register_operand" "0,x")
1014 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
1018 max<ssemodesuffix>\t{%2, %0|%0, %2}
1019 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1020 [(set_attr "isa" "noavx,avx")
1021 (set_attr "type" "sseadd")
1022 (set_attr "prefix" "orig,vex")
1023 (set_attr "mode" "<MODE>")])
1025 (define_insn "avx_addsubv4df3"
1026 [(set (match_operand:V4DF 0 "register_operand" "=x")
1029 (match_operand:V4DF 1 "register_operand" "x")
1030 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1031 (minus:V4DF (match_dup 1) (match_dup 2))
1034 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1035 [(set_attr "type" "sseadd")
1036 (set_attr "prefix" "vex")
1037 (set_attr "mode" "V4DF")])
1039 (define_insn "sse3_addsubv2df3"
1040 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1043 (match_operand:V2DF 1 "register_operand" "0,x")
1044 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1045 (minus:V2DF (match_dup 1) (match_dup 2))
1049 addsubpd\t{%2, %0|%0, %2}
1050 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1051 [(set_attr "isa" "noavx,avx")
1052 (set_attr "type" "sseadd")
1053 (set_attr "atom_unit" "complex")
1054 (set_attr "prefix" "orig,vex")
1055 (set_attr "mode" "V2DF")])
1057 (define_insn "avx_addsubv8sf3"
1058 [(set (match_operand:V8SF 0 "register_operand" "=x")
1061 (match_operand:V8SF 1 "register_operand" "x")
1062 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1063 (minus:V8SF (match_dup 1) (match_dup 2))
1066 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1067 [(set_attr "type" "sseadd")
1068 (set_attr "prefix" "vex")
1069 (set_attr "mode" "V8SF")])
1071 (define_insn "sse3_addsubv4sf3"
1072 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1075 (match_operand:V4SF 1 "register_operand" "0,x")
1076 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1077 (minus:V4SF (match_dup 1) (match_dup 2))
1081 addsubps\t{%2, %0|%0, %2}
1082 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1083 [(set_attr "isa" "noavx,avx")
1084 (set_attr "type" "sseadd")
1085 (set_attr "prefix" "orig,vex")
1086 (set_attr "prefix_rep" "1,*")
1087 (set_attr "mode" "V4SF")])
1089 (define_insn "avx_h<plusminus_insn>v4df3"
1090 [(set (match_operand:V4DF 0 "register_operand" "=x")
1095 (match_operand:V4DF 1 "register_operand" "x")
1096 (parallel [(const_int 0)]))
1097 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1099 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1100 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
1104 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1105 (parallel [(const_int 0)]))
1106 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
1108 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1109 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1111 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1112 [(set_attr "type" "sseadd")
1113 (set_attr "prefix" "vex")
1114 (set_attr "mode" "V4DF")])
1116 (define_insn "sse3_h<plusminus_insn>v2df3"
1117 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1121 (match_operand:V2DF 1 "register_operand" "0,x")
1122 (parallel [(const_int 0)]))
1123 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1126 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1127 (parallel [(const_int 0)]))
1128 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1131 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1132 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1133 [(set_attr "isa" "noavx,avx")
1134 (set_attr "type" "sseadd")
1135 (set_attr "prefix" "orig,vex")
1136 (set_attr "mode" "V2DF")])
1138 (define_insn "avx_h<plusminus_insn>v8sf3"
1139 [(set (match_operand:V8SF 0 "register_operand" "=x")
1145 (match_operand:V8SF 1 "register_operand" "x")
1146 (parallel [(const_int 0)]))
1147 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1149 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1150 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1154 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1155 (parallel [(const_int 0)]))
1156 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1158 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1159 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1163 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1164 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1166 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1167 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1170 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1171 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1173 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1174 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1176 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1177 [(set_attr "type" "sseadd")
1178 (set_attr "prefix" "vex")
1179 (set_attr "mode" "V8SF")])
1181 (define_insn "sse3_h<plusminus_insn>v4sf3"
1182 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1187 (match_operand:V4SF 1 "register_operand" "0,x")
1188 (parallel [(const_int 0)]))
1189 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1191 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1192 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1196 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1197 (parallel [(const_int 0)]))
1198 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1200 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1201 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1204 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1205 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1206 [(set_attr "isa" "noavx,avx")
1207 (set_attr "type" "sseadd")
1208 (set_attr "atom_unit" "complex")
1209 (set_attr "prefix" "orig,vex")
1210 (set_attr "prefix_rep" "1,*")
1211 (set_attr "mode" "V4SF")])
1213 (define_expand "reduc_splus_v4df"
1214 [(match_operand:V4DF 0 "register_operand" "")
1215 (match_operand:V4DF 1 "register_operand" "")]
1218 rtx tmp = gen_reg_rtx (V4DFmode);
1219 rtx tmp2 = gen_reg_rtx (V4DFmode);
1220 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1221 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1222 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1226 (define_expand "reduc_splus_v2df"
1227 [(match_operand:V2DF 0 "register_operand" "")
1228 (match_operand:V2DF 1 "register_operand" "")]
1231 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1235 (define_expand "reduc_splus_v8sf"
1236 [(match_operand:V8SF 0 "register_operand" "")
1237 (match_operand:V8SF 1 "register_operand" "")]
1240 rtx tmp = gen_reg_rtx (V8SFmode);
1241 rtx tmp2 = gen_reg_rtx (V8SFmode);
1242 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1243 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1244 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1245 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1249 (define_expand "reduc_splus_v4sf"
1250 [(match_operand:V4SF 0 "register_operand" "")
1251 (match_operand:V4SF 1 "register_operand" "")]
1256 rtx tmp = gen_reg_rtx (V4SFmode);
1257 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1258 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1261 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
1265 ;; Modes handled by reduc_sm{in,ax}* patterns.
1266 (define_mode_iterator REDUC_SMINMAX_MODE
1267 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
1268 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
1269 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
1270 (V4SF "TARGET_SSE")])
1272 (define_expand "reduc_<code>_<mode>"
1273 [(smaxmin:REDUC_SMINMAX_MODE
1274 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand" "")
1275 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand" ""))]
1278 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1282 (define_expand "reduc_<code>_<mode>"
1284 (match_operand:VI_256 0 "register_operand" "")
1285 (match_operand:VI_256 1 "register_operand" ""))]
1288 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
1292 (define_expand "reduc_umin_v8hi"
1294 (match_operand:V8HI 0 "register_operand" "")
1295 (match_operand:V8HI 1 "register_operand" ""))]
1298 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
1302 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1304 ;; Parallel floating point comparisons
1306 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1308 (define_insn "avx_cmp<mode>3"
1309 [(set (match_operand:VF 0 "register_operand" "=x")
1311 [(match_operand:VF 1 "register_operand" "x")
1312 (match_operand:VF 2 "nonimmediate_operand" "xm")
1313 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1316 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1317 [(set_attr "type" "ssecmp")
1318 (set_attr "length_immediate" "1")
1319 (set_attr "prefix" "vex")
1320 (set_attr "mode" "<MODE>")])
1322 (define_insn "avx_vmcmp<mode>3"
1323 [(set (match_operand:VF_128 0 "register_operand" "=x")
1326 [(match_operand:VF_128 1 "register_operand" "x")
1327 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1328 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1333 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1334 [(set_attr "type" "ssecmp")
1335 (set_attr "length_immediate" "1")
1336 (set_attr "prefix" "vex")
1337 (set_attr "mode" "<ssescalarmode>")])
1339 (define_insn "*<sse>_maskcmp<mode>3_comm"
1340 [(set (match_operand:VF 0 "register_operand" "=x,x")
1341 (match_operator:VF 3 "sse_comparison_operator"
1342 [(match_operand:VF 1 "register_operand" "%0,x")
1343 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1345 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1347 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1348 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1349 [(set_attr "isa" "noavx,avx")
1350 (set_attr "type" "ssecmp")
1351 (set_attr "length_immediate" "1")
1352 (set_attr "prefix" "orig,vex")
1353 (set_attr "mode" "<MODE>")])
1355 (define_insn "<sse>_maskcmp<mode>3"
1356 [(set (match_operand:VF 0 "register_operand" "=x,x")
1357 (match_operator:VF 3 "sse_comparison_operator"
1358 [(match_operand:VF 1 "register_operand" "0,x")
1359 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1362 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1363 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1364 [(set_attr "isa" "noavx,avx")
1365 (set_attr "type" "ssecmp")
1366 (set_attr "length_immediate" "1")
1367 (set_attr "prefix" "orig,vex")
1368 (set_attr "mode" "<MODE>")])
1370 (define_insn "<sse>_vmmaskcmp<mode>3"
1371 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1373 (match_operator:VF_128 3 "sse_comparison_operator"
1374 [(match_operand:VF_128 1 "register_operand" "0,x")
1375 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1380 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1381 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1382 [(set_attr "isa" "noavx,avx")
1383 (set_attr "type" "ssecmp")
1384 (set_attr "length_immediate" "1,*")
1385 (set_attr "prefix" "orig,vex")
1386 (set_attr "mode" "<ssescalarmode>")])
1388 (define_insn "<sse>_comi"
1389 [(set (reg:CCFP FLAGS_REG)
1392 (match_operand:<ssevecmode> 0 "register_operand" "x")
1393 (parallel [(const_int 0)]))
1395 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1396 (parallel [(const_int 0)]))))]
1397 "SSE_FLOAT_MODE_P (<MODE>mode)"
1398 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1399 [(set_attr "type" "ssecomi")
1400 (set_attr "prefix" "maybe_vex")
1401 (set_attr "prefix_rep" "0")
1402 (set (attr "prefix_data16")
1403 (if_then_else (eq_attr "mode" "DF")
1405 (const_string "0")))
1406 (set_attr "mode" "<MODE>")])
1408 (define_insn "<sse>_ucomi"
1409 [(set (reg:CCFPU FLAGS_REG)
1412 (match_operand:<ssevecmode> 0 "register_operand" "x")
1413 (parallel [(const_int 0)]))
1415 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1416 (parallel [(const_int 0)]))))]
1417 "SSE_FLOAT_MODE_P (<MODE>mode)"
1418 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1419 [(set_attr "type" "ssecomi")
1420 (set_attr "prefix" "maybe_vex")
1421 (set_attr "prefix_rep" "0")
1422 (set (attr "prefix_data16")
1423 (if_then_else (eq_attr "mode" "DF")
1425 (const_string "0")))
1426 (set_attr "mode" "<MODE>")])
1428 (define_expand "vcond<V_256:mode><VF_256:mode>"
1429 [(set (match_operand:V_256 0 "register_operand" "")
1431 (match_operator 3 ""
1432 [(match_operand:VF_256 4 "nonimmediate_operand" "")
1433 (match_operand:VF_256 5 "nonimmediate_operand" "")])
1434 (match_operand:V_256 1 "general_operand" "")
1435 (match_operand:V_256 2 "general_operand" "")))]
1437 && (GET_MODE_NUNITS (<V_256:MODE>mode)
1438 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
1440 bool ok = ix86_expand_fp_vcond (operands);
1445 (define_expand "vcond<V_128:mode><VF_128:mode>"
1446 [(set (match_operand:V_128 0 "register_operand" "")
1448 (match_operator 3 ""
1449 [(match_operand:VF_128 4 "nonimmediate_operand" "")
1450 (match_operand:VF_128 5 "nonimmediate_operand" "")])
1451 (match_operand:V_128 1 "general_operand" "")
1452 (match_operand:V_128 2 "general_operand" "")))]
1454 && (GET_MODE_NUNITS (<V_128:MODE>mode)
1455 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
1457 bool ok = ix86_expand_fp_vcond (operands);
1462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1464 ;; Parallel floating point logical operations
1466 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1468 (define_insn "<sse>_andnot<mode>3"
1469 [(set (match_operand:VF 0 "register_operand" "=x,x")
1472 (match_operand:VF 1 "register_operand" "0,x"))
1473 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1476 static char buf[32];
1479 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1481 switch (which_alternative)
1484 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1487 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1493 snprintf (buf, sizeof (buf), insn, suffix);
1496 [(set_attr "isa" "noavx,avx")
1497 (set_attr "type" "sselog")
1498 (set_attr "prefix" "orig,vex")
1499 (set_attr "mode" "<MODE>")])
1501 (define_expand "<code><mode>3"
1502 [(set (match_operand:VF 0 "register_operand" "")
1504 (match_operand:VF 1 "nonimmediate_operand" "")
1505 (match_operand:VF 2 "nonimmediate_operand" "")))]
1507 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1509 (define_insn "*<code><mode>3"
1510 [(set (match_operand:VF 0 "register_operand" "=x,x")
1512 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1513 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1514 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1516 static char buf[32];
1519 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1521 switch (which_alternative)
1524 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1527 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1533 snprintf (buf, sizeof (buf), insn, suffix);
1536 [(set_attr "isa" "noavx,avx")
1537 (set_attr "type" "sselog")
1538 (set_attr "prefix" "orig,vex")
1539 (set_attr "mode" "<MODE>")])
1541 (define_expand "copysign<mode>3"
1544 (not:VF (match_dup 3))
1545 (match_operand:VF 1 "nonimmediate_operand" "")))
1547 (and:VF (match_dup 3)
1548 (match_operand:VF 2 "nonimmediate_operand" "")))
1549 (set (match_operand:VF 0 "register_operand" "")
1550 (ior:VF (match_dup 4) (match_dup 5)))]
1553 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1555 operands[4] = gen_reg_rtx (<MODE>mode);
1556 operands[5] = gen_reg_rtx (<MODE>mode);
1559 ;; Also define scalar versions. These are used for abs, neg, and
1560 ;; conditional move. Using subregs into vector modes causes register
1561 ;; allocation lossage. These patterns do not allow memory operands
1562 ;; because the native instructions read the full 128-bits.
1564 (define_insn "*andnot<mode>3"
1565 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1568 (match_operand:MODEF 1 "register_operand" "0,x"))
1569 (match_operand:MODEF 2 "register_operand" "x,x")))]
1570 "SSE_FLOAT_MODE_P (<MODE>mode)"
1572 static char buf[32];
1575 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1577 switch (which_alternative)
1580 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1583 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1589 snprintf (buf, sizeof (buf), insn, suffix);
1592 [(set_attr "isa" "noavx,avx")
1593 (set_attr "type" "sselog")
1594 (set_attr "prefix" "orig,vex")
1595 (set_attr "mode" "<ssevecmode>")])
1597 (define_insn "*<code><mode>3"
1598 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1600 (match_operand:MODEF 1 "register_operand" "%0,x")
1601 (match_operand:MODEF 2 "register_operand" "x,x")))]
1602 "SSE_FLOAT_MODE_P (<MODE>mode)"
1604 static char buf[32];
1607 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1609 switch (which_alternative)
1612 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1615 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1621 snprintf (buf, sizeof (buf), insn, suffix);
1624 [(set_attr "isa" "noavx,avx")
1625 (set_attr "type" "sselog")
1626 (set_attr "prefix" "orig,vex")
1627 (set_attr "mode" "<ssevecmode>")])
1629 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1631 ;; FMA4 floating point multiply/accumulate instructions. This
1632 ;; includes the scalar version of the instructions as well as the
1635 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1637 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1638 ;; combine to generate a multiply/add with two memory references. We then
1639 ;; split this insn, into loading up the destination register with one of the
1640 ;; memory operations. If we don't manage to split the insn, reload will
1641 ;; generate the appropriate moves. The reason this is needed, is that combine
1642 ;; has already folded one of the memory references into both the multiply and
1643 ;; add insns, and it can't generate a new pseudo. I.e.:
1644 ;; (set (reg1) (mem (addr1)))
1645 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1646 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1648 ;; ??? This is historic, pre-dating the gimple fma transformation.
1649 ;; We could now properly represent that only one memory operand is
1650 ;; allowed and not be penalized during optimization.
1652 ;; Intrinsic FMA operations.
1654 ;; The standard names for fma is only available with SSE math enabled.
1655 (define_expand "fma<mode>4"
1656 [(set (match_operand:FMAMODE 0 "register_operand")
1658 (match_operand:FMAMODE 1 "nonimmediate_operand")
1659 (match_operand:FMAMODE 2 "nonimmediate_operand")
1660 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1661 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1663 (define_expand "fms<mode>4"
1664 [(set (match_operand:FMAMODE 0 "register_operand")
1666 (match_operand:FMAMODE 1 "nonimmediate_operand")
1667 (match_operand:FMAMODE 2 "nonimmediate_operand")
1668 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1669 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1671 (define_expand "fnma<mode>4"
1672 [(set (match_operand:FMAMODE 0 "register_operand")
1674 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1675 (match_operand:FMAMODE 2 "nonimmediate_operand")
1676 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1677 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1679 (define_expand "fnms<mode>4"
1680 [(set (match_operand:FMAMODE 0 "register_operand")
1682 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1683 (match_operand:FMAMODE 2 "nonimmediate_operand")
1684 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1685 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1687 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1688 (define_expand "fma4i_fmadd_<mode>"
1689 [(set (match_operand:FMAMODE 0 "register_operand")
1691 (match_operand:FMAMODE 1 "nonimmediate_operand")
1692 (match_operand:FMAMODE 2 "nonimmediate_operand")
1693 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1694 "TARGET_FMA || TARGET_FMA4")
1696 (define_insn "*fma4i_fmadd_<mode>"
1697 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1699 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1700 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1701 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1703 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1704 [(set_attr "type" "ssemuladd")
1705 (set_attr "mode" "<MODE>")])
1707 (define_insn "*fma4i_fmsub_<mode>"
1708 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1710 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1711 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1713 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1715 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1716 [(set_attr "type" "ssemuladd")
1717 (set_attr "mode" "<MODE>")])
1719 (define_insn "*fma4i_fnmadd_<mode>"
1720 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1723 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1724 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1725 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1727 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1728 [(set_attr "type" "ssemuladd")
1729 (set_attr "mode" "<MODE>")])
1731 (define_insn "*fma4i_fnmsub_<mode>"
1732 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1735 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1736 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1738 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1740 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1741 [(set_attr "type" "ssemuladd")
1742 (set_attr "mode" "<MODE>")])
1744 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1745 ;; entire destination register, with the high-order elements zeroed.
1747 (define_expand "fma4i_vmfmadd_<mode>"
1748 [(set (match_operand:VF_128 0 "register_operand")
1751 (match_operand:VF_128 1 "nonimmediate_operand")
1752 (match_operand:VF_128 2 "nonimmediate_operand")
1753 (match_operand:VF_128 3 "nonimmediate_operand"))
1758 operands[4] = CONST0_RTX (<MODE>mode);
1761 (define_expand "fmai_vmfmadd_<mode>"
1762 [(set (match_operand:VF_128 0 "register_operand")
1765 (match_operand:VF_128 1 "nonimmediate_operand")
1766 (match_operand:VF_128 2 "nonimmediate_operand")
1767 (match_operand:VF_128 3 "nonimmediate_operand"))
1772 (define_insn "*fmai_fmadd_<mode>"
1773 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1776 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1777 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1778 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1783 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1784 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1785 vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1786 [(set_attr "type" "ssemuladd")
1787 (set_attr "mode" "<MODE>")])
1789 (define_insn "*fmai_fmsub_<mode>"
1790 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1793 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
1794 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1796 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1801 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1802 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1803 vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1804 [(set_attr "type" "ssemuladd")
1805 (set_attr "mode" "<MODE>")])
1807 (define_insn "*fmai_fnmadd_<mode>"
1808 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1812 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1813 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1814 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
1819 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1820 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1821 vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1822 [(set_attr "type" "ssemuladd")
1823 (set_attr "mode" "<MODE>")])
1825 (define_insn "*fmai_fnmsub_<mode>"
1826 [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
1830 (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
1831 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
1833 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
1838 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
1839 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
1840 vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1841 [(set_attr "type" "ssemuladd")
1842 (set_attr "mode" "<MODE>")])
1844 (define_insn "*fma4i_vmfmadd_<mode>"
1845 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1848 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1849 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1850 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1851 (match_operand:VF_128 4 "const0_operand" "")
1854 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1855 [(set_attr "type" "ssemuladd")
1856 (set_attr "mode" "<MODE>")])
1858 (define_insn "*fma4i_vmfmsub_<mode>"
1859 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1862 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1863 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1865 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1866 (match_operand:VF_128 4 "const0_operand" "")
1869 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1870 [(set_attr "type" "ssemuladd")
1871 (set_attr "mode" "<MODE>")])
1873 (define_insn "*fma4i_vmfnmadd_<mode>"
1874 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1878 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1879 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1880 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1881 (match_operand:VF_128 4 "const0_operand" "")
1884 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1885 [(set_attr "type" "ssemuladd")
1886 (set_attr "mode" "<MODE>")])
1888 (define_insn "*fma4i_vmfnmsub_<mode>"
1889 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1893 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1894 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1896 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1897 (match_operand:VF_128 4 "const0_operand" "")
1900 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1901 [(set_attr "type" "ssemuladd")
1902 (set_attr "mode" "<MODE>")])
1904 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1906 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1908 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1910 ;; It would be possible to represent these without the UNSPEC as
1913 ;; (fma op1 op2 op3)
1914 ;; (fma op1 op2 (neg op3))
1917 ;; But this doesn't seem useful in practice.
1919 (define_expand "fmaddsub_<mode>"
1920 [(set (match_operand:VF 0 "register_operand")
1922 [(match_operand:VF 1 "nonimmediate_operand")
1923 (match_operand:VF 2 "nonimmediate_operand")
1924 (match_operand:VF 3 "nonimmediate_operand")]
1926 "TARGET_FMA || TARGET_FMA4")
1928 (define_insn "*fma4_fmaddsub_<mode>"
1929 [(set (match_operand:VF 0 "register_operand" "=x,x")
1931 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1932 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1933 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1936 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1937 [(set_attr "type" "ssemuladd")
1938 (set_attr "mode" "<MODE>")])
1940 (define_insn "*fma4_fmsubadd_<mode>"
1941 [(set (match_operand:VF 0 "register_operand" "=x,x")
1943 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1944 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1946 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1949 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1950 [(set_attr "type" "ssemuladd")
1951 (set_attr "mode" "<MODE>")])
1953 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1955 ;; FMA3 floating point multiply/accumulate instructions.
1957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1959 (define_insn "*fma_fmadd_<mode>"
1960 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1962 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1963 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1964 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1967 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1968 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1969 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1970 [(set_attr "type" "ssemuladd")
1971 (set_attr "mode" "<MODE>")])
1973 (define_insn "*fma_fmsub_<mode>"
1974 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1976 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1977 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1979 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1982 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1983 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1984 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1985 [(set_attr "type" "ssemuladd")
1986 (set_attr "mode" "<MODE>")])
1988 (define_insn "*fma_fnmadd_<mode>"
1989 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1992 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1993 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1994 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1997 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1998 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1999 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2000 [(set_attr "type" "ssemuladd")
2001 (set_attr "mode" "<MODE>")])
2003 (define_insn "*fma_fnmsub_<mode>"
2004 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
2007 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
2008 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
2010 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
2013 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2014 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2015 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2016 [(set_attr "type" "ssemuladd")
2017 (set_attr "mode" "<MODE>")])
2019 (define_insn "*fma_fmaddsub_<mode>"
2020 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2022 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2023 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2024 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
2028 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2029 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2030 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2031 [(set_attr "type" "ssemuladd")
2032 (set_attr "mode" "<MODE>")])
2034 (define_insn "*fma_fmsubadd_<mode>"
2035 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
2037 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
2038 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
2040 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
2044 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
2045 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
2046 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2047 [(set_attr "type" "ssemuladd")
2048 (set_attr "mode" "<MODE>")])
2050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2052 ;; Parallel single-precision floating point conversion operations
2054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2056 (define_insn "sse_cvtpi2ps"
2057 [(set (match_operand:V4SF 0 "register_operand" "=x")
2060 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
2061 (match_operand:V4SF 1 "register_operand" "0")
2064 "cvtpi2ps\t{%2, %0|%0, %2}"
2065 [(set_attr "type" "ssecvt")
2066 (set_attr "mode" "V4SF")])
2068 (define_insn "sse_cvtps2pi"
2069 [(set (match_operand:V2SI 0 "register_operand" "=y")
2071 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2073 (parallel [(const_int 0) (const_int 1)])))]
2075 "cvtps2pi\t{%1, %0|%0, %1}"
2076 [(set_attr "type" "ssecvt")
2077 (set_attr "unit" "mmx")
2078 (set_attr "mode" "DI")])
2080 (define_insn "sse_cvttps2pi"
2081 [(set (match_operand:V2SI 0 "register_operand" "=y")
2083 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
2084 (parallel [(const_int 0) (const_int 1)])))]
2086 "cvttps2pi\t{%1, %0|%0, %1}"
2087 [(set_attr "type" "ssecvt")
2088 (set_attr "unit" "mmx")
2089 (set_attr "prefix_rep" "0")
2090 (set_attr "mode" "SF")])
2092 (define_insn "sse_cvtsi2ss"
2093 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2096 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2097 (match_operand:V4SF 1 "register_operand" "0,0,x")
2101 cvtsi2ss\t{%2, %0|%0, %2}
2102 cvtsi2ss\t{%2, %0|%0, %2}
2103 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
2104 [(set_attr "isa" "noavx,noavx,avx")
2105 (set_attr "type" "sseicvt")
2106 (set_attr "athlon_decode" "vector,double,*")
2107 (set_attr "amdfam10_decode" "vector,double,*")
2108 (set_attr "bdver1_decode" "double,direct,*")
2109 (set_attr "prefix" "orig,orig,vex")
2110 (set_attr "mode" "SF")])
2112 (define_insn "sse_cvtsi2ssq"
2113 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2116 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2117 (match_operand:V4SF 1 "register_operand" "0,0,x")
2119 "TARGET_SSE && TARGET_64BIT"
2121 cvtsi2ssq\t{%2, %0|%0, %2}
2122 cvtsi2ssq\t{%2, %0|%0, %2}
2123 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2124 [(set_attr "isa" "noavx,noavx,avx")
2125 (set_attr "type" "sseicvt")
2126 (set_attr "athlon_decode" "vector,double,*")
2127 (set_attr "amdfam10_decode" "vector,double,*")
2128 (set_attr "bdver1_decode" "double,direct,*")
2129 (set_attr "length_vex" "*,*,4")
2130 (set_attr "prefix_rex" "1,1,*")
2131 (set_attr "prefix" "orig,orig,vex")
2132 (set_attr "mode" "SF")])
2134 (define_insn "sse_cvtss2si"
2135 [(set (match_operand:SI 0 "register_operand" "=r,r")
2138 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2139 (parallel [(const_int 0)]))]
2140 UNSPEC_FIX_NOTRUNC))]
2142 "%vcvtss2si\t{%1, %0|%0, %1}"
2143 [(set_attr "type" "sseicvt")
2144 (set_attr "athlon_decode" "double,vector")
2145 (set_attr "bdver1_decode" "double,double")
2146 (set_attr "prefix_rep" "1")
2147 (set_attr "prefix" "maybe_vex")
2148 (set_attr "mode" "SI")])
2150 (define_insn "sse_cvtss2si_2"
2151 [(set (match_operand:SI 0 "register_operand" "=r,r")
2152 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2153 UNSPEC_FIX_NOTRUNC))]
2155 "%vcvtss2si\t{%1, %0|%0, %1}"
2156 [(set_attr "type" "sseicvt")
2157 (set_attr "athlon_decode" "double,vector")
2158 (set_attr "amdfam10_decode" "double,double")
2159 (set_attr "bdver1_decode" "double,double")
2160 (set_attr "prefix_rep" "1")
2161 (set_attr "prefix" "maybe_vex")
2162 (set_attr "mode" "SI")])
2164 (define_insn "sse_cvtss2siq"
2165 [(set (match_operand:DI 0 "register_operand" "=r,r")
2168 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2169 (parallel [(const_int 0)]))]
2170 UNSPEC_FIX_NOTRUNC))]
2171 "TARGET_SSE && TARGET_64BIT"
2172 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2173 [(set_attr "type" "sseicvt")
2174 (set_attr "athlon_decode" "double,vector")
2175 (set_attr "bdver1_decode" "double,double")
2176 (set_attr "prefix_rep" "1")
2177 (set_attr "prefix" "maybe_vex")
2178 (set_attr "mode" "DI")])
2180 (define_insn "sse_cvtss2siq_2"
2181 [(set (match_operand:DI 0 "register_operand" "=r,r")
2182 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
2183 UNSPEC_FIX_NOTRUNC))]
2184 "TARGET_SSE && TARGET_64BIT"
2185 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
2186 [(set_attr "type" "sseicvt")
2187 (set_attr "athlon_decode" "double,vector")
2188 (set_attr "amdfam10_decode" "double,double")
2189 (set_attr "bdver1_decode" "double,double")
2190 (set_attr "prefix_rep" "1")
2191 (set_attr "prefix" "maybe_vex")
2192 (set_attr "mode" "DI")])
2194 (define_insn "sse_cvttss2si"
2195 [(set (match_operand:SI 0 "register_operand" "=r,r")
2198 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2199 (parallel [(const_int 0)]))))]
2201 "%vcvttss2si\t{%1, %0|%0, %1}"
2202 [(set_attr "type" "sseicvt")
2203 (set_attr "athlon_decode" "double,vector")
2204 (set_attr "amdfam10_decode" "double,double")
2205 (set_attr "bdver1_decode" "double,double")
2206 (set_attr "prefix_rep" "1")
2207 (set_attr "prefix" "maybe_vex")
2208 (set_attr "mode" "SI")])
2210 (define_insn "sse_cvttss2siq"
2211 [(set (match_operand:DI 0 "register_operand" "=r,r")
2214 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
2215 (parallel [(const_int 0)]))))]
2216 "TARGET_SSE && TARGET_64BIT"
2217 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
2218 [(set_attr "type" "sseicvt")
2219 (set_attr "athlon_decode" "double,vector")
2220 (set_attr "amdfam10_decode" "double,double")
2221 (set_attr "bdver1_decode" "double,double")
2222 (set_attr "prefix_rep" "1")
2223 (set_attr "prefix" "maybe_vex")
2224 (set_attr "mode" "DI")])
2226 (define_insn "float<sseintvecmodelower><mode>2"
2227 [(set (match_operand:VF1 0 "register_operand" "=x")
2229 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
2231 "%vcvtdq2ps\t{%1, %0|%0, %1}"
2232 [(set_attr "type" "ssecvt")
2233 (set_attr "prefix" "maybe_vex")
2234 (set_attr "mode" "<sseinsnmode>")])
2236 (define_expand "floatuns<sseintvecmodelower><mode>2"
2237 [(match_operand:VF1 0 "register_operand" "")
2238 (match_operand:<sseintvecmode> 1 "register_operand" "")]
2239 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
2241 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
2245 (define_insn "avx_cvtps2dq256"
2246 [(set (match_operand:V8SI 0 "register_operand" "=x")
2247 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2248 UNSPEC_FIX_NOTRUNC))]
2250 "vcvtps2dq\t{%1, %0|%0, %1}"
2251 [(set_attr "type" "ssecvt")
2252 (set_attr "prefix" "vex")
2253 (set_attr "mode" "OI")])
2255 (define_insn "sse2_cvtps2dq"
2256 [(set (match_operand:V4SI 0 "register_operand" "=x")
2257 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2258 UNSPEC_FIX_NOTRUNC))]
2260 "%vcvtps2dq\t{%1, %0|%0, %1}"
2261 [(set_attr "type" "ssecvt")
2262 (set (attr "prefix_data16")
2264 (match_test "TARGET_AVX")
2266 (const_string "1")))
2267 (set_attr "prefix" "maybe_vex")
2268 (set_attr "mode" "TI")])
2270 (define_insn "fix_truncv8sfv8si2"
2271 [(set (match_operand:V8SI 0 "register_operand" "=x")
2272 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2274 "vcvttps2dq\t{%1, %0|%0, %1}"
2275 [(set_attr "type" "ssecvt")
2276 (set_attr "prefix" "vex")
2277 (set_attr "mode" "OI")])
2279 (define_insn "fix_truncv4sfv4si2"
2280 [(set (match_operand:V4SI 0 "register_operand" "=x")
2281 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2283 "%vcvttps2dq\t{%1, %0|%0, %1}"
2284 [(set_attr "type" "ssecvt")
2285 (set (attr "prefix_rep")
2287 (match_test "TARGET_AVX")
2289 (const_string "1")))
2290 (set (attr "prefix_data16")
2292 (match_test "TARGET_AVX")
2294 (const_string "0")))
2295 (set_attr "prefix_data16" "0")
2296 (set_attr "prefix" "maybe_vex")
2297 (set_attr "mode" "TI")])
2299 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
2300 [(match_operand:<sseintvecmode> 0 "register_operand" "")
2301 (match_operand:VF1 1 "register_operand" "")]
2305 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
2306 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
2307 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
2308 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
2312 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2314 ;; Parallel double-precision floating point conversion operations
2316 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2318 (define_insn "sse2_cvtpi2pd"
2319 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2320 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2322 "cvtpi2pd\t{%1, %0|%0, %1}"
2323 [(set_attr "type" "ssecvt")
2324 (set_attr "unit" "mmx,*")
2325 (set_attr "prefix_data16" "1,*")
2326 (set_attr "mode" "V2DF")])
2328 (define_insn "sse2_cvtpd2pi"
2329 [(set (match_operand:V2SI 0 "register_operand" "=y")
2330 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2331 UNSPEC_FIX_NOTRUNC))]
2333 "cvtpd2pi\t{%1, %0|%0, %1}"
2334 [(set_attr "type" "ssecvt")
2335 (set_attr "unit" "mmx")
2336 (set_attr "bdver1_decode" "double")
2337 (set_attr "prefix_data16" "1")
2338 (set_attr "mode" "DI")])
2340 (define_insn "sse2_cvttpd2pi"
2341 [(set (match_operand:V2SI 0 "register_operand" "=y")
2342 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2344 "cvttpd2pi\t{%1, %0|%0, %1}"
2345 [(set_attr "type" "ssecvt")
2346 (set_attr "unit" "mmx")
2347 (set_attr "bdver1_decode" "double")
2348 (set_attr "prefix_data16" "1")
2349 (set_attr "mode" "TI")])
2351 (define_insn "sse2_cvtsi2sd"
2352 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2355 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2356 (match_operand:V2DF 1 "register_operand" "0,0,x")
2360 cvtsi2sd\t{%2, %0|%0, %2}
2361 cvtsi2sd\t{%2, %0|%0, %2}
2362 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2363 [(set_attr "isa" "noavx,noavx,avx")
2364 (set_attr "type" "sseicvt")
2365 (set_attr "athlon_decode" "double,direct,*")
2366 (set_attr "amdfam10_decode" "vector,double,*")
2367 (set_attr "bdver1_decode" "double,direct,*")
2368 (set_attr "prefix" "orig,orig,vex")
2369 (set_attr "mode" "DF")])
2371 (define_insn "sse2_cvtsi2sdq"
2372 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2375 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2376 (match_operand:V2DF 1 "register_operand" "0,0,x")
2378 "TARGET_SSE2 && TARGET_64BIT"
2380 cvtsi2sdq\t{%2, %0|%0, %2}
2381 cvtsi2sdq\t{%2, %0|%0, %2}
2382 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2383 [(set_attr "isa" "noavx,noavx,avx")
2384 (set_attr "type" "sseicvt")
2385 (set_attr "athlon_decode" "double,direct,*")
2386 (set_attr "amdfam10_decode" "vector,double,*")
2387 (set_attr "bdver1_decode" "double,direct,*")
2388 (set_attr "length_vex" "*,*,4")
2389 (set_attr "prefix_rex" "1,1,*")
2390 (set_attr "prefix" "orig,orig,vex")
2391 (set_attr "mode" "DF")])
2393 (define_insn "sse2_cvtsd2si"
2394 [(set (match_operand:SI 0 "register_operand" "=r,r")
2397 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2398 (parallel [(const_int 0)]))]
2399 UNSPEC_FIX_NOTRUNC))]
2401 "%vcvtsd2si\t{%1, %0|%0, %1}"
2402 [(set_attr "type" "sseicvt")
2403 (set_attr "athlon_decode" "double,vector")
2404 (set_attr "bdver1_decode" "double,double")
2405 (set_attr "prefix_rep" "1")
2406 (set_attr "prefix" "maybe_vex")
2407 (set_attr "mode" "SI")])
2409 (define_insn "sse2_cvtsd2si_2"
2410 [(set (match_operand:SI 0 "register_operand" "=r,r")
2411 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2412 UNSPEC_FIX_NOTRUNC))]
2414 "%vcvtsd2si\t{%1, %0|%0, %1}"
2415 [(set_attr "type" "sseicvt")
2416 (set_attr "athlon_decode" "double,vector")
2417 (set_attr "amdfam10_decode" "double,double")
2418 (set_attr "bdver1_decode" "double,double")
2419 (set_attr "prefix_rep" "1")
2420 (set_attr "prefix" "maybe_vex")
2421 (set_attr "mode" "SI")])
2423 (define_insn "sse2_cvtsd2siq"
2424 [(set (match_operand:DI 0 "register_operand" "=r,r")
2427 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2428 (parallel [(const_int 0)]))]
2429 UNSPEC_FIX_NOTRUNC))]
2430 "TARGET_SSE2 && TARGET_64BIT"
2431 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2432 [(set_attr "type" "sseicvt")
2433 (set_attr "athlon_decode" "double,vector")
2434 (set_attr "bdver1_decode" "double,double")
2435 (set_attr "prefix_rep" "1")
2436 (set_attr "prefix" "maybe_vex")
2437 (set_attr "mode" "DI")])
2439 (define_insn "sse2_cvtsd2siq_2"
2440 [(set (match_operand:DI 0 "register_operand" "=r,r")
2441 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2442 UNSPEC_FIX_NOTRUNC))]
2443 "TARGET_SSE2 && TARGET_64BIT"
2444 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2445 [(set_attr "type" "sseicvt")
2446 (set_attr "athlon_decode" "double,vector")
2447 (set_attr "amdfam10_decode" "double,double")
2448 (set_attr "bdver1_decode" "double,double")
2449 (set_attr "prefix_rep" "1")
2450 (set_attr "prefix" "maybe_vex")
2451 (set_attr "mode" "DI")])
2453 (define_insn "sse2_cvttsd2si"
2454 [(set (match_operand:SI 0 "register_operand" "=r,r")
2457 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2458 (parallel [(const_int 0)]))))]
2460 "%vcvttsd2si\t{%1, %0|%0, %1}"
2461 [(set_attr "type" "sseicvt")
2462 (set_attr "athlon_decode" "double,vector")
2463 (set_attr "amdfam10_decode" "double,double")
2464 (set_attr "bdver1_decode" "double,double")
2465 (set_attr "prefix_rep" "1")
2466 (set_attr "prefix" "maybe_vex")
2467 (set_attr "mode" "SI")])
2469 (define_insn "sse2_cvttsd2siq"
2470 [(set (match_operand:DI 0 "register_operand" "=r,r")
2473 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2474 (parallel [(const_int 0)]))))]
2475 "TARGET_SSE2 && TARGET_64BIT"
2476 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2477 [(set_attr "type" "sseicvt")
2478 (set_attr "athlon_decode" "double,vector")
2479 (set_attr "amdfam10_decode" "double,double")
2480 (set_attr "bdver1_decode" "double,double")
2481 (set_attr "prefix_rep" "1")
2482 (set_attr "prefix" "maybe_vex")
2483 (set_attr "mode" "DI")])
2485 (define_insn "floatv4siv4df2"
2486 [(set (match_operand:V4DF 0 "register_operand" "=x")
2487 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2489 "vcvtdq2pd\t{%1, %0|%0, %1}"
2490 [(set_attr "type" "ssecvt")
2491 (set_attr "prefix" "vex")
2492 (set_attr "mode" "V4DF")])
2494 (define_insn "avx_cvtdq2pd256_2"
2495 [(set (match_operand:V4DF 0 "register_operand" "=x")
2498 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2499 (parallel [(const_int 0) (const_int 1)
2500 (const_int 2) (const_int 3)]))))]
2502 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2503 [(set_attr "type" "ssecvt")
2504 (set_attr "prefix" "vex")
2505 (set_attr "mode" "V4DF")])
2507 (define_insn "sse2_cvtdq2pd"
2508 [(set (match_operand:V2DF 0 "register_operand" "=x")
2511 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2512 (parallel [(const_int 0) (const_int 1)]))))]
2514 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
2515 [(set_attr "type" "ssecvt")
2516 (set_attr "prefix" "maybe_vex")
2517 (set_attr "mode" "V2DF")])
2519 (define_insn "avx_cvtpd2dq256"
2520 [(set (match_operand:V4SI 0 "register_operand" "=x")
2521 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2522 UNSPEC_FIX_NOTRUNC))]
2524 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2525 [(set_attr "type" "ssecvt")
2526 (set_attr "prefix" "vex")
2527 (set_attr "mode" "OI")])
2529 (define_expand "avx_cvtpd2dq256_2"
2530 [(set (match_operand:V8SI 0 "register_operand" "")
2532 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "")]
2536 "operands[2] = CONST0_RTX (V4SImode);")
2538 (define_insn "*avx_cvtpd2dq256_2"
2539 [(set (match_operand:V8SI 0 "register_operand" "=x")
2541 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2543 (match_operand:V4SI 2 "const0_operand" "")))]
2545 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
2546 [(set_attr "type" "ssecvt")
2547 (set_attr "prefix" "vex")
2548 (set_attr "mode" "OI")])
2550 (define_expand "sse2_cvtpd2dq"
2551 [(set (match_operand:V4SI 0 "register_operand" "")
2553 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2557 "operands[2] = CONST0_RTX (V2SImode);")
2559 (define_insn "*sse2_cvtpd2dq"
2560 [(set (match_operand:V4SI 0 "register_operand" "=x")
2562 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2564 (match_operand:V2SI 2 "const0_operand" "")))]
2568 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2570 return "cvtpd2dq\t{%1, %0|%0, %1}";
2572 [(set_attr "type" "ssecvt")
2573 (set_attr "prefix_rep" "1")
2574 (set_attr "prefix_data16" "0")
2575 (set_attr "prefix" "maybe_vex")
2576 (set_attr "mode" "TI")
2577 (set_attr "amdfam10_decode" "double")
2578 (set_attr "athlon_decode" "vector")
2579 (set_attr "bdver1_decode" "double")])
2581 (define_insn "fix_truncv4dfv4si2"
2582 [(set (match_operand:V4SI 0 "register_operand" "=x")
2583 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2585 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2586 [(set_attr "type" "ssecvt")
2587 (set_attr "prefix" "vex")
2588 (set_attr "mode" "OI")])
2590 (define_expand "avx_cvttpd2dq256_2"
2591 [(set (match_operand:V8SI 0 "register_operand" "")
2593 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" ""))
2596 "operands[2] = CONST0_RTX (V4SImode);")
2598 (define_insn "*avx_cvttpd2dq256_2"
2599 [(set (match_operand:V8SI 0 "register_operand" "=x")
2601 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
2602 (match_operand:V4SI 2 "const0_operand" "")))]
2604 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
2605 [(set_attr "type" "ssecvt")
2606 (set_attr "prefix" "vex")
2607 (set_attr "mode" "OI")])
2609 (define_expand "sse2_cvttpd2dq"
2610 [(set (match_operand:V4SI 0 "register_operand" "")
2612 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2615 "operands[2] = CONST0_RTX (V2SImode);")
2617 (define_insn "*sse2_cvttpd2dq"
2618 [(set (match_operand:V4SI 0 "register_operand" "=x")
2620 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2621 (match_operand:V2SI 2 "const0_operand" "")))]
2625 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2627 return "cvttpd2dq\t{%1, %0|%0, %1}";
2629 [(set_attr "type" "ssecvt")
2630 (set_attr "amdfam10_decode" "double")
2631 (set_attr "athlon_decode" "vector")
2632 (set_attr "bdver1_decode" "double")
2633 (set_attr "prefix" "maybe_vex")
2634 (set_attr "mode" "TI")])
2636 (define_insn "sse2_cvtsd2ss"
2637 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2640 (float_truncate:V2SF
2641 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2642 (match_operand:V4SF 1 "register_operand" "0,0,x")
2646 cvtsd2ss\t{%2, %0|%0, %2}
2647 cvtsd2ss\t{%2, %0|%0, %2}
2648 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2649 [(set_attr "isa" "noavx,noavx,avx")
2650 (set_attr "type" "ssecvt")
2651 (set_attr "athlon_decode" "vector,double,*")
2652 (set_attr "amdfam10_decode" "vector,double,*")
2653 (set_attr "bdver1_decode" "direct,direct,*")
2654 (set_attr "prefix" "orig,orig,vex")
2655 (set_attr "mode" "SF")])
2657 (define_insn "sse2_cvtss2sd"
2658 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2662 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2663 (parallel [(const_int 0) (const_int 1)])))
2664 (match_operand:V2DF 1 "register_operand" "0,0,x")
2668 cvtss2sd\t{%2, %0|%0, %2}
2669 cvtss2sd\t{%2, %0|%0, %2}
2670 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2671 [(set_attr "isa" "noavx,noavx,avx")
2672 (set_attr "type" "ssecvt")
2673 (set_attr "amdfam10_decode" "vector,double,*")
2674 (set_attr "athlon_decode" "direct,direct,*")
2675 (set_attr "bdver1_decode" "direct,direct,*")
2676 (set_attr "prefix" "orig,orig,vex")
2677 (set_attr "mode" "DF")])
2679 (define_insn "avx_cvtpd2ps256"
2680 [(set (match_operand:V4SF 0 "register_operand" "=x")
2681 (float_truncate:V4SF
2682 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2684 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2685 [(set_attr "type" "ssecvt")
2686 (set_attr "prefix" "vex")
2687 (set_attr "mode" "V4SF")])
2689 (define_expand "sse2_cvtpd2ps"
2690 [(set (match_operand:V4SF 0 "register_operand" "")
2692 (float_truncate:V2SF
2693 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2696 "operands[2] = CONST0_RTX (V2SFmode);")
2698 (define_insn "*sse2_cvtpd2ps"
2699 [(set (match_operand:V4SF 0 "register_operand" "=x")
2701 (float_truncate:V2SF
2702 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2703 (match_operand:V2SF 2 "const0_operand" "")))]
2707 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2709 return "cvtpd2ps\t{%1, %0|%0, %1}";
2711 [(set_attr "type" "ssecvt")
2712 (set_attr "amdfam10_decode" "double")
2713 (set_attr "athlon_decode" "vector")
2714 (set_attr "bdver1_decode" "double")
2715 (set_attr "prefix_data16" "1")
2716 (set_attr "prefix" "maybe_vex")
2717 (set_attr "mode" "V4SF")])
2719 (define_insn "avx_cvtps2pd256"
2720 [(set (match_operand:V4DF 0 "register_operand" "=x")
2722 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2724 "vcvtps2pd\t{%1, %0|%0, %1}"
2725 [(set_attr "type" "ssecvt")
2726 (set_attr "prefix" "vex")
2727 (set_attr "mode" "V4DF")])
2729 (define_insn "*avx_cvtps2pd256_2"
2730 [(set (match_operand:V4DF 0 "register_operand" "=x")
2733 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2734 (parallel [(const_int 0) (const_int 1)
2735 (const_int 2) (const_int 3)]))))]
2737 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2738 [(set_attr "type" "ssecvt")
2739 (set_attr "prefix" "vex")
2740 (set_attr "mode" "V4DF")])
2742 (define_insn "sse2_cvtps2pd"
2743 [(set (match_operand:V2DF 0 "register_operand" "=x")
2746 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2747 (parallel [(const_int 0) (const_int 1)]))))]
2749 "%vcvtps2pd\t{%1, %0|%0, %q1}"
2750 [(set_attr "type" "ssecvt")
2751 (set_attr "amdfam10_decode" "direct")
2752 (set_attr "athlon_decode" "double")
2753 (set_attr "bdver1_decode" "double")
2754 (set_attr "prefix_data16" "0")
2755 (set_attr "prefix" "maybe_vex")
2756 (set_attr "mode" "V2DF")])
2758 (define_expand "vec_unpacks_hi_v4sf"
2763 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2764 (parallel [(const_int 6) (const_int 7)
2765 (const_int 2) (const_int 3)])))
2766 (set (match_operand:V2DF 0 "register_operand" "")
2770 (parallel [(const_int 0) (const_int 1)]))))]
2772 "operands[2] = gen_reg_rtx (V4SFmode);")
2774 (define_expand "vec_unpacks_hi_v8sf"
2777 (match_operand:V8SF 1 "nonimmediate_operand" "")
2778 (parallel [(const_int 4) (const_int 5)
2779 (const_int 6) (const_int 7)])))
2780 (set (match_operand:V4DF 0 "register_operand" "")
2784 "operands[2] = gen_reg_rtx (V4SFmode);")
2786 (define_expand "vec_unpacks_lo_v4sf"
2787 [(set (match_operand:V2DF 0 "register_operand" "")
2790 (match_operand:V4SF 1 "nonimmediate_operand" "")
2791 (parallel [(const_int 0) (const_int 1)]))))]
2794 (define_expand "vec_unpacks_lo_v8sf"
2795 [(set (match_operand:V4DF 0 "register_operand" "")
2798 (match_operand:V8SF 1 "nonimmediate_operand" "")
2799 (parallel [(const_int 0) (const_int 1)
2800 (const_int 2) (const_int 3)]))))]
2803 (define_mode_attr sseunpackfltmode
2804 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
2806 (define_expand "vec_unpacks_float_hi_<mode>"
2807 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2808 (match_operand:VI2_AVX2 1 "register_operand" "")]
2811 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2813 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
2814 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2815 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2819 (define_expand "vec_unpacks_float_lo_<mode>"
2820 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2821 (match_operand:VI2_AVX2 1 "register_operand" "")]
2824 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2826 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
2827 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2828 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2832 (define_expand "vec_unpacku_float_hi_<mode>"
2833 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2834 (match_operand:VI2_AVX2 1 "register_operand" "")]
2837 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2839 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
2840 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2841 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2845 (define_expand "vec_unpacku_float_lo_<mode>"
2846 [(match_operand:<sseunpackfltmode> 0 "register_operand" "")
2847 (match_operand:VI2_AVX2 1 "register_operand" "")]
2850 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
2852 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
2853 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
2854 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
2858 (define_expand "vec_unpacks_float_hi_v4si"
2861 (match_operand:V4SI 1 "nonimmediate_operand" "")
2862 (parallel [(const_int 2) (const_int 3)
2863 (const_int 2) (const_int 3)])))
2864 (set (match_operand:V2DF 0 "register_operand" "")
2868 (parallel [(const_int 0) (const_int 1)]))))]
2870 "operands[2] = gen_reg_rtx (V4SImode);")
2872 (define_expand "vec_unpacks_float_lo_v4si"
2873 [(set (match_operand:V2DF 0 "register_operand" "")
2876 (match_operand:V4SI 1 "nonimmediate_operand" "")
2877 (parallel [(const_int 0) (const_int 1)]))))]
2880 (define_expand "vec_unpacks_float_hi_v8si"
2883 (match_operand:V8SI 1 "nonimmediate_operand" "")
2884 (parallel [(const_int 4) (const_int 5)
2885 (const_int 6) (const_int 7)])))
2886 (set (match_operand:V4DF 0 "register_operand" "")
2890 "operands[2] = gen_reg_rtx (V4SImode);")
2892 (define_expand "vec_unpacks_float_lo_v8si"
2893 [(set (match_operand:V4DF 0 "register_operand" "")
2896 (match_operand:V8SI 1 "nonimmediate_operand" "")
2897 (parallel [(const_int 0) (const_int 1)
2898 (const_int 2) (const_int 3)]))))]
2901 (define_expand "vec_unpacku_float_hi_v4si"
2904 (match_operand:V4SI 1 "nonimmediate_operand" "")
2905 (parallel [(const_int 2) (const_int 3)
2906 (const_int 2) (const_int 3)])))
2911 (parallel [(const_int 0) (const_int 1)]))))
2913 (lt:V2DF (match_dup 6) (match_dup 3)))
2915 (and:V2DF (match_dup 7) (match_dup 4)))
2916 (set (match_operand:V2DF 0 "register_operand" "")
2917 (plus:V2DF (match_dup 6) (match_dup 8)))]
2920 REAL_VALUE_TYPE TWO32r;
2924 real_ldexp (&TWO32r, &dconst1, 32);
2925 x = const_double_from_real_value (TWO32r, DFmode);
2927 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2928 operands[4] = force_reg (V2DFmode,
2929 ix86_build_const_vector (V2DFmode, 1, x));
2931 operands[5] = gen_reg_rtx (V4SImode);
2933 for (i = 6; i < 9; i++)
2934 operands[i] = gen_reg_rtx (V2DFmode);
2937 (define_expand "vec_unpacku_float_lo_v4si"
2941 (match_operand:V4SI 1 "nonimmediate_operand" "")
2942 (parallel [(const_int 0) (const_int 1)]))))
2944 (lt:V2DF (match_dup 5) (match_dup 3)))
2946 (and:V2DF (match_dup 6) (match_dup 4)))
2947 (set (match_operand:V2DF 0 "register_operand" "")
2948 (plus:V2DF (match_dup 5) (match_dup 7)))]
2951 REAL_VALUE_TYPE TWO32r;
2955 real_ldexp (&TWO32r, &dconst1, 32);
2956 x = const_double_from_real_value (TWO32r, DFmode);
2958 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2959 operands[4] = force_reg (V2DFmode,
2960 ix86_build_const_vector (V2DFmode, 1, x));
2962 for (i = 5; i < 8; i++)
2963 operands[i] = gen_reg_rtx (V2DFmode);
2966 (define_expand "vec_unpacku_float_hi_v8si"
2967 [(match_operand:V4DF 0 "register_operand" "")
2968 (match_operand:V8SI 1 "register_operand" "")]
2971 REAL_VALUE_TYPE TWO32r;
2975 real_ldexp (&TWO32r, &dconst1, 32);
2976 x = const_double_from_real_value (TWO32r, DFmode);
2978 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
2979 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
2980 tmp[5] = gen_reg_rtx (V4SImode);
2982 for (i = 2; i < 5; i++)
2983 tmp[i] = gen_reg_rtx (V4DFmode);
2984 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
2985 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
2986 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
2987 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
2988 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
2989 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
2993 (define_expand "vec_unpacku_float_lo_v8si"
2994 [(match_operand:V4DF 0 "register_operand" "")
2995 (match_operand:V8SI 1 "nonimmediate_operand" "")]
2998 REAL_VALUE_TYPE TWO32r;
3002 real_ldexp (&TWO32r, &dconst1, 32);
3003 x = const_double_from_real_value (TWO32r, DFmode);
3005 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
3006 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
3008 for (i = 2; i < 5; i++)
3009 tmp[i] = gen_reg_rtx (V4DFmode);
3010 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
3011 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
3012 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
3013 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
3014 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
3018 (define_expand "vec_pack_trunc_v4df"
3020 (float_truncate:V4SF
3021 (match_operand:V4DF 1 "nonimmediate_operand" "")))
3023 (float_truncate:V4SF
3024 (match_operand:V4DF 2 "nonimmediate_operand" "")))
3025 (set (match_operand:V8SF 0 "register_operand" "")
3031 operands[3] = gen_reg_rtx (V4SFmode);
3032 operands[4] = gen_reg_rtx (V4SFmode);
3035 (define_expand "vec_pack_trunc_v2df"
3036 [(match_operand:V4SF 0 "register_operand" "")
3037 (match_operand:V2DF 1 "nonimmediate_operand" "")
3038 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3043 r1 = gen_reg_rtx (V4SFmode);
3044 r2 = gen_reg_rtx (V4SFmode);
3046 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
3047 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
3048 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
3052 (define_expand "vec_pack_sfix_trunc_v4df"
3053 [(match_operand:V8SI 0 "register_operand" "")
3054 (match_operand:V4DF 1 "nonimmediate_operand" "")
3055 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3060 r1 = gen_reg_rtx (V8SImode);
3061 r2 = gen_reg_rtx (V8SImode);
3063 emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1]));
3064 emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2]));
3065 emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
3069 (define_expand "vec_pack_sfix_trunc_v2df"
3070 [(match_operand:V4SI 0 "register_operand" "")
3071 (match_operand:V2DF 1 "nonimmediate_operand" "")
3072 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3077 r1 = gen_reg_rtx (V4SImode);
3078 r2 = gen_reg_rtx (V4SImode);
3080 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
3081 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
3082 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3083 gen_lowpart (V2DImode, r1),
3084 gen_lowpart (V2DImode, r2)));
3088 (define_mode_attr ssepackfltmode
3089 [(V4DF "V8SI") (V2DF "V4SI")])
3091 (define_expand "vec_pack_ufix_trunc_<mode>"
3092 [(match_operand:<ssepackfltmode> 0 "register_operand" "")
3093 (match_operand:VF2 1 "register_operand" "")
3094 (match_operand:VF2 2 "register_operand" "")]
3098 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3099 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
3100 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
3101 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
3102 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
3104 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
3105 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
3109 tmp[5] = gen_reg_rtx (V8SFmode);
3110 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
3111 gen_lowpart (V8SFmode, tmp[3]), 0);
3112 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
3114 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
3115 operands[0], 0, OPTAB_DIRECT);
3116 if (tmp[6] != operands[0])
3117 emit_move_insn (operands[0], tmp[6]);
3121 (define_expand "vec_pack_sfix_v4df"
3122 [(match_operand:V8SI 0 "register_operand" "")
3123 (match_operand:V4DF 1 "nonimmediate_operand" "")
3124 (match_operand:V4DF 2 "nonimmediate_operand" "")]
3129 r1 = gen_reg_rtx (V8SImode);
3130 r2 = gen_reg_rtx (V8SImode);
3132 emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1]));
3133 emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2]));
3134 emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20)));
3138 (define_expand "vec_pack_sfix_v2df"
3139 [(match_operand:V4SI 0 "register_operand" "")
3140 (match_operand:V2DF 1 "nonimmediate_operand" "")
3141 (match_operand:V2DF 2 "nonimmediate_operand" "")]
3146 r1 = gen_reg_rtx (V4SImode);
3147 r2 = gen_reg_rtx (V4SImode);
3149 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
3150 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
3151 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
3152 gen_lowpart (V2DImode, r1),
3153 gen_lowpart (V2DImode, r2)));
3157 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3159 ;; Parallel single-precision floating point element swizzling
3161 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3163 (define_expand "sse_movhlps_exp"
3164 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3167 (match_operand:V4SF 1 "nonimmediate_operand" "")
3168 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3169 (parallel [(const_int 6)
3175 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3177 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
3179 /* Fix up the destination if needed. */
3180 if (dst != operands[0])
3181 emit_move_insn (operands[0], dst);
3186 (define_insn "sse_movhlps"
3187 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3190 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3191 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
3192 (parallel [(const_int 6)
3196 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3198 movhlps\t{%2, %0|%0, %2}
3199 vmovhlps\t{%2, %1, %0|%0, %1, %2}
3200 movlps\t{%H2, %0|%0, %H2}
3201 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
3202 %vmovhps\t{%2, %0|%0, %2}"
3203 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3204 (set_attr "type" "ssemov")
3205 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3206 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3208 (define_expand "sse_movlhps_exp"
3209 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3212 (match_operand:V4SF 1 "nonimmediate_operand" "")
3213 (match_operand:V4SF 2 "nonimmediate_operand" ""))
3214 (parallel [(const_int 0)
3220 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3222 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
3224 /* Fix up the destination if needed. */
3225 if (dst != operands[0])
3226 emit_move_insn (operands[0], dst);
3231 (define_insn "sse_movlhps"
3232 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3235 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3236 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
3237 (parallel [(const_int 0)
3241 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
3243 movlhps\t{%2, %0|%0, %2}
3244 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3245 movhps\t{%2, %0|%0, %2}
3246 vmovhps\t{%2, %1, %0|%0, %1, %2}
3247 %vmovlps\t{%2, %H0|%H0, %2}"
3248 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3249 (set_attr "type" "ssemov")
3250 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3251 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3253 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3254 (define_insn "avx_unpckhps256"
3255 [(set (match_operand:V8SF 0 "register_operand" "=x")
3258 (match_operand:V8SF 1 "register_operand" "x")
3259 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3260 (parallel [(const_int 2) (const_int 10)
3261 (const_int 3) (const_int 11)
3262 (const_int 6) (const_int 14)
3263 (const_int 7) (const_int 15)])))]
3265 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3266 [(set_attr "type" "sselog")
3267 (set_attr "prefix" "vex")
3268 (set_attr "mode" "V8SF")])
3270 (define_expand "vec_interleave_highv8sf"
3274 (match_operand:V8SF 1 "register_operand" "x")
3275 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3276 (parallel [(const_int 0) (const_int 8)
3277 (const_int 1) (const_int 9)
3278 (const_int 4) (const_int 12)
3279 (const_int 5) (const_int 13)])))
3285 (parallel [(const_int 2) (const_int 10)
3286 (const_int 3) (const_int 11)
3287 (const_int 6) (const_int 14)
3288 (const_int 7) (const_int 15)])))
3289 (set (match_operand:V8SF 0 "register_operand" "")
3294 (parallel [(const_int 4) (const_int 5)
3295 (const_int 6) (const_int 7)
3296 (const_int 12) (const_int 13)
3297 (const_int 14) (const_int 15)])))]
3300 operands[3] = gen_reg_rtx (V8SFmode);
3301 operands[4] = gen_reg_rtx (V8SFmode);
3304 (define_insn "vec_interleave_highv4sf"
3305 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3308 (match_operand:V4SF 1 "register_operand" "0,x")
3309 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3310 (parallel [(const_int 2) (const_int 6)
3311 (const_int 3) (const_int 7)])))]
3314 unpckhps\t{%2, %0|%0, %2}
3315 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3316 [(set_attr "isa" "noavx,avx")
3317 (set_attr "type" "sselog")
3318 (set_attr "prefix" "orig,vex")
3319 (set_attr "mode" "V4SF")])
3321 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3322 (define_insn "avx_unpcklps256"
3323 [(set (match_operand:V8SF 0 "register_operand" "=x")
3326 (match_operand:V8SF 1 "register_operand" "x")
3327 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3328 (parallel [(const_int 0) (const_int 8)
3329 (const_int 1) (const_int 9)
3330 (const_int 4) (const_int 12)
3331 (const_int 5) (const_int 13)])))]
3333 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3334 [(set_attr "type" "sselog")
3335 (set_attr "prefix" "vex")
3336 (set_attr "mode" "V8SF")])
3338 (define_expand "vec_interleave_lowv8sf"
3342 (match_operand:V8SF 1 "register_operand" "x")
3343 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3344 (parallel [(const_int 0) (const_int 8)
3345 (const_int 1) (const_int 9)
3346 (const_int 4) (const_int 12)
3347 (const_int 5) (const_int 13)])))
3353 (parallel [(const_int 2) (const_int 10)
3354 (const_int 3) (const_int 11)
3355 (const_int 6) (const_int 14)
3356 (const_int 7) (const_int 15)])))
3357 (set (match_operand:V8SF 0 "register_operand" "")
3362 (parallel [(const_int 0) (const_int 1)
3363 (const_int 2) (const_int 3)
3364 (const_int 8) (const_int 9)
3365 (const_int 10) (const_int 11)])))]
3368 operands[3] = gen_reg_rtx (V8SFmode);
3369 operands[4] = gen_reg_rtx (V8SFmode);
3372 (define_insn "vec_interleave_lowv4sf"
3373 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3376 (match_operand:V4SF 1 "register_operand" "0,x")
3377 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
3378 (parallel [(const_int 0) (const_int 4)
3379 (const_int 1) (const_int 5)])))]
3382 unpcklps\t{%2, %0|%0, %2}
3383 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3384 [(set_attr "isa" "noavx,avx")
3385 (set_attr "type" "sselog")
3386 (set_attr "prefix" "orig,vex")
3387 (set_attr "mode" "V4SF")])
3389 ;; These are modeled with the same vec_concat as the others so that we
3390 ;; capture users of shufps that can use the new instructions
3391 (define_insn "avx_movshdup256"
3392 [(set (match_operand:V8SF 0 "register_operand" "=x")
3395 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3397 (parallel [(const_int 1) (const_int 1)
3398 (const_int 3) (const_int 3)
3399 (const_int 5) (const_int 5)
3400 (const_int 7) (const_int 7)])))]
3402 "vmovshdup\t{%1, %0|%0, %1}"
3403 [(set_attr "type" "sse")
3404 (set_attr "prefix" "vex")
3405 (set_attr "mode" "V8SF")])
3407 (define_insn "sse3_movshdup"
3408 [(set (match_operand:V4SF 0 "register_operand" "=x")
3411 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3413 (parallel [(const_int 1)
3418 "%vmovshdup\t{%1, %0|%0, %1}"
3419 [(set_attr "type" "sse")
3420 (set_attr "prefix_rep" "1")
3421 (set_attr "prefix" "maybe_vex")
3422 (set_attr "mode" "V4SF")])
3424 (define_insn "avx_movsldup256"
3425 [(set (match_operand:V8SF 0 "register_operand" "=x")
3428 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3430 (parallel [(const_int 0) (const_int 0)
3431 (const_int 2) (const_int 2)
3432 (const_int 4) (const_int 4)
3433 (const_int 6) (const_int 6)])))]
3435 "vmovsldup\t{%1, %0|%0, %1}"
3436 [(set_attr "type" "sse")
3437 (set_attr "prefix" "vex")
3438 (set_attr "mode" "V8SF")])
3440 (define_insn "sse3_movsldup"
3441 [(set (match_operand:V4SF 0 "register_operand" "=x")
3444 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3446 (parallel [(const_int 0)
3451 "%vmovsldup\t{%1, %0|%0, %1}"
3452 [(set_attr "type" "sse")
3453 (set_attr "prefix_rep" "1")
3454 (set_attr "prefix" "maybe_vex")
3455 (set_attr "mode" "V4SF")])
3457 (define_expand "avx_shufps256"
3458 [(match_operand:V8SF 0 "register_operand" "")
3459 (match_operand:V8SF 1 "register_operand" "")
3460 (match_operand:V8SF 2 "nonimmediate_operand" "")
3461 (match_operand:SI 3 "const_int_operand" "")]
3464 int mask = INTVAL (operands[3]);
3465 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3466 GEN_INT ((mask >> 0) & 3),
3467 GEN_INT ((mask >> 2) & 3),
3468 GEN_INT (((mask >> 4) & 3) + 8),
3469 GEN_INT (((mask >> 6) & 3) + 8),
3470 GEN_INT (((mask >> 0) & 3) + 4),
3471 GEN_INT (((mask >> 2) & 3) + 4),
3472 GEN_INT (((mask >> 4) & 3) + 12),
3473 GEN_INT (((mask >> 6) & 3) + 12)));
3477 ;; One bit in mask selects 2 elements.
3478 (define_insn "avx_shufps256_1"
3479 [(set (match_operand:V8SF 0 "register_operand" "=x")
3482 (match_operand:V8SF 1 "register_operand" "x")
3483 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3484 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3485 (match_operand 4 "const_0_to_3_operand" "")
3486 (match_operand 5 "const_8_to_11_operand" "")
3487 (match_operand 6 "const_8_to_11_operand" "")
3488 (match_operand 7 "const_4_to_7_operand" "")
3489 (match_operand 8 "const_4_to_7_operand" "")
3490 (match_operand 9 "const_12_to_15_operand" "")
3491 (match_operand 10 "const_12_to_15_operand" "")])))]
3493 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3494 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3495 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3496 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3499 mask = INTVAL (operands[3]);
3500 mask |= INTVAL (operands[4]) << 2;
3501 mask |= (INTVAL (operands[5]) - 8) << 4;
3502 mask |= (INTVAL (operands[6]) - 8) << 6;
3503 operands[3] = GEN_INT (mask);
3505 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3507 [(set_attr "type" "sselog")
3508 (set_attr "length_immediate" "1")
3509 (set_attr "prefix" "vex")
3510 (set_attr "mode" "V8SF")])
3512 (define_expand "sse_shufps"
3513 [(match_operand:V4SF 0 "register_operand" "")
3514 (match_operand:V4SF 1 "register_operand" "")
3515 (match_operand:V4SF 2 "nonimmediate_operand" "")
3516 (match_operand:SI 3 "const_int_operand" "")]
3519 int mask = INTVAL (operands[3]);
3520 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3521 GEN_INT ((mask >> 0) & 3),
3522 GEN_INT ((mask >> 2) & 3),
3523 GEN_INT (((mask >> 4) & 3) + 4),
3524 GEN_INT (((mask >> 6) & 3) + 4)));
3528 (define_insn "sse_shufps_<mode>"
3529 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3530 (vec_select:VI4F_128
3531 (vec_concat:<ssedoublevecmode>
3532 (match_operand:VI4F_128 1 "register_operand" "0,x")
3533 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3534 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3535 (match_operand 4 "const_0_to_3_operand" "")
3536 (match_operand 5 "const_4_to_7_operand" "")
3537 (match_operand 6 "const_4_to_7_operand" "")])))]
3541 mask |= INTVAL (operands[3]) << 0;
3542 mask |= INTVAL (operands[4]) << 2;
3543 mask |= (INTVAL (operands[5]) - 4) << 4;
3544 mask |= (INTVAL (operands[6]) - 4) << 6;
3545 operands[3] = GEN_INT (mask);
3547 switch (which_alternative)
3550 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3552 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3557 [(set_attr "isa" "noavx,avx")
3558 (set_attr "type" "sselog")
3559 (set_attr "length_immediate" "1")
3560 (set_attr "prefix" "orig,vex")
3561 (set_attr "mode" "V4SF")])
3563 (define_insn "sse_storehps"
3564 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3566 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3567 (parallel [(const_int 2) (const_int 3)])))]
3570 %vmovhps\t{%1, %0|%0, %1}
3571 %vmovhlps\t{%1, %d0|%d0, %1}
3572 %vmovlps\t{%H1, %d0|%d0, %H1}"
3573 [(set_attr "type" "ssemov")
3574 (set_attr "prefix" "maybe_vex")
3575 (set_attr "mode" "V2SF,V4SF,V2SF")])
3577 (define_expand "sse_loadhps_exp"
3578 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3581 (match_operand:V4SF 1 "nonimmediate_operand" "")
3582 (parallel [(const_int 0) (const_int 1)]))
3583 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3586 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3588 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3590 /* Fix up the destination if needed. */
3591 if (dst != operands[0])
3592 emit_move_insn (operands[0], dst);
3597 (define_insn "sse_loadhps"
3598 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3601 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3602 (parallel [(const_int 0) (const_int 1)]))
3603 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3606 movhps\t{%2, %0|%0, %2}
3607 vmovhps\t{%2, %1, %0|%0, %1, %2}
3608 movlhps\t{%2, %0|%0, %2}
3609 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3610 %vmovlps\t{%2, %H0|%H0, %2}"
3611 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3612 (set_attr "type" "ssemov")
3613 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3614 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3616 (define_insn "sse_storelps"
3617 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3619 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3620 (parallel [(const_int 0) (const_int 1)])))]
3623 %vmovlps\t{%1, %0|%0, %1}
3624 %vmovaps\t{%1, %0|%0, %1}
3625 %vmovlps\t{%1, %d0|%d0, %1}"
3626 [(set_attr "type" "ssemov")
3627 (set_attr "prefix" "maybe_vex")
3628 (set_attr "mode" "V2SF,V4SF,V2SF")])
3630 (define_expand "sse_loadlps_exp"
3631 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3633 (match_operand:V2SF 2 "nonimmediate_operand" "")
3635 (match_operand:V4SF 1 "nonimmediate_operand" "")
3636 (parallel [(const_int 2) (const_int 3)]))))]
3639 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3641 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3643 /* Fix up the destination if needed. */
3644 if (dst != operands[0])
3645 emit_move_insn (operands[0], dst);
3650 (define_insn "sse_loadlps"
3651 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3653 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3655 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3656 (parallel [(const_int 2) (const_int 3)]))))]
3659 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3660 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3661 movlps\t{%2, %0|%0, %2}
3662 vmovlps\t{%2, %1, %0|%0, %1, %2}
3663 %vmovlps\t{%2, %0|%0, %2}"
3664 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3665 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3666 (set_attr "length_immediate" "1,1,*,*,*")
3667 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3668 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3670 (define_insn "sse_movss"
3671 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3673 (match_operand:V4SF 2 "register_operand" " x,x")
3674 (match_operand:V4SF 1 "register_operand" " 0,x")
3678 movss\t{%2, %0|%0, %2}
3679 vmovss\t{%2, %1, %0|%0, %1, %2}"
3680 [(set_attr "isa" "noavx,avx")
3681 (set_attr "type" "ssemov")
3682 (set_attr "prefix" "orig,vex")
3683 (set_attr "mode" "SF")])
3685 (define_insn "avx2_vec_dup<mode>"
3686 [(set (match_operand:VF1 0 "register_operand" "=x")
3689 (match_operand:V4SF 1 "register_operand" "x")
3690 (parallel [(const_int 0)]))))]
3692 "vbroadcastss\t{%1, %0|%0, %1}"
3693 [(set_attr "type" "sselog1")
3694 (set_attr "prefix" "vex")
3695 (set_attr "mode" "<MODE>")])
3697 (define_insn "vec_dupv4sf"
3698 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
3700 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
3703 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3704 vbroadcastss\t{%1, %0|%0, %1}
3705 shufps\t{$0, %0, %0|%0, %0, 0}"
3706 [(set_attr "isa" "avx,avx,noavx")
3707 (set_attr "type" "sselog1,ssemov,sselog1")
3708 (set_attr "length_immediate" "1,0,1")
3709 (set_attr "prefix_extra" "0,1,*")
3710 (set_attr "prefix" "vex,vex,orig")
3711 (set_attr "mode" "V4SF")])
3713 ;; Although insertps takes register source, we prefer
3714 ;; unpcklps with register source since it is shorter.
3715 (define_insn "*vec_concatv2sf_sse4_1"
3716 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3718 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3719 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3722 unpcklps\t{%2, %0|%0, %2}
3723 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3724 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3725 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3726 %vmovss\t{%1, %0|%0, %1}
3727 punpckldq\t{%2, %0|%0, %2}
3728 movd\t{%1, %0|%0, %1}"
3729 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3730 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3731 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3732 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3733 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3734 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3735 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3737 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3738 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3739 ;; alternatives pretty much forces the MMX alternative to be chosen.
3740 (define_insn "*vec_concatv2sf_sse"
3741 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3743 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3744 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3747 unpcklps\t{%2, %0|%0, %2}
3748 movss\t{%1, %0|%0, %1}
3749 punpckldq\t{%2, %0|%0, %2}
3750 movd\t{%1, %0|%0, %1}"
3751 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3752 (set_attr "mode" "V4SF,SF,DI,DI")])
3754 (define_insn "*vec_concatv4sf"
3755 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3757 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3758 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3761 movlhps\t{%2, %0|%0, %2}
3762 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3763 movhps\t{%2, %0|%0, %2}
3764 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3765 [(set_attr "isa" "noavx,avx,noavx,avx")
3766 (set_attr "type" "ssemov")
3767 (set_attr "prefix" "orig,vex,orig,vex")
3768 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3770 (define_expand "vec_init<mode>"
3771 [(match_operand:V_128 0 "register_operand" "")
3772 (match_operand 1 "" "")]
3775 ix86_expand_vector_init (false, operands[0], operands[1]);
3779 ;; Avoid combining registers from different units in a single alternative,
3780 ;; see comment above inline_secondary_memory_needed function in i386.c
3781 (define_insn "vec_set<mode>_0"
3782 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3783 "=x,x,x ,x,x,x,x ,x ,m,m ,m")
3785 (vec_duplicate:VI4F_128
3786 (match_operand:<ssescalarmode> 2 "general_operand"
3787 " x,m,*r,m,x,x,*rm,*rm,x,fF,*r"))
3788 (match_operand:VI4F_128 1 "vector_move_operand"
3789 " C,C,C ,C,0,x,0 ,x ,0,0 ,0")
3793 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3794 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3795 %vmovd\t{%2, %0|%0, %2}
3796 movss\t{%2, %0|%0, %2}
3797 movss\t{%2, %0|%0, %2}
3798 vmovss\t{%2, %1, %0|%0, %1, %2}
3799 pinsrd\t{$0, %2, %0|%0, %2, 0}
3800 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3804 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
3806 (cond [(eq_attr "alternative" "0,6,7")
3807 (const_string "sselog")
3808 (eq_attr "alternative" "9")
3809 (const_string "fmov")
3810 (eq_attr "alternative" "10")
3811 (const_string "imov")
3813 (const_string "ssemov")))
3814 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3815 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3816 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3817 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3819 ;; A subset is vec_setv4sf.
3820 (define_insn "*vec_setv4sf_sse4_1"
3821 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3824 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3825 (match_operand:V4SF 1 "register_operand" "0,x")
3826 (match_operand:SI 3 "const_int_operand" "")))]
3828 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3829 < GET_MODE_NUNITS (V4SFmode))"
3831 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3832 switch (which_alternative)
3835 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3837 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3842 [(set_attr "isa" "noavx,avx")
3843 (set_attr "type" "sselog")
3844 (set_attr "prefix_data16" "1,*")
3845 (set_attr "prefix_extra" "1")
3846 (set_attr "length_immediate" "1")
3847 (set_attr "prefix" "orig,vex")
3848 (set_attr "mode" "V4SF")])
3850 (define_insn "sse4_1_insertps"
3851 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3852 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3853 (match_operand:V4SF 1 "register_operand" "0,x")
3854 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3858 if (MEM_P (operands[2]))
3860 unsigned count_s = INTVAL (operands[3]) >> 6;
3862 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3863 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3865 switch (which_alternative)
3868 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3870 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3875 [(set_attr "isa" "noavx,avx")
3876 (set_attr "type" "sselog")
3877 (set_attr "prefix_data16" "1,*")
3878 (set_attr "prefix_extra" "1")
3879 (set_attr "length_immediate" "1")
3880 (set_attr "prefix" "orig,vex")
3881 (set_attr "mode" "V4SF")])
3884 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3886 (vec_duplicate:VI4F_128
3887 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3890 "TARGET_SSE && reload_completed"
3893 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3898 (define_expand "vec_set<mode>"
3899 [(match_operand:V 0 "register_operand" "")
3900 (match_operand:<ssescalarmode> 1 "register_operand" "")
3901 (match_operand 2 "const_int_operand" "")]
3904 ix86_expand_vector_set (false, operands[0], operands[1],
3905 INTVAL (operands[2]));
3909 (define_insn_and_split "*vec_extractv4sf_0"
3910 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3912 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3913 (parallel [(const_int 0)])))]
3914 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3916 "&& reload_completed"
3919 rtx op1 = operands[1];
3921 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3923 op1 = gen_lowpart (SFmode, op1);
3924 emit_move_insn (operands[0], op1);
3928 (define_insn_and_split "*sse4_1_extractps"
3929 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
3931 (match_operand:V4SF 1 "register_operand" "x,0,x")
3932 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
3935 %vextractps\t{%2, %1, %0|%0, %1, %2}
3938 "&& reload_completed && SSE_REG_P (operands[0])"
3941 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
3942 switch (INTVAL (operands[2]))
3946 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
3947 operands[2], operands[2],
3948 GEN_INT (INTVAL (operands[2]) + 4),
3949 GEN_INT (INTVAL (operands[2]) + 4)));
3952 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
3955 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
3960 [(set_attr "isa" "*,noavx,avx")
3961 (set_attr "type" "sselog,*,*")
3962 (set_attr "prefix_data16" "1,*,*")
3963 (set_attr "prefix_extra" "1,*,*")
3964 (set_attr "length_immediate" "1,*,*")
3965 (set_attr "prefix" "maybe_vex,*,*")
3966 (set_attr "mode" "V4SF,*,*")])
3968 (define_insn_and_split "*vec_extract_v4sf_mem"
3969 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
3971 (match_operand:V4SF 1 "memory_operand" "o,o,o")
3972 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
3975 "&& reload_completed"
3978 int i = INTVAL (operands[2]);
3980 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3984 (define_expand "avx_vextractf128<mode>"
3985 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3986 (match_operand:V_256 1 "register_operand" "")
3987 (match_operand:SI 2 "const_0_to_1_operand" "")]
3990 rtx (*insn)(rtx, rtx);
3992 switch (INTVAL (operands[2]))
3995 insn = gen_vec_extract_lo_<mode>;
3998 insn = gen_vec_extract_hi_<mode>;
4004 emit_insn (insn (operands[0], operands[1]));
4008 (define_insn_and_split "vec_extract_lo_<mode>"
4009 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4010 (vec_select:<ssehalfvecmode>
4011 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
4012 (parallel [(const_int 0) (const_int 1)])))]
4013 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4015 "&& reload_completed"
4018 rtx op1 = operands[1];
4020 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4022 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4023 emit_move_insn (operands[0], op1);
4027 (define_insn "vec_extract_hi_<mode>"
4028 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4029 (vec_select:<ssehalfvecmode>
4030 (match_operand:VI8F_256 1 "register_operand" "x,x")
4031 (parallel [(const_int 2) (const_int 3)])))]
4033 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4034 [(set_attr "type" "sselog")
4035 (set_attr "prefix_extra" "1")
4036 (set_attr "length_immediate" "1")
4037 (set_attr "memory" "none,store")
4038 (set_attr "prefix" "vex")
4039 (set_attr "mode" "<sseinsnmode>")])
4041 (define_insn_and_split "vec_extract_lo_<mode>"
4042 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4043 (vec_select:<ssehalfvecmode>
4044 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
4045 (parallel [(const_int 0) (const_int 1)
4046 (const_int 2) (const_int 3)])))]
4047 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4049 "&& reload_completed"
4052 rtx op1 = operands[1];
4054 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
4056 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
4057 emit_move_insn (operands[0], op1);
4061 (define_insn "vec_extract_hi_<mode>"
4062 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
4063 (vec_select:<ssehalfvecmode>
4064 (match_operand:VI4F_256 1 "register_operand" "x,x")
4065 (parallel [(const_int 4) (const_int 5)
4066 (const_int 6) (const_int 7)])))]
4068 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
4069 [(set_attr "type" "sselog")
4070 (set_attr "prefix_extra" "1")
4071 (set_attr "length_immediate" "1")
4072 (set_attr "memory" "none,store")
4073 (set_attr "prefix" "vex")
4074 (set_attr "mode" "<sseinsnmode>")])
4076 (define_insn_and_split "vec_extract_lo_v16hi"
4077 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4079 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
4080 (parallel [(const_int 0) (const_int 1)
4081 (const_int 2) (const_int 3)
4082 (const_int 4) (const_int 5)
4083 (const_int 6) (const_int 7)])))]
4084 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4086 "&& reload_completed"
4089 rtx op1 = operands[1];
4091 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
4093 op1 = gen_lowpart (V8HImode, op1);
4094 emit_move_insn (operands[0], op1);
4098 (define_insn "vec_extract_hi_v16hi"
4099 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
4101 (match_operand:V16HI 1 "register_operand" "x,x")
4102 (parallel [(const_int 8) (const_int 9)
4103 (const_int 10) (const_int 11)
4104 (const_int 12) (const_int 13)
4105 (const_int 14) (const_int 15)])))]
4107 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4108 [(set_attr "type" "sselog")
4109 (set_attr "prefix_extra" "1")
4110 (set_attr "length_immediate" "1")
4111 (set_attr "memory" "none,store")
4112 (set_attr "prefix" "vex")
4113 (set_attr "mode" "OI")])
4115 (define_insn_and_split "vec_extract_lo_v32qi"
4116 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4118 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
4119 (parallel [(const_int 0) (const_int 1)
4120 (const_int 2) (const_int 3)
4121 (const_int 4) (const_int 5)
4122 (const_int 6) (const_int 7)
4123 (const_int 8) (const_int 9)
4124 (const_int 10) (const_int 11)
4125 (const_int 12) (const_int 13)
4126 (const_int 14) (const_int 15)])))]
4127 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4129 "&& reload_completed"
4132 rtx op1 = operands[1];
4134 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
4136 op1 = gen_lowpart (V16QImode, op1);
4137 emit_move_insn (operands[0], op1);
4141 (define_insn "vec_extract_hi_v32qi"
4142 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
4144 (match_operand:V32QI 1 "register_operand" "x,x")
4145 (parallel [(const_int 16) (const_int 17)
4146 (const_int 18) (const_int 19)
4147 (const_int 20) (const_int 21)
4148 (const_int 22) (const_int 23)
4149 (const_int 24) (const_int 25)
4150 (const_int 26) (const_int 27)
4151 (const_int 28) (const_int 29)
4152 (const_int 30) (const_int 31)])))]
4154 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
4155 [(set_attr "type" "sselog")
4156 (set_attr "prefix_extra" "1")
4157 (set_attr "length_immediate" "1")
4158 (set_attr "memory" "none,store")
4159 (set_attr "prefix" "vex")
4160 (set_attr "mode" "OI")])
4162 ;; Modes handled by vec_extract patterns.
4163 (define_mode_iterator VEC_EXTRACT_MODE
4164 [(V32QI "TARGET_AVX") V16QI
4165 (V16HI "TARGET_AVX") V8HI
4166 (V8SI "TARGET_AVX") V4SI
4167 (V4DI "TARGET_AVX") V2DI
4168 (V8SF "TARGET_AVX") V4SF
4169 (V4DF "TARGET_AVX") V2DF])
4171 (define_expand "vec_extract<mode>"
4172 [(match_operand:<ssescalarmode> 0 "register_operand" "")
4173 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
4174 (match_operand 2 "const_int_operand" "")]
4177 ix86_expand_vector_extract (false, operands[0], operands[1],
4178 INTVAL (operands[2]));
4182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4184 ;; Parallel double-precision floating point element swizzling
4186 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4188 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4189 (define_insn "avx_unpckhpd256"
4190 [(set (match_operand:V4DF 0 "register_operand" "=x")
4193 (match_operand:V4DF 1 "register_operand" "x")
4194 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4195 (parallel [(const_int 1) (const_int 5)
4196 (const_int 3) (const_int 7)])))]
4198 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
4199 [(set_attr "type" "sselog")
4200 (set_attr "prefix" "vex")
4201 (set_attr "mode" "V4DF")])
4203 (define_expand "vec_interleave_highv4df"
4207 (match_operand:V4DF 1 "register_operand" "x")
4208 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4209 (parallel [(const_int 0) (const_int 4)
4210 (const_int 2) (const_int 6)])))
4216 (parallel [(const_int 1) (const_int 5)
4217 (const_int 3) (const_int 7)])))
4218 (set (match_operand:V4DF 0 "register_operand" "")
4223 (parallel [(const_int 2) (const_int 3)
4224 (const_int 6) (const_int 7)])))]
4227 operands[3] = gen_reg_rtx (V4DFmode);
4228 operands[4] = gen_reg_rtx (V4DFmode);
4232 (define_expand "vec_interleave_highv2df"
4233 [(set (match_operand:V2DF 0 "register_operand" "")
4236 (match_operand:V2DF 1 "nonimmediate_operand" "")
4237 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4238 (parallel [(const_int 1)
4242 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
4243 operands[2] = force_reg (V2DFmode, operands[2]);
4246 (define_insn "*vec_interleave_highv2df"
4247 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
4250 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
4251 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
4252 (parallel [(const_int 1)
4254 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4256 unpckhpd\t{%2, %0|%0, %2}
4257 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4258 %vmovddup\t{%H1, %0|%0, %H1}
4259 movlpd\t{%H1, %0|%0, %H1}
4260 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
4261 %vmovhpd\t{%1, %0|%0, %1}"
4262 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4263 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4264 (set_attr "prefix_data16" "*,*,*,1,*,1")
4265 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4266 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4268 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
4269 (define_expand "avx_movddup256"
4270 [(set (match_operand:V4DF 0 "register_operand" "")
4273 (match_operand:V4DF 1 "nonimmediate_operand" "")
4275 (parallel [(const_int 0) (const_int 4)
4276 (const_int 2) (const_int 6)])))]
4279 (define_expand "avx_unpcklpd256"
4280 [(set (match_operand:V4DF 0 "register_operand" "")
4283 (match_operand:V4DF 1 "register_operand" "")
4284 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4285 (parallel [(const_int 0) (const_int 4)
4286 (const_int 2) (const_int 6)])))]
4289 (define_insn "*avx_unpcklpd256"
4290 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4293 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
4294 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
4295 (parallel [(const_int 0) (const_int 4)
4296 (const_int 2) (const_int 6)])))]
4299 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4300 vmovddup\t{%1, %0|%0, %1}"
4301 [(set_attr "type" "sselog")
4302 (set_attr "prefix" "vex")
4303 (set_attr "mode" "V4DF")])
4305 (define_expand "vec_interleave_lowv4df"
4309 (match_operand:V4DF 1 "register_operand" "x")
4310 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4311 (parallel [(const_int 0) (const_int 4)
4312 (const_int 2) (const_int 6)])))
4318 (parallel [(const_int 1) (const_int 5)
4319 (const_int 3) (const_int 7)])))
4320 (set (match_operand:V4DF 0 "register_operand" "")
4325 (parallel [(const_int 0) (const_int 1)
4326 (const_int 4) (const_int 5)])))]
4329 operands[3] = gen_reg_rtx (V4DFmode);
4330 operands[4] = gen_reg_rtx (V4DFmode);
4333 (define_expand "vec_interleave_lowv2df"
4334 [(set (match_operand:V2DF 0 "register_operand" "")
4337 (match_operand:V2DF 1 "nonimmediate_operand" "")
4338 (match_operand:V2DF 2 "nonimmediate_operand" ""))
4339 (parallel [(const_int 0)
4343 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
4344 operands[1] = force_reg (V2DFmode, operands[1]);
4347 (define_insn "*vec_interleave_lowv2df"
4348 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
4351 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
4352 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
4353 (parallel [(const_int 0)
4355 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4357 unpcklpd\t{%2, %0|%0, %2}
4358 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4359 %vmovddup\t{%1, %0|%0, %1}
4360 movhpd\t{%2, %0|%0, %2}
4361 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4362 %vmovlpd\t{%2, %H0|%H0, %2}"
4363 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
4364 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
4365 (set_attr "prefix_data16" "*,*,*,1,*,1")
4366 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
4367 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
4370 [(set (match_operand:V2DF 0 "memory_operand" "")
4373 (match_operand:V2DF 1 "register_operand" "")
4375 (parallel [(const_int 0)
4377 "TARGET_SSE3 && reload_completed"
4380 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4381 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4382 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4387 [(set (match_operand:V2DF 0 "register_operand" "")
4390 (match_operand:V2DF 1 "memory_operand" "")
4392 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4393 (match_operand:SI 3 "const_int_operand" "")])))]
4394 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4395 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4397 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4400 (define_expand "avx_shufpd256"
4401 [(match_operand:V4DF 0 "register_operand" "")
4402 (match_operand:V4DF 1 "register_operand" "")
4403 (match_operand:V4DF 2 "nonimmediate_operand" "")
4404 (match_operand:SI 3 "const_int_operand" "")]
4407 int mask = INTVAL (operands[3]);
4408 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4410 GEN_INT (mask & 2 ? 5 : 4),
4411 GEN_INT (mask & 4 ? 3 : 2),
4412 GEN_INT (mask & 8 ? 7 : 6)));
4416 (define_insn "avx_shufpd256_1"
4417 [(set (match_operand:V4DF 0 "register_operand" "=x")
4420 (match_operand:V4DF 1 "register_operand" "x")
4421 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4422 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4423 (match_operand 4 "const_4_to_5_operand" "")
4424 (match_operand 5 "const_2_to_3_operand" "")
4425 (match_operand 6 "const_6_to_7_operand" "")])))]
4429 mask = INTVAL (operands[3]);
4430 mask |= (INTVAL (operands[4]) - 4) << 1;
4431 mask |= (INTVAL (operands[5]) - 2) << 2;
4432 mask |= (INTVAL (operands[6]) - 6) << 3;
4433 operands[3] = GEN_INT (mask);
4435 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4437 [(set_attr "type" "sselog")
4438 (set_attr "length_immediate" "1")
4439 (set_attr "prefix" "vex")
4440 (set_attr "mode" "V4DF")])
4442 (define_expand "sse2_shufpd"
4443 [(match_operand:V2DF 0 "register_operand" "")
4444 (match_operand:V2DF 1 "register_operand" "")
4445 (match_operand:V2DF 2 "nonimmediate_operand" "")
4446 (match_operand:SI 3 "const_int_operand" "")]
4449 int mask = INTVAL (operands[3]);
4450 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4452 GEN_INT (mask & 2 ? 3 : 2)));
4456 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4457 (define_insn "avx2_interleave_highv4di"
4458 [(set (match_operand:V4DI 0 "register_operand" "=x")
4461 (match_operand:V4DI 1 "register_operand" "x")
4462 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4463 (parallel [(const_int 1)
4468 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4469 [(set_attr "type" "sselog")
4470 (set_attr "prefix" "vex")
4471 (set_attr "mode" "OI")])
4473 (define_insn "vec_interleave_highv2di"
4474 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4477 (match_operand:V2DI 1 "register_operand" "0,x")
4478 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4479 (parallel [(const_int 1)
4483 punpckhqdq\t{%2, %0|%0, %2}
4484 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4485 [(set_attr "isa" "noavx,avx")
4486 (set_attr "type" "sselog")
4487 (set_attr "prefix_data16" "1,*")
4488 (set_attr "prefix" "orig,vex")
4489 (set_attr "mode" "TI")])
4491 (define_insn "avx2_interleave_lowv4di"
4492 [(set (match_operand:V4DI 0 "register_operand" "=x")
4495 (match_operand:V4DI 1 "register_operand" "x")
4496 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
4497 (parallel [(const_int 0)
4502 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4503 [(set_attr "type" "sselog")
4504 (set_attr "prefix" "vex")
4505 (set_attr "mode" "OI")])
4507 (define_insn "vec_interleave_lowv2di"
4508 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4511 (match_operand:V2DI 1 "register_operand" "0,x")
4512 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4513 (parallel [(const_int 0)
4517 punpcklqdq\t{%2, %0|%0, %2}
4518 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4519 [(set_attr "isa" "noavx,avx")
4520 (set_attr "type" "sselog")
4521 (set_attr "prefix_data16" "1,*")
4522 (set_attr "prefix" "orig,vex")
4523 (set_attr "mode" "TI")])
4525 (define_insn "sse2_shufpd_<mode>"
4526 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4527 (vec_select:VI8F_128
4528 (vec_concat:<ssedoublevecmode>
4529 (match_operand:VI8F_128 1 "register_operand" "0,x")
4530 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4531 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4532 (match_operand 4 "const_2_to_3_operand" "")])))]
4536 mask = INTVAL (operands[3]);
4537 mask |= (INTVAL (operands[4]) - 2) << 1;
4538 operands[3] = GEN_INT (mask);
4540 switch (which_alternative)
4543 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4545 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4550 [(set_attr "isa" "noavx,avx")
4551 (set_attr "type" "sselog")
4552 (set_attr "length_immediate" "1")
4553 (set_attr "prefix" "orig,vex")
4554 (set_attr "mode" "V2DF")])
4556 ;; Avoid combining registers from different units in a single alternative,
4557 ;; see comment above inline_secondary_memory_needed function in i386.c
4558 (define_insn "sse2_storehpd"
4559 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4561 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4562 (parallel [(const_int 1)])))]
4563 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4565 %vmovhpd\t{%1, %0|%0, %1}
4567 vunpckhpd\t{%d1, %0|%0, %d1}
4571 [(set_attr "isa" "*,noavx,avx,*,*,*")
4572 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4573 (set (attr "prefix_data16")
4575 (and (eq_attr "alternative" "0")
4576 (not (match_test "TARGET_AVX")))
4578 (const_string "*")))
4579 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4580 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4583 [(set (match_operand:DF 0 "register_operand" "")
4585 (match_operand:V2DF 1 "memory_operand" "")
4586 (parallel [(const_int 1)])))]
4587 "TARGET_SSE2 && reload_completed"
4588 [(set (match_dup 0) (match_dup 1))]
4589 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4591 (define_insn "*vec_extractv2df_1_sse"
4592 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4594 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4595 (parallel [(const_int 1)])))]
4596 "!TARGET_SSE2 && TARGET_SSE
4597 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4599 movhps\t{%1, %0|%0, %1}
4600 movhlps\t{%1, %0|%0, %1}
4601 movlps\t{%H1, %0|%0, %H1}"
4602 [(set_attr "type" "ssemov")
4603 (set_attr "mode" "V2SF,V4SF,V2SF")])
4605 ;; Avoid combining registers from different units in a single alternative,
4606 ;; see comment above inline_secondary_memory_needed function in i386.c
4607 (define_insn "sse2_storelpd"
4608 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4610 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4611 (parallel [(const_int 0)])))]
4612 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4614 %vmovlpd\t{%1, %0|%0, %1}
4619 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4620 (set_attr "prefix_data16" "1,*,*,*,*")
4621 (set_attr "prefix" "maybe_vex")
4622 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4625 [(set (match_operand:DF 0 "register_operand" "")
4627 (match_operand:V2DF 1 "nonimmediate_operand" "")
4628 (parallel [(const_int 0)])))]
4629 "TARGET_SSE2 && reload_completed"
4632 rtx op1 = operands[1];
4634 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4636 op1 = gen_lowpart (DFmode, op1);
4637 emit_move_insn (operands[0], op1);
4641 (define_insn "*vec_extractv2df_0_sse"
4642 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4644 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4645 (parallel [(const_int 0)])))]
4646 "!TARGET_SSE2 && TARGET_SSE
4647 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4649 movlps\t{%1, %0|%0, %1}
4650 movaps\t{%1, %0|%0, %1}
4651 movlps\t{%1, %0|%0, %1}"
4652 [(set_attr "type" "ssemov")
4653 (set_attr "mode" "V2SF,V4SF,V2SF")])
4655 (define_expand "sse2_loadhpd_exp"
4656 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4659 (match_operand:V2DF 1 "nonimmediate_operand" "")
4660 (parallel [(const_int 0)]))
4661 (match_operand:DF 2 "nonimmediate_operand" "")))]
4664 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4666 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4668 /* Fix up the destination if needed. */
4669 if (dst != operands[0])
4670 emit_move_insn (operands[0], dst);
4675 ;; Avoid combining registers from different units in a single alternative,
4676 ;; see comment above inline_secondary_memory_needed function in i386.c
4677 (define_insn "sse2_loadhpd"
4678 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4682 (match_operand:V2DF 1 "nonimmediate_operand"
4684 (parallel [(const_int 0)]))
4685 (match_operand:DF 2 "nonimmediate_operand"
4686 " m,m,x,x,x,*f,r")))]
4687 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4689 movhpd\t{%2, %0|%0, %2}
4690 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4691 unpcklpd\t{%2, %0|%0, %2}
4692 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4696 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4697 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4698 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4699 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4700 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4703 [(set (match_operand:V2DF 0 "memory_operand" "")
4705 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4706 (match_operand:DF 1 "register_operand" "")))]
4707 "TARGET_SSE2 && reload_completed"
4708 [(set (match_dup 0) (match_dup 1))]
4709 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4711 (define_expand "sse2_loadlpd_exp"
4712 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4714 (match_operand:DF 2 "nonimmediate_operand" "")
4716 (match_operand:V2DF 1 "nonimmediate_operand" "")
4717 (parallel [(const_int 1)]))))]
4720 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4722 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4724 /* Fix up the destination if needed. */
4725 if (dst != operands[0])
4726 emit_move_insn (operands[0], dst);
4731 ;; Avoid combining registers from different units in a single alternative,
4732 ;; see comment above inline_secondary_memory_needed function in i386.c
4733 (define_insn "sse2_loadlpd"
4734 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4735 "=x,x,x,x,x,x,x,x,m,m ,m")
4737 (match_operand:DF 2 "nonimmediate_operand"
4738 " m,m,m,x,x,0,0,x,x,*f,r")
4740 (match_operand:V2DF 1 "vector_move_operand"
4741 " C,0,x,0,x,x,o,o,0,0 ,0")
4742 (parallel [(const_int 1)]))))]
4743 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4745 %vmovsd\t{%2, %0|%0, %2}
4746 movlpd\t{%2, %0|%0, %2}
4747 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4748 movsd\t{%2, %0|%0, %2}
4749 vmovsd\t{%2, %1, %0|%0, %1, %2}
4750 shufpd\t{$2, %1, %0|%0, %1, 2}
4751 movhpd\t{%H1, %0|%0, %H1}
4752 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4756 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4758 (cond [(eq_attr "alternative" "5")
4759 (const_string "sselog")
4760 (eq_attr "alternative" "9")
4761 (const_string "fmov")
4762 (eq_attr "alternative" "10")
4763 (const_string "imov")
4765 (const_string "ssemov")))
4766 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4767 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4768 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4769 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4772 [(set (match_operand:V2DF 0 "memory_operand" "")
4774 (match_operand:DF 1 "register_operand" "")
4775 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4776 "TARGET_SSE2 && reload_completed"
4777 [(set (match_dup 0) (match_dup 1))]
4778 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4780 (define_insn "sse2_movsd"
4781 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4783 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4784 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4788 movsd\t{%2, %0|%0, %2}
4789 vmovsd\t{%2, %1, %0|%0, %1, %2}
4790 movlpd\t{%2, %0|%0, %2}
4791 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4792 %vmovlpd\t{%2, %0|%0, %2}
4793 shufpd\t{$2, %1, %0|%0, %1, 2}
4794 movhps\t{%H1, %0|%0, %H1}
4795 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4796 %vmovhps\t{%1, %H0|%H0, %1}"
4797 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4800 (eq_attr "alternative" "5")
4801 (const_string "sselog")
4802 (const_string "ssemov")))
4803 (set (attr "prefix_data16")
4805 (and (eq_attr "alternative" "2,4")
4806 (not (match_test "TARGET_AVX")))
4808 (const_string "*")))
4809 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4810 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4811 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4813 (define_insn "vec_dupv2df"
4814 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4816 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
4820 %vmovddup\t{%1, %0|%0, %1}"
4821 [(set_attr "isa" "noavx,sse3")
4822 (set_attr "type" "sselog1")
4823 (set_attr "prefix" "orig,maybe_vex")
4824 (set_attr "mode" "V2DF")])
4826 (define_insn "*vec_concatv2df"
4827 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
4829 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
4830 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
4833 unpcklpd\t{%2, %0|%0, %2}
4834 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4835 %vmovddup\t{%1, %0|%0, %1}
4836 movhpd\t{%2, %0|%0, %2}
4837 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4838 %vmovsd\t{%1, %0|%0, %1}
4839 movlhps\t{%2, %0|%0, %2}
4840 movhps\t{%2, %0|%0, %2}"
4841 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
4844 (eq_attr "alternative" "0,1,2")
4845 (const_string "sselog")
4846 (const_string "ssemov")))
4847 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
4848 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
4849 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
4851 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4853 ;; Parallel integral arithmetic
4855 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4857 (define_expand "neg<mode>2"
4858 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4861 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")))]
4863 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4865 (define_expand "<plusminus_insn><mode>3"
4866 [(set (match_operand:VI_AVX2 0 "register_operand" "")
4868 (match_operand:VI_AVX2 1 "nonimmediate_operand" "")
4869 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
4871 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4873 (define_insn "*<plusminus_insn><mode>3"
4874 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x")
4876 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4877 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4878 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4880 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4881 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4882 [(set_attr "isa" "noavx,avx")
4883 (set_attr "type" "sseiadd")
4884 (set_attr "prefix_data16" "1,*")
4885 (set_attr "prefix" "orig,vex")
4886 (set_attr "mode" "<sseinsnmode>")])
4888 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
4889 [(set (match_operand:VI12_AVX2 0 "register_operand" "")
4890 (sat_plusminus:VI12_AVX2
4891 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "")
4892 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "")))]
4894 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4896 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
4897 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
4898 (sat_plusminus:VI12_AVX2
4899 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x")
4900 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4901 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4903 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4904 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4905 [(set_attr "isa" "noavx,avx")
4906 (set_attr "type" "sseiadd")
4907 (set_attr "prefix_data16" "1,*")
4908 (set_attr "prefix" "orig,vex")
4909 (set_attr "mode" "TI")])
4911 (define_insn_and_split "mul<mode>3"
4912 [(set (match_operand:VI1_AVX2 0 "register_operand" "")
4913 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "")
4914 (match_operand:VI1_AVX2 2 "register_operand" "")))]
4916 && can_create_pseudo_p ()"
4923 enum machine_mode mulmode = <sseunpackmode>mode;
4925 for (i = 0; i < 6; ++i)
4926 t[i] = gen_reg_rtx (<MODE>mode);
4928 /* Unpack data such that we've got a source byte in each low byte of
4929 each word. We don't care what goes into the high byte of each word.
4930 Rather than trying to get zero in there, most convenient is to let
4931 it be a copy of the low byte. */
4932 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
4934 emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
4936 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
4938 emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
4941 /* Multiply words. The end-of-line annotations here give a picture of what
4942 the output of that instruction looks like. Dot means don't care; the
4943 letters are the bytes of the result with A being the most significant. */
4944 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
4945 gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
4946 gen_lowpart (mulmode, t[0]),
4947 gen_lowpart (mulmode, t[1]))));
4948 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
4949 gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
4950 gen_lowpart (mulmode, t[2]),
4951 gen_lowpart (mulmode, t[3]))));
4953 /* Extract the even bytes and merge them back together. */
4954 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4956 set_unique_reg_note (get_last_insn (), REG_EQUAL,
4957 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
4961 (define_expand "mul<mode>3"
4962 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4963 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "")
4964 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))]
4966 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4968 (define_insn "*mul<mode>3"
4969 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4970 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
4971 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
4972 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
4974 pmullw\t{%2, %0|%0, %2}
4975 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4976 [(set_attr "isa" "noavx,avx")
4977 (set_attr "type" "sseimul")
4978 (set_attr "prefix_data16" "1,*")
4979 (set_attr "prefix" "orig,vex")
4980 (set_attr "mode" "<sseinsnmode>")])
4982 (define_expand "<s>mul<mode>3_highpart"
4983 [(set (match_operand:VI2_AVX2 0 "register_operand" "")
4985 (lshiftrt:<ssedoublemode>
4986 (mult:<ssedoublemode>
4987 (any_extend:<ssedoublemode>
4988 (match_operand:VI2_AVX2 1 "nonimmediate_operand" ""))
4989 (any_extend:<ssedoublemode>
4990 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "")))
4993 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
4995 (define_insn "*<s>mul<mode>3_highpart"
4996 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
4998 (lshiftrt:<ssedoublemode>
4999 (mult:<ssedoublemode>
5000 (any_extend:<ssedoublemode>
5001 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
5002 (any_extend:<ssedoublemode>
5003 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
5005 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5007 pmulh<u>w\t{%2, %0|%0, %2}
5008 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
5009 [(set_attr "isa" "noavx,avx")
5010 (set_attr "type" "sseimul")
5011 (set_attr "prefix_data16" "1,*")
5012 (set_attr "prefix" "orig,vex")
5013 (set_attr "mode" "<sseinsnmode>")])
5015 (define_expand "avx2_umulv4siv4di3"
5016 [(set (match_operand:V4DI 0 "register_operand" "")
5020 (match_operand:V8SI 1 "nonimmediate_operand" "")
5021 (parallel [(const_int 0) (const_int 2)
5022 (const_int 4) (const_int 6)])))
5025 (match_operand:V8SI 2 "nonimmediate_operand" "")
5026 (parallel [(const_int 0) (const_int 2)
5027 (const_int 4) (const_int 6)])))))]
5029 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5031 (define_insn "*avx_umulv4siv4di3"
5032 [(set (match_operand:V4DI 0 "register_operand" "=x")
5036 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
5037 (parallel [(const_int 0) (const_int 2)
5038 (const_int 4) (const_int 6)])))
5041 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5042 (parallel [(const_int 0) (const_int 2)
5043 (const_int 4) (const_int 6)])))))]
5044 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5045 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5046 [(set_attr "type" "sseimul")
5047 (set_attr "prefix" "vex")
5048 (set_attr "mode" "OI")])
5050 (define_expand "sse2_umulv2siv2di3"
5051 [(set (match_operand:V2DI 0 "register_operand" "")
5055 (match_operand:V4SI 1 "nonimmediate_operand" "")
5056 (parallel [(const_int 0) (const_int 2)])))
5059 (match_operand:V4SI 2 "nonimmediate_operand" "")
5060 (parallel [(const_int 0) (const_int 2)])))))]
5062 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5064 (define_insn "*sse2_umulv2siv2di3"
5065 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5069 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5070 (parallel [(const_int 0) (const_int 2)])))
5073 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5074 (parallel [(const_int 0) (const_int 2)])))))]
5075 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5077 pmuludq\t{%2, %0|%0, %2}
5078 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
5079 [(set_attr "isa" "noavx,avx")
5080 (set_attr "type" "sseimul")
5081 (set_attr "prefix_data16" "1,*")
5082 (set_attr "prefix" "orig,vex")
5083 (set_attr "mode" "TI")])
5085 (define_expand "avx2_mulv4siv4di3"
5086 [(set (match_operand:V4DI 0 "register_operand" "")
5090 (match_operand:V8SI 1 "nonimmediate_operand" "")
5091 (parallel [(const_int 0) (const_int 2)
5092 (const_int 4) (const_int 6)])))
5095 (match_operand:V8SI 2 "nonimmediate_operand" "")
5096 (parallel [(const_int 0) (const_int 2)
5097 (const_int 4) (const_int 6)])))))]
5099 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
5101 (define_insn "*avx2_mulv4siv4di3"
5102 [(set (match_operand:V4DI 0 "register_operand" "=x")
5106 (match_operand:V8SI 1 "nonimmediate_operand" "x")
5107 (parallel [(const_int 0) (const_int 2)
5108 (const_int 4) (const_int 6)])))
5111 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
5112 (parallel [(const_int 0) (const_int 2)
5113 (const_int 4) (const_int 6)])))))]
5114 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
5115 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5116 [(set_attr "isa" "avx")
5117 (set_attr "type" "sseimul")
5118 (set_attr "prefix_extra" "1")
5119 (set_attr "prefix" "vex")
5120 (set_attr "mode" "OI")])
5122 (define_expand "sse4_1_mulv2siv2di3"
5123 [(set (match_operand:V2DI 0 "register_operand" "")
5127 (match_operand:V4SI 1 "nonimmediate_operand" "")
5128 (parallel [(const_int 0) (const_int 2)])))
5131 (match_operand:V4SI 2 "nonimmediate_operand" "")
5132 (parallel [(const_int 0) (const_int 2)])))))]
5134 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
5136 (define_insn "*sse4_1_mulv2siv2di3"
5137 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5141 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
5142 (parallel [(const_int 0) (const_int 2)])))
5145 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
5146 (parallel [(const_int 0) (const_int 2)])))))]
5147 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5149 pmuldq\t{%2, %0|%0, %2}
5150 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
5151 [(set_attr "isa" "noavx,avx")
5152 (set_attr "type" "sseimul")
5153 (set_attr "prefix_data16" "1,*")
5154 (set_attr "prefix_extra" "1")
5155 (set_attr "prefix" "orig,vex")
5156 (set_attr "mode" "TI")])
5158 (define_expand "avx2_pmaddwd"
5159 [(set (match_operand:V8SI 0 "register_operand" "")
5164 (match_operand:V16HI 1 "nonimmediate_operand" "")
5165 (parallel [(const_int 0)
5175 (match_operand:V16HI 2 "nonimmediate_operand" "")
5176 (parallel [(const_int 0)
5186 (vec_select:V8HI (match_dup 1)
5187 (parallel [(const_int 1)
5196 (vec_select:V8HI (match_dup 2)
5197 (parallel [(const_int 1)
5204 (const_int 15)]))))))]
5206 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
5208 (define_expand "sse2_pmaddwd"
5209 [(set (match_operand:V4SI 0 "register_operand" "")
5214 (match_operand:V8HI 1 "nonimmediate_operand" "")
5215 (parallel [(const_int 0)
5221 (match_operand:V8HI 2 "nonimmediate_operand" "")
5222 (parallel [(const_int 0)
5228 (vec_select:V4HI (match_dup 1)
5229 (parallel [(const_int 1)
5234 (vec_select:V4HI (match_dup 2)
5235 (parallel [(const_int 1)
5238 (const_int 7)]))))))]
5240 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
5242 (define_insn "*avx2_pmaddwd"
5243 [(set (match_operand:V8SI 0 "register_operand" "=x")
5248 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
5249 (parallel [(const_int 0)
5259 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
5260 (parallel [(const_int 0)
5270 (vec_select:V8HI (match_dup 1)
5271 (parallel [(const_int 1)
5280 (vec_select:V8HI (match_dup 2)
5281 (parallel [(const_int 1)
5288 (const_int 15)]))))))]
5289 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
5290 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5291 [(set_attr "type" "sseiadd")
5292 (set_attr "prefix" "vex")
5293 (set_attr "mode" "OI")])
5295 (define_insn "*sse2_pmaddwd"
5296 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5301 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5302 (parallel [(const_int 0)
5308 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
5309 (parallel [(const_int 0)
5315 (vec_select:V4HI (match_dup 1)
5316 (parallel [(const_int 1)
5321 (vec_select:V4HI (match_dup 2)
5322 (parallel [(const_int 1)
5325 (const_int 7)]))))))]
5326 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5328 pmaddwd\t{%2, %0|%0, %2}
5329 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
5330 [(set_attr "isa" "noavx,avx")
5331 (set_attr "type" "sseiadd")
5332 (set_attr "atom_unit" "simul")
5333 (set_attr "prefix_data16" "1,*")
5334 (set_attr "prefix" "orig,vex")
5335 (set_attr "mode" "TI")])
5337 (define_expand "mul<mode>3"
5338 [(set (match_operand:VI4_AVX2 0 "register_operand" "")
5339 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "register_operand" "")
5340 (match_operand:VI4_AVX2 2 "register_operand" "")))]
5343 if (TARGET_SSE4_1 || TARGET_AVX)
5344 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
5347 (define_insn "*<sse4_1_avx2>_mul<mode>3"
5348 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x")
5349 (mult:VI4_AVX2 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x")
5350 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))]
5351 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
5353 pmulld\t{%2, %0|%0, %2}
5354 vpmulld\t{%2, %1, %0|%0, %1, %2}"
5355 [(set_attr "isa" "noavx,avx")
5356 (set_attr "type" "sseimul")
5357 (set_attr "prefix_extra" "1")
5358 (set_attr "prefix" "orig,vex")
5359 (set_attr "mode" "<sseinsnmode>")])
5361 (define_insn_and_split "*sse2_mulv4si3"
5362 [(set (match_operand:V4SI 0 "register_operand" "")
5363 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5364 (match_operand:V4SI 2 "register_operand" "")))]
5365 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5366 && can_create_pseudo_p ()"
5371 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5377 t1 = gen_reg_rtx (V4SImode);
5378 t2 = gen_reg_rtx (V4SImode);
5379 t3 = gen_reg_rtx (V4SImode);
5380 t4 = gen_reg_rtx (V4SImode);
5381 t5 = gen_reg_rtx (V4SImode);
5382 t6 = gen_reg_rtx (V4SImode);
5383 thirtytwo = GEN_INT (32);
5385 /* Multiply elements 2 and 0. */
5386 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
5389 /* Shift both input vectors down one element, so that elements 3
5390 and 1 are now in the slots for elements 2 and 0. For K8, at
5391 least, this is faster than using a shuffle. */
5392 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5393 gen_lowpart (V1TImode, op1),
5395 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5396 gen_lowpart (V1TImode, op2),
5398 /* Multiply elements 3 and 1. */
5399 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5402 /* Move the results in element 2 down to element 1; we don't care
5403 what goes in elements 2 and 3. */
5404 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
5405 const0_rtx, const0_rtx));
5406 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5407 const0_rtx, const0_rtx));
5409 /* Merge the parts back together. */
5410 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5412 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5413 gen_rtx_MULT (V4SImode, operands[1], operands[2]));
5417 (define_insn_and_split "mul<mode>3"
5418 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5419 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
5420 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5422 && can_create_pseudo_p ()"
5427 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5434 if (TARGET_XOP && <MODE>mode == V2DImode)
5436 /* op1: A,B,C,D, op2: E,F,G,H */
5437 op1 = gen_lowpart (V4SImode, op1);
5438 op2 = gen_lowpart (V4SImode, op2);
5440 t1 = gen_reg_rtx (V4SImode);
5441 t2 = gen_reg_rtx (V4SImode);
5442 t3 = gen_reg_rtx (V2DImode);
5443 t4 = gen_reg_rtx (V2DImode);
5446 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5452 /* t2: (B*E),(A*F),(D*G),(C*H) */
5453 emit_insn (gen_mulv4si3 (t2, t1, op2));
5455 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5456 emit_insn (gen_xop_phadddq (t3, t2));
5458 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5459 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5461 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5462 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5466 t1 = gen_reg_rtx (<MODE>mode);
5467 t2 = gen_reg_rtx (<MODE>mode);
5468 t3 = gen_reg_rtx (<MODE>mode);
5469 t4 = gen_reg_rtx (<MODE>mode);
5470 t5 = gen_reg_rtx (<MODE>mode);
5471 t6 = gen_reg_rtx (<MODE>mode);
5472 thirtytwo = GEN_INT (32);
5474 /* Multiply low parts. */
5475 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5476 (t1, gen_lowpart (<ssepackmode>mode, op1),
5477 gen_lowpart (<ssepackmode>mode, op2)));
5479 /* Shift input vectors right 32 bits so we can multiply high parts. */
5480 emit_insn (gen_lshr<mode>3 (t2, op1, thirtytwo));
5481 emit_insn (gen_lshr<mode>3 (t3, op2, thirtytwo));
5483 /* Multiply high parts by low parts. */
5484 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5485 (t4, gen_lowpart (<ssepackmode>mode, op1),
5486 gen_lowpart (<ssepackmode>mode, t3)));
5487 emit_insn (gen_<sse2_avx2>_umulv<ssescalarnum>si<mode>3
5488 (t5, gen_lowpart (<ssepackmode>mode, op2),
5489 gen_lowpart (<ssepackmode>mode, t2)));
5491 /* Shift them back. */
5492 emit_insn (gen_ashl<mode>3 (t4, t4, thirtytwo));
5493 emit_insn (gen_ashl<mode>3 (t5, t5, thirtytwo));
5495 /* Add the three parts together. */
5496 emit_insn (gen_add<mode>3 (t6, t1, t4));
5497 emit_insn (gen_add<mode>3 (op0, t6, t5));
5500 set_unique_reg_note (get_last_insn (), REG_EQUAL,
5501 gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
5505 (define_expand "vec_widen_<s>mult_hi_<mode>"
5506 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5507 (any_extend:<sseunpackmode>
5508 (match_operand:VI2_AVX2 1 "register_operand" ""))
5509 (match_operand:VI2_AVX2 2 "register_operand" "")]
5512 rtx op1, op2, t1, t2, dest;
5516 t1 = gen_reg_rtx (<MODE>mode);
5517 t2 = gen_reg_rtx (<MODE>mode);
5518 dest = gen_lowpart (<MODE>mode, operands[0]);
5520 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5521 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5522 emit_insn (gen_vec_interleave_high<mode> (dest, t1, t2));
5526 (define_expand "vec_widen_<s>mult_lo_<mode>"
5527 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5528 (any_extend:<sseunpackmode>
5529 (match_operand:VI2_AVX2 1 "register_operand" ""))
5530 (match_operand:VI2_AVX2 2 "register_operand" "")]
5533 rtx op1, op2, t1, t2, dest;
5537 t1 = gen_reg_rtx (<MODE>mode);
5538 t2 = gen_reg_rtx (<MODE>mode);
5539 dest = gen_lowpart (<MODE>mode, operands[0]);
5541 emit_insn (gen_mul<mode>3 (t1, op1, op2));
5542 emit_insn (gen_<s>mul<mode>3_highpart (t2, op1, op2));
5543 emit_insn (gen_vec_interleave_low<mode> (dest, t1, t2));
5547 (define_expand "vec_widen_<s>mult_hi_v8si"
5548 [(match_operand:V4DI 0 "register_operand" "")
5549 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5550 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5555 t1 = gen_reg_rtx (V4DImode);
5556 t2 = gen_reg_rtx (V4DImode);
5557 t3 = gen_reg_rtx (V8SImode);
5558 t4 = gen_reg_rtx (V8SImode);
5559 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5560 const0_rtx, const2_rtx,
5561 const1_rtx, GEN_INT (3)));
5562 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5563 const0_rtx, const2_rtx,
5564 const1_rtx, GEN_INT (3)));
5565 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5566 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5567 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5568 GEN_INT (2 + (2 << 2) + (3 << 4) + (3 << 6))));
5569 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5573 (define_expand "vec_widen_<s>mult_lo_v8si"
5574 [(match_operand:V4DI 0 "register_operand" "")
5575 (any_extend:V4DI (match_operand:V8SI 1 "nonimmediate_operand" ""))
5576 (match_operand:V8SI 2 "nonimmediate_operand" "")]
5581 t1 = gen_reg_rtx (V4DImode);
5582 t2 = gen_reg_rtx (V4DImode);
5583 t3 = gen_reg_rtx (V8SImode);
5584 t4 = gen_reg_rtx (V8SImode);
5585 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, operands[1]),
5586 const0_rtx, const2_rtx,
5587 const1_rtx, GEN_INT (3)));
5588 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, operands[2]),
5589 const0_rtx, const2_rtx,
5590 const1_rtx, GEN_INT (3)));
5591 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1),
5592 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5593 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2),
5594 GEN_INT (0 + (0 << 2) + (1 << 4) + (1 << 6))));
5595 emit_insn (gen_avx2_<u>mulv4siv4di3 (operands[0], t3, t4));
5599 (define_expand "vec_widen_smult_hi_v4si"
5600 [(match_operand:V2DI 0 "register_operand" "")
5601 (match_operand:V4SI 1 "register_operand" "")
5602 (match_operand:V4SI 2 "register_operand" "")]
5605 rtx op1, op2, t1, t2;
5609 t1 = gen_reg_rtx (V4SImode);
5610 t2 = gen_reg_rtx (V4SImode);
5614 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5615 GEN_INT (1), GEN_INT (3)));
5616 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5617 GEN_INT (1), GEN_INT (3)));
5618 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5622 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5623 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5624 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5628 (define_expand "vec_widen_smult_lo_v4si"
5629 [(match_operand:V2DI 0 "register_operand" "")
5630 (match_operand:V4SI 1 "register_operand" "")
5631 (match_operand:V4SI 2 "register_operand" "")]
5634 rtx op1, op2, t1, t2;
5638 t1 = gen_reg_rtx (V4SImode);
5639 t2 = gen_reg_rtx (V4SImode);
5643 emit_insn (gen_sse2_pshufd_1 (t1, op1, GEN_INT (0), GEN_INT (2),
5644 GEN_INT (1), GEN_INT (3)));
5645 emit_insn (gen_sse2_pshufd_1 (t2, op2, GEN_INT (0), GEN_INT (2),
5646 GEN_INT (1), GEN_INT (3)));
5647 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5651 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5652 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5653 emit_insn (gen_sse4_1_mulv2siv2di3 (operands[0], t1, t2));
5657 (define_expand "vec_widen_umult_hi_v4si"
5658 [(match_operand:V2DI 0 "register_operand" "")
5659 (match_operand:V4SI 1 "register_operand" "")
5660 (match_operand:V4SI 2 "register_operand" "")]
5663 rtx op1, op2, t1, t2;
5667 t1 = gen_reg_rtx (V4SImode);
5668 t2 = gen_reg_rtx (V4SImode);
5670 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5671 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5672 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5676 (define_expand "vec_widen_umult_lo_v4si"
5677 [(match_operand:V2DI 0 "register_operand" "")
5678 (match_operand:V4SI 1 "register_operand" "")
5679 (match_operand:V4SI 2 "register_operand" "")]
5682 rtx op1, op2, t1, t2;
5686 t1 = gen_reg_rtx (V4SImode);
5687 t2 = gen_reg_rtx (V4SImode);
5689 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5690 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5691 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5695 (define_expand "sdot_prod<mode>"
5696 [(match_operand:<sseunpackmode> 0 "register_operand" "")
5697 (match_operand:VI2_AVX2 1 "register_operand" "")
5698 (match_operand:VI2_AVX2 2 "register_operand" "")
5699 (match_operand:<sseunpackmode> 3 "register_operand" "")]
5702 rtx t = gen_reg_rtx (<sseunpackmode>mode);
5703 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
5704 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5705 gen_rtx_PLUS (<sseunpackmode>mode,
5710 (define_code_attr sse2_sse4_1
5711 [(zero_extend "sse2") (sign_extend "sse4_1")])
5713 (define_expand "<s>dot_prodv4si"
5714 [(match_operand:V2DI 0 "register_operand" "")
5715 (any_extend:V2DI (match_operand:V4SI 1 "register_operand" ""))
5716 (match_operand:V4SI 2 "register_operand" "")
5717 (match_operand:V2DI 3 "register_operand" "")]
5718 "<CODE> == ZERO_EXTEND ? TARGET_SSE2 : TARGET_SSE4_1"
5722 t1 = gen_reg_rtx (V2DImode);
5723 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t1, operands[1], operands[2]));
5724 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5726 t2 = gen_reg_rtx (V4SImode);
5727 t3 = gen_reg_rtx (V4SImode);
5728 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5729 gen_lowpart (V1TImode, operands[1]),
5731 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5732 gen_lowpart (V1TImode, operands[2]),
5735 t4 = gen_reg_rtx (V2DImode);
5736 emit_insn (gen_<sse2_sse4_1>_<u>mulv2siv2di3 (t4, t2, t3));
5738 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5742 (define_expand "<s>dot_prodv8si"
5743 [(match_operand:V4DI 0 "register_operand" "")
5744 (any_extend:V4DI (match_operand:V8SI 1 "register_operand" ""))
5745 (match_operand:V8SI 2 "register_operand" "")
5746 (match_operand:V4DI 3 "register_operand" "")]
5751 t1 = gen_reg_rtx (V4DImode);
5752 emit_insn (gen_avx2_<u>mulv4siv4di3 (t1, operands[1], operands[2]));
5753 emit_insn (gen_addv4di3 (t1, t1, operands[3]));
5755 t2 = gen_reg_rtx (V8SImode);
5756 t3 = gen_reg_rtx (V8SImode);
5757 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t2),
5758 gen_lowpart (V2TImode, operands[1]),
5760 emit_insn (gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, t3),
5761 gen_lowpart (V2TImode, operands[2]),
5764 t4 = gen_reg_rtx (V4DImode);
5765 emit_insn (gen_avx2_<u>mulv4siv4di3 (t4, t2, t3));
5767 emit_insn (gen_addv4di3 (operands[0], t1, t4));
5771 (define_insn "ashr<mode>3"
5772 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
5774 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
5775 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5778 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5779 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5780 [(set_attr "isa" "noavx,avx")
5781 (set_attr "type" "sseishft")
5782 (set (attr "length_immediate")
5783 (if_then_else (match_operand 2 "const_int_operand" "")
5785 (const_string "0")))
5786 (set_attr "prefix_data16" "1,*")
5787 (set_attr "prefix" "orig,vex")
5788 (set_attr "mode" "<sseinsnmode>")])
5790 (define_insn "<shift_insn><mode>3"
5791 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
5792 (any_lshift:VI248_AVX2
5793 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
5794 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5797 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
5798 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5799 [(set_attr "isa" "noavx,avx")
5800 (set_attr "type" "sseishft")
5801 (set (attr "length_immediate")
5802 (if_then_else (match_operand 2 "const_int_operand" "")
5804 (const_string "0")))
5805 (set_attr "prefix_data16" "1,*")
5806 (set_attr "prefix" "orig,vex")
5807 (set_attr "mode" "<sseinsnmode>")])
5809 (define_expand "vec_shl_<mode>"
5810 [(set (match_operand:VI_128 0 "register_operand" "")
5812 (match_operand:VI_128 1 "register_operand" "")
5813 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5816 operands[0] = gen_lowpart (V1TImode, operands[0]);
5817 operands[1] = gen_lowpart (V1TImode, operands[1]);
5820 (define_insn "<sse2_avx2>_ashl<mode>3"
5821 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5823 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5824 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5827 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5829 switch (which_alternative)
5832 return "pslldq\t{%2, %0|%0, %2}";
5834 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5839 [(set_attr "isa" "noavx,avx")
5840 (set_attr "type" "sseishft")
5841 (set_attr "length_immediate" "1")
5842 (set_attr "prefix_data16" "1,*")
5843 (set_attr "prefix" "orig,vex")
5844 (set_attr "mode" "<sseinsnmode>")])
5846 (define_expand "vec_shr_<mode>"
5847 [(set (match_operand:VI_128 0 "register_operand" "")
5849 (match_operand:VI_128 1 "register_operand" "")
5850 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5853 operands[0] = gen_lowpart (V1TImode, operands[0]);
5854 operands[1] = gen_lowpart (V1TImode, operands[1]);
5857 (define_insn "<sse2_avx2>_lshr<mode>3"
5858 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
5859 (lshiftrt:VIMAX_AVX2
5860 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
5861 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5864 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5866 switch (which_alternative)
5869 return "psrldq\t{%2, %0|%0, %2}";
5871 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5876 [(set_attr "isa" "noavx,avx")
5877 (set_attr "type" "sseishft")
5878 (set_attr "length_immediate" "1")
5879 (set_attr "atom_unit" "sishuf")
5880 (set_attr "prefix_data16" "1,*")
5881 (set_attr "prefix" "orig,vex")
5882 (set_attr "mode" "<sseinsnmode>")])
5885 (define_expand "<code><mode>3"
5886 [(set (match_operand:VI124_256 0 "register_operand" "")
5888 (match_operand:VI124_256 1 "nonimmediate_operand" "")
5889 (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
5891 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5893 (define_insn "*avx2_<code><mode>3"
5894 [(set (match_operand:VI124_256 0 "register_operand" "=x")
5896 (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
5897 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
5898 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5899 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5900 [(set_attr "type" "sseiadd")
5901 (set_attr "prefix_extra" "1")
5902 (set_attr "prefix" "vex")
5903 (set_attr "mode" "OI")])
5905 (define_expand "<code><mode>3"
5906 [(set (match_operand:VI8_AVX2 0 "register_operand" "")
5908 (match_operand:VI8_AVX2 1 "register_operand" "")
5909 (match_operand:VI8_AVX2 2 "register_operand" "")))]
5916 xops[0] = operands[0];
5918 if (<CODE> == SMAX || <CODE> == UMAX)
5920 xops[1] = operands[1];
5921 xops[2] = operands[2];
5925 xops[1] = operands[2];
5926 xops[2] = operands[1];
5929 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
5931 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
5932 xops[4] = operands[1];
5933 xops[5] = operands[2];
5935 ok = ix86_expand_int_vcond (xops);
5940 (define_expand "<code><mode>3"
5941 [(set (match_operand:VI124_128 0 "register_operand" "")
5943 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5944 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5947 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
5948 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
5954 xops[0] = operands[0];
5955 operands[1] = force_reg (<MODE>mode, operands[1]);
5956 operands[2] = force_reg (<MODE>mode, operands[2]);
5960 xops[1] = operands[1];
5961 xops[2] = operands[2];
5965 xops[1] = operands[2];
5966 xops[2] = operands[1];
5969 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5970 xops[4] = operands[1];
5971 xops[5] = operands[2];
5973 ok = ix86_expand_int_vcond (xops);
5979 (define_insn "*sse4_1_<code><mode>3"
5980 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5982 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5983 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5984 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5986 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5987 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5988 [(set_attr "isa" "noavx,avx")
5989 (set_attr "type" "sseiadd")
5990 (set_attr "prefix_extra" "1,*")
5991 (set_attr "prefix" "orig,vex")
5992 (set_attr "mode" "TI")])
5994 (define_insn "*<code>v8hi3"
5995 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5997 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5998 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5999 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
6001 p<maxmin_int>w\t{%2, %0|%0, %2}
6002 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
6003 [(set_attr "isa" "noavx,avx")
6004 (set_attr "type" "sseiadd")
6005 (set_attr "prefix_data16" "1,*")
6006 (set_attr "prefix_extra" "*,1")
6007 (set_attr "prefix" "orig,vex")
6008 (set_attr "mode" "TI")])
6010 (define_expand "<code><mode>3"
6011 [(set (match_operand:VI124_128 0 "register_operand" "")
6013 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6014 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6017 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
6018 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
6019 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
6021 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
6022 operands[1] = force_reg (<MODE>mode, operands[1]);
6023 if (rtx_equal_p (op3, op2))
6024 op3 = gen_reg_rtx (V8HImode);
6025 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
6026 emit_insn (gen_addv8hi3 (op0, op3, op2));
6034 operands[1] = force_reg (<MODE>mode, operands[1]);
6035 operands[2] = force_reg (<MODE>mode, operands[2]);
6037 xops[0] = operands[0];
6041 xops[1] = operands[1];
6042 xops[2] = operands[2];
6046 xops[1] = operands[2];
6047 xops[2] = operands[1];
6050 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
6051 xops[4] = operands[1];
6052 xops[5] = operands[2];
6054 ok = ix86_expand_int_vcond (xops);
6060 (define_insn "*sse4_1_<code><mode>3"
6061 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
6063 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
6064 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
6065 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6067 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
6068 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6069 [(set_attr "isa" "noavx,avx")
6070 (set_attr "type" "sseiadd")
6071 (set_attr "prefix_extra" "1,*")
6072 (set_attr "prefix" "orig,vex")
6073 (set_attr "mode" "TI")])
6075 (define_insn "*<code>v16qi3"
6076 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6078 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
6079 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
6080 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
6082 p<maxmin_int>b\t{%2, %0|%0, %2}
6083 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
6084 [(set_attr "isa" "noavx,avx")
6085 (set_attr "type" "sseiadd")
6086 (set_attr "prefix_data16" "1,*")
6087 (set_attr "prefix_extra" "*,1")
6088 (set_attr "prefix" "orig,vex")
6089 (set_attr "mode" "TI")])
6091 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6093 ;; Parallel integral comparisons
6095 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6097 (define_expand "avx2_eq<mode>3"
6098 [(set (match_operand:VI_256 0 "register_operand" "")
6100 (match_operand:VI_256 1 "nonimmediate_operand" "")
6101 (match_operand:VI_256 2 "nonimmediate_operand" "")))]
6103 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6105 (define_insn "*avx2_eq<mode>3"
6106 [(set (match_operand:VI_256 0 "register_operand" "=x")
6108 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
6109 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6110 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6111 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6112 [(set_attr "type" "ssecmp")
6113 (set_attr "prefix_extra" "1")
6114 (set_attr "prefix" "vex")
6115 (set_attr "mode" "OI")])
6117 (define_insn "*sse4_1_eqv2di3"
6118 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6120 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
6121 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6122 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
6124 pcmpeqq\t{%2, %0|%0, %2}
6125 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
6126 [(set_attr "isa" "noavx,avx")
6127 (set_attr "type" "ssecmp")
6128 (set_attr "prefix_extra" "1")
6129 (set_attr "prefix" "orig,vex")
6130 (set_attr "mode" "TI")])
6132 (define_insn "*sse2_eq<mode>3"
6133 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6135 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
6136 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6137 "TARGET_SSE2 && !TARGET_XOP
6138 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
6140 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
6141 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6142 [(set_attr "isa" "noavx,avx")
6143 (set_attr "type" "ssecmp")
6144 (set_attr "prefix_data16" "1,*")
6145 (set_attr "prefix" "orig,vex")
6146 (set_attr "mode" "TI")])
6148 (define_expand "sse2_eq<mode>3"
6149 [(set (match_operand:VI124_128 0 "register_operand" "")
6151 (match_operand:VI124_128 1 "nonimmediate_operand" "")
6152 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
6153 "TARGET_SSE2 && !TARGET_XOP "
6154 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
6156 (define_expand "sse4_1_eqv2di3"
6157 [(set (match_operand:V2DI 0 "register_operand" "")
6159 (match_operand:V2DI 1 "nonimmediate_operand" "")
6160 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
6162 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
6164 (define_insn "sse4_2_gtv2di3"
6165 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6167 (match_operand:V2DI 1 "register_operand" "0,x")
6168 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
6171 pcmpgtq\t{%2, %0|%0, %2}
6172 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
6173 [(set_attr "isa" "noavx,avx")
6174 (set_attr "type" "ssecmp")
6175 (set_attr "prefix_extra" "1")
6176 (set_attr "prefix" "orig,vex")
6177 (set_attr "mode" "TI")])
6179 (define_insn "avx2_gt<mode>3"
6180 [(set (match_operand:VI_256 0 "register_operand" "=x")
6182 (match_operand:VI_256 1 "register_operand" "x")
6183 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
6185 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6186 [(set_attr "type" "ssecmp")
6187 (set_attr "prefix_extra" "1")
6188 (set_attr "prefix" "vex")
6189 (set_attr "mode" "OI")])
6191 (define_insn "sse2_gt<mode>3"
6192 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
6194 (match_operand:VI124_128 1 "register_operand" "0,x")
6195 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
6196 "TARGET_SSE2 && !TARGET_XOP"
6198 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
6199 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
6200 [(set_attr "isa" "noavx,avx")
6201 (set_attr "type" "ssecmp")
6202 (set_attr "prefix_data16" "1,*")
6203 (set_attr "prefix" "orig,vex")
6204 (set_attr "mode" "TI")])
6206 (define_expand "vcond<V_256:mode><VI_256:mode>"
6207 [(set (match_operand:V_256 0 "register_operand" "")
6209 (match_operator 3 ""
6210 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6211 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6212 (match_operand:V_256 1 "general_operand" "")
6213 (match_operand:V_256 2 "general_operand" "")))]
6215 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6216 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6218 bool ok = ix86_expand_int_vcond (operands);
6223 (define_expand "vcond<V_128:mode><VI124_128:mode>"
6224 [(set (match_operand:V_128 0 "register_operand" "")
6226 (match_operator 3 ""
6227 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6228 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6229 (match_operand:V_128 1 "general_operand" "")
6230 (match_operand:V_128 2 "general_operand" "")))]
6232 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6233 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6235 bool ok = ix86_expand_int_vcond (operands);
6240 (define_expand "vcond<VI8F_128:mode>v2di"
6241 [(set (match_operand:VI8F_128 0 "register_operand" "")
6242 (if_then_else:VI8F_128
6243 (match_operator 3 ""
6244 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6245 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6246 (match_operand:VI8F_128 1 "general_operand" "")
6247 (match_operand:VI8F_128 2 "general_operand" "")))]
6250 bool ok = ix86_expand_int_vcond (operands);
6255 (define_expand "vcondu<V_256:mode><VI_256:mode>"
6256 [(set (match_operand:V_256 0 "register_operand" "")
6258 (match_operator 3 ""
6259 [(match_operand:VI_256 4 "nonimmediate_operand" "")
6260 (match_operand:VI_256 5 "nonimmediate_operand" "")])
6261 (match_operand:V_256 1 "general_operand" "")
6262 (match_operand:V_256 2 "general_operand" "")))]
6264 && (GET_MODE_NUNITS (<V_256:MODE>mode)
6265 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
6267 bool ok = ix86_expand_int_vcond (operands);
6272 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
6273 [(set (match_operand:V_128 0 "register_operand" "")
6275 (match_operator 3 ""
6276 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
6277 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
6278 (match_operand:V_128 1 "general_operand" "")
6279 (match_operand:V_128 2 "general_operand" "")))]
6281 && (GET_MODE_NUNITS (<V_128:MODE>mode)
6282 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
6284 bool ok = ix86_expand_int_vcond (operands);
6289 (define_expand "vcondu<VI8F_128:mode>v2di"
6290 [(set (match_operand:VI8F_128 0 "register_operand" "")
6291 (if_then_else:VI8F_128
6292 (match_operator 3 ""
6293 [(match_operand:V2DI 4 "nonimmediate_operand" "")
6294 (match_operand:V2DI 5 "nonimmediate_operand" "")])
6295 (match_operand:VI8F_128 1 "general_operand" "")
6296 (match_operand:VI8F_128 2 "general_operand" "")))]
6299 bool ok = ix86_expand_int_vcond (operands);
6304 (define_mode_iterator VEC_PERM_AVX2
6305 [V16QI V8HI V4SI V2DI V4SF V2DF
6306 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
6307 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
6308 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
6310 (define_expand "vec_perm<mode>"
6311 [(match_operand:VEC_PERM_AVX2 0 "register_operand" "")
6312 (match_operand:VEC_PERM_AVX2 1 "register_operand" "")
6313 (match_operand:VEC_PERM_AVX2 2 "register_operand" "")
6314 (match_operand:<sseintvecmode> 3 "register_operand" "")]
6315 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
6317 ix86_expand_vec_perm (operands);
6321 (define_mode_iterator VEC_PERM_CONST
6322 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
6323 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
6324 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
6325 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
6326 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
6327 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
6329 (define_expand "vec_perm_const<mode>"
6330 [(match_operand:VEC_PERM_CONST 0 "register_operand" "")
6331 (match_operand:VEC_PERM_CONST 1 "register_operand" "")
6332 (match_operand:VEC_PERM_CONST 2 "register_operand" "")
6333 (match_operand:<sseintvecmode> 3 "" "")]
6336 if (ix86_expand_vec_perm_const (operands))
6342 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6344 ;; Parallel bitwise logical operations
6346 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6348 (define_expand "one_cmpl<mode>2"
6349 [(set (match_operand:VI 0 "register_operand" "")
6350 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
6354 int i, n = GET_MODE_NUNITS (<MODE>mode);
6355 rtvec v = rtvec_alloc (n);
6357 for (i = 0; i < n; ++i)
6358 RTVEC_ELT (v, i) = constm1_rtx;
6360 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
6363 (define_expand "<sse2_avx2>_andnot<mode>3"
6364 [(set (match_operand:VI_AVX2 0 "register_operand" "")
6366 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand" ""))
6367 (match_operand:VI_AVX2 2 "nonimmediate_operand" "")))]
6370 (define_insn "*andnot<mode>3"
6371 [(set (match_operand:VI 0 "register_operand" "=x,x")
6373 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
6374 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6377 static char buf[32];
6381 switch (get_attr_mode (insn))
6384 gcc_assert (TARGET_AVX2);
6386 gcc_assert (TARGET_SSE2);
6392 gcc_assert (TARGET_AVX);
6394 gcc_assert (TARGET_SSE);
6403 switch (which_alternative)
6406 ops = "%s\t{%%2, %%0|%%0, %%2}";
6409 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6415 snprintf (buf, sizeof (buf), ops, tmp);
6418 [(set_attr "isa" "noavx,avx")
6419 (set_attr "type" "sselog")
6420 (set (attr "prefix_data16")
6422 (and (eq_attr "alternative" "0")
6423 (eq_attr "mode" "TI"))
6425 (const_string "*")))
6426 (set_attr "prefix" "orig,vex")
6428 (cond [(and (not (match_test "TARGET_AVX2"))
6429 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6430 (const_string "V8SF")
6431 (not (match_test "TARGET_SSE2"))
6432 (const_string "V4SF")
6434 (const_string "<sseinsnmode>")))])
6436 (define_expand "<code><mode>3"
6437 [(set (match_operand:VI 0 "register_operand" "")
6439 (match_operand:VI 1 "nonimmediate_operand" "")
6440 (match_operand:VI 2 "nonimmediate_operand" "")))]
6442 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
6444 (define_insn "*<code><mode>3"
6445 [(set (match_operand:VI 0 "register_operand" "=x,x")
6447 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
6448 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
6450 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
6452 static char buf[32];
6456 switch (get_attr_mode (insn))
6459 gcc_assert (TARGET_AVX2);
6461 gcc_assert (TARGET_SSE2);
6467 gcc_assert (TARGET_AVX);
6469 gcc_assert (TARGET_SSE);
6478 switch (which_alternative)
6481 ops = "%s\t{%%2, %%0|%%0, %%2}";
6484 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
6490 snprintf (buf, sizeof (buf), ops, tmp);
6493 [(set_attr "isa" "noavx,avx")
6494 (set_attr "type" "sselog")
6495 (set (attr "prefix_data16")
6497 (and (eq_attr "alternative" "0")
6498 (eq_attr "mode" "TI"))
6500 (const_string "*")))
6501 (set_attr "prefix" "orig,vex")
6503 (cond [(and (not (match_test "TARGET_AVX2"))
6504 (match_test "GET_MODE_SIZE (<MODE>mode) > 16"))
6505 (const_string "V8SF")
6506 (not (match_test "TARGET_SSE2"))
6507 (const_string "V4SF")
6509 (const_string "<sseinsnmode>")))])
6511 (define_insn "*andnottf3"
6512 [(set (match_operand:TF 0 "register_operand" "=x,x")
6514 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
6515 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6518 pandn\t{%2, %0|%0, %2}
6519 vpandn\t{%2, %1, %0|%0, %1, %2}"
6520 [(set_attr "isa" "noavx,avx")
6521 (set_attr "type" "sselog")
6522 (set_attr "prefix_data16" "1,*")
6523 (set_attr "prefix" "orig,vex")
6524 (set_attr "mode" "TI")])
6526 (define_expand "<code>tf3"
6527 [(set (match_operand:TF 0 "register_operand" "")
6529 (match_operand:TF 1 "nonimmediate_operand" "")
6530 (match_operand:TF 2 "nonimmediate_operand" "")))]
6532 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6534 (define_insn "*<code>tf3"
6535 [(set (match_operand:TF 0 "register_operand" "=x,x")
6537 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
6538 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
6540 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6542 p<logic>\t{%2, %0|%0, %2}
6543 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
6544 [(set_attr "isa" "noavx,avx")
6545 (set_attr "type" "sselog")
6546 (set_attr "prefix_data16" "1,*")
6547 (set_attr "prefix" "orig,vex")
6548 (set_attr "mode" "TI")])
6550 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6552 ;; Parallel integral element swizzling
6554 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6556 (define_expand "vec_pack_trunc_<mode>"
6557 [(match_operand:<ssepackmode> 0 "register_operand" "")
6558 (match_operand:VI248_AVX2 1 "register_operand" "")
6559 (match_operand:VI248_AVX2 2 "register_operand" "")]
6562 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
6563 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
6564 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6568 (define_insn "<sse2_avx2>_packsswb"
6569 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6570 (vec_concat:VI1_AVX2
6571 (ss_truncate:<ssehalfvecmode>
6572 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6573 (ss_truncate:<ssehalfvecmode>
6574 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6577 packsswb\t{%2, %0|%0, %2}
6578 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
6579 [(set_attr "isa" "noavx,avx")
6580 (set_attr "type" "sselog")
6581 (set_attr "prefix_data16" "1,*")
6582 (set_attr "prefix" "orig,vex")
6583 (set_attr "mode" "<sseinsnmode>")])
6585 (define_insn "<sse2_avx2>_packssdw"
6586 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
6587 (vec_concat:VI2_AVX2
6588 (ss_truncate:<ssehalfvecmode>
6589 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6590 (ss_truncate:<ssehalfvecmode>
6591 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6594 packssdw\t{%2, %0|%0, %2}
6595 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
6596 [(set_attr "isa" "noavx,avx")
6597 (set_attr "type" "sselog")
6598 (set_attr "prefix_data16" "1,*")
6599 (set_attr "prefix" "orig,vex")
6600 (set_attr "mode" "<sseinsnmode>")])
6602 (define_insn "<sse2_avx2>_packuswb"
6603 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
6604 (vec_concat:VI1_AVX2
6605 (us_truncate:<ssehalfvecmode>
6606 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
6607 (us_truncate:<ssehalfvecmode>
6608 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
6611 packuswb\t{%2, %0|%0, %2}
6612 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
6613 [(set_attr "isa" "noavx,avx")
6614 (set_attr "type" "sselog")
6615 (set_attr "prefix_data16" "1,*")
6616 (set_attr "prefix" "orig,vex")
6617 (set_attr "mode" "<sseinsnmode>")])
6619 (define_insn "avx2_interleave_highv32qi"
6620 [(set (match_operand:V32QI 0 "register_operand" "=x")
6623 (match_operand:V32QI 1 "register_operand" "x")
6624 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6625 (parallel [(const_int 8) (const_int 40)
6626 (const_int 9) (const_int 41)
6627 (const_int 10) (const_int 42)
6628 (const_int 11) (const_int 43)
6629 (const_int 12) (const_int 44)
6630 (const_int 13) (const_int 45)
6631 (const_int 14) (const_int 46)
6632 (const_int 15) (const_int 47)
6633 (const_int 24) (const_int 56)
6634 (const_int 25) (const_int 57)
6635 (const_int 26) (const_int 58)
6636 (const_int 27) (const_int 59)
6637 (const_int 28) (const_int 60)
6638 (const_int 29) (const_int 61)
6639 (const_int 30) (const_int 62)
6640 (const_int 31) (const_int 63)])))]
6642 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6643 [(set_attr "type" "sselog")
6644 (set_attr "prefix" "vex")
6645 (set_attr "mode" "OI")])
6647 (define_insn "vec_interleave_highv16qi"
6648 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6651 (match_operand:V16QI 1 "register_operand" "0,x")
6652 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6653 (parallel [(const_int 8) (const_int 24)
6654 (const_int 9) (const_int 25)
6655 (const_int 10) (const_int 26)
6656 (const_int 11) (const_int 27)
6657 (const_int 12) (const_int 28)
6658 (const_int 13) (const_int 29)
6659 (const_int 14) (const_int 30)
6660 (const_int 15) (const_int 31)])))]
6663 punpckhbw\t{%2, %0|%0, %2}
6664 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6665 [(set_attr "isa" "noavx,avx")
6666 (set_attr "type" "sselog")
6667 (set_attr "prefix_data16" "1,*")
6668 (set_attr "prefix" "orig,vex")
6669 (set_attr "mode" "TI")])
6671 (define_insn "avx2_interleave_lowv32qi"
6672 [(set (match_operand:V32QI 0 "register_operand" "=x")
6675 (match_operand:V32QI 1 "register_operand" "x")
6676 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
6677 (parallel [(const_int 0) (const_int 32)
6678 (const_int 1) (const_int 33)
6679 (const_int 2) (const_int 34)
6680 (const_int 3) (const_int 35)
6681 (const_int 4) (const_int 36)
6682 (const_int 5) (const_int 37)
6683 (const_int 6) (const_int 38)
6684 (const_int 7) (const_int 39)
6685 (const_int 16) (const_int 48)
6686 (const_int 17) (const_int 49)
6687 (const_int 18) (const_int 50)
6688 (const_int 19) (const_int 51)
6689 (const_int 20) (const_int 52)
6690 (const_int 21) (const_int 53)
6691 (const_int 22) (const_int 54)
6692 (const_int 23) (const_int 55)])))]
6694 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6695 [(set_attr "type" "sselog")
6696 (set_attr "prefix" "vex")
6697 (set_attr "mode" "OI")])
6699 (define_insn "vec_interleave_lowv16qi"
6700 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6703 (match_operand:V16QI 1 "register_operand" "0,x")
6704 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
6705 (parallel [(const_int 0) (const_int 16)
6706 (const_int 1) (const_int 17)
6707 (const_int 2) (const_int 18)
6708 (const_int 3) (const_int 19)
6709 (const_int 4) (const_int 20)
6710 (const_int 5) (const_int 21)
6711 (const_int 6) (const_int 22)
6712 (const_int 7) (const_int 23)])))]
6715 punpcklbw\t{%2, %0|%0, %2}
6716 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6717 [(set_attr "isa" "noavx,avx")
6718 (set_attr "type" "sselog")
6719 (set_attr "prefix_data16" "1,*")
6720 (set_attr "prefix" "orig,vex")
6721 (set_attr "mode" "TI")])
6723 (define_insn "avx2_interleave_highv16hi"
6724 [(set (match_operand:V16HI 0 "register_operand" "=x")
6727 (match_operand:V16HI 1 "register_operand" "x")
6728 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6729 (parallel [(const_int 4) (const_int 20)
6730 (const_int 5) (const_int 21)
6731 (const_int 6) (const_int 22)
6732 (const_int 7) (const_int 23)
6733 (const_int 12) (const_int 28)
6734 (const_int 13) (const_int 29)
6735 (const_int 14) (const_int 30)
6736 (const_int 15) (const_int 31)])))]
6738 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6739 [(set_attr "type" "sselog")
6740 (set_attr "prefix" "vex")
6741 (set_attr "mode" "OI")])
6743 (define_insn "vec_interleave_highv8hi"
6744 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6747 (match_operand:V8HI 1 "register_operand" "0,x")
6748 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6749 (parallel [(const_int 4) (const_int 12)
6750 (const_int 5) (const_int 13)
6751 (const_int 6) (const_int 14)
6752 (const_int 7) (const_int 15)])))]
6755 punpckhwd\t{%2, %0|%0, %2}
6756 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6757 [(set_attr "isa" "noavx,avx")
6758 (set_attr "type" "sselog")
6759 (set_attr "prefix_data16" "1,*")
6760 (set_attr "prefix" "orig,vex")
6761 (set_attr "mode" "TI")])
6763 (define_insn "avx2_interleave_lowv16hi"
6764 [(set (match_operand:V16HI 0 "register_operand" "=x")
6767 (match_operand:V16HI 1 "register_operand" "x")
6768 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
6769 (parallel [(const_int 0) (const_int 16)
6770 (const_int 1) (const_int 17)
6771 (const_int 2) (const_int 18)
6772 (const_int 3) (const_int 19)
6773 (const_int 8) (const_int 24)
6774 (const_int 9) (const_int 25)
6775 (const_int 10) (const_int 26)
6776 (const_int 11) (const_int 27)])))]
6778 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6779 [(set_attr "type" "sselog")
6780 (set_attr "prefix" "vex")
6781 (set_attr "mode" "OI")])
6783 (define_insn "vec_interleave_lowv8hi"
6784 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6787 (match_operand:V8HI 1 "register_operand" "0,x")
6788 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6789 (parallel [(const_int 0) (const_int 8)
6790 (const_int 1) (const_int 9)
6791 (const_int 2) (const_int 10)
6792 (const_int 3) (const_int 11)])))]
6795 punpcklwd\t{%2, %0|%0, %2}
6796 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6797 [(set_attr "isa" "noavx,avx")
6798 (set_attr "type" "sselog")
6799 (set_attr "prefix_data16" "1,*")
6800 (set_attr "prefix" "orig,vex")
6801 (set_attr "mode" "TI")])
6803 (define_insn "avx2_interleave_highv8si"
6804 [(set (match_operand:V8SI 0 "register_operand" "=x")
6807 (match_operand:V8SI 1 "register_operand" "x")
6808 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6809 (parallel [(const_int 2) (const_int 10)
6810 (const_int 3) (const_int 11)
6811 (const_int 6) (const_int 14)
6812 (const_int 7) (const_int 15)])))]
6814 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6815 [(set_attr "type" "sselog")
6816 (set_attr "prefix" "vex")
6817 (set_attr "mode" "OI")])
6819 (define_insn "vec_interleave_highv4si"
6820 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6823 (match_operand:V4SI 1 "register_operand" "0,x")
6824 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6825 (parallel [(const_int 2) (const_int 6)
6826 (const_int 3) (const_int 7)])))]
6829 punpckhdq\t{%2, %0|%0, %2}
6830 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6831 [(set_attr "isa" "noavx,avx")
6832 (set_attr "type" "sselog")
6833 (set_attr "prefix_data16" "1,*")
6834 (set_attr "prefix" "orig,vex")
6835 (set_attr "mode" "TI")])
6837 (define_insn "avx2_interleave_lowv8si"
6838 [(set (match_operand:V8SI 0 "register_operand" "=x")
6841 (match_operand:V8SI 1 "register_operand" "x")
6842 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
6843 (parallel [(const_int 0) (const_int 8)
6844 (const_int 1) (const_int 9)
6845 (const_int 4) (const_int 12)
6846 (const_int 5) (const_int 13)])))]
6848 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6849 [(set_attr "type" "sselog")
6850 (set_attr "prefix" "vex")
6851 (set_attr "mode" "OI")])
6853 (define_insn "vec_interleave_lowv4si"
6854 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6857 (match_operand:V4SI 1 "register_operand" "0,x")
6858 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6859 (parallel [(const_int 0) (const_int 4)
6860 (const_int 1) (const_int 5)])))]
6863 punpckldq\t{%2, %0|%0, %2}
6864 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6865 [(set_attr "isa" "noavx,avx")
6866 (set_attr "type" "sselog")
6867 (set_attr "prefix_data16" "1,*")
6868 (set_attr "prefix" "orig,vex")
6869 (set_attr "mode" "TI")])
6871 (define_expand "vec_interleave_high<mode>"
6872 [(match_operand:VI_256 0 "register_operand" "=x")
6873 (match_operand:VI_256 1 "register_operand" "x")
6874 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6877 rtx t1 = gen_reg_rtx (<MODE>mode);
6878 rtx t2 = gen_reg_rtx (<MODE>mode);
6879 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6880 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6881 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6882 gen_lowpart (V4DImode, t1),
6883 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4))));
6887 (define_expand "vec_interleave_low<mode>"
6888 [(match_operand:VI_256 0 "register_operand" "=x")
6889 (match_operand:VI_256 1 "register_operand" "x")
6890 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
6893 rtx t1 = gen_reg_rtx (<MODE>mode);
6894 rtx t2 = gen_reg_rtx (<MODE>mode);
6895 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
6896 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
6897 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, operands[0]),
6898 gen_lowpart (V4DImode, t1),
6899 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4))));
6903 ;; Modes handled by pinsr patterns.
6904 (define_mode_iterator PINSR_MODE
6905 [(V16QI "TARGET_SSE4_1") V8HI
6906 (V4SI "TARGET_SSE4_1")
6907 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6909 (define_mode_attr sse2p4_1
6910 [(V16QI "sse4_1") (V8HI "sse2")
6911 (V4SI "sse4_1") (V2DI "sse4_1")])
6913 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6914 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6915 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6916 (vec_merge:PINSR_MODE
6917 (vec_duplicate:PINSR_MODE
6918 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6919 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6920 (match_operand:SI 3 "const_int_operand" "")))]
6922 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6923 < GET_MODE_NUNITS (<MODE>mode))"
6925 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6927 switch (which_alternative)
6930 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6931 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6934 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6936 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6937 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6940 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6945 [(set_attr "isa" "noavx,noavx,avx,avx")
6946 (set_attr "type" "sselog")
6947 (set (attr "prefix_rex")
6949 (and (not (match_test "TARGET_AVX"))
6950 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6952 (const_string "*")))
6953 (set (attr "prefix_data16")
6955 (and (not (match_test "TARGET_AVX"))
6956 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6958 (const_string "*")))
6959 (set (attr "prefix_extra")
6961 (and (not (match_test "TARGET_AVX"))
6962 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6964 (const_string "1")))
6965 (set_attr "length_immediate" "1")
6966 (set_attr "prefix" "orig,orig,vex,vex")
6967 (set_attr "mode" "TI")])
6969 (define_insn "*sse4_1_pextrb_<mode>"
6970 [(set (match_operand:SWI48 0 "register_operand" "=r")
6973 (match_operand:V16QI 1 "register_operand" "x")
6974 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6976 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6977 [(set_attr "type" "sselog")
6978 (set_attr "prefix_extra" "1")
6979 (set_attr "length_immediate" "1")
6980 (set_attr "prefix" "maybe_vex")
6981 (set_attr "mode" "TI")])
6983 (define_insn "*sse4_1_pextrb_memory"
6984 [(set (match_operand:QI 0 "memory_operand" "=m")
6986 (match_operand:V16QI 1 "register_operand" "x")
6987 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6989 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6990 [(set_attr "type" "sselog")
6991 (set_attr "prefix_extra" "1")
6992 (set_attr "length_immediate" "1")
6993 (set_attr "prefix" "maybe_vex")
6994 (set_attr "mode" "TI")])
6996 (define_insn "*sse2_pextrw_<mode>"
6997 [(set (match_operand:SWI48 0 "register_operand" "=r")
7000 (match_operand:V8HI 1 "register_operand" "x")
7001 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
7003 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
7004 [(set_attr "type" "sselog")
7005 (set_attr "prefix_data16" "1")
7006 (set_attr "length_immediate" "1")
7007 (set_attr "prefix" "maybe_vex")
7008 (set_attr "mode" "TI")])
7010 (define_insn "*sse4_1_pextrw_memory"
7011 [(set (match_operand:HI 0 "memory_operand" "=m")
7013 (match_operand:V8HI 1 "register_operand" "x")
7014 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
7016 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
7017 [(set_attr "type" "sselog")
7018 (set_attr "prefix_extra" "1")
7019 (set_attr "length_immediate" "1")
7020 (set_attr "prefix" "maybe_vex")
7021 (set_attr "mode" "TI")])
7023 (define_insn "*sse4_1_pextrd"
7024 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
7026 (match_operand:V4SI 1 "register_operand" "x")
7027 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
7029 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
7030 [(set_attr "type" "sselog")
7031 (set_attr "prefix_extra" "1")
7032 (set_attr "length_immediate" "1")
7033 (set_attr "prefix" "maybe_vex")
7034 (set_attr "mode" "TI")])
7036 (define_insn "*sse4_1_pextrd_zext"
7037 [(set (match_operand:DI 0 "register_operand" "=r")
7040 (match_operand:V4SI 1 "register_operand" "x")
7041 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
7042 "TARGET_64BIT && TARGET_SSE4_1"
7043 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
7044 [(set_attr "type" "sselog")
7045 (set_attr "prefix_extra" "1")
7046 (set_attr "length_immediate" "1")
7047 (set_attr "prefix" "maybe_vex")
7048 (set_attr "mode" "TI")])
7050 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
7051 (define_insn "*sse4_1_pextrq"
7052 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
7054 (match_operand:V2DI 1 "register_operand" "x")
7055 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
7056 "TARGET_SSE4_1 && TARGET_64BIT"
7057 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
7058 [(set_attr "type" "sselog")
7059 (set_attr "prefix_rex" "1")
7060 (set_attr "prefix_extra" "1")
7061 (set_attr "length_immediate" "1")
7062 (set_attr "prefix" "maybe_vex")
7063 (set_attr "mode" "TI")])
7065 (define_expand "avx2_pshufdv3"
7066 [(match_operand:V8SI 0 "register_operand" "")
7067 (match_operand:V8SI 1 "nonimmediate_operand" "")
7068 (match_operand:SI 2 "const_0_to_255_operand" "")]
7071 int mask = INTVAL (operands[2]);
7072 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
7073 GEN_INT ((mask >> 0) & 3),
7074 GEN_INT ((mask >> 2) & 3),
7075 GEN_INT ((mask >> 4) & 3),
7076 GEN_INT ((mask >> 6) & 3),
7077 GEN_INT (((mask >> 0) & 3) + 4),
7078 GEN_INT (((mask >> 2) & 3) + 4),
7079 GEN_INT (((mask >> 4) & 3) + 4),
7080 GEN_INT (((mask >> 6) & 3) + 4)));
7084 (define_insn "avx2_pshufd_1"
7085 [(set (match_operand:V8SI 0 "register_operand" "=x")
7087 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
7088 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7089 (match_operand 3 "const_0_to_3_operand" "")
7090 (match_operand 4 "const_0_to_3_operand" "")
7091 (match_operand 5 "const_0_to_3_operand" "")
7092 (match_operand 6 "const_4_to_7_operand" "")
7093 (match_operand 7 "const_4_to_7_operand" "")
7094 (match_operand 8 "const_4_to_7_operand" "")
7095 (match_operand 9 "const_4_to_7_operand" "")])))]
7097 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
7098 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
7099 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
7100 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
7103 mask |= INTVAL (operands[2]) << 0;
7104 mask |= INTVAL (operands[3]) << 2;
7105 mask |= INTVAL (operands[4]) << 4;
7106 mask |= INTVAL (operands[5]) << 6;
7107 operands[2] = GEN_INT (mask);
7109 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
7111 [(set_attr "type" "sselog1")
7112 (set_attr "prefix" "vex")
7113 (set_attr "length_immediate" "1")
7114 (set_attr "mode" "OI")])
7116 (define_expand "sse2_pshufd"
7117 [(match_operand:V4SI 0 "register_operand" "")
7118 (match_operand:V4SI 1 "nonimmediate_operand" "")
7119 (match_operand:SI 2 "const_int_operand" "")]
7122 int mask = INTVAL (operands[2]);
7123 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
7124 GEN_INT ((mask >> 0) & 3),
7125 GEN_INT ((mask >> 2) & 3),
7126 GEN_INT ((mask >> 4) & 3),
7127 GEN_INT ((mask >> 6) & 3)));
7131 (define_insn "sse2_pshufd_1"
7132 [(set (match_operand:V4SI 0 "register_operand" "=x")
7134 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
7135 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7136 (match_operand 3 "const_0_to_3_operand" "")
7137 (match_operand 4 "const_0_to_3_operand" "")
7138 (match_operand 5 "const_0_to_3_operand" "")])))]
7142 mask |= INTVAL (operands[2]) << 0;
7143 mask |= INTVAL (operands[3]) << 2;
7144 mask |= INTVAL (operands[4]) << 4;
7145 mask |= INTVAL (operands[5]) << 6;
7146 operands[2] = GEN_INT (mask);
7148 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
7150 [(set_attr "type" "sselog1")
7151 (set_attr "prefix_data16" "1")
7152 (set_attr "prefix" "maybe_vex")
7153 (set_attr "length_immediate" "1")
7154 (set_attr "mode" "TI")])
7156 (define_expand "avx2_pshuflwv3"
7157 [(match_operand:V16HI 0 "register_operand" "")
7158 (match_operand:V16HI 1 "nonimmediate_operand" "")
7159 (match_operand:SI 2 "const_0_to_255_operand" "")]
7162 int mask = INTVAL (operands[2]);
7163 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
7164 GEN_INT ((mask >> 0) & 3),
7165 GEN_INT ((mask >> 2) & 3),
7166 GEN_INT ((mask >> 4) & 3),
7167 GEN_INT ((mask >> 6) & 3),
7168 GEN_INT (((mask >> 0) & 3) + 8),
7169 GEN_INT (((mask >> 2) & 3) + 8),
7170 GEN_INT (((mask >> 4) & 3) + 8),
7171 GEN_INT (((mask >> 6) & 3) + 8)));
7175 (define_insn "avx2_pshuflw_1"
7176 [(set (match_operand:V16HI 0 "register_operand" "=x")
7178 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7179 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7180 (match_operand 3 "const_0_to_3_operand" "")
7181 (match_operand 4 "const_0_to_3_operand" "")
7182 (match_operand 5 "const_0_to_3_operand" "")
7187 (match_operand 6 "const_8_to_11_operand" "")
7188 (match_operand 7 "const_8_to_11_operand" "")
7189 (match_operand 8 "const_8_to_11_operand" "")
7190 (match_operand 9 "const_8_to_11_operand" "")
7196 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7197 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7198 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7199 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7202 mask |= INTVAL (operands[2]) << 0;
7203 mask |= INTVAL (operands[3]) << 2;
7204 mask |= INTVAL (operands[4]) << 4;
7205 mask |= INTVAL (operands[5]) << 6;
7206 operands[2] = GEN_INT (mask);
7208 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7210 [(set_attr "type" "sselog")
7211 (set_attr "prefix" "vex")
7212 (set_attr "length_immediate" "1")
7213 (set_attr "mode" "OI")])
7215 (define_expand "sse2_pshuflw"
7216 [(match_operand:V8HI 0 "register_operand" "")
7217 (match_operand:V8HI 1 "nonimmediate_operand" "")
7218 (match_operand:SI 2 "const_int_operand" "")]
7221 int mask = INTVAL (operands[2]);
7222 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
7223 GEN_INT ((mask >> 0) & 3),
7224 GEN_INT ((mask >> 2) & 3),
7225 GEN_INT ((mask >> 4) & 3),
7226 GEN_INT ((mask >> 6) & 3)));
7230 (define_insn "sse2_pshuflw_1"
7231 [(set (match_operand:V8HI 0 "register_operand" "=x")
7233 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7234 (parallel [(match_operand 2 "const_0_to_3_operand" "")
7235 (match_operand 3 "const_0_to_3_operand" "")
7236 (match_operand 4 "const_0_to_3_operand" "")
7237 (match_operand 5 "const_0_to_3_operand" "")
7245 mask |= INTVAL (operands[2]) << 0;
7246 mask |= INTVAL (operands[3]) << 2;
7247 mask |= INTVAL (operands[4]) << 4;
7248 mask |= INTVAL (operands[5]) << 6;
7249 operands[2] = GEN_INT (mask);
7251 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
7253 [(set_attr "type" "sselog")
7254 (set_attr "prefix_data16" "0")
7255 (set_attr "prefix_rep" "1")
7256 (set_attr "prefix" "maybe_vex")
7257 (set_attr "length_immediate" "1")
7258 (set_attr "mode" "TI")])
7260 (define_expand "avx2_pshufhwv3"
7261 [(match_operand:V16HI 0 "register_operand" "")
7262 (match_operand:V16HI 1 "nonimmediate_operand" "")
7263 (match_operand:SI 2 "const_0_to_255_operand" "")]
7266 int mask = INTVAL (operands[2]);
7267 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
7268 GEN_INT (((mask >> 0) & 3) + 4),
7269 GEN_INT (((mask >> 2) & 3) + 4),
7270 GEN_INT (((mask >> 4) & 3) + 4),
7271 GEN_INT (((mask >> 6) & 3) + 4),
7272 GEN_INT (((mask >> 0) & 3) + 12),
7273 GEN_INT (((mask >> 2) & 3) + 12),
7274 GEN_INT (((mask >> 4) & 3) + 12),
7275 GEN_INT (((mask >> 6) & 3) + 12)));
7279 (define_insn "avx2_pshufhw_1"
7280 [(set (match_operand:V16HI 0 "register_operand" "=x")
7282 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
7283 (parallel [(const_int 0)
7287 (match_operand 2 "const_4_to_7_operand" "")
7288 (match_operand 3 "const_4_to_7_operand" "")
7289 (match_operand 4 "const_4_to_7_operand" "")
7290 (match_operand 5 "const_4_to_7_operand" "")
7295 (match_operand 6 "const_12_to_15_operand" "")
7296 (match_operand 7 "const_12_to_15_operand" "")
7297 (match_operand 8 "const_12_to_15_operand" "")
7298 (match_operand 9 "const_12_to_15_operand" "")])))]
7300 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
7301 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
7302 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
7303 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
7306 mask |= (INTVAL (operands[2]) - 4) << 0;
7307 mask |= (INTVAL (operands[3]) - 4) << 2;
7308 mask |= (INTVAL (operands[4]) - 4) << 4;
7309 mask |= (INTVAL (operands[5]) - 4) << 6;
7310 operands[2] = GEN_INT (mask);
7312 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7314 [(set_attr "type" "sselog")
7315 (set_attr "prefix" "vex")
7316 (set_attr "length_immediate" "1")
7317 (set_attr "mode" "OI")])
7319 (define_expand "sse2_pshufhw"
7320 [(match_operand:V8HI 0 "register_operand" "")
7321 (match_operand:V8HI 1 "nonimmediate_operand" "")
7322 (match_operand:SI 2 "const_int_operand" "")]
7325 int mask = INTVAL (operands[2]);
7326 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
7327 GEN_INT (((mask >> 0) & 3) + 4),
7328 GEN_INT (((mask >> 2) & 3) + 4),
7329 GEN_INT (((mask >> 4) & 3) + 4),
7330 GEN_INT (((mask >> 6) & 3) + 4)));
7334 (define_insn "sse2_pshufhw_1"
7335 [(set (match_operand:V8HI 0 "register_operand" "=x")
7337 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7338 (parallel [(const_int 0)
7342 (match_operand 2 "const_4_to_7_operand" "")
7343 (match_operand 3 "const_4_to_7_operand" "")
7344 (match_operand 4 "const_4_to_7_operand" "")
7345 (match_operand 5 "const_4_to_7_operand" "")])))]
7349 mask |= (INTVAL (operands[2]) - 4) << 0;
7350 mask |= (INTVAL (operands[3]) - 4) << 2;
7351 mask |= (INTVAL (operands[4]) - 4) << 4;
7352 mask |= (INTVAL (operands[5]) - 4) << 6;
7353 operands[2] = GEN_INT (mask);
7355 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
7357 [(set_attr "type" "sselog")
7358 (set_attr "prefix_rep" "1")
7359 (set_attr "prefix_data16" "0")
7360 (set_attr "prefix" "maybe_vex")
7361 (set_attr "length_immediate" "1")
7362 (set_attr "mode" "TI")])
7364 (define_expand "sse2_loadd"
7365 [(set (match_operand:V4SI 0 "register_operand" "")
7368 (match_operand:SI 1 "nonimmediate_operand" ""))
7372 "operands[2] = CONST0_RTX (V4SImode);")
7374 (define_insn "sse2_loadld"
7375 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
7378 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
7379 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
7383 %vmovd\t{%2, %0|%0, %2}
7384 %vmovd\t{%2, %0|%0, %2}
7385 movss\t{%2, %0|%0, %2}
7386 movss\t{%2, %0|%0, %2}
7387 vmovss\t{%2, %1, %0|%0, %1, %2}"
7388 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
7389 (set_attr "type" "ssemov")
7390 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
7391 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
7393 (define_insn_and_split "sse2_stored"
7394 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
7396 (match_operand:V4SI 1 "register_operand" "x,Yi")
7397 (parallel [(const_int 0)])))]
7400 "&& reload_completed
7401 && (TARGET_INTER_UNIT_MOVES
7402 || MEM_P (operands [0])
7403 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7404 [(set (match_dup 0) (match_dup 1))]
7405 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
7407 (define_insn_and_split "*vec_ext_v4si_mem"
7408 [(set (match_operand:SI 0 "register_operand" "=r")
7410 (match_operand:V4SI 1 "memory_operand" "o")
7411 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
7417 int i = INTVAL (operands[2]);
7419 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
7423 (define_expand "sse_storeq"
7424 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7426 (match_operand:V2DI 1 "register_operand" "")
7427 (parallel [(const_int 0)])))]
7430 (define_insn "*sse2_storeq_rex64"
7431 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
7433 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
7434 (parallel [(const_int 0)])))]
7435 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7439 mov{q}\t{%1, %0|%0, %1}"
7440 [(set_attr "type" "*,*,imov")
7441 (set_attr "mode" "*,*,DI")])
7443 (define_insn "*sse2_storeq"
7444 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
7446 (match_operand:V2DI 1 "register_operand" "x")
7447 (parallel [(const_int 0)])))]
7452 [(set (match_operand:DI 0 "nonimmediate_operand" "")
7454 (match_operand:V2DI 1 "register_operand" "")
7455 (parallel [(const_int 0)])))]
7458 && (TARGET_INTER_UNIT_MOVES
7459 || MEM_P (operands [0])
7460 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
7461 [(set (match_dup 0) (match_dup 1))]
7462 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
7464 (define_insn "*vec_extractv2di_1_rex64"
7465 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
7467 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
7468 (parallel [(const_int 1)])))]
7469 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7471 %vmovhps\t{%1, %0|%0, %1}
7472 psrldq\t{$8, %0|%0, 8}
7473 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7474 %vmovq\t{%H1, %0|%0, %H1}
7475 mov{q}\t{%H1, %0|%0, %H1}"
7476 [(set_attr "isa" "*,noavx,avx,*,*")
7477 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
7478 (set_attr "length_immediate" "*,1,1,*,*")
7479 (set_attr "memory" "*,none,none,*,*")
7480 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
7481 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
7483 (define_insn "*vec_extractv2di_1"
7484 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x")
7486 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o")
7487 (parallel [(const_int 1)])))]
7488 "!TARGET_64BIT && TARGET_SSE
7489 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7491 %vmovhps\t{%1, %0|%0, %1}
7492 psrldq\t{$8, %0|%0, 8}
7493 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7494 %vmovq\t{%H1, %0|%0, %H1}
7495 movhlps\t{%1, %0|%0, %1}
7496 movlps\t{%H1, %0|%0, %H1}"
7497 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx")
7498 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
7499 (set_attr "length_immediate" "*,1,1,*,*,*")
7500 (set_attr "memory" "*,none,none,*,*,*")
7501 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
7502 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
7504 (define_insn "*vec_dupv4si"
7505 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
7507 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
7510 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7511 vbroadcastss\t{%1, %0|%0, %1}
7512 shufps\t{$0, %0, %0|%0, %0, 0}"
7513 [(set_attr "isa" "sse2,avx,noavx")
7514 (set_attr "type" "sselog1,ssemov,sselog1")
7515 (set_attr "length_immediate" "1,0,1")
7516 (set_attr "prefix_extra" "0,1,*")
7517 (set_attr "prefix" "maybe_vex,vex,orig")
7518 (set_attr "mode" "TI,V4SF,V4SF")])
7520 (define_insn "*vec_dupv2di"
7521 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
7523 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
7527 vpunpcklqdq\t{%d1, %0|%0, %d1}
7528 %vmovddup\t{%1, %0|%0, %1}
7530 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
7531 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
7532 (set_attr "prefix" "orig,vex,maybe_vex,orig")
7533 (set_attr "mode" "TI,TI,DF,V4SF")])
7535 (define_insn "*vec_concatv2si_sse4_1"
7536 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
7538 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
7539 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
7542 pinsrd\t{$1, %2, %0|%0, %2, 1}
7543 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
7544 punpckldq\t{%2, %0|%0, %2}
7545 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7546 %vmovd\t{%1, %0|%0, %1}
7547 punpckldq\t{%2, %0|%0, %2}
7548 movd\t{%1, %0|%0, %1}"
7549 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7550 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
7551 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
7552 (set_attr "length_immediate" "1,1,*,*,*,*,*")
7553 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
7554 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
7556 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7557 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7558 ;; alternatives pretty much forces the MMX alternative to be chosen.
7559 (define_insn "*vec_concatv2si_sse2"
7560 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
7562 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
7563 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
7566 punpckldq\t{%2, %0|%0, %2}
7567 movd\t{%1, %0|%0, %1}
7568 punpckldq\t{%2, %0|%0, %2}
7569 movd\t{%1, %0|%0, %1}"
7570 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7571 (set_attr "mode" "TI,TI,DI,DI")])
7573 (define_insn "*vec_concatv2si_sse"
7574 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
7576 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
7577 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
7580 unpcklps\t{%2, %0|%0, %2}
7581 movss\t{%1, %0|%0, %1}
7582 punpckldq\t{%2, %0|%0, %2}
7583 movd\t{%1, %0|%0, %1}"
7584 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7585 (set_attr "mode" "V4SF,V4SF,DI,DI")])
7587 (define_insn "*vec_concatv4si"
7588 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
7590 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
7591 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
7594 punpcklqdq\t{%2, %0|%0, %2}
7595 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7596 movlhps\t{%2, %0|%0, %2}
7597 movhps\t{%2, %0|%0, %2}
7598 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7599 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
7600 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
7601 (set_attr "prefix" "orig,vex,orig,orig,vex")
7602 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
7604 ;; movd instead of movq is required to handle broken assemblers.
7605 (define_insn "*vec_concatv2di_rex64"
7606 [(set (match_operand:V2DI 0 "register_operand"
7607 "=x,x ,x ,Yi,!x,x,x,x,x")
7609 (match_operand:DI 1 "nonimmediate_operand"
7610 " 0,x ,xm,r ,*y,0,x,0,x")
7611 (match_operand:DI 2 "vector_move_operand"
7612 "rm,rm,C ,C ,C ,x,x,m,m")))]
7615 pinsrq\t{$1, %2, %0|%0, %2, 1}
7616 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
7617 %vmovq\t{%1, %0|%0, %1}
7618 %vmovd\t{%1, %0|%0, %1}
7619 movq2dq\t{%1, %0|%0, %1}
7620 punpcklqdq\t{%2, %0|%0, %2}
7621 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7622 movhps\t{%2, %0|%0, %2}
7623 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7624 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx")
7627 (eq_attr "alternative" "0,1,5,6")
7628 (const_string "sselog")
7629 (const_string "ssemov")))
7630 (set (attr "prefix_rex")
7632 (and (eq_attr "alternative" "0,3")
7633 (not (match_test "TARGET_AVX")))
7635 (const_string "*")))
7636 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
7637 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
7638 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
7639 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
7641 (define_insn "vec_concatv2di"
7642 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x")
7644 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x")
7645 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))]
7646 "!TARGET_64BIT && TARGET_SSE"
7648 %vmovq\t{%1, %0|%0, %1}
7649 movq2dq\t{%1, %0|%0, %1}
7650 punpcklqdq\t{%2, %0|%0, %2}
7651 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7652 movlhps\t{%2, %0|%0, %2}
7653 movhps\t{%2, %0|%0, %2}
7654 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7655 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
7656 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
7657 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
7658 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
7660 (define_expand "vec_unpacks_lo_<mode>"
7661 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7662 (match_operand:VI124_AVX2 1 "register_operand" "")]
7664 "ix86_expand_sse_unpack (operands, false, false); DONE;")
7666 (define_expand "vec_unpacks_hi_<mode>"
7667 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7668 (match_operand:VI124_AVX2 1 "register_operand" "")]
7670 "ix86_expand_sse_unpack (operands, false, true); DONE;")
7672 (define_expand "vec_unpacku_lo_<mode>"
7673 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7674 (match_operand:VI124_AVX2 1 "register_operand" "")]
7676 "ix86_expand_sse_unpack (operands, true, false); DONE;")
7678 (define_expand "vec_unpacku_hi_<mode>"
7679 [(match_operand:<sseunpackmode> 0 "register_operand" "")
7680 (match_operand:VI124_AVX2 1 "register_operand" "")]
7682 "ix86_expand_sse_unpack (operands, true, true); DONE;")
7684 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7688 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7690 (define_expand "avx2_uavgv32qi3"
7691 [(set (match_operand:V32QI 0 "register_operand" "")
7697 (match_operand:V32QI 1 "nonimmediate_operand" ""))
7699 (match_operand:V32QI 2 "nonimmediate_operand" "")))
7700 (const_vector:V32QI [(const_int 1) (const_int 1)
7701 (const_int 1) (const_int 1)
7702 (const_int 1) (const_int 1)
7703 (const_int 1) (const_int 1)
7704 (const_int 1) (const_int 1)
7705 (const_int 1) (const_int 1)
7706 (const_int 1) (const_int 1)
7707 (const_int 1) (const_int 1)
7708 (const_int 1) (const_int 1)
7709 (const_int 1) (const_int 1)
7710 (const_int 1) (const_int 1)
7711 (const_int 1) (const_int 1)
7712 (const_int 1) (const_int 1)
7713 (const_int 1) (const_int 1)
7714 (const_int 1) (const_int 1)
7715 (const_int 1) (const_int 1)]))
7718 "ix86_fixup_binary_operands_no_copy (PLUS, V32QImode, operands);")
7720 (define_expand "sse2_uavgv16qi3"
7721 [(set (match_operand:V16QI 0 "register_operand" "")
7727 (match_operand:V16QI 1 "nonimmediate_operand" ""))
7729 (match_operand:V16QI 2 "nonimmediate_operand" "")))
7730 (const_vector:V16QI [(const_int 1) (const_int 1)
7731 (const_int 1) (const_int 1)
7732 (const_int 1) (const_int 1)
7733 (const_int 1) (const_int 1)
7734 (const_int 1) (const_int 1)
7735 (const_int 1) (const_int 1)
7736 (const_int 1) (const_int 1)
7737 (const_int 1) (const_int 1)]))
7740 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
7742 (define_insn "*avx2_uavgv32qi3"
7743 [(set (match_operand:V32QI 0 "register_operand" "=x")
7749 (match_operand:V32QI 1 "nonimmediate_operand" "%x"))
7751 (match_operand:V32QI 2 "nonimmediate_operand" "xm")))
7752 (const_vector:V32QI [(const_int 1) (const_int 1)
7753 (const_int 1) (const_int 1)
7754 (const_int 1) (const_int 1)
7755 (const_int 1) (const_int 1)
7756 (const_int 1) (const_int 1)
7757 (const_int 1) (const_int 1)
7758 (const_int 1) (const_int 1)
7759 (const_int 1) (const_int 1)
7760 (const_int 1) (const_int 1)
7761 (const_int 1) (const_int 1)
7762 (const_int 1) (const_int 1)
7763 (const_int 1) (const_int 1)
7764 (const_int 1) (const_int 1)
7765 (const_int 1) (const_int 1)
7766 (const_int 1) (const_int 1)
7767 (const_int 1) (const_int 1)]))
7769 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V32QImode, operands)"
7770 "vpavgb\t{%2, %1, %0|%0, %1, %2}"
7771 [(set_attr "type" "sseiadd")
7772 (set_attr "prefix" "vex")
7773 (set_attr "mode" "OI")])
7775 (define_insn "*sse2_uavgv16qi3"
7776 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7782 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
7784 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
7785 (const_vector:V16QI [(const_int 1) (const_int 1)
7786 (const_int 1) (const_int 1)
7787 (const_int 1) (const_int 1)
7788 (const_int 1) (const_int 1)
7789 (const_int 1) (const_int 1)
7790 (const_int 1) (const_int 1)
7791 (const_int 1) (const_int 1)
7792 (const_int 1) (const_int 1)]))
7794 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
7796 pavgb\t{%2, %0|%0, %2}
7797 vpavgb\t{%2, %1, %0|%0, %1, %2}"
7798 [(set_attr "isa" "noavx,avx")
7799 (set_attr "type" "sseiadd")
7800 (set_attr "prefix_data16" "1,*")
7801 (set_attr "prefix" "orig,vex")
7802 (set_attr "mode" "TI")])
7804 (define_expand "avx2_uavgv16hi3"
7805 [(set (match_operand:V16HI 0 "register_operand" "")
7811 (match_operand:V16HI 1 "nonimmediate_operand" ""))
7813 (match_operand:V16HI 2 "nonimmediate_operand" "")))
7814 (const_vector:V16HI [(const_int 1) (const_int 1)
7815 (const_int 1) (const_int 1)
7816 (const_int 1) (const_int 1)
7817 (const_int 1) (const_int 1)
7818 (const_int 1) (const_int 1)
7819 (const_int 1) (const_int 1)
7820 (const_int 1) (const_int 1)
7821 (const_int 1) (const_int 1)]))
7824 "ix86_fixup_binary_operands_no_copy (PLUS, V16HImode, operands);")
7826 (define_expand "sse2_uavgv8hi3"
7827 [(set (match_operand:V8HI 0 "register_operand" "")
7833 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7835 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7836 (const_vector:V8HI [(const_int 1) (const_int 1)
7837 (const_int 1) (const_int 1)
7838 (const_int 1) (const_int 1)
7839 (const_int 1) (const_int 1)]))
7842 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
7844 (define_insn "*avx2_uavgv16hi3"
7845 [(set (match_operand:V16HI 0 "register_operand" "=x")
7851 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
7853 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
7854 (const_vector:V16HI [(const_int 1) (const_int 1)
7855 (const_int 1) (const_int 1)
7856 (const_int 1) (const_int 1)
7857 (const_int 1) (const_int 1)
7858 (const_int 1) (const_int 1)
7859 (const_int 1) (const_int 1)
7860 (const_int 1) (const_int 1)
7861 (const_int 1) (const_int 1)]))
7863 "TARGET_AVX2 && ix86_binary_operator_ok (PLUS, V16HImode, operands)"
7864 "vpavgw\t{%2, %1, %0|%0, %1, %2}"
7865 [(set_attr "type" "sseiadd")
7866 (set_attr "prefix" "vex")
7867 (set_attr "mode" "OI")])
7869 (define_insn "*sse2_uavgv8hi3"
7870 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7876 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7878 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7879 (const_vector:V8HI [(const_int 1) (const_int 1)
7880 (const_int 1) (const_int 1)
7881 (const_int 1) (const_int 1)
7882 (const_int 1) (const_int 1)]))
7884 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
7886 pavgw\t{%2, %0|%0, %2}
7887 vpavgw\t{%2, %1, %0|%0, %1, %2}"
7888 [(set_attr "isa" "noavx,avx")
7889 (set_attr "type" "sseiadd")
7890 (set_attr "prefix_data16" "1,*")
7891 (set_attr "prefix" "orig,vex")
7892 (set_attr "mode" "TI")])
7894 ;; The correct representation for this is absolutely enormous, and
7895 ;; surely not generally useful.
7896 (define_insn "<sse2_avx2>_psadbw"
7897 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
7898 (unspec:VI8_AVX2 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
7899 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
7903 psadbw\t{%2, %0|%0, %2}
7904 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
7905 [(set_attr "isa" "noavx,avx")
7906 (set_attr "type" "sseiadd")
7907 (set_attr "atom_unit" "simul")
7908 (set_attr "prefix_data16" "1,*")
7909 (set_attr "prefix" "orig,vex")
7910 (set_attr "mode" "<sseinsnmode>")])
7912 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
7913 [(set (match_operand:SI 0 "register_operand" "=r")
7915 [(match_operand:VF 1 "register_operand" "x")]
7918 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
7919 [(set_attr "type" "ssemov")
7920 (set_attr "prefix" "maybe_vex")
7921 (set_attr "mode" "<MODE>")])
7923 (define_insn "avx2_pmovmskb"
7924 [(set (match_operand:SI 0 "register_operand" "=r")
7925 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
7928 "vpmovmskb\t{%1, %0|%0, %1}"
7929 [(set_attr "type" "ssemov")
7930 (set_attr "prefix" "vex")
7931 (set_attr "mode" "DI")])
7933 (define_insn "sse2_pmovmskb"
7934 [(set (match_operand:SI 0 "register_operand" "=r")
7935 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7938 "%vpmovmskb\t{%1, %0|%0, %1}"
7939 [(set_attr "type" "ssemov")
7940 (set_attr "prefix_data16" "1")
7941 (set_attr "prefix" "maybe_vex")
7942 (set_attr "mode" "SI")])
7944 (define_expand "sse2_maskmovdqu"
7945 [(set (match_operand:V16QI 0 "memory_operand" "")
7946 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
7947 (match_operand:V16QI 2 "register_operand" "")
7952 (define_insn "*sse2_maskmovdqu"
7953 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
7954 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
7955 (match_operand:V16QI 2 "register_operand" "x")
7956 (mem:V16QI (match_dup 0))]
7959 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7960 [(set_attr "type" "ssemov")
7961 (set_attr "prefix_data16" "1")
7962 ;; The implicit %rdi operand confuses default length_vex computation.
7963 (set (attr "length_vex")
7964 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
7965 (set_attr "prefix" "maybe_vex")
7966 (set_attr "mode" "TI")])
7968 (define_insn "sse_ldmxcsr"
7969 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7973 [(set_attr "type" "sse")
7974 (set_attr "atom_sse_attr" "mxcsr")
7975 (set_attr "prefix" "maybe_vex")
7976 (set_attr "memory" "load")])
7978 (define_insn "sse_stmxcsr"
7979 [(set (match_operand:SI 0 "memory_operand" "=m")
7980 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7983 [(set_attr "type" "sse")
7984 (set_attr "atom_sse_attr" "mxcsr")
7985 (set_attr "prefix" "maybe_vex")
7986 (set_attr "memory" "store")])
7988 (define_expand "sse_sfence"
7990 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7991 "TARGET_SSE || TARGET_3DNOW_A"
7993 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
7994 MEM_VOLATILE_P (operands[0]) = 1;
7997 (define_insn "*sse_sfence"
7998 [(set (match_operand:BLK 0 "" "")
7999 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
8000 "TARGET_SSE || TARGET_3DNOW_A"
8002 [(set_attr "type" "sse")
8003 (set_attr "length_address" "0")
8004 (set_attr "atom_sse_attr" "fence")
8005 (set_attr "memory" "unknown")])
8007 (define_insn "sse2_clflush"
8008 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
8012 [(set_attr "type" "sse")
8013 (set_attr "atom_sse_attr" "fence")
8014 (set_attr "memory" "unknown")])
8016 (define_expand "sse2_mfence"
8018 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8021 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8022 MEM_VOLATILE_P (operands[0]) = 1;
8025 (define_insn "*sse2_mfence"
8026 [(set (match_operand:BLK 0 "" "")
8027 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
8028 "TARGET_64BIT || TARGET_SSE2"
8030 [(set_attr "type" "sse")
8031 (set_attr "length_address" "0")
8032 (set_attr "atom_sse_attr" "fence")
8033 (set_attr "memory" "unknown")])
8035 (define_expand "sse2_lfence"
8037 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8040 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
8041 MEM_VOLATILE_P (operands[0]) = 1;
8044 (define_insn "*sse2_lfence"
8045 [(set (match_operand:BLK 0 "" "")
8046 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
8049 [(set_attr "type" "sse")
8050 (set_attr "length_address" "0")
8051 (set_attr "atom_sse_attr" "lfence")
8052 (set_attr "memory" "unknown")])
8054 (define_insn "sse3_mwait"
8055 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8056 (match_operand:SI 1 "register_operand" "c")]
8059 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
8060 ;; Since 32bit register operands are implicitly zero extended to 64bit,
8061 ;; we only need to set up 32bit registers.
8063 [(set_attr "length" "3")])
8065 (define_insn "sse3_monitor"
8066 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
8067 (match_operand:SI 1 "register_operand" "c")
8068 (match_operand:SI 2 "register_operand" "d")]
8070 "TARGET_SSE3 && !TARGET_64BIT"
8071 "monitor\t%0, %1, %2"
8072 [(set_attr "length" "3")])
8074 (define_insn "sse3_monitor64"
8075 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
8076 (match_operand:SI 1 "register_operand" "c")
8077 (match_operand:SI 2 "register_operand" "d")]
8079 "TARGET_SSE3 && TARGET_64BIT"
8080 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
8081 ;; RCX and RDX are used. Since 32bit register operands are implicitly
8082 ;; zero extended to 64bit, we only need to set up 32bit registers.
8084 [(set_attr "length" "3")])
8086 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8088 ;; SSSE3 instructions
8090 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8092 (define_insn "avx2_phaddwv16hi3"
8093 [(set (match_operand:V16HI 0 "register_operand" "=x")
8100 (match_operand:V16HI 1 "register_operand" "x")
8101 (parallel [(const_int 0)]))
8102 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8104 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8105 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8108 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8109 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8111 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8112 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8116 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8117 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8119 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8120 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8123 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8124 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8126 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8127 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8133 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8134 (parallel [(const_int 0)]))
8135 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8137 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8138 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8141 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8142 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8144 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8145 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8149 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8150 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8152 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8153 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8156 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8157 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8159 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8160 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8162 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
8163 [(set_attr "type" "sseiadd")
8164 (set_attr "prefix_extra" "1")
8165 (set_attr "prefix" "vex")
8166 (set_attr "mode" "OI")])
8168 (define_insn "ssse3_phaddwv8hi3"
8169 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8175 (match_operand:V8HI 1 "register_operand" "0,x")
8176 (parallel [(const_int 0)]))
8177 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8179 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8180 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8183 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8184 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8186 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8187 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8192 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8193 (parallel [(const_int 0)]))
8194 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8196 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8197 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8200 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8201 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8203 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8204 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8207 phaddw\t{%2, %0|%0, %2}
8208 vphaddw\t{%2, %1, %0|%0, %1, %2}"
8209 [(set_attr "isa" "noavx,avx")
8210 (set_attr "type" "sseiadd")
8211 (set_attr "atom_unit" "complex")
8212 (set_attr "prefix_data16" "1,*")
8213 (set_attr "prefix_extra" "1")
8214 (set_attr "prefix" "orig,vex")
8215 (set_attr "mode" "TI")])
8217 (define_insn "ssse3_phaddwv4hi3"
8218 [(set (match_operand:V4HI 0 "register_operand" "=y")
8223 (match_operand:V4HI 1 "register_operand" "0")
8224 (parallel [(const_int 0)]))
8225 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8227 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8228 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8232 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8233 (parallel [(const_int 0)]))
8234 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8236 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8237 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8239 "phaddw\t{%2, %0|%0, %2}"
8240 [(set_attr "type" "sseiadd")
8241 (set_attr "atom_unit" "complex")
8242 (set_attr "prefix_extra" "1")
8243 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8244 (set_attr "mode" "DI")])
8246 (define_insn "avx2_phadddv8si3"
8247 [(set (match_operand:V8SI 0 "register_operand" "=x")
8253 (match_operand:V8SI 1 "register_operand" "x")
8254 (parallel [(const_int 0)]))
8255 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8257 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8258 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8261 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8262 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8264 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8265 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8270 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8271 (parallel [(const_int 0)]))
8272 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8274 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8275 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8278 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8279 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8281 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8282 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8284 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
8285 [(set_attr "type" "sseiadd")
8286 (set_attr "prefix_extra" "1")
8287 (set_attr "prefix" "vex")
8288 (set_attr "mode" "OI")])
8290 (define_insn "ssse3_phadddv4si3"
8291 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8296 (match_operand:V4SI 1 "register_operand" "0,x")
8297 (parallel [(const_int 0)]))
8298 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8300 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8301 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8305 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8306 (parallel [(const_int 0)]))
8307 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8309 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8310 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8313 phaddd\t{%2, %0|%0, %2}
8314 vphaddd\t{%2, %1, %0|%0, %1, %2}"
8315 [(set_attr "isa" "noavx,avx")
8316 (set_attr "type" "sseiadd")
8317 (set_attr "atom_unit" "complex")
8318 (set_attr "prefix_data16" "1,*")
8319 (set_attr "prefix_extra" "1")
8320 (set_attr "prefix" "orig,vex")
8321 (set_attr "mode" "TI")])
8323 (define_insn "ssse3_phadddv2si3"
8324 [(set (match_operand:V2SI 0 "register_operand" "=y")
8328 (match_operand:V2SI 1 "register_operand" "0")
8329 (parallel [(const_int 0)]))
8330 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8333 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8334 (parallel [(const_int 0)]))
8335 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8337 "phaddd\t{%2, %0|%0, %2}"
8338 [(set_attr "type" "sseiadd")
8339 (set_attr "atom_unit" "complex")
8340 (set_attr "prefix_extra" "1")
8341 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8342 (set_attr "mode" "DI")])
8344 (define_insn "avx2_phaddswv16hi3"
8345 [(set (match_operand:V16HI 0 "register_operand" "=x")
8352 (match_operand:V16HI 1 "register_operand" "x")
8353 (parallel [(const_int 0)]))
8354 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8356 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8357 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8360 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8361 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8363 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8364 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8368 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8369 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8371 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8372 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8375 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8376 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8378 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8379 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8385 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8386 (parallel [(const_int 0)]))
8387 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8389 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8390 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8393 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8394 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8396 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8397 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8401 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8402 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8404 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8405 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8408 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8409 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8411 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8412 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8414 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8415 [(set_attr "type" "sseiadd")
8416 (set_attr "prefix_extra" "1")
8417 (set_attr "prefix" "vex")
8418 (set_attr "mode" "OI")])
8420 (define_insn "ssse3_phaddswv8hi3"
8421 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8427 (match_operand:V8HI 1 "register_operand" "0,x")
8428 (parallel [(const_int 0)]))
8429 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8431 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8432 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8435 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8436 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8438 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8439 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8444 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8445 (parallel [(const_int 0)]))
8446 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8448 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8449 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8452 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8453 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8455 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8456 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8459 phaddsw\t{%2, %0|%0, %2}
8460 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8461 [(set_attr "isa" "noavx,avx")
8462 (set_attr "type" "sseiadd")
8463 (set_attr "atom_unit" "complex")
8464 (set_attr "prefix_data16" "1,*")
8465 (set_attr "prefix_extra" "1")
8466 (set_attr "prefix" "orig,vex")
8467 (set_attr "mode" "TI")])
8469 (define_insn "ssse3_phaddswv4hi3"
8470 [(set (match_operand:V4HI 0 "register_operand" "=y")
8475 (match_operand:V4HI 1 "register_operand" "0")
8476 (parallel [(const_int 0)]))
8477 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8479 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8480 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8484 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8485 (parallel [(const_int 0)]))
8486 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8488 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8489 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8491 "phaddsw\t{%2, %0|%0, %2}"
8492 [(set_attr "type" "sseiadd")
8493 (set_attr "atom_unit" "complex")
8494 (set_attr "prefix_extra" "1")
8495 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8496 (set_attr "mode" "DI")])
8498 (define_insn "avx2_phsubwv16hi3"
8499 [(set (match_operand:V16HI 0 "register_operand" "=x")
8506 (match_operand:V16HI 1 "register_operand" "x")
8507 (parallel [(const_int 0)]))
8508 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8510 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8511 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8514 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8515 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8517 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8518 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8522 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8523 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8525 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8526 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8529 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8530 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8532 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8533 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8539 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8540 (parallel [(const_int 0)]))
8541 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8543 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8544 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8547 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8548 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8550 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8551 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8555 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8556 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8558 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8559 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8562 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8563 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8565 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8566 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8568 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8569 [(set_attr "type" "sseiadd")
8570 (set_attr "prefix_extra" "1")
8571 (set_attr "prefix" "vex")
8572 (set_attr "mode" "OI")])
8574 (define_insn "ssse3_phsubwv8hi3"
8575 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8581 (match_operand:V8HI 1 "register_operand" "0,x")
8582 (parallel [(const_int 0)]))
8583 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8585 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8586 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8589 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8590 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8592 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8593 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8598 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8599 (parallel [(const_int 0)]))
8600 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8602 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8603 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8606 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8607 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8609 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8610 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8613 phsubw\t{%2, %0|%0, %2}
8614 vphsubw\t{%2, %1, %0|%0, %1, %2}"
8615 [(set_attr "isa" "noavx,avx")
8616 (set_attr "type" "sseiadd")
8617 (set_attr "atom_unit" "complex")
8618 (set_attr "prefix_data16" "1,*")
8619 (set_attr "prefix_extra" "1")
8620 (set_attr "prefix" "orig,vex")
8621 (set_attr "mode" "TI")])
8623 (define_insn "ssse3_phsubwv4hi3"
8624 [(set (match_operand:V4HI 0 "register_operand" "=y")
8629 (match_operand:V4HI 1 "register_operand" "0")
8630 (parallel [(const_int 0)]))
8631 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8633 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8634 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8638 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8639 (parallel [(const_int 0)]))
8640 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8642 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8643 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8645 "phsubw\t{%2, %0|%0, %2}"
8646 [(set_attr "type" "sseiadd")
8647 (set_attr "atom_unit" "complex")
8648 (set_attr "prefix_extra" "1")
8649 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8650 (set_attr "mode" "DI")])
8652 (define_insn "avx2_phsubdv8si3"
8653 [(set (match_operand:V8SI 0 "register_operand" "=x")
8659 (match_operand:V8SI 1 "register_operand" "x")
8660 (parallel [(const_int 0)]))
8661 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8663 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8664 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8667 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
8668 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
8670 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
8671 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
8676 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
8677 (parallel [(const_int 0)]))
8678 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8680 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8681 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
8684 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
8685 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
8687 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
8688 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
8690 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8691 [(set_attr "type" "sseiadd")
8692 (set_attr "prefix_extra" "1")
8693 (set_attr "prefix" "vex")
8694 (set_attr "mode" "OI")])
8696 (define_insn "ssse3_phsubdv4si3"
8697 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8702 (match_operand:V4SI 1 "register_operand" "0,x")
8703 (parallel [(const_int 0)]))
8704 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8706 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
8707 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
8711 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
8712 (parallel [(const_int 0)]))
8713 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
8715 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8716 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8719 phsubd\t{%2, %0|%0, %2}
8720 vphsubd\t{%2, %1, %0|%0, %1, %2}"
8722 [(set_attr "isa" "noavx,avx")
8723 (set_attr "type" "sseiadd")
8724 (set_attr "atom_unit" "complex")
8725 (set_attr "prefix_data16" "1,*")
8726 (set_attr "prefix_extra" "1")
8727 (set_attr "prefix" "orig,vex")
8728 (set_attr "mode" "TI")])
8730 (define_insn "ssse3_phsubdv2si3"
8731 [(set (match_operand:V2SI 0 "register_operand" "=y")
8735 (match_operand:V2SI 1 "register_operand" "0")
8736 (parallel [(const_int 0)]))
8737 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
8740 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
8741 (parallel [(const_int 0)]))
8742 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8744 "phsubd\t{%2, %0|%0, %2}"
8745 [(set_attr "type" "sseiadd")
8746 (set_attr "atom_unit" "complex")
8747 (set_attr "prefix_extra" "1")
8748 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8749 (set_attr "mode" "DI")])
8751 (define_insn "avx2_phsubswv16hi3"
8752 [(set (match_operand:V16HI 0 "register_operand" "=x")
8759 (match_operand:V16HI 1 "register_operand" "x")
8760 (parallel [(const_int 0)]))
8761 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8763 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8764 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8767 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8768 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8770 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8771 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8775 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
8776 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
8778 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
8779 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
8782 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
8783 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
8785 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
8786 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
8792 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8793 (parallel [(const_int 0)]))
8794 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8796 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8797 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8800 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8801 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8803 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8804 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
8808 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
8809 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
8811 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
8812 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
8815 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
8816 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
8818 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
8819 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
8821 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8822 [(set_attr "type" "sseiadd")
8823 (set_attr "prefix_extra" "1")
8824 (set_attr "prefix" "vex")
8825 (set_attr "mode" "OI")])
8827 (define_insn "ssse3_phsubswv8hi3"
8828 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8834 (match_operand:V8HI 1 "register_operand" "0,x")
8835 (parallel [(const_int 0)]))
8836 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8838 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8839 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8842 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
8843 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
8845 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
8846 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
8851 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8852 (parallel [(const_int 0)]))
8853 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8855 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8856 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
8859 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
8860 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
8862 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8863 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8866 phsubsw\t{%2, %0|%0, %2}
8867 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8868 [(set_attr "isa" "noavx,avx")
8869 (set_attr "type" "sseiadd")
8870 (set_attr "atom_unit" "complex")
8871 (set_attr "prefix_data16" "1,*")
8872 (set_attr "prefix_extra" "1")
8873 (set_attr "prefix" "orig,vex")
8874 (set_attr "mode" "TI")])
8876 (define_insn "ssse3_phsubswv4hi3"
8877 [(set (match_operand:V4HI 0 "register_operand" "=y")
8882 (match_operand:V4HI 1 "register_operand" "0")
8883 (parallel [(const_int 0)]))
8884 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
8886 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
8887 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
8891 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
8892 (parallel [(const_int 0)]))
8893 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
8895 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8896 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8898 "phsubsw\t{%2, %0|%0, %2}"
8899 [(set_attr "type" "sseiadd")
8900 (set_attr "atom_unit" "complex")
8901 (set_attr "prefix_extra" "1")
8902 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8903 (set_attr "mode" "DI")])
8905 (define_insn "avx2_pmaddubsw256"
8906 [(set (match_operand:V16HI 0 "register_operand" "=x")
8911 (match_operand:V32QI 1 "register_operand" "x")
8912 (parallel [(const_int 0)
8930 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
8931 (parallel [(const_int 0)
8949 (vec_select:V16QI (match_dup 1)
8950 (parallel [(const_int 1)
8967 (vec_select:V16QI (match_dup 2)
8968 (parallel [(const_int 1)
8983 (const_int 31)]))))))]
8985 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8986 [(set_attr "type" "sseiadd")
8987 (set_attr "prefix_extra" "1")
8988 (set_attr "prefix" "vex")
8989 (set_attr "mode" "OI")])
8991 (define_insn "ssse3_pmaddubsw128"
8992 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8997 (match_operand:V16QI 1 "register_operand" "0,x")
8998 (parallel [(const_int 0)
9008 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
9009 (parallel [(const_int 0)
9019 (vec_select:V8QI (match_dup 1)
9020 (parallel [(const_int 1)
9029 (vec_select:V8QI (match_dup 2)
9030 (parallel [(const_int 1)
9037 (const_int 15)]))))))]
9040 pmaddubsw\t{%2, %0|%0, %2}
9041 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
9042 [(set_attr "isa" "noavx,avx")
9043 (set_attr "type" "sseiadd")
9044 (set_attr "atom_unit" "simul")
9045 (set_attr "prefix_data16" "1,*")
9046 (set_attr "prefix_extra" "1")
9047 (set_attr "prefix" "orig,vex")
9048 (set_attr "mode" "TI")])
9050 (define_insn "ssse3_pmaddubsw"
9051 [(set (match_operand:V4HI 0 "register_operand" "=y")
9056 (match_operand:V8QI 1 "register_operand" "0")
9057 (parallel [(const_int 0)
9063 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
9064 (parallel [(const_int 0)
9070 (vec_select:V4QI (match_dup 1)
9071 (parallel [(const_int 1)
9076 (vec_select:V4QI (match_dup 2)
9077 (parallel [(const_int 1)
9080 (const_int 7)]))))))]
9082 "pmaddubsw\t{%2, %0|%0, %2}"
9083 [(set_attr "type" "sseiadd")
9084 (set_attr "atom_unit" "simul")
9085 (set_attr "prefix_extra" "1")
9086 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9087 (set_attr "mode" "DI")])
9089 (define_expand "avx2_umulhrswv16hi3"
9090 [(set (match_operand:V16HI 0 "register_operand" "")
9097 (match_operand:V16HI 1 "nonimmediate_operand" ""))
9099 (match_operand:V16HI 2 "nonimmediate_operand" "")))
9101 (const_vector:V16HI [(const_int 1) (const_int 1)
9102 (const_int 1) (const_int 1)
9103 (const_int 1) (const_int 1)
9104 (const_int 1) (const_int 1)
9105 (const_int 1) (const_int 1)
9106 (const_int 1) (const_int 1)
9107 (const_int 1) (const_int 1)
9108 (const_int 1) (const_int 1)]))
9111 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9113 (define_insn "*avx2_umulhrswv16hi3"
9114 [(set (match_operand:V16HI 0 "register_operand" "=x")
9121 (match_operand:V16HI 1 "nonimmediate_operand" "%x"))
9123 (match_operand:V16HI 2 "nonimmediate_operand" "xm")))
9125 (const_vector:V16HI [(const_int 1) (const_int 1)
9126 (const_int 1) (const_int 1)
9127 (const_int 1) (const_int 1)
9128 (const_int 1) (const_int 1)
9129 (const_int 1) (const_int 1)
9130 (const_int 1) (const_int 1)
9131 (const_int 1) (const_int 1)
9132 (const_int 1) (const_int 1)]))
9134 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9135 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9136 [(set_attr "type" "sseimul")
9137 (set_attr "prefix_extra" "1")
9138 (set_attr "prefix" "vex")
9139 (set_attr "mode" "OI")])
9141 (define_expand "ssse3_pmulhrswv8hi3"
9142 [(set (match_operand:V8HI 0 "register_operand" "")
9149 (match_operand:V8HI 1 "nonimmediate_operand" ""))
9151 (match_operand:V8HI 2 "nonimmediate_operand" "")))
9153 (const_vector:V8HI [(const_int 1) (const_int 1)
9154 (const_int 1) (const_int 1)
9155 (const_int 1) (const_int 1)
9156 (const_int 1) (const_int 1)]))
9159 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9161 (define_insn "*ssse3_pmulhrswv8hi3"
9162 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9169 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
9171 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
9173 (const_vector:V8HI [(const_int 1) (const_int 1)
9174 (const_int 1) (const_int 1)
9175 (const_int 1) (const_int 1)
9176 (const_int 1) (const_int 1)]))
9178 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9180 pmulhrsw\t{%2, %0|%0, %2}
9181 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
9182 [(set_attr "isa" "noavx,avx")
9183 (set_attr "type" "sseimul")
9184 (set_attr "prefix_data16" "1,*")
9185 (set_attr "prefix_extra" "1")
9186 (set_attr "prefix" "orig,vex")
9187 (set_attr "mode" "TI")])
9189 (define_expand "ssse3_pmulhrswv4hi3"
9190 [(set (match_operand:V4HI 0 "register_operand" "")
9197 (match_operand:V4HI 1 "nonimmediate_operand" ""))
9199 (match_operand:V4HI 2 "nonimmediate_operand" "")))
9201 (const_vector:V4HI [(const_int 1) (const_int 1)
9202 (const_int 1) (const_int 1)]))
9205 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
9207 (define_insn "*ssse3_pmulhrswv4hi3"
9208 [(set (match_operand:V4HI 0 "register_operand" "=y")
9215 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
9217 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
9219 (const_vector:V4HI [(const_int 1) (const_int 1)
9220 (const_int 1) (const_int 1)]))
9222 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
9223 "pmulhrsw\t{%2, %0|%0, %2}"
9224 [(set_attr "type" "sseimul")
9225 (set_attr "prefix_extra" "1")
9226 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9227 (set_attr "mode" "DI")])
9229 (define_insn "<ssse3_avx2>_pshufb<mode>3"
9230 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9231 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9232 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
9236 pshufb\t{%2, %0|%0, %2}
9237 vpshufb\t{%2, %1, %0|%0, %1, %2}"
9238 [(set_attr "isa" "noavx,avx")
9239 (set_attr "type" "sselog1")
9240 (set_attr "prefix_data16" "1,*")
9241 (set_attr "prefix_extra" "1")
9242 (set_attr "prefix" "orig,vex")
9243 (set_attr "mode" "<sseinsnmode>")])
9245 (define_insn "ssse3_pshufbv8qi3"
9246 [(set (match_operand:V8QI 0 "register_operand" "=y")
9247 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
9248 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
9251 "pshufb\t{%2, %0|%0, %2}";
9252 [(set_attr "type" "sselog1")
9253 (set_attr "prefix_extra" "1")
9254 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9255 (set_attr "mode" "DI")])
9257 (define_insn "<ssse3_avx2>_psign<mode>3"
9258 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
9260 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
9261 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
9265 psign<ssemodesuffix>\t{%2, %0|%0, %2}
9266 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9267 [(set_attr "isa" "noavx,avx")
9268 (set_attr "type" "sselog1")
9269 (set_attr "prefix_data16" "1,*")
9270 (set_attr "prefix_extra" "1")
9271 (set_attr "prefix" "orig,vex")
9272 (set_attr "mode" "<sseinsnmode>")])
9274 (define_insn "ssse3_psign<mode>3"
9275 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9277 [(match_operand:MMXMODEI 1 "register_operand" "0")
9278 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
9281 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
9282 [(set_attr "type" "sselog1")
9283 (set_attr "prefix_extra" "1")
9284 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9285 (set_attr "mode" "DI")])
9287 (define_insn "<ssse3_avx2>_palignr<mode>"
9288 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
9289 (unspec:SSESCALARMODE [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
9290 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
9291 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
9295 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9297 switch (which_alternative)
9300 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9302 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9307 [(set_attr "isa" "noavx,avx")
9308 (set_attr "type" "sseishft")
9309 (set_attr "atom_unit" "sishuf")
9310 (set_attr "prefix_data16" "1,*")
9311 (set_attr "prefix_extra" "1")
9312 (set_attr "length_immediate" "1")
9313 (set_attr "prefix" "orig,vex")
9314 (set_attr "mode" "<sseinsnmode>")])
9316 (define_insn "ssse3_palignrdi"
9317 [(set (match_operand:DI 0 "register_operand" "=y")
9318 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
9319 (match_operand:DI 2 "nonimmediate_operand" "ym")
9320 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
9324 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
9325 return "palignr\t{%3, %2, %0|%0, %2, %3}";
9327 [(set_attr "type" "sseishft")
9328 (set_attr "atom_unit" "sishuf")
9329 (set_attr "prefix_extra" "1")
9330 (set_attr "length_immediate" "1")
9331 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9332 (set_attr "mode" "DI")])
9334 (define_insn "abs<mode>2"
9335 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x")
9337 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))]
9339 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
9340 [(set_attr "type" "sselog1")
9341 (set_attr "prefix_data16" "1")
9342 (set_attr "prefix_extra" "1")
9343 (set_attr "prefix" "maybe_vex")
9344 (set_attr "mode" "<sseinsnmode>")])
9346 (define_insn "abs<mode>2"
9347 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
9349 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
9351 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
9352 [(set_attr "type" "sselog1")
9353 (set_attr "prefix_rep" "0")
9354 (set_attr "prefix_extra" "1")
9355 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
9356 (set_attr "mode" "DI")])
9358 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9360 ;; AMD SSE4A instructions
9362 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9364 (define_insn "sse4a_movnt<mode>"
9365 [(set (match_operand:MODEF 0 "memory_operand" "=m")
9367 [(match_operand:MODEF 1 "register_operand" "x")]
9370 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
9371 [(set_attr "type" "ssemov")
9372 (set_attr "mode" "<MODE>")])
9374 (define_insn "sse4a_vmmovnt<mode>"
9375 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
9376 (unspec:<ssescalarmode>
9377 [(vec_select:<ssescalarmode>
9378 (match_operand:VF_128 1 "register_operand" "x")
9379 (parallel [(const_int 0)]))]
9382 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9383 [(set_attr "type" "ssemov")
9384 (set_attr "mode" "<ssescalarmode>")])
9386 (define_insn "sse4a_extrqi"
9387 [(set (match_operand:V2DI 0 "register_operand" "=x")
9388 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9389 (match_operand 2 "const_0_to_255_operand" "")
9390 (match_operand 3 "const_0_to_255_operand" "")]
9393 "extrq\t{%3, %2, %0|%0, %2, %3}"
9394 [(set_attr "type" "sse")
9395 (set_attr "prefix_data16" "1")
9396 (set_attr "length_immediate" "2")
9397 (set_attr "mode" "TI")])
9399 (define_insn "sse4a_extrq"
9400 [(set (match_operand:V2DI 0 "register_operand" "=x")
9401 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9402 (match_operand:V16QI 2 "register_operand" "x")]
9405 "extrq\t{%2, %0|%0, %2}"
9406 [(set_attr "type" "sse")
9407 (set_attr "prefix_data16" "1")
9408 (set_attr "mode" "TI")])
9410 (define_insn "sse4a_insertqi"
9411 [(set (match_operand:V2DI 0 "register_operand" "=x")
9412 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9413 (match_operand:V2DI 2 "register_operand" "x")
9414 (match_operand 3 "const_0_to_255_operand" "")
9415 (match_operand 4 "const_0_to_255_operand" "")]
9418 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
9419 [(set_attr "type" "sseins")
9420 (set_attr "prefix_data16" "0")
9421 (set_attr "prefix_rep" "1")
9422 (set_attr "length_immediate" "2")
9423 (set_attr "mode" "TI")])
9425 (define_insn "sse4a_insertq"
9426 [(set (match_operand:V2DI 0 "register_operand" "=x")
9427 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9428 (match_operand:V2DI 2 "register_operand" "x")]
9431 "insertq\t{%2, %0|%0, %2}"
9432 [(set_attr "type" "sseins")
9433 (set_attr "prefix_data16" "0")
9434 (set_attr "prefix_rep" "1")
9435 (set_attr "mode" "TI")])
9437 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9439 ;; Intel SSE4.1 instructions
9441 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9443 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
9444 [(set (match_operand:VF 0 "register_operand" "=x,x")
9446 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9447 (match_operand:VF 1 "register_operand" "0,x")
9448 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
9451 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9452 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9453 [(set_attr "isa" "noavx,avx")
9454 (set_attr "type" "ssemov")
9455 (set_attr "length_immediate" "1")
9456 (set_attr "prefix_data16" "1,*")
9457 (set_attr "prefix_extra" "1")
9458 (set_attr "prefix" "orig,vex")
9459 (set_attr "mode" "<MODE>")])
9461 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
9462 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
9464 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9465 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9466 (match_operand:VF 3 "register_operand" "Yz,x")]
9470 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9471 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9472 [(set_attr "isa" "noavx,avx")
9473 (set_attr "type" "ssemov")
9474 (set_attr "length_immediate" "1")
9475 (set_attr "prefix_data16" "1,*")
9476 (set_attr "prefix_extra" "1")
9477 (set_attr "prefix" "orig,vex")
9478 (set_attr "mode" "<MODE>")])
9480 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
9481 [(set (match_operand:VF 0 "register_operand" "=x,x")
9483 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
9484 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
9485 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9489 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
9490 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9491 [(set_attr "isa" "noavx,avx")
9492 (set_attr "type" "ssemul")
9493 (set_attr "length_immediate" "1")
9494 (set_attr "prefix_data16" "1,*")
9495 (set_attr "prefix_extra" "1")
9496 (set_attr "prefix" "orig,vex")
9497 (set_attr "mode" "<MODE>")])
9499 (define_insn "<sse4_1_avx2>_movntdqa"
9500 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x")
9501 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")]
9504 "%vmovntdqa\t{%1, %0|%0, %1}"
9505 [(set_attr "type" "ssemov")
9506 (set_attr "prefix_extra" "1")
9507 (set_attr "prefix" "maybe_vex")
9508 (set_attr "mode" "<sseinsnmode>")])
9510 (define_insn "<sse4_1_avx2>_mpsadbw"
9511 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9512 (unspec:VI1_AVX2 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
9513 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
9514 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9518 mpsadbw\t{%3, %2, %0|%0, %2, %3}
9519 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9520 [(set_attr "isa" "noavx,avx")
9521 (set_attr "type" "sselog1")
9522 (set_attr "length_immediate" "1")
9523 (set_attr "prefix_extra" "1")
9524 (set_attr "prefix" "orig,vex")
9525 (set_attr "mode" "<sseinsnmode>")])
9527 (define_insn "avx2_packusdw"
9528 [(set (match_operand:V16HI 0 "register_operand" "=x")
9531 (match_operand:V8SI 1 "register_operand" "x"))
9533 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
9535 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9536 [(set_attr "type" "sselog")
9537 (set_attr "prefix_extra" "1")
9538 (set_attr "prefix" "vex")
9539 (set_attr "mode" "OI")])
9541 (define_insn "sse4_1_packusdw"
9542 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9545 (match_operand:V4SI 1 "register_operand" "0,x"))
9547 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
9550 packusdw\t{%2, %0|%0, %2}
9551 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9552 [(set_attr "isa" "noavx,avx")
9553 (set_attr "type" "sselog")
9554 (set_attr "prefix_extra" "1")
9555 (set_attr "prefix" "orig,vex")
9556 (set_attr "mode" "TI")])
9558 (define_insn "<sse4_1_avx2>_pblendvb"
9559 [(set (match_operand:VI1_AVX2 0 "reg_not_xmm0_operand" "=x,x")
9561 [(match_operand:VI1_AVX2 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
9562 (match_operand:VI1_AVX2 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
9563 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
9567 pblendvb\t{%3, %2, %0|%0, %2, %3}
9568 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9569 [(set_attr "isa" "noavx,avx")
9570 (set_attr "type" "ssemov")
9571 (set_attr "prefix_extra" "1")
9572 (set_attr "length_immediate" "*,1")
9573 (set_attr "prefix" "orig,vex")
9574 (set_attr "mode" "<sseinsnmode>")])
9576 (define_insn "sse4_1_pblendw"
9577 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9579 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9580 (match_operand:V8HI 1 "register_operand" "0,x")
9581 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
9584 pblendw\t{%3, %2, %0|%0, %2, %3}
9585 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9586 [(set_attr "isa" "noavx,avx")
9587 (set_attr "type" "ssemov")
9588 (set_attr "prefix_extra" "1")
9589 (set_attr "length_immediate" "1")
9590 (set_attr "prefix" "orig,vex")
9591 (set_attr "mode" "TI")])
9593 ;; The builtin uses an 8-bit immediate. Expand that.
9594 (define_expand "avx2_pblendw"
9595 [(set (match_operand:V16HI 0 "register_operand" "")
9597 (match_operand:V16HI 2 "nonimmediate_operand" "")
9598 (match_operand:V16HI 1 "register_operand" "")
9599 (match_operand:SI 3 "const_0_to_255_operand" "")))]
9602 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
9603 operands[3] = GEN_INT (val << 8 | val);
9606 (define_insn "*avx2_pblendw"
9607 [(set (match_operand:V16HI 0 "register_operand" "=x")
9609 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9610 (match_operand:V16HI 1 "register_operand" "x")
9611 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
9614 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
9615 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9617 [(set_attr "type" "ssemov")
9618 (set_attr "prefix_extra" "1")
9619 (set_attr "length_immediate" "1")
9620 (set_attr "prefix" "vex")
9621 (set_attr "mode" "OI")])
9623 (define_insn "avx2_pblendd<mode>"
9624 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
9626 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
9627 (match_operand:VI4_AVX2 1 "register_operand" "x")
9628 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9630 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9631 [(set_attr "type" "ssemov")
9632 (set_attr "prefix_extra" "1")
9633 (set_attr "length_immediate" "1")
9634 (set_attr "prefix" "vex")
9635 (set_attr "mode" "<sseinsnmode>")])
9637 (define_insn "sse4_1_phminposuw"
9638 [(set (match_operand:V8HI 0 "register_operand" "=x")
9639 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9640 UNSPEC_PHMINPOSUW))]
9642 "%vphminposuw\t{%1, %0|%0, %1}"
9643 [(set_attr "type" "sselog1")
9644 (set_attr "prefix_extra" "1")
9645 (set_attr "prefix" "maybe_vex")
9646 (set_attr "mode" "TI")])
9648 (define_insn "avx2_<code>v16qiv16hi2"
9649 [(set (match_operand:V16HI 0 "register_operand" "=x")
9651 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
9653 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
9654 [(set_attr "type" "ssemov")
9655 (set_attr "prefix_extra" "1")
9656 (set_attr "prefix" "vex")
9657 (set_attr "mode" "OI")])
9659 (define_insn "sse4_1_<code>v8qiv8hi2"
9660 [(set (match_operand:V8HI 0 "register_operand" "=x")
9663 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9664 (parallel [(const_int 0)
9673 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
9674 [(set_attr "type" "ssemov")
9675 (set_attr "prefix_extra" "1")
9676 (set_attr "prefix" "maybe_vex")
9677 (set_attr "mode" "TI")])
9679 (define_insn "avx2_<code>v8qiv8si2"
9680 [(set (match_operand:V8SI 0 "register_operand" "=x")
9683 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9684 (parallel [(const_int 0)
9693 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
9694 [(set_attr "type" "ssemov")
9695 (set_attr "prefix_extra" "1")
9696 (set_attr "prefix" "vex")
9697 (set_attr "mode" "OI")])
9699 (define_insn "sse4_1_<code>v4qiv4si2"
9700 [(set (match_operand:V4SI 0 "register_operand" "=x")
9703 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9704 (parallel [(const_int 0)
9709 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
9710 [(set_attr "type" "ssemov")
9711 (set_attr "prefix_extra" "1")
9712 (set_attr "prefix" "maybe_vex")
9713 (set_attr "mode" "TI")])
9715 (define_insn "avx2_<code>v8hiv8si2"
9716 [(set (match_operand:V8SI 0 "register_operand" "=x")
9718 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
9720 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
9721 [(set_attr "type" "ssemov")
9722 (set_attr "prefix_extra" "1")
9723 (set_attr "prefix" "vex")
9724 (set_attr "mode" "OI")])
9726 (define_insn "sse4_1_<code>v4hiv4si2"
9727 [(set (match_operand:V4SI 0 "register_operand" "=x")
9730 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9731 (parallel [(const_int 0)
9736 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
9737 [(set_attr "type" "ssemov")
9738 (set_attr "prefix_extra" "1")
9739 (set_attr "prefix" "maybe_vex")
9740 (set_attr "mode" "TI")])
9742 (define_insn "avx2_<code>v4qiv4di2"
9743 [(set (match_operand:V4DI 0 "register_operand" "=x")
9746 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9747 (parallel [(const_int 0)
9752 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
9753 [(set_attr "type" "ssemov")
9754 (set_attr "prefix_extra" "1")
9755 (set_attr "prefix" "vex")
9756 (set_attr "mode" "OI")])
9758 (define_insn "sse4_1_<code>v2qiv2di2"
9759 [(set (match_operand:V2DI 0 "register_operand" "=x")
9762 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9763 (parallel [(const_int 0)
9766 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
9767 [(set_attr "type" "ssemov")
9768 (set_attr "prefix_extra" "1")
9769 (set_attr "prefix" "maybe_vex")
9770 (set_attr "mode" "TI")])
9772 (define_insn "avx2_<code>v4hiv4di2"
9773 [(set (match_operand:V4DI 0 "register_operand" "=x")
9776 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9777 (parallel [(const_int 0)
9782 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
9783 [(set_attr "type" "ssemov")
9784 (set_attr "prefix_extra" "1")
9785 (set_attr "prefix" "vex")
9786 (set_attr "mode" "OI")])
9788 (define_insn "sse4_1_<code>v2hiv2di2"
9789 [(set (match_operand:V2DI 0 "register_operand" "=x")
9792 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9793 (parallel [(const_int 0)
9796 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
9797 [(set_attr "type" "ssemov")
9798 (set_attr "prefix_extra" "1")
9799 (set_attr "prefix" "maybe_vex")
9800 (set_attr "mode" "TI")])
9802 (define_insn "avx2_<code>v4siv4di2"
9803 [(set (match_operand:V4DI 0 "register_operand" "=x")
9805 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
9807 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
9808 [(set_attr "type" "ssemov")
9809 (set_attr "prefix_extra" "1")
9810 (set_attr "mode" "OI")])
9812 (define_insn "sse4_1_<code>v2siv2di2"
9813 [(set (match_operand:V2DI 0 "register_operand" "=x")
9816 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9817 (parallel [(const_int 0)
9820 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
9821 [(set_attr "type" "ssemov")
9822 (set_attr "prefix_extra" "1")
9823 (set_attr "prefix" "maybe_vex")
9824 (set_attr "mode" "TI")])
9826 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
9827 ;; setting FLAGS_REG. But it is not a really compare instruction.
9828 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
9829 [(set (reg:CC FLAGS_REG)
9830 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
9831 (match_operand:VF 1 "nonimmediate_operand" "xm")]
9834 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
9835 [(set_attr "type" "ssecomi")
9836 (set_attr "prefix_extra" "1")
9837 (set_attr "prefix" "vex")
9838 (set_attr "mode" "<MODE>")])
9840 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9841 ;; But it is not a really compare instruction.
9842 (define_insn "avx_ptest256"
9843 [(set (reg:CC FLAGS_REG)
9844 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
9845 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9848 "vptest\t{%1, %0|%0, %1}"
9849 [(set_attr "type" "ssecomi")
9850 (set_attr "prefix_extra" "1")
9851 (set_attr "prefix" "vex")
9852 (set_attr "mode" "OI")])
9854 (define_insn "sse4_1_ptest"
9855 [(set (reg:CC FLAGS_REG)
9856 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
9857 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9860 "%vptest\t{%1, %0|%0, %1}"
9861 [(set_attr "type" "ssecomi")
9862 (set_attr "prefix_extra" "1")
9863 (set_attr "prefix" "maybe_vex")
9864 (set_attr "mode" "TI")])
9866 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
9867 [(set (match_operand:VF 0 "register_operand" "=x")
9869 [(match_operand:VF 1 "nonimmediate_operand" "xm")
9870 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9873 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9874 [(set_attr "type" "ssecvt")
9875 (set (attr "prefix_data16")
9877 (match_test "TARGET_AVX")
9879 (const_string "1")))
9880 (set_attr "prefix_extra" "1")
9881 (set_attr "length_immediate" "1")
9882 (set_attr "prefix" "maybe_vex")
9883 (set_attr "mode" "<MODE>")])
9885 (define_insn "sse4_1_round<ssescalarmodesuffix>"
9886 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
9889 [(match_operand:VF_128 2 "register_operand" "x,x")
9890 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
9892 (match_operand:VF_128 1 "register_operand" "0,x")
9896 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
9897 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9898 [(set_attr "isa" "noavx,avx")
9899 (set_attr "type" "ssecvt")
9900 (set_attr "length_immediate" "1")
9901 (set_attr "prefix_data16" "1,*")
9902 (set_attr "prefix_extra" "1")
9903 (set_attr "prefix" "orig,vex")
9904 (set_attr "mode" "<MODE>")])
9906 (define_expand "round<mode>2"
9909 (match_operand:VF 1 "nonimmediate_operand" "")
9911 (set (match_operand:VF 0 "register_operand" "")
9913 [(match_dup 4) (match_dup 5)]
9915 "TARGET_ROUND && !flag_trapping_math"
9917 enum machine_mode scalar_mode;
9918 const struct real_format *fmt;
9919 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
9922 scalar_mode = GET_MODE_INNER (<MODE>mode);
9924 /* load nextafter (0.5, 0.0) */
9925 fmt = REAL_MODE_FORMAT (scalar_mode);
9926 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
9927 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
9928 half = const_double_from_real_value (pred_half, scalar_mode);
9930 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
9931 vec_half = force_reg (<MODE>mode, vec_half);
9933 operands[3] = gen_reg_rtx (<MODE>mode);
9934 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
9936 operands[4] = gen_reg_rtx (<MODE>mode);
9937 operands[5] = GEN_INT (ROUND_TRUNC);
9940 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9942 ;; Intel SSE4.2 string/text processing instructions
9944 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9946 (define_insn_and_split "sse4_2_pcmpestr"
9947 [(set (match_operand:SI 0 "register_operand" "=c,c")
9949 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
9950 (match_operand:SI 3 "register_operand" "a,a")
9951 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
9952 (match_operand:SI 5 "register_operand" "d,d")
9953 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
9955 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
9963 (set (reg:CC FLAGS_REG)
9972 && can_create_pseudo_p ()"
9977 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9978 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
9979 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
9982 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
9983 operands[3], operands[4],
9984 operands[5], operands[6]));
9986 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
9987 operands[3], operands[4],
9988 operands[5], operands[6]));
9989 if (flags && !(ecx || xmm0))
9990 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
9991 operands[2], operands[3],
9992 operands[4], operands[5],
9994 if (!(flags || ecx || xmm0))
9995 emit_note (NOTE_INSN_DELETED);
9999 [(set_attr "type" "sselog")
10000 (set_attr "prefix_data16" "1")
10001 (set_attr "prefix_extra" "1")
10002 (set_attr "length_immediate" "1")
10003 (set_attr "memory" "none,load")
10004 (set_attr "mode" "TI")])
10006 (define_insn "sse4_2_pcmpestri"
10007 [(set (match_operand:SI 0 "register_operand" "=c,c")
10009 [(match_operand:V16QI 1 "register_operand" "x,x")
10010 (match_operand:SI 2 "register_operand" "a,a")
10011 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10012 (match_operand:SI 4 "register_operand" "d,d")
10013 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10015 (set (reg:CC FLAGS_REG)
10024 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
10025 [(set_attr "type" "sselog")
10026 (set_attr "prefix_data16" "1")
10027 (set_attr "prefix_extra" "1")
10028 (set_attr "prefix" "maybe_vex")
10029 (set_attr "length_immediate" "1")
10030 (set_attr "memory" "none,load")
10031 (set_attr "mode" "TI")])
10033 (define_insn "sse4_2_pcmpestrm"
10034 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10036 [(match_operand:V16QI 1 "register_operand" "x,x")
10037 (match_operand:SI 2 "register_operand" "a,a")
10038 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
10039 (match_operand:SI 4 "register_operand" "d,d")
10040 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
10042 (set (reg:CC FLAGS_REG)
10051 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
10052 [(set_attr "type" "sselog")
10053 (set_attr "prefix_data16" "1")
10054 (set_attr "prefix_extra" "1")
10055 (set_attr "length_immediate" "1")
10056 (set_attr "prefix" "maybe_vex")
10057 (set_attr "memory" "none,load")
10058 (set_attr "mode" "TI")])
10060 (define_insn "sse4_2_pcmpestr_cconly"
10061 [(set (reg:CC FLAGS_REG)
10063 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10064 (match_operand:SI 3 "register_operand" "a,a,a,a")
10065 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
10066 (match_operand:SI 5 "register_operand" "d,d,d,d")
10067 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
10069 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10070 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10073 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10074 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
10075 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
10076 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
10077 [(set_attr "type" "sselog")
10078 (set_attr "prefix_data16" "1")
10079 (set_attr "prefix_extra" "1")
10080 (set_attr "length_immediate" "1")
10081 (set_attr "memory" "none,load,none,load")
10082 (set_attr "prefix" "maybe_vex")
10083 (set_attr "mode" "TI")])
10085 (define_insn_and_split "sse4_2_pcmpistr"
10086 [(set (match_operand:SI 0 "register_operand" "=c,c")
10088 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
10089 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
10090 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
10092 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
10098 (set (reg:CC FLAGS_REG)
10105 && can_create_pseudo_p ()"
10110 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
10111 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
10112 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
10115 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
10116 operands[3], operands[4]));
10118 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
10119 operands[3], operands[4]));
10120 if (flags && !(ecx || xmm0))
10121 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
10122 operands[2], operands[3],
10124 if (!(flags || ecx || xmm0))
10125 emit_note (NOTE_INSN_DELETED);
10129 [(set_attr "type" "sselog")
10130 (set_attr "prefix_data16" "1")
10131 (set_attr "prefix_extra" "1")
10132 (set_attr "length_immediate" "1")
10133 (set_attr "memory" "none,load")
10134 (set_attr "mode" "TI")])
10136 (define_insn "sse4_2_pcmpistri"
10137 [(set (match_operand:SI 0 "register_operand" "=c,c")
10139 [(match_operand:V16QI 1 "register_operand" "x,x")
10140 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10141 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10143 (set (reg:CC FLAGS_REG)
10150 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
10151 [(set_attr "type" "sselog")
10152 (set_attr "prefix_data16" "1")
10153 (set_attr "prefix_extra" "1")
10154 (set_attr "length_immediate" "1")
10155 (set_attr "prefix" "maybe_vex")
10156 (set_attr "memory" "none,load")
10157 (set_attr "mode" "TI")])
10159 (define_insn "sse4_2_pcmpistrm"
10160 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
10162 [(match_operand:V16QI 1 "register_operand" "x,x")
10163 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10164 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
10166 (set (reg:CC FLAGS_REG)
10173 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
10174 [(set_attr "type" "sselog")
10175 (set_attr "prefix_data16" "1")
10176 (set_attr "prefix_extra" "1")
10177 (set_attr "length_immediate" "1")
10178 (set_attr "prefix" "maybe_vex")
10179 (set_attr "memory" "none,load")
10180 (set_attr "mode" "TI")])
10182 (define_insn "sse4_2_pcmpistr_cconly"
10183 [(set (reg:CC FLAGS_REG)
10185 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
10186 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
10187 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
10189 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
10190 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
10193 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10194 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
10195 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
10196 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
10197 [(set_attr "type" "sselog")
10198 (set_attr "prefix_data16" "1")
10199 (set_attr "prefix_extra" "1")
10200 (set_attr "length_immediate" "1")
10201 (set_attr "memory" "none,load,none,load")
10202 (set_attr "prefix" "maybe_vex")
10203 (set_attr "mode" "TI")])
10205 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10207 ;; XOP instructions
10209 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10211 ;; XOP parallel integer multiply/add instructions.
10212 ;; Note the XOP multiply/add instructions
10213 ;; a[i] = b[i] * c[i] + d[i];
10214 ;; do not allow the value being added to be a memory operation.
10215 (define_insn "xop_pmacsww"
10216 [(set (match_operand:V8HI 0 "register_operand" "=x")
10219 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10220 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10221 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10223 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10224 [(set_attr "type" "ssemuladd")
10225 (set_attr "mode" "TI")])
10227 (define_insn "xop_pmacssww"
10228 [(set (match_operand:V8HI 0 "register_operand" "=x")
10230 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10231 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
10232 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
10234 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10235 [(set_attr "type" "ssemuladd")
10236 (set_attr "mode" "TI")])
10238 (define_insn "xop_pmacsdd"
10239 [(set (match_operand:V4SI 0 "register_operand" "=x")
10242 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10243 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10244 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10246 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10247 [(set_attr "type" "ssemuladd")
10248 (set_attr "mode" "TI")])
10250 (define_insn "xop_pmacssdd"
10251 [(set (match_operand:V4SI 0 "register_operand" "=x")
10253 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10254 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
10255 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10257 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10258 [(set_attr "type" "ssemuladd")
10259 (set_attr "mode" "TI")])
10261 (define_insn "xop_pmacssdql"
10262 [(set (match_operand:V2DI 0 "register_operand" "=x")
10267 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10268 (parallel [(const_int 1)
10271 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10272 (parallel [(const_int 1)
10274 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10276 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10277 [(set_attr "type" "ssemuladd")
10278 (set_attr "mode" "TI")])
10280 (define_insn "xop_pmacssdqh"
10281 [(set (match_operand:V2DI 0 "register_operand" "=x")
10286 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10287 (parallel [(const_int 0)
10291 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10292 (parallel [(const_int 0)
10294 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10296 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10297 [(set_attr "type" "ssemuladd")
10298 (set_attr "mode" "TI")])
10300 (define_insn "xop_pmacsdql"
10301 [(set (match_operand:V2DI 0 "register_operand" "=x")
10306 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10307 (parallel [(const_int 1)
10311 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10312 (parallel [(const_int 1)
10314 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10316 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10317 [(set_attr "type" "ssemuladd")
10318 (set_attr "mode" "TI")])
10320 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10321 ;; fake it with a multiply/add. In general, we expect the define_split to
10322 ;; occur before register allocation, so we have to handle the corner case where
10323 ;; the target is the same as operands 1/2
10324 (define_insn_and_split "xop_mulv2div2di3_low"
10325 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10329 (match_operand:V4SI 1 "register_operand" "%x")
10330 (parallel [(const_int 1)
10334 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10335 (parallel [(const_int 1)
10336 (const_int 3)])))))]
10339 "&& reload_completed"
10340 [(set (match_dup 0)
10348 (parallel [(const_int 1)
10353 (parallel [(const_int 1)
10357 operands[3] = CONST0_RTX (V2DImode);
10359 [(set_attr "type" "ssemul")
10360 (set_attr "mode" "TI")])
10362 (define_insn "xop_pmacsdqh"
10363 [(set (match_operand:V2DI 0 "register_operand" "=x")
10368 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10369 (parallel [(const_int 0)
10373 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10374 (parallel [(const_int 0)
10376 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10378 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10379 [(set_attr "type" "ssemuladd")
10380 (set_attr "mode" "TI")])
10382 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10383 ;; fake it with a multiply/add. In general, we expect the define_split to
10384 ;; occur before register allocation, so we have to handle the corner case where
10385 ;; the target is the same as either operands[1] or operands[2]
10386 (define_insn_and_split "xop_mulv2div2di3_high"
10387 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10391 (match_operand:V4SI 1 "register_operand" "%x")
10392 (parallel [(const_int 0)
10396 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10397 (parallel [(const_int 0)
10398 (const_int 2)])))))]
10401 "&& reload_completed"
10402 [(set (match_dup 0)
10410 (parallel [(const_int 0)
10415 (parallel [(const_int 0)
10419 operands[3] = CONST0_RTX (V2DImode);
10421 [(set_attr "type" "ssemul")
10422 (set_attr "mode" "TI")])
10424 ;; XOP parallel integer multiply/add instructions for the intrinisics
10425 (define_insn "xop_pmacsswd"
10426 [(set (match_operand:V4SI 0 "register_operand" "=x")
10431 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10432 (parallel [(const_int 1)
10438 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10439 (parallel [(const_int 1)
10443 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10445 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10446 [(set_attr "type" "ssemuladd")
10447 (set_attr "mode" "TI")])
10449 (define_insn "xop_pmacswd"
10450 [(set (match_operand:V4SI 0 "register_operand" "=x")
10455 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10456 (parallel [(const_int 1)
10462 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10463 (parallel [(const_int 1)
10467 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10469 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10470 [(set_attr "type" "ssemuladd")
10471 (set_attr "mode" "TI")])
10473 (define_insn "xop_pmadcsswd"
10474 [(set (match_operand:V4SI 0 "register_operand" "=x")
10480 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10481 (parallel [(const_int 0)
10487 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10488 (parallel [(const_int 0)
10496 (parallel [(const_int 1)
10503 (parallel [(const_int 1)
10506 (const_int 7)])))))
10507 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10509 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10510 [(set_attr "type" "ssemuladd")
10511 (set_attr "mode" "TI")])
10513 (define_insn "xop_pmadcswd"
10514 [(set (match_operand:V4SI 0 "register_operand" "=x")
10520 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10521 (parallel [(const_int 0)
10527 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10528 (parallel [(const_int 0)
10536 (parallel [(const_int 1)
10543 (parallel [(const_int 1)
10546 (const_int 7)])))))
10547 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10549 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10550 [(set_attr "type" "ssemuladd")
10551 (set_attr "mode" "TI")])
10553 ;; XOP parallel XMM conditional moves
10554 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
10555 [(set (match_operand:V 0 "register_operand" "=x,x")
10557 (match_operand:V 3 "nonimmediate_operand" "x,m")
10558 (match_operand:V 1 "register_operand" "x,x")
10559 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
10561 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10562 [(set_attr "type" "sse4arg")])
10564 ;; XOP horizontal add/subtract instructions
10565 (define_insn "xop_phaddbw"
10566 [(set (match_operand:V8HI 0 "register_operand" "=x")
10570 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10571 (parallel [(const_int 0)
10582 (parallel [(const_int 1)
10589 (const_int 15)])))))]
10591 "vphaddbw\t{%1, %0|%0, %1}"
10592 [(set_attr "type" "sseiadd1")])
10594 (define_insn "xop_phaddbd"
10595 [(set (match_operand:V4SI 0 "register_operand" "=x")
10600 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10601 (parallel [(const_int 0)
10608 (parallel [(const_int 1)
10611 (const_int 13)]))))
10616 (parallel [(const_int 2)
10623 (parallel [(const_int 3)
10626 (const_int 15)]))))))]
10628 "vphaddbd\t{%1, %0|%0, %1}"
10629 [(set_attr "type" "sseiadd1")])
10631 (define_insn "xop_phaddbq"
10632 [(set (match_operand:V2DI 0 "register_operand" "=x")
10638 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10639 (parallel [(const_int 0)
10644 (parallel [(const_int 1)
10650 (parallel [(const_int 2)
10655 (parallel [(const_int 3)
10656 (const_int 7)])))))
10662 (parallel [(const_int 8)
10667 (parallel [(const_int 9)
10668 (const_int 13)]))))
10673 (parallel [(const_int 10)
10678 (parallel [(const_int 11)
10679 (const_int 15)])))))))]
10681 "vphaddbq\t{%1, %0|%0, %1}"
10682 [(set_attr "type" "sseiadd1")])
10684 (define_insn "xop_phaddwd"
10685 [(set (match_operand:V4SI 0 "register_operand" "=x")
10689 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10690 (parallel [(const_int 0)
10697 (parallel [(const_int 1)
10700 (const_int 7)])))))]
10702 "vphaddwd\t{%1, %0|%0, %1}"
10703 [(set_attr "type" "sseiadd1")])
10705 (define_insn "xop_phaddwq"
10706 [(set (match_operand:V2DI 0 "register_operand" "=x")
10711 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10712 (parallel [(const_int 0)
10717 (parallel [(const_int 1)
10723 (parallel [(const_int 2)
10728 (parallel [(const_int 3)
10729 (const_int 7)]))))))]
10731 "vphaddwq\t{%1, %0|%0, %1}"
10732 [(set_attr "type" "sseiadd1")])
10734 (define_insn "xop_phadddq"
10735 [(set (match_operand:V2DI 0 "register_operand" "=x")
10739 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10740 (parallel [(const_int 0)
10745 (parallel [(const_int 1)
10746 (const_int 3)])))))]
10748 "vphadddq\t{%1, %0|%0, %1}"
10749 [(set_attr "type" "sseiadd1")])
10751 (define_insn "xop_phaddubw"
10752 [(set (match_operand:V8HI 0 "register_operand" "=x")
10756 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10757 (parallel [(const_int 0)
10768 (parallel [(const_int 1)
10775 (const_int 15)])))))]
10777 "vphaddubw\t{%1, %0|%0, %1}"
10778 [(set_attr "type" "sseiadd1")])
10780 (define_insn "xop_phaddubd"
10781 [(set (match_operand:V4SI 0 "register_operand" "=x")
10786 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10787 (parallel [(const_int 0)
10794 (parallel [(const_int 1)
10797 (const_int 13)]))))
10802 (parallel [(const_int 2)
10809 (parallel [(const_int 3)
10812 (const_int 15)]))))))]
10814 "vphaddubd\t{%1, %0|%0, %1}"
10815 [(set_attr "type" "sseiadd1")])
10817 (define_insn "xop_phaddubq"
10818 [(set (match_operand:V2DI 0 "register_operand" "=x")
10824 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10825 (parallel [(const_int 0)
10830 (parallel [(const_int 1)
10836 (parallel [(const_int 2)
10841 (parallel [(const_int 3)
10842 (const_int 7)])))))
10848 (parallel [(const_int 8)
10853 (parallel [(const_int 9)
10854 (const_int 13)]))))
10859 (parallel [(const_int 10)
10864 (parallel [(const_int 11)
10865 (const_int 15)])))))))]
10867 "vphaddubq\t{%1, %0|%0, %1}"
10868 [(set_attr "type" "sseiadd1")])
10870 (define_insn "xop_phadduwd"
10871 [(set (match_operand:V4SI 0 "register_operand" "=x")
10875 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10876 (parallel [(const_int 0)
10883 (parallel [(const_int 1)
10886 (const_int 7)])))))]
10888 "vphadduwd\t{%1, %0|%0, %1}"
10889 [(set_attr "type" "sseiadd1")])
10891 (define_insn "xop_phadduwq"
10892 [(set (match_operand:V2DI 0 "register_operand" "=x")
10897 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10898 (parallel [(const_int 0)
10903 (parallel [(const_int 1)
10909 (parallel [(const_int 2)
10914 (parallel [(const_int 3)
10915 (const_int 7)]))))))]
10917 "vphadduwq\t{%1, %0|%0, %1}"
10918 [(set_attr "type" "sseiadd1")])
10920 (define_insn "xop_phaddudq"
10921 [(set (match_operand:V2DI 0 "register_operand" "=x")
10925 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10926 (parallel [(const_int 0)
10931 (parallel [(const_int 1)
10932 (const_int 3)])))))]
10934 "vphaddudq\t{%1, %0|%0, %1}"
10935 [(set_attr "type" "sseiadd1")])
10937 (define_insn "xop_phsubbw"
10938 [(set (match_operand:V8HI 0 "register_operand" "=x")
10942 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10943 (parallel [(const_int 0)
10954 (parallel [(const_int 1)
10961 (const_int 15)])))))]
10963 "vphsubbw\t{%1, %0|%0, %1}"
10964 [(set_attr "type" "sseiadd1")])
10966 (define_insn "xop_phsubwd"
10967 [(set (match_operand:V4SI 0 "register_operand" "=x")
10971 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10972 (parallel [(const_int 0)
10979 (parallel [(const_int 1)
10982 (const_int 7)])))))]
10984 "vphsubwd\t{%1, %0|%0, %1}"
10985 [(set_attr "type" "sseiadd1")])
10987 (define_insn "xop_phsubdq"
10988 [(set (match_operand:V2DI 0 "register_operand" "=x")
10992 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10993 (parallel [(const_int 0)
10998 (parallel [(const_int 1)
10999 (const_int 3)])))))]
11001 "vphsubdq\t{%1, %0|%0, %1}"
11002 [(set_attr "type" "sseiadd1")])
11004 ;; XOP permute instructions
11005 (define_insn "xop_pperm"
11006 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11008 [(match_operand:V16QI 1 "register_operand" "x,x")
11009 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
11010 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
11011 UNSPEC_XOP_PERMUTE))]
11012 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11013 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11014 [(set_attr "type" "sse4arg")
11015 (set_attr "mode" "TI")])
11017 ;; XOP pack instructions that combine two vectors into a smaller vector
11018 (define_insn "xop_pperm_pack_v2di_v4si"
11019 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11022 (match_operand:V2DI 1 "register_operand" "x,x"))
11024 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
11025 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11026 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11027 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11028 [(set_attr "type" "sse4arg")
11029 (set_attr "mode" "TI")])
11031 (define_insn "xop_pperm_pack_v4si_v8hi"
11032 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11035 (match_operand:V4SI 1 "register_operand" "x,x"))
11037 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
11038 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11039 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11040 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11041 [(set_attr "type" "sse4arg")
11042 (set_attr "mode" "TI")])
11044 (define_insn "xop_pperm_pack_v8hi_v16qi"
11045 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
11048 (match_operand:V8HI 1 "register_operand" "x,x"))
11050 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
11051 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
11052 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
11053 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11054 [(set_attr "type" "sse4arg")
11055 (set_attr "mode" "TI")])
11057 ;; XOP packed rotate instructions
11058 (define_expand "rotl<mode>3"
11059 [(set (match_operand:VI_128 0 "register_operand" "")
11061 (match_operand:VI_128 1 "nonimmediate_operand" "")
11062 (match_operand:SI 2 "general_operand")))]
11065 /* If we were given a scalar, convert it to parallel */
11066 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11068 rtvec vs = rtvec_alloc (<ssescalarnum>);
11069 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11070 rtx reg = gen_reg_rtx (<MODE>mode);
11071 rtx op2 = operands[2];
11074 if (GET_MODE (op2) != <ssescalarmode>mode)
11076 op2 = gen_reg_rtx (<ssescalarmode>mode);
11077 convert_move (op2, operands[2], false);
11080 for (i = 0; i < <ssescalarnum>; i++)
11081 RTVEC_ELT (vs, i) = op2;
11083 emit_insn (gen_vec_init<mode> (reg, par));
11084 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11089 (define_expand "rotr<mode>3"
11090 [(set (match_operand:VI_128 0 "register_operand" "")
11092 (match_operand:VI_128 1 "nonimmediate_operand" "")
11093 (match_operand:SI 2 "general_operand")))]
11096 /* If we were given a scalar, convert it to parallel */
11097 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
11099 rtvec vs = rtvec_alloc (<ssescalarnum>);
11100 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
11101 rtx neg = gen_reg_rtx (<MODE>mode);
11102 rtx reg = gen_reg_rtx (<MODE>mode);
11103 rtx op2 = operands[2];
11106 if (GET_MODE (op2) != <ssescalarmode>mode)
11108 op2 = gen_reg_rtx (<ssescalarmode>mode);
11109 convert_move (op2, operands[2], false);
11112 for (i = 0; i < <ssescalarnum>; i++)
11113 RTVEC_ELT (vs, i) = op2;
11115 emit_insn (gen_vec_init<mode> (reg, par));
11116 emit_insn (gen_neg<mode>2 (neg, reg));
11117 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
11122 (define_insn "xop_rotl<mode>3"
11123 [(set (match_operand:VI_128 0 "register_operand" "=x")
11125 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11126 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11128 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11129 [(set_attr "type" "sseishft")
11130 (set_attr "length_immediate" "1")
11131 (set_attr "mode" "TI")])
11133 (define_insn "xop_rotr<mode>3"
11134 [(set (match_operand:VI_128 0 "register_operand" "=x")
11136 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
11137 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11140 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11141 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
11143 [(set_attr "type" "sseishft")
11144 (set_attr "length_immediate" "1")
11145 (set_attr "mode" "TI")])
11147 (define_expand "vrotr<mode>3"
11148 [(match_operand:VI_128 0 "register_operand" "")
11149 (match_operand:VI_128 1 "register_operand" "")
11150 (match_operand:VI_128 2 "register_operand" "")]
11153 rtx reg = gen_reg_rtx (<MODE>mode);
11154 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11155 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11159 (define_expand "vrotl<mode>3"
11160 [(match_operand:VI_128 0 "register_operand" "")
11161 (match_operand:VI_128 1 "register_operand" "")
11162 (match_operand:VI_128 2 "register_operand" "")]
11165 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11169 (define_insn "xop_vrotl<mode>3"
11170 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11171 (if_then_else:VI_128
11173 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11176 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11180 (neg:VI_128 (match_dup 2)))))]
11181 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11182 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11183 [(set_attr "type" "sseishft")
11184 (set_attr "prefix_data16" "0")
11185 (set_attr "prefix_extra" "2")
11186 (set_attr "mode" "TI")])
11188 ;; XOP packed shift instructions.
11189 (define_expand "vlshr<mode>3"
11190 [(set (match_operand:VI12_128 0 "register_operand" "")
11192 (match_operand:VI12_128 1 "register_operand" "")
11193 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11196 rtx neg = gen_reg_rtx (<MODE>mode);
11197 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11198 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11202 (define_expand "vlshr<mode>3"
11203 [(set (match_operand:VI48_128 0 "register_operand" "")
11205 (match_operand:VI48_128 1 "register_operand" "")
11206 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11207 "TARGET_AVX2 || TARGET_XOP"
11211 rtx neg = gen_reg_rtx (<MODE>mode);
11212 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11213 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
11218 (define_expand "vlshr<mode>3"
11219 [(set (match_operand:VI48_256 0 "register_operand" "")
11221 (match_operand:VI48_256 1 "register_operand" "")
11222 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11225 (define_expand "vashr<mode>3"
11226 [(set (match_operand:VI128_128 0 "register_operand" "")
11227 (ashiftrt:VI128_128
11228 (match_operand:VI128_128 1 "register_operand" "")
11229 (match_operand:VI128_128 2 "nonimmediate_operand" "")))]
11232 rtx neg = gen_reg_rtx (<MODE>mode);
11233 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11234 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
11238 (define_expand "vashrv4si3"
11239 [(set (match_operand:V4SI 0 "register_operand" "")
11240 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "")
11241 (match_operand:V4SI 2 "nonimmediate_operand" "")))]
11242 "TARGET_AVX2 || TARGET_XOP"
11246 rtx neg = gen_reg_rtx (V4SImode);
11247 emit_insn (gen_negv4si2 (neg, operands[2]));
11248 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
11253 (define_expand "vashrv8si3"
11254 [(set (match_operand:V8SI 0 "register_operand" "")
11255 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand" "")
11256 (match_operand:V8SI 2 "nonimmediate_operand" "")))]
11259 (define_expand "vashl<mode>3"
11260 [(set (match_operand:VI12_128 0 "register_operand" "")
11262 (match_operand:VI12_128 1 "register_operand" "")
11263 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
11266 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11270 (define_expand "vashl<mode>3"
11271 [(set (match_operand:VI48_128 0 "register_operand" "")
11273 (match_operand:VI48_128 1 "register_operand" "")
11274 (match_operand:VI48_128 2 "nonimmediate_operand" "")))]
11275 "TARGET_AVX2 || TARGET_XOP"
11279 operands[2] = force_reg (<MODE>mode, operands[2]);
11280 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
11285 (define_expand "vashl<mode>3"
11286 [(set (match_operand:VI48_256 0 "register_operand" "")
11288 (match_operand:VI48_256 1 "register_operand" "")
11289 (match_operand:VI48_256 2 "nonimmediate_operand" "")))]
11292 (define_insn "xop_sha<mode>3"
11293 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11294 (if_then_else:VI_128
11296 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11299 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11303 (neg:VI_128 (match_dup 2)))))]
11304 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11305 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11306 [(set_attr "type" "sseishft")
11307 (set_attr "prefix_data16" "0")
11308 (set_attr "prefix_extra" "2")
11309 (set_attr "mode" "TI")])
11311 (define_insn "xop_shl<mode>3"
11312 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
11313 (if_then_else:VI_128
11315 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
11318 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
11322 (neg:VI_128 (match_dup 2)))))]
11323 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11324 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11325 [(set_attr "type" "sseishft")
11326 (set_attr "prefix_data16" "0")
11327 (set_attr "prefix_extra" "2")
11328 (set_attr "mode" "TI")])
11330 ;; SSE2 doesn't have some shift variants, so define versions for XOP
11331 (define_expand "ashlv16qi3"
11332 [(set (match_operand:V16QI 0 "register_operand" "")
11334 (match_operand:V16QI 1 "register_operand" "")
11335 (match_operand:SI 2 "nonmemory_operand" "")))]
11338 rtx reg = gen_reg_rtx (V16QImode);
11342 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11343 for (i = 0; i < 16; i++)
11344 XVECEXP (par, 0, i) = operands[2];
11346 emit_insn (gen_vec_initv16qi (reg, par));
11347 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg));
11351 (define_expand "<shift_insn>v16qi3"
11352 [(set (match_operand:V16QI 0 "register_operand" "")
11354 (match_operand:V16QI 1 "register_operand" "")
11355 (match_operand:SI 2 "nonmemory_operand" "")))]
11358 rtx reg = gen_reg_rtx (V16QImode);
11360 bool negate = false;
11361 rtx (*shift_insn)(rtx, rtx, rtx);
11364 if (CONST_INT_P (operands[2]))
11365 operands[2] = GEN_INT (-INTVAL (operands[2]));
11369 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
11370 for (i = 0; i < 16; i++)
11371 XVECEXP (par, 0, i) = operands[2];
11373 emit_insn (gen_vec_initv16qi (reg, par));
11376 emit_insn (gen_negv16qi2 (reg, reg));
11378 if (<CODE> == LSHIFTRT)
11379 shift_insn = gen_xop_shlv16qi3;
11381 shift_insn = gen_xop_shav16qi3;
11383 emit_insn (shift_insn (operands[0], operands[1], reg));
11387 (define_expand "ashrv2di3"
11388 [(set (match_operand:V2DI 0 "register_operand" "")
11390 (match_operand:V2DI 1 "register_operand" "")
11391 (match_operand:DI 2 "nonmemory_operand" "")))]
11394 rtx reg = gen_reg_rtx (V2DImode);
11396 bool negate = false;
11399 if (CONST_INT_P (operands[2]))
11400 operands[2] = GEN_INT (-INTVAL (operands[2]));
11404 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
11405 for (i = 0; i < 2; i++)
11406 XVECEXP (par, 0, i) = operands[2];
11408 emit_insn (gen_vec_initv2di (reg, par));
11411 emit_insn (gen_negv2di2 (reg, reg));
11413 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
11417 ;; XOP FRCZ support
11418 (define_insn "xop_frcz<mode>2"
11419 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
11421 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
11424 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
11425 [(set_attr "type" "ssecvt1")
11426 (set_attr "mode" "<MODE>")])
11429 (define_expand "xop_vmfrcz<mode>2"
11430 [(set (match_operand:VF_128 0 "register_operand")
11433 [(match_operand:VF_128 1 "nonimmediate_operand")]
11439 operands[3] = CONST0_RTX (<MODE>mode);
11442 (define_insn "*xop_vmfrcz_<mode>"
11443 [(set (match_operand:VF_128 0 "register_operand" "=x")
11446 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
11448 (match_operand:VF_128 2 "const0_operand")
11451 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11452 [(set_attr "type" "ssecvt1")
11453 (set_attr "mode" "<MODE>")])
11455 (define_insn "xop_maskcmp<mode>3"
11456 [(set (match_operand:VI_128 0 "register_operand" "=x")
11457 (match_operator:VI_128 1 "ix86_comparison_int_operator"
11458 [(match_operand:VI_128 2 "register_operand" "x")
11459 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11461 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11462 [(set_attr "type" "sse4arg")
11463 (set_attr "prefix_data16" "0")
11464 (set_attr "prefix_rep" "0")
11465 (set_attr "prefix_extra" "2")
11466 (set_attr "length_immediate" "1")
11467 (set_attr "mode" "TI")])
11469 (define_insn "xop_maskcmp_uns<mode>3"
11470 [(set (match_operand:VI_128 0 "register_operand" "=x")
11471 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
11472 [(match_operand:VI_128 2 "register_operand" "x")
11473 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
11475 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11476 [(set_attr "type" "ssecmp")
11477 (set_attr "prefix_data16" "0")
11478 (set_attr "prefix_rep" "0")
11479 (set_attr "prefix_extra" "2")
11480 (set_attr "length_immediate" "1")
11481 (set_attr "mode" "TI")])
11483 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11484 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11485 ;; the exact instruction generated for the intrinsic.
11486 (define_insn "xop_maskcmp_uns2<mode>3"
11487 [(set (match_operand:VI_128 0 "register_operand" "=x")
11489 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
11490 [(match_operand:VI_128 2 "register_operand" "x")
11491 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
11492 UNSPEC_XOP_UNSIGNED_CMP))]
11494 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
11495 [(set_attr "type" "ssecmp")
11496 (set_attr "prefix_data16" "0")
11497 (set_attr "prefix_extra" "2")
11498 (set_attr "length_immediate" "1")
11499 (set_attr "mode" "TI")])
11501 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11502 ;; being added here to be complete.
11503 (define_insn "xop_pcom_tf<mode>3"
11504 [(set (match_operand:VI_128 0 "register_operand" "=x")
11506 [(match_operand:VI_128 1 "register_operand" "x")
11507 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
11508 (match_operand:SI 3 "const_int_operand" "n")]
11509 UNSPEC_XOP_TRUEFALSE))]
11512 return ((INTVAL (operands[3]) != 0)
11513 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11514 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
11516 [(set_attr "type" "ssecmp")
11517 (set_attr "prefix_data16" "0")
11518 (set_attr "prefix_extra" "2")
11519 (set_attr "length_immediate" "1")
11520 (set_attr "mode" "TI")])
11522 (define_insn "xop_vpermil2<mode>3"
11523 [(set (match_operand:VF 0 "register_operand" "=x")
11525 [(match_operand:VF 1 "register_operand" "x")
11526 (match_operand:VF 2 "nonimmediate_operand" "%x")
11527 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
11528 (match_operand:SI 4 "const_0_to_3_operand" "n")]
11531 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
11532 [(set_attr "type" "sse4arg")
11533 (set_attr "length_immediate" "1")
11534 (set_attr "mode" "<MODE>")])
11536 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11538 (define_insn "aesenc"
11539 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11540 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11541 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11545 aesenc\t{%2, %0|%0, %2}
11546 vaesenc\t{%2, %1, %0|%0, %1, %2}"
11547 [(set_attr "isa" "noavx,avx")
11548 (set_attr "type" "sselog1")
11549 (set_attr "prefix_extra" "1")
11550 (set_attr "prefix" "orig,vex")
11551 (set_attr "mode" "TI")])
11553 (define_insn "aesenclast"
11554 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11555 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11556 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11557 UNSPEC_AESENCLAST))]
11560 aesenclast\t{%2, %0|%0, %2}
11561 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11562 [(set_attr "isa" "noavx,avx")
11563 (set_attr "type" "sselog1")
11564 (set_attr "prefix_extra" "1")
11565 (set_attr "prefix" "orig,vex")
11566 (set_attr "mode" "TI")])
11568 (define_insn "aesdec"
11569 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11570 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11571 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11575 aesdec\t{%2, %0|%0, %2}
11576 vaesdec\t{%2, %1, %0|%0, %1, %2}"
11577 [(set_attr "isa" "noavx,avx")
11578 (set_attr "type" "sselog1")
11579 (set_attr "prefix_extra" "1")
11580 (set_attr "prefix" "orig,vex")
11581 (set_attr "mode" "TI")])
11583 (define_insn "aesdeclast"
11584 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11585 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11586 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
11587 UNSPEC_AESDECLAST))]
11590 aesdeclast\t{%2, %0|%0, %2}
11591 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11592 [(set_attr "isa" "noavx,avx")
11593 (set_attr "type" "sselog1")
11594 (set_attr "prefix_extra" "1")
11595 (set_attr "prefix" "orig,vex")
11596 (set_attr "mode" "TI")])
11598 (define_insn "aesimc"
11599 [(set (match_operand:V2DI 0 "register_operand" "=x")
11600 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
11603 "%vaesimc\t{%1, %0|%0, %1}"
11604 [(set_attr "type" "sselog1")
11605 (set_attr "prefix_extra" "1")
11606 (set_attr "prefix" "maybe_vex")
11607 (set_attr "mode" "TI")])
11609 (define_insn "aeskeygenassist"
11610 [(set (match_operand:V2DI 0 "register_operand" "=x")
11611 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
11612 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11613 UNSPEC_AESKEYGENASSIST))]
11615 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11616 [(set_attr "type" "sselog1")
11617 (set_attr "prefix_extra" "1")
11618 (set_attr "length_immediate" "1")
11619 (set_attr "prefix" "maybe_vex")
11620 (set_attr "mode" "TI")])
11622 (define_insn "pclmulqdq"
11623 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
11624 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
11625 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
11626 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11630 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
11631 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11632 [(set_attr "isa" "noavx,avx")
11633 (set_attr "type" "sselog1")
11634 (set_attr "prefix_extra" "1")
11635 (set_attr "length_immediate" "1")
11636 (set_attr "prefix" "orig,vex")
11637 (set_attr "mode" "TI")])
11639 (define_expand "avx_vzeroall"
11640 [(match_par_dup 0 [(const_int 0)])]
11643 int nregs = TARGET_64BIT ? 16 : 8;
11646 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11648 XVECEXP (operands[0], 0, 0)
11649 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11652 for (regno = 0; regno < nregs; regno++)
11653 XVECEXP (operands[0], 0, regno + 1)
11654 = gen_rtx_SET (VOIDmode,
11655 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
11656 CONST0_RTX (V8SImode));
11659 (define_insn "*avx_vzeroall"
11660 [(match_parallel 0 "vzeroall_operation"
11661 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11664 [(set_attr "type" "sse")
11665 (set_attr "modrm" "0")
11666 (set_attr "memory" "none")
11667 (set_attr "prefix" "vex")
11668 (set_attr "mode" "OI")])
11670 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
11671 ;; if the upper 128bits are unused.
11672 (define_insn "avx_vzeroupper"
11673 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
11674 UNSPECV_VZEROUPPER)]
11677 [(set_attr "type" "sse")
11678 (set_attr "modrm" "0")
11679 (set_attr "memory" "none")
11680 (set_attr "prefix" "vex")
11681 (set_attr "mode" "OI")])
11683 (define_mode_attr AVXTOSSEMODE
11684 [(V4DI "V2DI") (V2DI "V2DI")
11685 (V8SI "V4SI") (V4SI "V4SI")
11686 (V16HI "V8HI") (V8HI "V8HI")
11687 (V32QI "V16QI") (V16QI "V16QI")])
11689 (define_insn "avx2_pbroadcast<mode>"
11690 [(set (match_operand:VI 0 "register_operand" "=x")
11692 (vec_select:<ssescalarmode>
11693 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
11694 (parallel [(const_int 0)]))))]
11696 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}"
11697 [(set_attr "type" "ssemov")
11698 (set_attr "prefix_extra" "1")
11699 (set_attr "prefix" "vex")
11700 (set_attr "mode" "<sseinsnmode>")])
11702 (define_insn "avx2_permvarv8si"
11703 [(set (match_operand:V8SI 0 "register_operand" "=x")
11705 [(match_operand:V8SI 1 "register_operand" "x")
11706 (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
11709 "vpermd\t{%2, %1, %0|%0, %1, %2}"
11710 [(set_attr "type" "sselog")
11711 (set_attr "prefix" "vex")
11712 (set_attr "mode" "OI")])
11714 (define_insn "avx2_permv4df"
11715 [(set (match_operand:V4DF 0 "register_operand" "=x")
11717 [(match_operand:V4DF 1 "register_operand" "xm")
11718 (match_operand:SI 2 "const_0_to_255_operand" "n")]
11721 "vpermpd\t{%2, %1, %0|%0, %1, %2}"
11722 [(set_attr "type" "sselog")
11723 (set_attr "prefix_extra" "1")
11724 (set_attr "prefix" "vex")
11725 (set_attr "mode" "OI")])
11727 (define_insn "avx2_permvarv8sf"
11728 [(set (match_operand:V8SF 0 "register_operand" "=x")
11730 [(match_operand:V8SF 1 "register_operand" "x")
11731 (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
11734 "vpermps\t{%2, %1, %0|%0, %1, %2}"
11735 [(set_attr "type" "sselog")
11736 (set_attr "prefix" "vex")
11737 (set_attr "mode" "OI")])
11739 (define_expand "avx2_permv4di"
11740 [(match_operand:V4DI 0 "register_operand" "")
11741 (match_operand:V4DI 1 "nonimmediate_operand" "")
11742 (match_operand:SI 2 "const_0_to_255_operand" "")]
11745 int mask = INTVAL (operands[2]);
11746 emit_insn (gen_avx2_permv4di_1 (operands[0], operands[1],
11747 GEN_INT ((mask >> 0) & 3),
11748 GEN_INT ((mask >> 2) & 3),
11749 GEN_INT ((mask >> 4) & 3),
11750 GEN_INT ((mask >> 6) & 3)));
11754 (define_insn "avx2_permv4di_1"
11755 [(set (match_operand:V4DI 0 "register_operand" "=x")
11757 (match_operand:V4DI 1 "nonimmediate_operand" "xm")
11758 (parallel [(match_operand 2 "const_0_to_3_operand" "")
11759 (match_operand 3 "const_0_to_3_operand" "")
11760 (match_operand 4 "const_0_to_3_operand" "")
11761 (match_operand 5 "const_0_to_3_operand" "")])))]
11765 mask |= INTVAL (operands[2]) << 0;
11766 mask |= INTVAL (operands[3]) << 2;
11767 mask |= INTVAL (operands[4]) << 4;
11768 mask |= INTVAL (operands[5]) << 6;
11769 operands[2] = GEN_INT (mask);
11770 return "vpermq\t{%2, %1, %0|%0, %1, %2}";
11772 [(set_attr "type" "sselog")
11773 (set_attr "prefix" "vex")
11774 (set_attr "mode" "OI")])
11776 (define_insn "avx2_permv2ti"
11777 [(set (match_operand:V4DI 0 "register_operand" "=x")
11779 [(match_operand:V4DI 1 "register_operand" "x")
11780 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
11781 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11784 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11785 [(set_attr "type" "sselog")
11786 (set_attr "prefix" "vex")
11787 (set_attr "mode" "OI")])
11789 (define_insn "avx2_vec_dupv4df"
11790 [(set (match_operand:V4DF 0 "register_operand" "=x")
11791 (vec_duplicate:V4DF
11793 (match_operand:V2DF 1 "register_operand" "x")
11794 (parallel [(const_int 0)]))))]
11796 "vbroadcastsd\t{%1, %0|%0, %1}"
11797 [(set_attr "type" "sselog1")
11798 (set_attr "prefix" "vex")
11799 (set_attr "mode" "V4DF")])
11801 ;; Modes handled by AVX vec_dup patterns.
11802 (define_mode_iterator AVX_VEC_DUP_MODE
11803 [V8SI V8SF V4DI V4DF])
11805 (define_insn "vec_dup<mode>"
11806 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
11807 (vec_duplicate:AVX_VEC_DUP_MODE
11808 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
11811 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
11813 [(set_attr "type" "ssemov")
11814 (set_attr "prefix_extra" "1")
11815 (set_attr "prefix" "vex")
11816 (set_attr "mode" "V8SF")])
11818 (define_insn "avx2_vbroadcasti128_<mode>"
11819 [(set (match_operand:VI_256 0 "register_operand" "=x")
11821 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
11824 "vbroadcasti128\t{%1, %0|%0, %1}"
11825 [(set_attr "type" "ssemov")
11826 (set_attr "prefix_extra" "1")
11827 (set_attr "prefix" "vex")
11828 (set_attr "mode" "OI")])
11831 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
11832 (vec_duplicate:AVX_VEC_DUP_MODE
11833 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
11834 "TARGET_AVX && reload_completed"
11835 [(set (match_dup 2)
11836 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
11838 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
11839 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
11841 (define_insn "avx_vbroadcastf128_<mode>"
11842 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
11844 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11848 vbroadcast<i128>\t{%1, %0|%0, %1}
11849 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
11850 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11851 [(set_attr "type" "ssemov,sselog1,sselog1")
11852 (set_attr "prefix_extra" "1")
11853 (set_attr "length_immediate" "0,1,1")
11854 (set_attr "prefix" "vex")
11855 (set_attr "mode" "<sseinsnmode>")])
11857 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11858 ;; If it so happens that the input is in memory, use vbroadcast.
11859 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11860 (define_insn "*avx_vperm_broadcast_v4sf"
11861 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11863 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11864 (match_parallel 2 "avx_vbroadcast_operand"
11865 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11868 int elt = INTVAL (operands[3]);
11869 switch (which_alternative)
11873 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11874 return "vbroadcastss\t{%1, %0|%0, %1}";
11876 operands[2] = GEN_INT (elt * 0x55);
11877 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11879 gcc_unreachable ();
11882 [(set_attr "type" "ssemov,ssemov,sselog1")
11883 (set_attr "prefix_extra" "1")
11884 (set_attr "length_immediate" "0,0,1")
11885 (set_attr "prefix" "vex")
11886 (set_attr "mode" "SF,SF,V4SF")])
11888 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11889 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
11891 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
11892 (match_parallel 2 "avx_vbroadcast_operand"
11893 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11896 "&& reload_completed"
11897 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
11899 rtx op0 = operands[0], op1 = operands[1];
11900 int elt = INTVAL (operands[3]);
11906 /* Shuffle element we care about into all elements of the 128-bit lane.
11907 The other lane gets shuffled too, but we don't care. */
11908 if (<MODE>mode == V4DFmode)
11909 mask = (elt & 1 ? 15 : 0);
11911 mask = (elt & 3) * 0x55;
11912 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11914 /* Shuffle the lane we care about into both lanes of the dest. */
11915 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11916 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11920 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
11921 elt * GET_MODE_SIZE (<ssescalarmode>mode));
11924 (define_expand "avx_vpermil<mode>"
11925 [(set (match_operand:VF2 0 "register_operand" "")
11927 (match_operand:VF2 1 "nonimmediate_operand" "")
11928 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11931 int mask = INTVAL (operands[2]);
11932 rtx perm[<ssescalarnum>];
11934 perm[0] = GEN_INT (mask & 1);
11935 perm[1] = GEN_INT ((mask >> 1) & 1);
11936 if (<MODE>mode == V4DFmode)
11938 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11939 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11943 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11946 (define_expand "avx_vpermil<mode>"
11947 [(set (match_operand:VF1 0 "register_operand" "")
11949 (match_operand:VF1 1 "nonimmediate_operand" "")
11950 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11953 int mask = INTVAL (operands[2]);
11954 rtx perm[<ssescalarnum>];
11956 perm[0] = GEN_INT (mask & 3);
11957 perm[1] = GEN_INT ((mask >> 2) & 3);
11958 perm[2] = GEN_INT ((mask >> 4) & 3);
11959 perm[3] = GEN_INT ((mask >> 6) & 3);
11960 if (<MODE>mode == V8SFmode)
11962 perm[4] = GEN_INT ((mask & 3) + 4);
11963 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11964 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11965 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11969 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11972 (define_insn "*avx_vpermilp<mode>"
11973 [(set (match_operand:VF 0 "register_operand" "=x")
11975 (match_operand:VF 1 "nonimmediate_operand" "xm")
11976 (match_parallel 2 ""
11977 [(match_operand 3 "const_int_operand" "")])))]
11979 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
11981 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11982 operands[2] = GEN_INT (mask);
11983 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
11985 [(set_attr "type" "sselog")
11986 (set_attr "prefix_extra" "1")
11987 (set_attr "length_immediate" "1")
11988 (set_attr "prefix" "vex")
11989 (set_attr "mode" "<MODE>")])
11991 (define_insn "avx_vpermilvar<mode>3"
11992 [(set (match_operand:VF 0 "register_operand" "=x")
11994 [(match_operand:VF 1 "register_operand" "x")
11995 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
11998 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11999 [(set_attr "type" "sselog")
12000 (set_attr "prefix_extra" "1")
12001 (set_attr "prefix" "vex")
12002 (set_attr "mode" "<MODE>")])
12004 (define_expand "avx_vperm2f128<mode>3"
12005 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
12006 (unspec:AVX256MODE2P
12007 [(match_operand:AVX256MODE2P 1 "register_operand" "")
12008 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
12009 (match_operand:SI 3 "const_0_to_255_operand" "")]
12010 UNSPEC_VPERMIL2F128))]
12013 int mask = INTVAL (operands[3]);
12014 if ((mask & 0x88) == 0)
12016 rtx perm[<ssescalarnum>], t1, t2;
12017 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
12019 base = (mask & 3) * nelt2;
12020 for (i = 0; i < nelt2; ++i)
12021 perm[i] = GEN_INT (base + i);
12023 base = ((mask >> 4) & 3) * nelt2;
12024 for (i = 0; i < nelt2; ++i)
12025 perm[i + nelt2] = GEN_INT (base + i);
12027 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
12028 operands[1], operands[2]);
12029 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
12030 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
12031 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
12037 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
12038 ;; means that in order to represent this properly in rtl we'd have to
12039 ;; nest *another* vec_concat with a zero operand and do the select from
12040 ;; a 4x wide vector. That doesn't seem very nice.
12041 (define_insn "*avx_vperm2f128<mode>_full"
12042 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12043 (unspec:AVX256MODE2P
12044 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
12045 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
12046 (match_operand:SI 3 "const_0_to_255_operand" "n")]
12047 UNSPEC_VPERMIL2F128))]
12049 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12050 [(set_attr "type" "sselog")
12051 (set_attr "prefix_extra" "1")
12052 (set_attr "length_immediate" "1")
12053 (set_attr "prefix" "vex")
12054 (set_attr "mode" "<sseinsnmode>")])
12056 (define_insn "*avx_vperm2f128<mode>_nozero"
12057 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
12058 (vec_select:AVX256MODE2P
12059 (vec_concat:<ssedoublevecmode>
12060 (match_operand:AVX256MODE2P 1 "register_operand" "x")
12061 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
12062 (match_parallel 3 ""
12063 [(match_operand 4 "const_int_operand" "")])))]
12065 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
12067 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
12068 operands[3] = GEN_INT (mask);
12069 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12071 [(set_attr "type" "sselog")
12072 (set_attr "prefix_extra" "1")
12073 (set_attr "length_immediate" "1")
12074 (set_attr "prefix" "vex")
12075 (set_attr "mode" "<sseinsnmode>")])
12077 (define_expand "avx_vinsertf128<mode>"
12078 [(match_operand:V_256 0 "register_operand" "")
12079 (match_operand:V_256 1 "register_operand" "")
12080 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
12081 (match_operand:SI 3 "const_0_to_1_operand" "")]
12084 rtx (*insn)(rtx, rtx, rtx);
12086 switch (INTVAL (operands[3]))
12089 insn = gen_vec_set_lo_<mode>;
12092 insn = gen_vec_set_hi_<mode>;
12095 gcc_unreachable ();
12098 emit_insn (insn (operands[0], operands[1], operands[2]));
12102 (define_insn "avx2_vec_set_lo_v4di"
12103 [(set (match_operand:V4DI 0 "register_operand" "=x")
12105 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
12107 (match_operand:V4DI 1 "register_operand" "x")
12108 (parallel [(const_int 2) (const_int 3)]))))]
12110 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12111 [(set_attr "type" "sselog")
12112 (set_attr "prefix_extra" "1")
12113 (set_attr "length_immediate" "1")
12114 (set_attr "prefix" "vex")
12115 (set_attr "mode" "OI")])
12117 (define_insn "avx2_vec_set_hi_v4di"
12118 [(set (match_operand:V4DI 0 "register_operand" "=x")
12121 (match_operand:V4DI 1 "register_operand" "x")
12122 (parallel [(const_int 0) (const_int 1)]))
12123 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
12125 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12126 [(set_attr "type" "sselog")
12127 (set_attr "prefix_extra" "1")
12128 (set_attr "length_immediate" "1")
12129 (set_attr "prefix" "vex")
12130 (set_attr "mode" "OI")])
12132 (define_insn "vec_set_lo_<mode>"
12133 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12134 (vec_concat:VI8F_256
12135 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12136 (vec_select:<ssehalfvecmode>
12137 (match_operand:VI8F_256 1 "register_operand" "x")
12138 (parallel [(const_int 2) (const_int 3)]))))]
12140 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12141 [(set_attr "type" "sselog")
12142 (set_attr "prefix_extra" "1")
12143 (set_attr "length_immediate" "1")
12144 (set_attr "prefix" "vex")
12145 (set_attr "mode" "<sseinsnmode>")])
12147 (define_insn "vec_set_hi_<mode>"
12148 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
12149 (vec_concat:VI8F_256
12150 (vec_select:<ssehalfvecmode>
12151 (match_operand:VI8F_256 1 "register_operand" "x")
12152 (parallel [(const_int 0) (const_int 1)]))
12153 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12155 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12156 [(set_attr "type" "sselog")
12157 (set_attr "prefix_extra" "1")
12158 (set_attr "length_immediate" "1")
12159 (set_attr "prefix" "vex")
12160 (set_attr "mode" "<sseinsnmode>")])
12162 (define_insn "vec_set_lo_<mode>"
12163 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12164 (vec_concat:VI4F_256
12165 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
12166 (vec_select:<ssehalfvecmode>
12167 (match_operand:VI4F_256 1 "register_operand" "x")
12168 (parallel [(const_int 4) (const_int 5)
12169 (const_int 6) (const_int 7)]))))]
12171 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12172 [(set_attr "type" "sselog")
12173 (set_attr "prefix_extra" "1")
12174 (set_attr "length_immediate" "1")
12175 (set_attr "prefix" "vex")
12176 (set_attr "mode" "<sseinsnmode>")])
12178 (define_insn "vec_set_hi_<mode>"
12179 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
12180 (vec_concat:VI4F_256
12181 (vec_select:<ssehalfvecmode>
12182 (match_operand:VI4F_256 1 "register_operand" "x")
12183 (parallel [(const_int 0) (const_int 1)
12184 (const_int 2) (const_int 3)]))
12185 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
12187 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12188 [(set_attr "type" "sselog")
12189 (set_attr "prefix_extra" "1")
12190 (set_attr "length_immediate" "1")
12191 (set_attr "prefix" "vex")
12192 (set_attr "mode" "<sseinsnmode>")])
12194 (define_insn "vec_set_lo_v16hi"
12195 [(set (match_operand:V16HI 0 "register_operand" "=x")
12197 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
12199 (match_operand:V16HI 1 "register_operand" "x")
12200 (parallel [(const_int 8) (const_int 9)
12201 (const_int 10) (const_int 11)
12202 (const_int 12) (const_int 13)
12203 (const_int 14) (const_int 15)]))))]
12205 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12206 [(set_attr "type" "sselog")
12207 (set_attr "prefix_extra" "1")
12208 (set_attr "length_immediate" "1")
12209 (set_attr "prefix" "vex")
12210 (set_attr "mode" "OI")])
12212 (define_insn "vec_set_hi_v16hi"
12213 [(set (match_operand:V16HI 0 "register_operand" "=x")
12216 (match_operand:V16HI 1 "register_operand" "x")
12217 (parallel [(const_int 0) (const_int 1)
12218 (const_int 2) (const_int 3)
12219 (const_int 4) (const_int 5)
12220 (const_int 6) (const_int 7)]))
12221 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
12223 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12224 [(set_attr "type" "sselog")
12225 (set_attr "prefix_extra" "1")
12226 (set_attr "length_immediate" "1")
12227 (set_attr "prefix" "vex")
12228 (set_attr "mode" "OI")])
12230 (define_insn "vec_set_lo_v32qi"
12231 [(set (match_operand:V32QI 0 "register_operand" "=x")
12233 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
12235 (match_operand:V32QI 1 "register_operand" "x")
12236 (parallel [(const_int 16) (const_int 17)
12237 (const_int 18) (const_int 19)
12238 (const_int 20) (const_int 21)
12239 (const_int 22) (const_int 23)
12240 (const_int 24) (const_int 25)
12241 (const_int 26) (const_int 27)
12242 (const_int 28) (const_int 29)
12243 (const_int 30) (const_int 31)]))))]
12245 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
12246 [(set_attr "type" "sselog")
12247 (set_attr "prefix_extra" "1")
12248 (set_attr "length_immediate" "1")
12249 (set_attr "prefix" "vex")
12250 (set_attr "mode" "OI")])
12252 (define_insn "vec_set_hi_v32qi"
12253 [(set (match_operand:V32QI 0 "register_operand" "=x")
12256 (match_operand:V32QI 1 "register_operand" "x")
12257 (parallel [(const_int 0) (const_int 1)
12258 (const_int 2) (const_int 3)
12259 (const_int 4) (const_int 5)
12260 (const_int 6) (const_int 7)
12261 (const_int 8) (const_int 9)
12262 (const_int 10) (const_int 11)
12263 (const_int 12) (const_int 13)
12264 (const_int 14) (const_int 15)]))
12265 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
12267 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
12268 [(set_attr "type" "sselog")
12269 (set_attr "prefix_extra" "1")
12270 (set_attr "length_immediate" "1")
12271 (set_attr "prefix" "vex")
12272 (set_attr "mode" "OI")])
12274 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
12275 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
12277 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
12278 (match_operand:V48_AVX2 1 "memory_operand" "m")]
12281 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
12282 [(set_attr "type" "sselog1")
12283 (set_attr "prefix_extra" "1")
12284 (set_attr "prefix" "vex")
12285 (set_attr "mode" "<sseinsnmode>")])
12287 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
12288 [(set (match_operand:V48_AVX2 0 "memory_operand" "=m")
12290 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
12291 (match_operand:V48_AVX2 2 "register_operand" "x")
12295 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12296 [(set_attr "type" "sselog1")
12297 (set_attr "prefix_extra" "1")
12298 (set_attr "prefix" "vex")
12299 (set_attr "mode" "<sseinsnmode>")])
12301 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
12302 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
12303 (unspec:AVX256MODE2P
12304 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
12308 "&& reload_completed"
12311 rtx op0 = operands[0];
12312 rtx op1 = operands[1];
12314 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
12316 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
12317 emit_move_insn (op0, op1);
12321 (define_expand "vec_init<mode>"
12322 [(match_operand:V_256 0 "register_operand" "")
12323 (match_operand 1 "" "")]
12326 ix86_expand_vector_init (false, operands[0], operands[1]);
12330 (define_expand "avx2_extracti128"
12331 [(match_operand:V2DI 0 "nonimmediate_operand" "")
12332 (match_operand:V4DI 1 "register_operand" "")
12333 (match_operand:SI 2 "const_0_to_1_operand" "")]
12336 rtx (*insn)(rtx, rtx);
12338 switch (INTVAL (operands[2]))
12341 insn = gen_vec_extract_lo_v4di;
12344 insn = gen_vec_extract_hi_v4di;
12347 gcc_unreachable ();
12350 emit_insn (insn (operands[0], operands[1]));
12354 (define_expand "avx2_inserti128"
12355 [(match_operand:V4DI 0 "register_operand" "")
12356 (match_operand:V4DI 1 "register_operand" "")
12357 (match_operand:V2DI 2 "nonimmediate_operand" "")
12358 (match_operand:SI 3 "const_0_to_1_operand" "")]
12361 rtx (*insn)(rtx, rtx, rtx);
12363 switch (INTVAL (operands[3]))
12366 insn = gen_avx2_vec_set_lo_v4di;
12369 insn = gen_avx2_vec_set_hi_v4di;
12372 gcc_unreachable ();
12375 emit_insn (insn (operands[0], operands[1], operands[2]));
12379 (define_insn "avx2_ashrv<mode>"
12380 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
12382 (match_operand:VI4_AVX2 1 "register_operand" "x")
12383 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))]
12385 "vpsravd\t{%2, %1, %0|%0, %1, %2}"
12386 [(set_attr "type" "sseishft")
12387 (set_attr "prefix" "vex")
12388 (set_attr "mode" "<sseinsnmode>")])
12390 (define_insn "avx2_<shift_insn>v<mode>"
12391 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x")
12392 (any_lshift:VI48_AVX2
12393 (match_operand:VI48_AVX2 1 "register_operand" "x")
12394 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))]
12396 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12397 [(set_attr "type" "sseishft")
12398 (set_attr "prefix" "vex")
12399 (set_attr "mode" "<sseinsnmode>")])
12401 (define_insn "avx_vec_concat<mode>"
12402 [(set (match_operand:V_256 0 "register_operand" "=x,x")
12404 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
12405 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
12408 switch (which_alternative)
12411 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
12413 switch (get_attr_mode (insn))
12416 return "vmovaps\t{%1, %x0|%x0, %1}";
12418 return "vmovapd\t{%1, %x0|%x0, %1}";
12420 return "vmovdqa\t{%1, %x0|%x0, %1}";
12423 gcc_unreachable ();
12426 [(set_attr "type" "sselog,ssemov")
12427 (set_attr "prefix_extra" "1,*")
12428 (set_attr "length_immediate" "1,*")
12429 (set_attr "prefix" "vex")
12430 (set_attr "mode" "<sseinsnmode>")])
12432 (define_insn "vcvtph2ps"
12433 [(set (match_operand:V4SF 0 "register_operand" "=x")
12435 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
12437 (parallel [(const_int 0) (const_int 1)
12438 (const_int 1) (const_int 2)])))]
12440 "vcvtph2ps\t{%1, %0|%0, %1}"
12441 [(set_attr "type" "ssecvt")
12442 (set_attr "prefix" "vex")
12443 (set_attr "mode" "V4SF")])
12445 (define_insn "*vcvtph2ps_load"
12446 [(set (match_operand:V4SF 0 "register_operand" "=x")
12447 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
12448 UNSPEC_VCVTPH2PS))]
12450 "vcvtph2ps\t{%1, %0|%0, %1}"
12451 [(set_attr "type" "ssecvt")
12452 (set_attr "prefix" "vex")
12453 (set_attr "mode" "V8SF")])
12455 (define_insn "vcvtph2ps256"
12456 [(set (match_operand:V8SF 0 "register_operand" "=x")
12457 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
12458 UNSPEC_VCVTPH2PS))]
12460 "vcvtph2ps\t{%1, %0|%0, %1}"
12461 [(set_attr "type" "ssecvt")
12462 (set_attr "prefix" "vex")
12463 (set_attr "mode" "V8SF")])
12465 (define_expand "vcvtps2ph"
12466 [(set (match_operand:V8HI 0 "register_operand" "")
12468 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
12469 (match_operand:SI 2 "const_0_to_255_operand" "")]
12473 "operands[3] = CONST0_RTX (V4HImode);")
12475 (define_insn "*vcvtps2ph"
12476 [(set (match_operand:V8HI 0 "register_operand" "=x")
12478 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12479 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12481 (match_operand:V4HI 3 "const0_operand" "")))]
12483 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12484 [(set_attr "type" "ssecvt")
12485 (set_attr "prefix" "vex")
12486 (set_attr "mode" "V4SF")])
12488 (define_insn "*vcvtps2ph_store"
12489 [(set (match_operand:V4HI 0 "memory_operand" "=m")
12490 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
12491 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12492 UNSPEC_VCVTPS2PH))]
12494 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12495 [(set_attr "type" "ssecvt")
12496 (set_attr "prefix" "vex")
12497 (set_attr "mode" "V4SF")])
12499 (define_insn "vcvtps2ph256"
12500 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
12501 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
12502 (match_operand:SI 2 "const_0_to_255_operand" "N")]
12503 UNSPEC_VCVTPS2PH))]
12505 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
12506 [(set_attr "type" "ssecvt")
12507 (set_attr "prefix" "vex")
12508 (set_attr "mode" "V8SF")])
12510 ;; For gather* insn patterns
12511 (define_mode_iterator VEC_GATHER_MODE
12512 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
12513 (define_mode_attr VEC_GATHER_IDXSI
12514 [(V2DI "V4SI") (V2DF "V4SI")
12515 (V4DI "V4SI") (V4DF "V4SI")
12516 (V4SI "V4SI") (V4SF "V4SI")
12517 (V8SI "V8SI") (V8SF "V8SI")])
12518 (define_mode_attr VEC_GATHER_IDXDI
12519 [(V2DI "V2DI") (V2DF "V2DI")
12520 (V4DI "V4DI") (V4DF "V4DI")
12521 (V4SI "V2DI") (V4SF "V2DI")
12522 (V8SI "V4DI") (V8SF "V4DI")])
12523 (define_mode_attr VEC_GATHER_SRCDI
12524 [(V2DI "V2DI") (V2DF "V2DF")
12525 (V4DI "V4DI") (V4DF "V4DF")
12526 (V4SI "V4SI") (V4SF "V4SF")
12527 (V8SI "V4SI") (V8SF "V4SF")])
12529 (define_expand "avx2_gathersi<mode>"
12530 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12531 (unspec:VEC_GATHER_MODE
12532 [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
12533 (mem:<ssescalarmode>
12535 [(match_operand 2 "vsib_address_operand" "")
12536 (match_operand:<VEC_GATHER_IDXSI>
12537 3 "register_operand" "")
12538 (match_operand:SI 5 "const1248_operand " "")]))
12539 (mem:BLK (scratch))
12540 (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
12542 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12546 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12547 operands[5]), UNSPEC_VSIBADDR);
12550 (define_insn "*avx2_gathersi<mode>"
12551 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12552 (unspec:VEC_GATHER_MODE
12553 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
12554 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12556 [(match_operand:P 3 "vsib_address_operand" "p")
12557 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
12558 (match_operand:SI 6 "const1248_operand" "n")]
12560 (mem:BLK (scratch))
12561 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
12563 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12565 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
12566 [(set_attr "type" "ssemov")
12567 (set_attr "prefix" "vex")
12568 (set_attr "mode" "<sseinsnmode>")])
12570 (define_insn "*avx2_gathersi<mode>_2"
12571 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12572 (unspec:VEC_GATHER_MODE
12574 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12576 [(match_operand:P 2 "vsib_address_operand" "p")
12577 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
12578 (match_operand:SI 5 "const1248_operand" "n")]
12580 (mem:BLK (scratch))
12581 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
12583 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12585 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
12586 [(set_attr "type" "ssemov")
12587 (set_attr "prefix" "vex")
12588 (set_attr "mode" "<sseinsnmode>")])
12590 (define_expand "avx2_gatherdi<mode>"
12591 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
12592 (unspec:VEC_GATHER_MODE
12593 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
12594 (mem:<ssescalarmode>
12596 [(match_operand 2 "vsib_address_operand" "")
12597 (match_operand:<VEC_GATHER_IDXDI>
12598 3 "register_operand" "")
12599 (match_operand:SI 5 "const1248_operand " "")]))
12600 (mem:BLK (scratch))
12601 (match_operand:<VEC_GATHER_SRCDI>
12602 4 "register_operand" "")]
12604 (clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
12608 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
12609 operands[5]), UNSPEC_VSIBADDR);
12612 (define_insn "*avx2_gatherdi<mode>"
12613 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12614 (unspec:VEC_GATHER_MODE
12615 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
12616 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
12618 [(match_operand:P 3 "vsib_address_operand" "p")
12619 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
12620 (match_operand:SI 6 "const1248_operand" "n")]
12622 (mem:BLK (scratch))
12623 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
12625 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12627 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
12628 [(set_attr "type" "ssemov")
12629 (set_attr "prefix" "vex")
12630 (set_attr "mode" "<sseinsnmode>")])
12632 (define_insn "*avx2_gatherdi<mode>_2"
12633 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
12634 (unspec:VEC_GATHER_MODE
12636 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
12638 [(match_operand:P 2 "vsib_address_operand" "p")
12639 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
12640 (match_operand:SI 5 "const1248_operand" "n")]
12642 (mem:BLK (scratch))
12643 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
12645 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
12648 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
12649 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
12650 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
12652 [(set_attr "type" "ssemov")
12653 (set_attr "prefix" "vex")
12654 (set_attr "mode" "<sseinsnmode>")])