1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V1TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 ;; All QImode vector integer modes
77 (define_mode_iterator VI1
78 [(V32QI "TARGET_AVX") V16QI])
80 ;; All DImode vector integer modes
81 (define_mode_iterator VI8
82 [(V4DI "TARGET_AVX") V2DI])
84 ;; All 128bit vector integer modes
85 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
87 ;; Random 128bit vector integer mode combinations
88 (define_mode_iterator VI12_128 [V16QI V8HI])
89 (define_mode_iterator VI14_128 [V16QI V4SI])
90 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
91 (define_mode_iterator VI24_128 [V8HI V4SI])
92 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
94 ;; Int-float size matches
95 (define_mode_iterator VI4F_128 [V4SI V4SF])
96 (define_mode_iterator VI8F_128 [V2DI V2DF])
97 (define_mode_iterator VI4F_256 [V8SI V8SF])
98 (define_mode_iterator VI8F_256 [V4DI V4DF])
100 ;; Mapping from float mode to required SSE level
101 (define_mode_attr sse
102 [(SF "sse") (DF "sse2")
103 (V4SF "sse") (V2DF "sse2")
104 (V8SF "avx") (V4DF "avx")])
106 (define_mode_attr sse2
107 [(V16QI "sse2") (V32QI "avx")
108 (V2DI "sse2") (V4DI "avx")])
110 (define_mode_attr sse3
111 [(V16QI "sse3") (V32QI "avx")])
113 (define_mode_attr sse4_1
114 [(V4SF "sse4_1") (V2DF "sse4_1")
115 (V8SF "avx") (V4DF "avx")])
117 (define_mode_attr avxsizesuffix
118 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
119 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
120 (V8SF "256") (V4DF "256")
121 (V4SF "") (V2DF "")])
123 ;; SSE instruction mode
124 (define_mode_attr sseinsnmode
125 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
126 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
127 (V8SF "V8SF") (V4DF "V4DF")
128 (V4SF "V4SF") (V2DF "V2DF")])
130 ;; Mapping of vector float modes to an integer mode of the same size
131 (define_mode_attr sseintvecmode
132 [(V8SF "V8SI") (V4DF "V4DI")
133 (V4SF "V4SI") (V2DF "V2DI")])
135 ;; Mapping of vector modes to a vector mode of double size
136 (define_mode_attr ssedoublevecmode
137 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
138 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
139 (V8SF "V16SF") (V4DF "V8DF")
140 (V4SF "V8SF") (V2DF "V4DF")])
142 ;; Mapping of vector modes to a vector mode of half size
143 (define_mode_attr ssehalfvecmode
144 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
145 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
146 (V8SF "V4SF") (V4DF "V2DF")
149 ;; Mapping of vector modes back to the scalar modes
150 (define_mode_attr ssescalarmode
151 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
152 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
153 (V8SF "SF") (V4DF "DF")
154 (V4SF "SF") (V2DF "DF")])
156 ;; Number of scalar elements in each vector type
157 (define_mode_attr ssescalarnum
158 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
159 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
160 (V8SF "8") (V4DF "4")
161 (V4SF "4") (V2DF "2")])
163 ;; SSE scalar suffix for vector modes
164 (define_mode_attr ssescalarmodesuffix
165 [(V8SF "ss") (V4DF "sd")
166 (V4SF "ss") (V2DF "sd")
167 (V8SI "ss") (V4DI "sd")
170 ;; Pack/unpack vector modes
171 (define_mode_attr sseunpackmode
172 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
174 (define_mode_attr ssepackmode
175 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
177 ;; Mapping of the max integer size for xop rotate immediate constraint
178 (define_mode_attr sserotatemax
179 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
181 ;; Mapping of mode to cast intrinsic name
182 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
184 ;; Instruction suffix for sign and zero extensions.
185 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
190 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
192 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
194 ;; Mapping of immediate bits for blend instructions
195 (define_mode_attr blendbits
196 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
198 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
200 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
204 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
206 ;; All of these patterns are enabled for SSE1 as well as SSE2.
207 ;; This is essential for maintaining stable calling conventions.
209 (define_expand "mov<mode>"
210 [(set (match_operand:V16 0 "nonimmediate_operand" "")
211 (match_operand:V16 1 "nonimmediate_operand" ""))]
214 ix86_expand_vector_move (<MODE>mode, operands);
218 (define_insn "*mov<mode>_internal"
219 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
220 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
222 && (register_operand (operands[0], <MODE>mode)
223 || register_operand (operands[1], <MODE>mode))"
225 switch (which_alternative)
228 return standard_sse_constant_opcode (insn, operands[1]);
231 switch (get_attr_mode (insn))
236 && (misaligned_operand (operands[0], <MODE>mode)
237 || misaligned_operand (operands[1], <MODE>mode)))
238 return "vmovups\t{%1, %0|%0, %1}";
240 return "%vmovaps\t{%1, %0|%0, %1}";
245 && (misaligned_operand (operands[0], <MODE>mode)
246 || misaligned_operand (operands[1], <MODE>mode)))
247 return "vmovupd\t{%1, %0|%0, %1}";
248 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
249 return "%vmovaps\t{%1, %0|%0, %1}";
251 return "%vmovapd\t{%1, %0|%0, %1}";
256 && (misaligned_operand (operands[0], <MODE>mode)
257 || misaligned_operand (operands[1], <MODE>mode)))
258 return "vmovdqu\t{%1, %0|%0, %1}";
259 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
260 return "%vmovaps\t{%1, %0|%0, %1}";
262 return "%vmovdqa\t{%1, %0|%0, %1}";
271 [(set_attr "type" "sselog1,ssemov,ssemov")
272 (set_attr "prefix" "maybe_vex")
274 (cond [(ne (symbol_ref "TARGET_AVX") (const_int 0))
275 (const_string "<sseinsnmode>")
277 (ne (symbol_ref "optimize_function_for_size_p (cfun)")
279 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
280 (and (eq_attr "alternative" "2")
281 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
283 (const_string "V4SF")
284 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
285 (const_string "V4SF")
286 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
287 (const_string "V2DF")
289 (const_string "TI")))])
291 (define_insn "sse2_movq128"
292 [(set (match_operand:V2DI 0 "register_operand" "=x")
295 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
296 (parallel [(const_int 0)]))
299 "%vmovq\t{%1, %0|%0, %1}"
300 [(set_attr "type" "ssemov")
301 (set_attr "prefix" "maybe_vex")
302 (set_attr "mode" "TI")])
304 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
305 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
306 ;; from memory, we'd prefer to load the memory directly into the %xmm
307 ;; register. To facilitate this happy circumstance, this pattern won't
308 ;; split until after register allocation. If the 64-bit value didn't
309 ;; come from memory, this is the best we can do. This is much better
310 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
313 (define_insn_and_split "movdi_to_sse"
315 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
316 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
317 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
318 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
320 "&& reload_completed"
323 if (register_operand (operands[1], DImode))
325 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
326 Assemble the 64-bit DImode value in an xmm register. */
327 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
328 gen_rtx_SUBREG (SImode, operands[1], 0)));
329 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
330 gen_rtx_SUBREG (SImode, operands[1], 4)));
331 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
334 else if (memory_operand (operands[1], DImode))
335 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
336 operands[1], const0_rtx));
342 [(set (match_operand:V4SF 0 "register_operand" "")
343 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
344 "TARGET_SSE && reload_completed"
347 (vec_duplicate:V4SF (match_dup 1))
351 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
352 operands[2] = CONST0_RTX (V4SFmode);
356 [(set (match_operand:V2DF 0 "register_operand" "")
357 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
358 "TARGET_SSE2 && reload_completed"
359 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
361 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
362 operands[2] = CONST0_RTX (DFmode);
365 (define_expand "push<mode>1"
366 [(match_operand:V16 0 "register_operand" "")]
369 ix86_expand_push (<MODE>mode, operands[0]);
373 (define_expand "movmisalign<mode>"
374 [(set (match_operand:V16 0 "nonimmediate_operand" "")
375 (match_operand:V16 1 "nonimmediate_operand" ""))]
378 ix86_expand_vector_move_misalign (<MODE>mode, operands);
382 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
383 [(set (match_operand:VF 0 "nonimmediate_operand" "")
385 [(match_operand:VF 1 "nonimmediate_operand" "")]
389 if (MEM_P (operands[0]) && MEM_P (operands[1]))
390 operands[1] = force_reg (<MODE>mode, operands[1]);
393 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
394 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
396 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
398 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
399 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
400 [(set_attr "type" "ssemov")
401 (set_attr "movu" "1")
402 (set_attr "prefix" "maybe_vex")
403 (set_attr "mode" "<MODE>")])
405 (define_expand "<sse2>_movdqu<avxsizesuffix>"
406 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
407 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
411 if (MEM_P (operands[0]) && MEM_P (operands[1]))
412 operands[1] = force_reg (<MODE>mode, operands[1]);
415 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
416 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
417 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
419 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
420 "%vmovdqu\t{%1, %0|%0, %1}"
421 [(set_attr "type" "ssemov")
422 (set_attr "movu" "1")
423 (set (attr "prefix_data16")
425 (ne (symbol_ref "TARGET_AVX") (const_int 0))
428 (set_attr "prefix" "maybe_vex")
429 (set_attr "mode" "<sseinsnmode>")])
431 (define_insn "<sse3>_lddqu<avxsizesuffix>"
432 [(set (match_operand:VI1 0 "register_operand" "=x")
433 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
436 "%vlddqu\t{%1, %0|%0, %1}"
437 [(set_attr "type" "ssemov")
438 (set_attr "movu" "1")
439 (set (attr "prefix_data16")
441 (ne (symbol_ref "TARGET_AVX") (const_int 0))
444 (set (attr "prefix_rep")
446 (ne (symbol_ref "TARGET_AVX") (const_int 0))
449 (set_attr "prefix" "maybe_vex")
450 (set_attr "mode" "<sseinsnmode>")])
452 (define_insn "sse2_movntsi"
453 [(set (match_operand:SI 0 "memory_operand" "=m")
454 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
457 "movnti\t{%1, %0|%0, %1}"
458 [(set_attr "type" "ssemov")
459 (set_attr "prefix_data16" "0")
460 (set_attr "mode" "V2DF")])
462 (define_insn "<sse>_movnt<mode>"
463 [(set (match_operand:VF 0 "memory_operand" "=m")
464 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
467 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
468 [(set_attr "type" "ssemov")
469 (set_attr "prefix" "maybe_vex")
470 (set_attr "mode" "<MODE>")])
472 (define_insn "<sse2>_movnt<mode>"
473 [(set (match_operand:VI8 0 "memory_operand" "=m")
474 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
477 "%vmovntdq\t{%1, %0|%0, %1}"
478 [(set_attr "type" "ssecvt")
479 (set (attr "prefix_data16")
481 (ne (symbol_ref "TARGET_AVX") (const_int 0))
484 (set_attr "prefix" "maybe_vex")
485 (set_attr "mode" "<sseinsnmode>")])
487 ; Expand patterns for non-temporal stores. At the moment, only those
488 ; that directly map to insns are defined; it would be possible to
489 ; define patterns for other modes that would expand to several insns.
491 ;; Modes handled by storent patterns.
492 (define_mode_iterator STORENT_MODE
493 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
495 (V8SF "TARGET_AVX") V4SF
496 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
498 (define_expand "storent<mode>"
499 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
501 [(match_operand:STORENT_MODE 1 "register_operand" "")]
505 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
507 ;; Parallel floating point arithmetic
509 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
511 (define_expand "<code><mode>2"
512 [(set (match_operand:VF 0 "register_operand" "")
514 (match_operand:VF 1 "register_operand" "")))]
516 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
518 (define_insn_and_split "*absneg<mode>2"
519 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
520 (match_operator:VF 3 "absneg_operator"
521 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
522 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
528 enum rtx_code absneg_op;
534 if (MEM_P (operands[1]))
535 op1 = operands[2], op2 = operands[1];
537 op1 = operands[1], op2 = operands[2];
542 if (rtx_equal_p (operands[0], operands[1]))
548 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
549 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
550 t = gen_rtx_SET (VOIDmode, operands[0], t);
554 [(set_attr "isa" "noavx,noavx,avx,avx")])
556 (define_expand "<plusminus_insn><mode>3"
557 [(set (match_operand:VF 0 "register_operand" "")
559 (match_operand:VF 1 "nonimmediate_operand" "")
560 (match_operand:VF 2 "nonimmediate_operand" "")))]
562 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
564 (define_insn "*<plusminus_insn><mode>3"
565 [(set (match_operand:VF 0 "register_operand" "=x,x")
567 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
568 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
569 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
571 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
572 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
573 [(set_attr "isa" "noavx,avx")
574 (set_attr "type" "sseadd")
575 (set_attr "prefix" "orig,vex")
576 (set_attr "mode" "<MODE>")])
578 (define_insn "<sse>_vm<plusminus_insn><mode>3"
579 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
582 (match_operand:VF_128 1 "register_operand" "0,x")
583 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
588 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
589 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
590 [(set_attr "isa" "noavx,avx")
591 (set_attr "type" "sseadd")
592 (set_attr "prefix" "orig,vex")
593 (set_attr "mode" "<ssescalarmode>")])
595 (define_expand "mul<mode>3"
596 [(set (match_operand:VF 0 "register_operand" "")
598 (match_operand:VF 1 "nonimmediate_operand" "")
599 (match_operand:VF 2 "nonimmediate_operand" "")))]
601 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
603 (define_insn "*mul<mode>3"
604 [(set (match_operand:VF 0 "register_operand" "=x,x")
606 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
607 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
608 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
610 mul<ssemodesuffix>\t{%2, %0|%0, %2}
611 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
612 [(set_attr "isa" "noavx,avx")
613 (set_attr "type" "ssemul")
614 (set_attr "prefix" "orig,vex")
615 (set_attr "mode" "<MODE>")])
617 (define_insn "<sse>_vmmul<mode>3"
618 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
621 (match_operand:VF_128 1 "register_operand" "0,x")
622 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
627 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
628 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
629 [(set_attr "isa" "noavx,avx")
630 (set_attr "type" "ssemul")
631 (set_attr "prefix" "orig,vex")
632 (set_attr "mode" "<ssescalarmode>")])
634 (define_expand "div<mode>3"
635 [(set (match_operand:VF2 0 "register_operand" "")
636 (div:VF2 (match_operand:VF2 1 "register_operand" "")
637 (match_operand:VF2 2 "nonimmediate_operand" "")))]
639 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
641 (define_expand "div<mode>3"
642 [(set (match_operand:VF1 0 "register_operand" "")
643 (div:VF1 (match_operand:VF1 1 "register_operand" "")
644 (match_operand:VF1 2 "nonimmediate_operand" "")))]
647 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
649 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
650 && flag_finite_math_only && !flag_trapping_math
651 && flag_unsafe_math_optimizations)
653 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
658 (define_insn "<sse>_div<mode>3"
659 [(set (match_operand:VF 0 "register_operand" "=x,x")
661 (match_operand:VF 1 "register_operand" "0,x")
662 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
665 div<ssemodesuffix>\t{%2, %0|%0, %2}
666 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
667 [(set_attr "isa" "noavx,avx")
668 (set_attr "type" "ssediv")
669 (set_attr "prefix" "orig,vex")
670 (set_attr "mode" "<MODE>")])
672 (define_insn "<sse>_vmdiv<mode>3"
673 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
676 (match_operand:VF_128 1 "register_operand" "0,x")
677 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
682 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
683 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
684 [(set_attr "isa" "noavx,avx")
685 (set_attr "type" "ssediv")
686 (set_attr "prefix" "orig,vex")
687 (set_attr "mode" "<ssescalarmode>")])
689 (define_insn "<sse>_rcp<mode>2"
690 [(set (match_operand:VF1 0 "register_operand" "=x")
692 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
694 "%vrcpps\t{%1, %0|%0, %1}"
695 [(set_attr "type" "sse")
696 (set_attr "atom_sse_attr" "rcp")
697 (set_attr "prefix" "maybe_vex")
698 (set_attr "mode" "<MODE>")])
700 (define_insn "sse_vmrcpv4sf2"
701 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
703 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
705 (match_operand:V4SF 2 "register_operand" "0,x")
709 rcpss\t{%1, %0|%0, %1}
710 vrcpss\t{%1, %2, %0|%0, %2, %1}"
711 [(set_attr "isa" "noavx,avx")
712 (set_attr "type" "sse")
713 (set_attr "atom_sse_attr" "rcp")
714 (set_attr "prefix" "orig,vex")
715 (set_attr "mode" "SF")])
717 (define_expand "sqrt<mode>2"
718 [(set (match_operand:VF2 0 "register_operand" "")
719 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
722 (define_expand "sqrt<mode>2"
723 [(set (match_operand:VF1 0 "register_operand" "")
724 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
727 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
728 && flag_finite_math_only && !flag_trapping_math
729 && flag_unsafe_math_optimizations)
731 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
736 (define_insn "<sse>_sqrt<mode>2"
737 [(set (match_operand:VF 0 "register_operand" "=x")
738 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
740 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
741 [(set_attr "type" "sse")
742 (set_attr "atom_sse_attr" "sqrt")
743 (set_attr "prefix" "maybe_vex")
744 (set_attr "mode" "<MODE>")])
746 (define_insn "<sse>_vmsqrt<mode>2"
747 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
750 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
751 (match_operand:VF_128 2 "register_operand" "0,x")
755 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
756 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
757 [(set_attr "isa" "noavx,avx")
758 (set_attr "type" "sse")
759 (set_attr "atom_sse_attr" "sqrt")
760 (set_attr "prefix" "orig,vex")
761 (set_attr "mode" "<ssescalarmode>")])
763 (define_expand "rsqrt<mode>2"
764 [(set (match_operand:VF1 0 "register_operand" "")
766 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
769 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
773 (define_insn "<sse>_rsqrt<mode>2"
774 [(set (match_operand:VF1 0 "register_operand" "=x")
776 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
778 "%vrsqrtps\t{%1, %0|%0, %1}"
779 [(set_attr "type" "sse")
780 (set_attr "prefix" "maybe_vex")
781 (set_attr "mode" "<MODE>")])
783 (define_insn "sse_vmrsqrtv4sf2"
784 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
786 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
788 (match_operand:V4SF 2 "register_operand" "0,x")
792 rsqrtss\t{%1, %0|%0, %1}
793 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
794 [(set_attr "isa" "noavx,avx")
795 (set_attr "type" "sse")
796 (set_attr "prefix" "orig,vex")
797 (set_attr "mode" "SF")])
799 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
800 ;; isn't really correct, as those rtl operators aren't defined when
801 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
803 (define_expand "<code><mode>3"
804 [(set (match_operand:VF 0 "register_operand" "")
806 (match_operand:VF 1 "nonimmediate_operand" "")
807 (match_operand:VF 2 "nonimmediate_operand" "")))]
810 if (!flag_finite_math_only)
811 operands[1] = force_reg (<MODE>mode, operands[1]);
812 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
815 (define_insn "*<code><mode>3_finite"
816 [(set (match_operand:VF 0 "register_operand" "=x,x")
818 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
819 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
820 "TARGET_SSE && flag_finite_math_only
821 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
823 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
824 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
825 [(set_attr "isa" "noavx,avx")
826 (set_attr "type" "sseadd")
827 (set_attr "prefix" "orig,vex")
828 (set_attr "mode" "<MODE>")])
830 (define_insn "*<code><mode>3"
831 [(set (match_operand:VF 0 "register_operand" "=x,x")
833 (match_operand:VF 1 "register_operand" "0,x")
834 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
835 "TARGET_SSE && !flag_finite_math_only"
837 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
838 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
839 [(set_attr "isa" "noavx,avx")
840 (set_attr "type" "sseadd")
841 (set_attr "prefix" "orig,vex")
842 (set_attr "mode" "<MODE>")])
844 (define_insn "<sse>_vm<code><mode>3"
845 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
848 (match_operand:VF_128 1 "register_operand" "0,x")
849 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
854 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
855 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
856 [(set_attr "isa" "noavx,avx")
857 (set_attr "type" "sse")
858 (set_attr "prefix" "orig,vex")
859 (set_attr "mode" "<ssescalarmode>")])
861 ;; These versions of the min/max patterns implement exactly the operations
862 ;; min = (op1 < op2 ? op1 : op2)
863 ;; max = (!(op1 < op2) ? op1 : op2)
864 ;; Their operands are not commutative, and thus they may be used in the
865 ;; presence of -0.0 and NaN.
867 (define_insn "*ieee_smin<mode>3"
868 [(set (match_operand:VF 0 "register_operand" "=x,x")
870 [(match_operand:VF 1 "register_operand" "0,x")
871 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
875 min<ssemodesuffix>\t{%2, %0|%0, %2}
876 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
877 [(set_attr "isa" "noavx,avx")
878 (set_attr "type" "sseadd")
879 (set_attr "prefix" "orig,vex")
880 (set_attr "mode" "<MODE>")])
882 (define_insn "*ieee_smax<mode>3"
883 [(set (match_operand:VF 0 "register_operand" "=x,x")
885 [(match_operand:VF 1 "register_operand" "0,x")
886 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
890 max<ssemodesuffix>\t{%2, %0|%0, %2}
891 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
892 [(set_attr "isa" "noavx,avx")
893 (set_attr "type" "sseadd")
894 (set_attr "prefix" "orig,vex")
895 (set_attr "mode" "<MODE>")])
897 (define_insn "avx_addsubv4df3"
898 [(set (match_operand:V4DF 0 "register_operand" "=x")
901 (match_operand:V4DF 1 "register_operand" "x")
902 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
903 (minus:V4DF (match_dup 1) (match_dup 2))
906 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
907 [(set_attr "type" "sseadd")
908 (set_attr "prefix" "vex")
909 (set_attr "mode" "V4DF")])
911 (define_insn "sse3_addsubv2df3"
912 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
915 (match_operand:V2DF 1 "register_operand" "0,x")
916 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
917 (minus:V2DF (match_dup 1) (match_dup 2))
921 addsubpd\t{%2, %0|%0, %2}
922 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
923 [(set_attr "isa" "noavx,avx")
924 (set_attr "type" "sseadd")
925 (set_attr "atom_unit" "complex")
926 (set_attr "prefix" "orig,vex")
927 (set_attr "mode" "V2DF")])
929 (define_insn "avx_addsubv8sf3"
930 [(set (match_operand:V8SF 0 "register_operand" "=x")
933 (match_operand:V8SF 1 "register_operand" "x")
934 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
935 (minus:V8SF (match_dup 1) (match_dup 2))
938 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
939 [(set_attr "type" "sseadd")
940 (set_attr "prefix" "vex")
941 (set_attr "mode" "V8SF")])
943 (define_insn "sse3_addsubv4sf3"
944 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
947 (match_operand:V4SF 1 "register_operand" "0,x")
948 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
949 (minus:V4SF (match_dup 1) (match_dup 2))
953 addsubps\t{%2, %0|%0, %2}
954 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
955 [(set_attr "isa" "noavx,avx")
956 (set_attr "type" "sseadd")
957 (set_attr "prefix" "orig,vex")
958 (set_attr "prefix_rep" "1,*")
959 (set_attr "mode" "V4SF")])
961 (define_insn "avx_h<plusminus_insn>v4df3"
962 [(set (match_operand:V4DF 0 "register_operand" "=x")
967 (match_operand:V4DF 1 "register_operand" "x")
968 (parallel [(const_int 0)]))
969 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
971 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
972 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
976 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
977 (parallel [(const_int 0)]))
978 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
980 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
981 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
983 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
984 [(set_attr "type" "sseadd")
985 (set_attr "prefix" "vex")
986 (set_attr "mode" "V4DF")])
988 (define_insn "sse3_h<plusminus_insn>v2df3"
989 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
993 (match_operand:V2DF 1 "register_operand" "0,x")
994 (parallel [(const_int 0)]))
995 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
998 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
999 (parallel [(const_int 0)]))
1000 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1003 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
1004 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1005 [(set_attr "isa" "noavx,avx")
1006 (set_attr "type" "sseadd")
1007 (set_attr "prefix" "orig,vex")
1008 (set_attr "mode" "V2DF")])
1010 (define_insn "avx_h<plusminus_insn>v8sf3"
1011 [(set (match_operand:V8SF 0 "register_operand" "=x")
1017 (match_operand:V8SF 1 "register_operand" "x")
1018 (parallel [(const_int 0)]))
1019 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1021 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1022 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1026 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1027 (parallel [(const_int 0)]))
1028 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1030 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1031 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1035 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1036 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1038 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1039 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1042 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1043 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1045 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1046 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1048 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1049 [(set_attr "type" "sseadd")
1050 (set_attr "prefix" "vex")
1051 (set_attr "mode" "V8SF")])
1053 (define_insn "sse3_h<plusminus_insn>v4sf3"
1054 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1059 (match_operand:V4SF 1 "register_operand" "0,x")
1060 (parallel [(const_int 0)]))
1061 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1063 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1064 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1068 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1069 (parallel [(const_int 0)]))
1070 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1072 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1073 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1076 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1077 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1078 [(set_attr "isa" "noavx,avx")
1079 (set_attr "type" "sseadd")
1080 (set_attr "atom_unit" "complex")
1081 (set_attr "prefix" "orig,vex")
1082 (set_attr "prefix_rep" "1,*")
1083 (set_attr "mode" "V4SF")])
1085 (define_expand "reduc_splus_v4df"
1086 [(match_operand:V4DF 0 "register_operand" "")
1087 (match_operand:V4DF 1 "register_operand" "")]
1090 rtx tmp = gen_reg_rtx (V4DFmode);
1091 rtx tmp2 = gen_reg_rtx (V4DFmode);
1092 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1093 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1094 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1098 (define_expand "reduc_splus_v2df"
1099 [(match_operand:V2DF 0 "register_operand" "")
1100 (match_operand:V2DF 1 "register_operand" "")]
1103 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1107 (define_expand "reduc_splus_v8sf"
1108 [(match_operand:V8SF 0 "register_operand" "")
1109 (match_operand:V8SF 1 "register_operand" "")]
1112 rtx tmp = gen_reg_rtx (V8SFmode);
1113 rtx tmp2 = gen_reg_rtx (V8SFmode);
1114 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1115 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1116 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1117 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1121 (define_expand "reduc_splus_v4sf"
1122 [(match_operand:V4SF 0 "register_operand" "")
1123 (match_operand:V4SF 1 "register_operand" "")]
1128 rtx tmp = gen_reg_rtx (V4SFmode);
1129 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1130 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1133 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1138 (define_expand "reduc_smax_v4sf"
1139 [(match_operand:V4SF 0 "register_operand" "")
1140 (match_operand:V4SF 1 "register_operand" "")]
1143 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1147 (define_expand "reduc_smin_v4sf"
1148 [(match_operand:V4SF 0 "register_operand" "")
1149 (match_operand:V4SF 1 "register_operand" "")]
1152 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1156 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1158 ;; Parallel floating point comparisons
1160 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1162 (define_insn "avx_cmp<mode>3"
1163 [(set (match_operand:VF 0 "register_operand" "=x")
1165 [(match_operand:VF 1 "register_operand" "x")
1166 (match_operand:VF 2 "nonimmediate_operand" "xm")
1167 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1170 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1171 [(set_attr "type" "ssecmp")
1172 (set_attr "length_immediate" "1")
1173 (set_attr "prefix" "vex")
1174 (set_attr "mode" "<MODE>")])
1176 (define_insn "avx_vmcmp<mode>3"
1177 [(set (match_operand:VF_128 0 "register_operand" "=x")
1180 [(match_operand:VF_128 1 "register_operand" "x")
1181 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1182 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1187 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1188 [(set_attr "type" "ssecmp")
1189 (set_attr "length_immediate" "1")
1190 (set_attr "prefix" "vex")
1191 (set_attr "mode" "<ssescalarmode>")])
1193 (define_insn "*<sse>_maskcmp<mode>3_comm"
1194 [(set (match_operand:VF 0 "register_operand" "=x,x")
1195 (match_operator:VF 3 "sse_comparison_operator"
1196 [(match_operand:VF 1 "register_operand" "%0,x")
1197 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1199 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1201 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1202 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1203 [(set_attr "isa" "noavx,avx")
1204 (set_attr "type" "ssecmp")
1205 (set_attr "length_immediate" "1")
1206 (set_attr "prefix" "orig,vex")
1207 (set_attr "mode" "<MODE>")])
1209 (define_insn "<sse>_maskcmp<mode>3"
1210 [(set (match_operand:VF 0 "register_operand" "=x,x")
1211 (match_operator:VF 3 "sse_comparison_operator"
1212 [(match_operand:VF 1 "register_operand" "0,x")
1213 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1216 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1217 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1218 [(set_attr "isa" "noavx,avx")
1219 (set_attr "type" "ssecmp")
1220 (set_attr "length_immediate" "1")
1221 (set_attr "prefix" "orig,vex")
1222 (set_attr "mode" "<MODE>")])
1224 (define_insn "<sse>_vmmaskcmp<mode>3"
1225 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1227 (match_operator:VF_128 3 "sse_comparison_operator"
1228 [(match_operand:VF_128 1 "register_operand" "0,x")
1229 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1234 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1235 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1236 [(set_attr "isa" "noavx,avx")
1237 (set_attr "type" "ssecmp")
1238 (set_attr "length_immediate" "1,*")
1239 (set_attr "prefix" "orig,vex")
1240 (set_attr "mode" "<ssescalarmode>")])
1242 (define_insn "<sse>_comi"
1243 [(set (reg:CCFP FLAGS_REG)
1246 (match_operand:<ssevecmode> 0 "register_operand" "x")
1247 (parallel [(const_int 0)]))
1249 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1250 (parallel [(const_int 0)]))))]
1251 "SSE_FLOAT_MODE_P (<MODE>mode)"
1252 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1253 [(set_attr "type" "ssecomi")
1254 (set_attr "prefix" "maybe_vex")
1255 (set_attr "prefix_rep" "0")
1256 (set (attr "prefix_data16")
1257 (if_then_else (eq_attr "mode" "DF")
1259 (const_string "0")))
1260 (set_attr "mode" "<MODE>")])
1262 (define_insn "<sse>_ucomi"
1263 [(set (reg:CCFPU FLAGS_REG)
1266 (match_operand:<ssevecmode> 0 "register_operand" "x")
1267 (parallel [(const_int 0)]))
1269 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1270 (parallel [(const_int 0)]))))]
1271 "SSE_FLOAT_MODE_P (<MODE>mode)"
1272 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1273 [(set_attr "type" "ssecomi")
1274 (set_attr "prefix" "maybe_vex")
1275 (set_attr "prefix_rep" "0")
1276 (set (attr "prefix_data16")
1277 (if_then_else (eq_attr "mode" "DF")
1279 (const_string "0")))
1280 (set_attr "mode" "<MODE>")])
1282 (define_expand "vcond<mode>"
1283 [(set (match_operand:VF 0 "register_operand" "")
1285 (match_operator 3 ""
1286 [(match_operand:VF 4 "nonimmediate_operand" "")
1287 (match_operand:VF 5 "nonimmediate_operand" "")])
1288 (match_operand:VF 1 "general_operand" "")
1289 (match_operand:VF 2 "general_operand" "")))]
1292 bool ok = ix86_expand_fp_vcond (operands);
1297 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1299 ;; Parallel floating point logical operations
1301 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1303 (define_insn "<sse>_andnot<mode>3"
1304 [(set (match_operand:VF 0 "register_operand" "=x,x")
1307 (match_operand:VF 1 "register_operand" "0,x"))
1308 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1311 static char buf[32];
1314 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1316 switch (which_alternative)
1319 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1322 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1328 snprintf (buf, sizeof (buf), insn, suffix);
1331 [(set_attr "isa" "noavx,avx")
1332 (set_attr "type" "sselog")
1333 (set_attr "prefix" "orig,vex")
1334 (set_attr "mode" "<MODE>")])
1336 (define_expand "<code><mode>3"
1337 [(set (match_operand:VF 0 "register_operand" "")
1339 (match_operand:VF 1 "nonimmediate_operand" "")
1340 (match_operand:VF 2 "nonimmediate_operand" "")))]
1342 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1344 (define_insn "*<code><mode>3"
1345 [(set (match_operand:VF 0 "register_operand" "=x,x")
1347 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1348 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1349 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1351 static char buf[32];
1354 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1356 switch (which_alternative)
1359 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1362 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1368 snprintf (buf, sizeof (buf), insn, suffix);
1371 [(set_attr "isa" "noavx,avx")
1372 (set_attr "type" "sselog")
1373 (set_attr "prefix" "orig,vex")
1374 (set_attr "mode" "<MODE>")])
1376 (define_expand "copysign<mode>3"
1379 (not:VF (match_dup 3))
1380 (match_operand:VF 1 "nonimmediate_operand" "")))
1382 (and:VF (match_dup 3)
1383 (match_operand:VF 2 "nonimmediate_operand" "")))
1384 (set (match_operand:VF 0 "register_operand" "")
1385 (ior:VF (match_dup 4) (match_dup 5)))]
1388 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1390 operands[4] = gen_reg_rtx (<MODE>mode);
1391 operands[5] = gen_reg_rtx (<MODE>mode);
1394 ;; Also define scalar versions. These are used for abs, neg, and
1395 ;; conditional move. Using subregs into vector modes causes register
1396 ;; allocation lossage. These patterns do not allow memory operands
1397 ;; because the native instructions read the full 128-bits.
1399 (define_insn "*andnot<mode>3"
1400 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1403 (match_operand:MODEF 1 "register_operand" "0,x"))
1404 (match_operand:MODEF 2 "register_operand" "x,x")))]
1405 "SSE_FLOAT_MODE_P (<MODE>mode)"
1407 static char buf[32];
1410 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1412 switch (which_alternative)
1415 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1418 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1424 snprintf (buf, sizeof (buf), insn, suffix);
1427 [(set_attr "isa" "noavx,avx")
1428 (set_attr "type" "sselog")
1429 (set_attr "prefix" "orig,vex")
1430 (set_attr "mode" "<ssevecmode>")])
1432 (define_insn "*<code><mode>3"
1433 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1435 (match_operand:MODEF 1 "register_operand" "%0,x")
1436 (match_operand:MODEF 2 "register_operand" "x,x")))]
1437 "SSE_FLOAT_MODE_P (<MODE>mode)"
1439 static char buf[32];
1442 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1444 switch (which_alternative)
1447 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1450 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1456 snprintf (buf, sizeof (buf), insn, suffix);
1459 [(set_attr "isa" "noavx,avx")
1460 (set_attr "type" "sselog")
1461 (set_attr "prefix" "orig,vex")
1462 (set_attr "mode" "<ssevecmode>")])
1464 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1466 ;; FMA4 floating point multiply/accumulate instructions. This
1467 ;; includes the scalar version of the instructions as well as the
1470 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1472 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1473 ;; combine to generate a multiply/add with two memory references. We then
1474 ;; split this insn, into loading up the destination register with one of the
1475 ;; memory operations. If we don't manage to split the insn, reload will
1476 ;; generate the appropriate moves. The reason this is needed, is that combine
1477 ;; has already folded one of the memory references into both the multiply and
1478 ;; add insns, and it can't generate a new pseudo. I.e.:
1479 ;; (set (reg1) (mem (addr1)))
1480 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1481 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1483 ;; ??? This is historic, pre-dating the gimple fma transformation.
1484 ;; We could now properly represent that only one memory operand is
1485 ;; allowed and not be penalized during optimization.
1487 ;; Intrinsic FMA operations.
1489 ;; The standard names for fma is only available with SSE math enabled.
1490 (define_expand "fma<mode>4"
1491 [(set (match_operand:FMAMODE 0 "register_operand")
1493 (match_operand:FMAMODE 1 "nonimmediate_operand")
1494 (match_operand:FMAMODE 2 "nonimmediate_operand")
1495 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1496 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1498 (define_expand "fms<mode>4"
1499 [(set (match_operand:FMAMODE 0 "register_operand")
1501 (match_operand:FMAMODE 1 "nonimmediate_operand")
1502 (match_operand:FMAMODE 2 "nonimmediate_operand")
1503 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1504 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1506 (define_expand "fnma<mode>4"
1507 [(set (match_operand:FMAMODE 0 "register_operand")
1509 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1510 (match_operand:FMAMODE 2 "nonimmediate_operand")
1511 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1512 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1514 (define_expand "fnms<mode>4"
1515 [(set (match_operand:FMAMODE 0 "register_operand")
1517 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1518 (match_operand:FMAMODE 2 "nonimmediate_operand")
1519 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1520 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1522 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1523 (define_expand "fma4i_fmadd_<mode>"
1524 [(set (match_operand:FMAMODE 0 "register_operand")
1526 (match_operand:FMAMODE 1 "nonimmediate_operand")
1527 (match_operand:FMAMODE 2 "nonimmediate_operand")
1528 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1529 "TARGET_FMA || TARGET_FMA4")
1531 (define_insn "*fma4i_fmadd_<mode>"
1532 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1534 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1535 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1536 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1538 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1539 [(set_attr "type" "ssemuladd")
1540 (set_attr "mode" "<MODE>")])
1542 (define_insn "*fma4i_fmsub_<mode>"
1543 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1545 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1546 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1548 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1550 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1551 [(set_attr "type" "ssemuladd")
1552 (set_attr "mode" "<MODE>")])
1554 (define_insn "*fma4i_fnmadd_<mode>"
1555 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1558 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1559 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1560 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1562 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1563 [(set_attr "type" "ssemuladd")
1564 (set_attr "mode" "<MODE>")])
1566 (define_insn "*fma4i_fnmsub_<mode>"
1567 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1570 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1571 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1573 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1575 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1576 [(set_attr "type" "ssemuladd")
1577 (set_attr "mode" "<MODE>")])
1579 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1580 ;; entire destination register, with the high-order elements zeroed.
1582 (define_expand "fma4i_vmfmadd_<mode>"
1583 [(set (match_operand:VF_128 0 "register_operand")
1586 (match_operand:VF_128 1 "nonimmediate_operand")
1587 (match_operand:VF_128 2 "nonimmediate_operand")
1588 (match_operand:VF_128 3 "nonimmediate_operand"))
1593 operands[4] = CONST0_RTX (<MODE>mode);
1596 (define_insn "*fma4i_vmfmadd_<mode>"
1597 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1600 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1601 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1602 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1603 (match_operand:VF_128 4 "const0_operand" "")
1606 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1607 [(set_attr "type" "ssemuladd")
1608 (set_attr "mode" "<MODE>")])
1610 (define_insn "*fma4i_vmfmsub_<mode>"
1611 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1614 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1615 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1617 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1618 (match_operand:VF_128 4 "const0_operand" "")
1621 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1622 [(set_attr "type" "ssemuladd")
1623 (set_attr "mode" "<MODE>")])
1625 (define_insn "*fma4i_vmfnmadd_<mode>"
1626 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1630 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1631 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1632 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1633 (match_operand:VF_128 4 "const0_operand" "")
1636 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1637 [(set_attr "type" "ssemuladd")
1638 (set_attr "mode" "<MODE>")])
1640 (define_insn "*fma4i_vmfnmsub_<mode>"
1641 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1645 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1646 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1648 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1649 (match_operand:VF_128 4 "const0_operand" "")
1652 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1653 [(set_attr "type" "ssemuladd")
1654 (set_attr "mode" "<MODE>")])
1656 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1658 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1660 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1662 ;; It would be possible to represent these without the UNSPEC as
1665 ;; (fma op1 op2 op3)
1666 ;; (fma op1 op2 (neg op3))
1669 ;; But this doesn't seem useful in practice.
1671 (define_expand "fmaddsub_<mode>"
1672 [(set (match_operand:VF 0 "register_operand")
1674 [(match_operand:VF 1 "nonimmediate_operand")
1675 (match_operand:VF 2 "nonimmediate_operand")
1676 (match_operand:VF 3 "nonimmediate_operand")]
1678 "TARGET_FMA || TARGET_FMA4")
1680 (define_insn "*fma4_fmaddsub_<mode>"
1681 [(set (match_operand:VF 0 "register_operand" "=x,x")
1683 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1684 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1685 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1688 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1689 [(set_attr "type" "ssemuladd")
1690 (set_attr "mode" "<MODE>")])
1692 (define_insn "*fma4_fmsubadd_<mode>"
1693 [(set (match_operand:VF 0 "register_operand" "=x,x")
1695 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1696 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1698 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1701 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1702 [(set_attr "type" "ssemuladd")
1703 (set_attr "mode" "<MODE>")])
1705 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1707 ;; FMA3 floating point multiply/accumulate instructions.
1709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1711 (define_insn "*fma_fmadd_<mode>"
1712 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1714 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1715 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1716 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1719 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1720 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1721 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1722 [(set_attr "type" "ssemuladd")
1723 (set_attr "mode" "<MODE>")])
1725 (define_insn "*fma_fmsub_<mode>"
1726 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1728 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1729 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1731 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1734 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1735 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1736 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1737 [(set_attr "type" "ssemuladd")
1738 (set_attr "mode" "<MODE>")])
1740 (define_insn "*fma_fnmadd_<mode>"
1741 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1744 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1745 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1746 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1749 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1750 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1751 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1752 [(set_attr "type" "ssemuladd")
1753 (set_attr "mode" "<MODE>")])
1755 (define_insn "*fma_fnmsub_<mode>"
1756 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1759 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1760 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1762 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1765 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1766 vfnmsub231<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1767 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1768 [(set_attr "type" "ssemuladd")
1769 (set_attr "mode" "<MODE>")])
1771 (define_insn "*fma_fmaddsub_<mode>"
1772 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1774 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1775 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1776 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
1780 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1781 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1782 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1783 [(set_attr "type" "ssemuladd")
1784 (set_attr "mode" "<MODE>")])
1786 (define_insn "*fma_fmsubadd_<mode>"
1787 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1789 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1790 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1792 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
1796 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1797 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1798 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1799 [(set_attr "type" "ssemuladd")
1800 (set_attr "mode" "<MODE>")])
1802 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1804 ;; Parallel single-precision floating point conversion operations
1806 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1808 (define_insn "sse_cvtpi2ps"
1809 [(set (match_operand:V4SF 0 "register_operand" "=x")
1812 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1813 (match_operand:V4SF 1 "register_operand" "0")
1816 "cvtpi2ps\t{%2, %0|%0, %2}"
1817 [(set_attr "type" "ssecvt")
1818 (set_attr "mode" "V4SF")])
1820 (define_insn "sse_cvtps2pi"
1821 [(set (match_operand:V2SI 0 "register_operand" "=y")
1823 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1825 (parallel [(const_int 0) (const_int 1)])))]
1827 "cvtps2pi\t{%1, %0|%0, %1}"
1828 [(set_attr "type" "ssecvt")
1829 (set_attr "unit" "mmx")
1830 (set_attr "mode" "DI")])
1832 (define_insn "sse_cvttps2pi"
1833 [(set (match_operand:V2SI 0 "register_operand" "=y")
1835 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1836 (parallel [(const_int 0) (const_int 1)])))]
1838 "cvttps2pi\t{%1, %0|%0, %1}"
1839 [(set_attr "type" "ssecvt")
1840 (set_attr "unit" "mmx")
1841 (set_attr "prefix_rep" "0")
1842 (set_attr "mode" "SF")])
1844 (define_insn "sse_cvtsi2ss"
1845 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1848 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
1849 (match_operand:V4SF 1 "register_operand" "0,0,x")
1853 cvtsi2ss\t{%2, %0|%0, %2}
1854 cvtsi2ss\t{%2, %0|%0, %2}
1855 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
1856 [(set_attr "isa" "noavx,noavx,avx")
1857 (set_attr "type" "sseicvt")
1858 (set_attr "athlon_decode" "vector,double,*")
1859 (set_attr "amdfam10_decode" "vector,double,*")
1860 (set_attr "bdver1_decode" "double,direct,*")
1861 (set_attr "prefix" "orig,orig,vex")
1862 (set_attr "mode" "SF")])
1864 (define_insn "sse_cvtsi2ssq"
1865 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1868 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
1869 (match_operand:V4SF 1 "register_operand" "0,0,x")
1871 "TARGET_SSE && TARGET_64BIT"
1873 cvtsi2ssq\t{%2, %0|%0, %2}
1874 cvtsi2ssq\t{%2, %0|%0, %2}
1875 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
1876 [(set_attr "isa" "noavx,noavx,avx")
1877 (set_attr "type" "sseicvt")
1878 (set_attr "athlon_decode" "vector,double,*")
1879 (set_attr "amdfam10_decode" "vector,double,*")
1880 (set_attr "bdver1_decode" "double,direct,*")
1881 (set_attr "length_vex" "*,*,4")
1882 (set_attr "prefix_rex" "1,1,*")
1883 (set_attr "prefix" "orig,orig,vex")
1884 (set_attr "mode" "SF")])
1886 (define_insn "sse_cvtss2si"
1887 [(set (match_operand:SI 0 "register_operand" "=r,r")
1890 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1891 (parallel [(const_int 0)]))]
1892 UNSPEC_FIX_NOTRUNC))]
1894 "%vcvtss2si\t{%1, %0|%0, %1}"
1895 [(set_attr "type" "sseicvt")
1896 (set_attr "athlon_decode" "double,vector")
1897 (set_attr "bdver1_decode" "double,double")
1898 (set_attr "prefix_rep" "1")
1899 (set_attr "prefix" "maybe_vex")
1900 (set_attr "mode" "SI")])
1902 (define_insn "sse_cvtss2si_2"
1903 [(set (match_operand:SI 0 "register_operand" "=r,r")
1904 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1905 UNSPEC_FIX_NOTRUNC))]
1907 "%vcvtss2si\t{%1, %0|%0, %1}"
1908 [(set_attr "type" "sseicvt")
1909 (set_attr "athlon_decode" "double,vector")
1910 (set_attr "amdfam10_decode" "double,double")
1911 (set_attr "bdver1_decode" "double,double")
1912 (set_attr "prefix_rep" "1")
1913 (set_attr "prefix" "maybe_vex")
1914 (set_attr "mode" "SI")])
1916 (define_insn "sse_cvtss2siq"
1917 [(set (match_operand:DI 0 "register_operand" "=r,r")
1920 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1921 (parallel [(const_int 0)]))]
1922 UNSPEC_FIX_NOTRUNC))]
1923 "TARGET_SSE && TARGET_64BIT"
1924 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1925 [(set_attr "type" "sseicvt")
1926 (set_attr "athlon_decode" "double,vector")
1927 (set_attr "bdver1_decode" "double,double")
1928 (set_attr "prefix_rep" "1")
1929 (set_attr "prefix" "maybe_vex")
1930 (set_attr "mode" "DI")])
1932 (define_insn "sse_cvtss2siq_2"
1933 [(set (match_operand:DI 0 "register_operand" "=r,r")
1934 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1935 UNSPEC_FIX_NOTRUNC))]
1936 "TARGET_SSE && TARGET_64BIT"
1937 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1938 [(set_attr "type" "sseicvt")
1939 (set_attr "athlon_decode" "double,vector")
1940 (set_attr "amdfam10_decode" "double,double")
1941 (set_attr "bdver1_decode" "double,double")
1942 (set_attr "prefix_rep" "1")
1943 (set_attr "prefix" "maybe_vex")
1944 (set_attr "mode" "DI")])
1946 (define_insn "sse_cvttss2si"
1947 [(set (match_operand:SI 0 "register_operand" "=r,r")
1950 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1951 (parallel [(const_int 0)]))))]
1953 "%vcvttss2si\t{%1, %0|%0, %1}"
1954 [(set_attr "type" "sseicvt")
1955 (set_attr "athlon_decode" "double,vector")
1956 (set_attr "amdfam10_decode" "double,double")
1957 (set_attr "bdver1_decode" "double,double")
1958 (set_attr "prefix_rep" "1")
1959 (set_attr "prefix" "maybe_vex")
1960 (set_attr "mode" "SI")])
1962 (define_insn "sse_cvttss2siq"
1963 [(set (match_operand:DI 0 "register_operand" "=r,r")
1966 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1967 (parallel [(const_int 0)]))))]
1968 "TARGET_SSE && TARGET_64BIT"
1969 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
1970 [(set_attr "type" "sseicvt")
1971 (set_attr "athlon_decode" "double,vector")
1972 (set_attr "amdfam10_decode" "double,double")
1973 (set_attr "bdver1_decode" "double,double")
1974 (set_attr "prefix_rep" "1")
1975 (set_attr "prefix" "maybe_vex")
1976 (set_attr "mode" "DI")])
1978 (define_insn "avx_cvtdq2ps256"
1979 [(set (match_operand:V8SF 0 "register_operand" "=x")
1980 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
1982 "vcvtdq2ps\t{%1, %0|%0, %1}"
1983 [(set_attr "type" "ssecvt")
1984 (set_attr "prefix" "vex")
1985 (set_attr "mode" "V8SF")])
1987 (define_insn "sse2_cvtdq2ps"
1988 [(set (match_operand:V4SF 0 "register_operand" "=x")
1989 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1991 "%vcvtdq2ps\t{%1, %0|%0, %1}"
1992 [(set_attr "type" "ssecvt")
1993 (set_attr "prefix" "maybe_vex")
1994 (set_attr "mode" "V4SF")])
1996 (define_expand "sse2_cvtudq2ps"
1998 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2000 (lt:V4SF (match_dup 5) (match_dup 3)))
2002 (and:V4SF (match_dup 6) (match_dup 4)))
2003 (set (match_operand:V4SF 0 "register_operand" "")
2004 (plus:V4SF (match_dup 5) (match_dup 7)))]
2007 REAL_VALUE_TYPE TWO32r;
2011 real_ldexp (&TWO32r, &dconst1, 32);
2012 x = const_double_from_real_value (TWO32r, SFmode);
2014 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2015 operands[4] = force_reg (V4SFmode,
2016 ix86_build_const_vector (V4SFmode, 1, x));
2018 for (i = 5; i < 8; i++)
2019 operands[i] = gen_reg_rtx (V4SFmode);
2022 (define_insn "avx_cvtps2dq256"
2023 [(set (match_operand:V8SI 0 "register_operand" "=x")
2024 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2025 UNSPEC_FIX_NOTRUNC))]
2027 "vcvtps2dq\t{%1, %0|%0, %1}"
2028 [(set_attr "type" "ssecvt")
2029 (set_attr "prefix" "vex")
2030 (set_attr "mode" "OI")])
2032 (define_insn "sse2_cvtps2dq"
2033 [(set (match_operand:V4SI 0 "register_operand" "=x")
2034 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2035 UNSPEC_FIX_NOTRUNC))]
2037 "%vcvtps2dq\t{%1, %0|%0, %1}"
2038 [(set_attr "type" "ssecvt")
2039 (set (attr "prefix_data16")
2041 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2043 (const_string "1")))
2044 (set_attr "prefix" "maybe_vex")
2045 (set_attr "mode" "TI")])
2047 (define_insn "avx_cvttps2dq256"
2048 [(set (match_operand:V8SI 0 "register_operand" "=x")
2049 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2051 "vcvttps2dq\t{%1, %0|%0, %1}"
2052 [(set_attr "type" "ssecvt")
2053 (set_attr "prefix" "vex")
2054 (set_attr "mode" "OI")])
2056 (define_insn "sse2_cvttps2dq"
2057 [(set (match_operand:V4SI 0 "register_operand" "=x")
2058 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2060 "%vcvttps2dq\t{%1, %0|%0, %1}"
2061 [(set_attr "type" "ssecvt")
2062 (set (attr "prefix_rep")
2064 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2066 (const_string "1")))
2067 (set (attr "prefix_data16")
2069 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2071 (const_string "0")))
2072 (set_attr "prefix_data16" "0")
2073 (set_attr "prefix" "maybe_vex")
2074 (set_attr "mode" "TI")])
2076 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2078 ;; Parallel double-precision floating point conversion operations
2080 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2082 (define_insn "sse2_cvtpi2pd"
2083 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2084 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2086 "cvtpi2pd\t{%1, %0|%0, %1}"
2087 [(set_attr "type" "ssecvt")
2088 (set_attr "unit" "mmx,*")
2089 (set_attr "prefix_data16" "1,*")
2090 (set_attr "mode" "V2DF")])
2092 (define_insn "sse2_cvtpd2pi"
2093 [(set (match_operand:V2SI 0 "register_operand" "=y")
2094 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2095 UNSPEC_FIX_NOTRUNC))]
2097 "cvtpd2pi\t{%1, %0|%0, %1}"
2098 [(set_attr "type" "ssecvt")
2099 (set_attr "unit" "mmx")
2100 (set_attr "bdver1_decode" "double")
2101 (set_attr "prefix_data16" "1")
2102 (set_attr "mode" "DI")])
2104 (define_insn "sse2_cvttpd2pi"
2105 [(set (match_operand:V2SI 0 "register_operand" "=y")
2106 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2108 "cvttpd2pi\t{%1, %0|%0, %1}"
2109 [(set_attr "type" "ssecvt")
2110 (set_attr "unit" "mmx")
2111 (set_attr "bdver1_decode" "double")
2112 (set_attr "prefix_data16" "1")
2113 (set_attr "mode" "TI")])
2115 (define_insn "sse2_cvtsi2sd"
2116 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2119 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2120 (match_operand:V2DF 1 "register_operand" "0,0,x")
2124 cvtsi2sd\t{%2, %0|%0, %2}
2125 cvtsi2sd\t{%2, %0|%0, %2}
2126 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2127 [(set_attr "isa" "noavx,noavx,avx")
2128 (set_attr "type" "sseicvt")
2129 (set_attr "athlon_decode" "double,direct,*")
2130 (set_attr "amdfam10_decode" "vector,double,*")
2131 (set_attr "bdver1_decode" "double,direct,*")
2132 (set_attr "prefix" "orig,orig,vex")
2133 (set_attr "mode" "DF")])
2135 (define_insn "sse2_cvtsi2sdq"
2136 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2139 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2140 (match_operand:V2DF 1 "register_operand" "0,0,x")
2142 "TARGET_SSE2 && TARGET_64BIT"
2144 cvtsi2sdq\t{%2, %0|%0, %2}
2145 cvtsi2sdq\t{%2, %0|%0, %2}
2146 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2147 [(set_attr "isa" "noavx,noavx,avx")
2148 (set_attr "type" "sseicvt")
2149 (set_attr "athlon_decode" "double,direct,*")
2150 (set_attr "amdfam10_decode" "vector,double,*")
2151 (set_attr "bdver1_decode" "double,direct,*")
2152 (set_attr "length_vex" "*,*,4")
2153 (set_attr "prefix_rex" "1,1,*")
2154 (set_attr "prefix" "orig,orig,vex")
2155 (set_attr "mode" "DF")])
2157 (define_insn "sse2_cvtsd2si"
2158 [(set (match_operand:SI 0 "register_operand" "=r,r")
2161 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2162 (parallel [(const_int 0)]))]
2163 UNSPEC_FIX_NOTRUNC))]
2165 "%vcvtsd2si\t{%1, %0|%0, %1}"
2166 [(set_attr "type" "sseicvt")
2167 (set_attr "athlon_decode" "double,vector")
2168 (set_attr "bdver1_decode" "double,double")
2169 (set_attr "prefix_rep" "1")
2170 (set_attr "prefix" "maybe_vex")
2171 (set_attr "mode" "SI")])
2173 (define_insn "sse2_cvtsd2si_2"
2174 [(set (match_operand:SI 0 "register_operand" "=r,r")
2175 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2176 UNSPEC_FIX_NOTRUNC))]
2178 "%vcvtsd2si\t{%1, %0|%0, %1}"
2179 [(set_attr "type" "sseicvt")
2180 (set_attr "athlon_decode" "double,vector")
2181 (set_attr "amdfam10_decode" "double,double")
2182 (set_attr "bdver1_decode" "double,double")
2183 (set_attr "prefix_rep" "1")
2184 (set_attr "prefix" "maybe_vex")
2185 (set_attr "mode" "SI")])
2187 (define_insn "sse2_cvtsd2siq"
2188 [(set (match_operand:DI 0 "register_operand" "=r,r")
2191 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2192 (parallel [(const_int 0)]))]
2193 UNSPEC_FIX_NOTRUNC))]
2194 "TARGET_SSE2 && TARGET_64BIT"
2195 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2196 [(set_attr "type" "sseicvt")
2197 (set_attr "athlon_decode" "double,vector")
2198 (set_attr "bdver1_decode" "double,double")
2199 (set_attr "prefix_rep" "1")
2200 (set_attr "prefix" "maybe_vex")
2201 (set_attr "mode" "DI")])
2203 (define_insn "sse2_cvtsd2siq_2"
2204 [(set (match_operand:DI 0 "register_operand" "=r,r")
2205 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2206 UNSPEC_FIX_NOTRUNC))]
2207 "TARGET_SSE2 && TARGET_64BIT"
2208 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2209 [(set_attr "type" "sseicvt")
2210 (set_attr "athlon_decode" "double,vector")
2211 (set_attr "amdfam10_decode" "double,double")
2212 (set_attr "bdver1_decode" "double,double")
2213 (set_attr "prefix_rep" "1")
2214 (set_attr "prefix" "maybe_vex")
2215 (set_attr "mode" "DI")])
2217 (define_insn "sse2_cvttsd2si"
2218 [(set (match_operand:SI 0 "register_operand" "=r,r")
2221 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2222 (parallel [(const_int 0)]))))]
2224 "%vcvttsd2si\t{%1, %0|%0, %1}"
2225 [(set_attr "type" "sseicvt")
2226 (set_attr "athlon_decode" "double,vector")
2227 (set_attr "amdfam10_decode" "double,double")
2228 (set_attr "bdver1_decode" "double,double")
2229 (set_attr "prefix_rep" "1")
2230 (set_attr "prefix" "maybe_vex")
2231 (set_attr "mode" "SI")])
2233 (define_insn "sse2_cvttsd2siq"
2234 [(set (match_operand:DI 0 "register_operand" "=r,r")
2237 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2238 (parallel [(const_int 0)]))))]
2239 "TARGET_SSE2 && TARGET_64BIT"
2240 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2241 [(set_attr "type" "sseicvt")
2242 (set_attr "athlon_decode" "double,vector")
2243 (set_attr "amdfam10_decode" "double,double")
2244 (set_attr "bdver1_decode" "double,double")
2245 (set_attr "prefix_rep" "1")
2246 (set_attr "prefix" "maybe_vex")
2247 (set_attr "mode" "DI")])
2249 (define_insn "avx_cvtdq2pd256"
2250 [(set (match_operand:V4DF 0 "register_operand" "=x")
2251 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2253 "vcvtdq2pd\t{%1, %0|%0, %1}"
2254 [(set_attr "type" "ssecvt")
2255 (set_attr "prefix" "vex")
2256 (set_attr "mode" "V4DF")])
2258 (define_insn "*avx_cvtdq2pd256_2"
2259 [(set (match_operand:V4DF 0 "register_operand" "=x")
2262 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2263 (parallel [(const_int 0) (const_int 1)
2264 (const_int 2) (const_int 3)]))))]
2266 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2267 [(set_attr "type" "ssecvt")
2268 (set_attr "prefix" "vex")
2269 (set_attr "mode" "V4DF")])
2271 (define_insn "sse2_cvtdq2pd"
2272 [(set (match_operand:V2DF 0 "register_operand" "=x")
2275 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2276 (parallel [(const_int 0) (const_int 1)]))))]
2278 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2279 [(set_attr "type" "ssecvt")
2280 (set_attr "prefix" "maybe_vex")
2281 (set_attr "mode" "V2DF")])
2283 (define_insn "avx_cvtpd2dq256"
2284 [(set (match_operand:V4SI 0 "register_operand" "=x")
2285 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2286 UNSPEC_FIX_NOTRUNC))]
2288 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2289 [(set_attr "type" "ssecvt")
2290 (set_attr "prefix" "vex")
2291 (set_attr "mode" "OI")])
2293 (define_expand "sse2_cvtpd2dq"
2294 [(set (match_operand:V4SI 0 "register_operand" "")
2296 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2300 "operands[2] = CONST0_RTX (V2SImode);")
2302 (define_insn "*sse2_cvtpd2dq"
2303 [(set (match_operand:V4SI 0 "register_operand" "=x")
2305 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2307 (match_operand:V2SI 2 "const0_operand" "")))]
2311 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2313 return "cvtpd2dq\t{%1, %0|%0, %1}";
2315 [(set_attr "type" "ssecvt")
2316 (set_attr "prefix_rep" "1")
2317 (set_attr "prefix_data16" "0")
2318 (set_attr "prefix" "maybe_vex")
2319 (set_attr "mode" "TI")
2320 (set_attr "amdfam10_decode" "double")
2321 (set_attr "athlon_decode" "vector")
2322 (set_attr "bdver1_decode" "double")])
2324 (define_insn "avx_cvttpd2dq256"
2325 [(set (match_operand:V4SI 0 "register_operand" "=x")
2326 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2328 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2329 [(set_attr "type" "ssecvt")
2330 (set_attr "prefix" "vex")
2331 (set_attr "mode" "OI")])
2333 (define_expand "sse2_cvttpd2dq"
2334 [(set (match_operand:V4SI 0 "register_operand" "")
2336 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2339 "operands[2] = CONST0_RTX (V2SImode);")
2341 (define_insn "*sse2_cvttpd2dq"
2342 [(set (match_operand:V4SI 0 "register_operand" "=x")
2344 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2345 (match_operand:V2SI 2 "const0_operand" "")))]
2349 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2351 return "cvttpd2dq\t{%1, %0|%0, %1}";
2353 [(set_attr "type" "ssecvt")
2354 (set_attr "amdfam10_decode" "double")
2355 (set_attr "athlon_decode" "vector")
2356 (set_attr "bdver1_decode" "double")
2357 (set_attr "prefix" "maybe_vex")
2358 (set_attr "mode" "TI")])
2360 (define_insn "sse2_cvtsd2ss"
2361 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2364 (float_truncate:V2SF
2365 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2366 (match_operand:V4SF 1 "register_operand" "0,0,x")
2370 cvtsd2ss\t{%2, %0|%0, %2}
2371 cvtsd2ss\t{%2, %0|%0, %2}
2372 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2373 [(set_attr "isa" "noavx,noavx,avx")
2374 (set_attr "type" "ssecvt")
2375 (set_attr "athlon_decode" "vector,double,*")
2376 (set_attr "amdfam10_decode" "vector,double,*")
2377 (set_attr "bdver1_decode" "direct,direct,*")
2378 (set_attr "prefix" "orig,orig,vex")
2379 (set_attr "mode" "SF")])
2381 (define_insn "sse2_cvtss2sd"
2382 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2386 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2387 (parallel [(const_int 0) (const_int 1)])))
2388 (match_operand:V2DF 1 "register_operand" "0,0,x")
2392 cvtss2sd\t{%2, %0|%0, %2}
2393 cvtss2sd\t{%2, %0|%0, %2}
2394 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2395 [(set_attr "isa" "noavx,noavx,avx")
2396 (set_attr "type" "ssecvt")
2397 (set_attr "amdfam10_decode" "vector,double,*")
2398 (set_attr "athlon_decode" "direct,direct,*")
2399 (set_attr "bdver1_decode" "direct,direct,*")
2400 (set_attr "prefix" "orig,orig,vex")
2401 (set_attr "mode" "DF")])
2403 (define_insn "avx_cvtpd2ps256"
2404 [(set (match_operand:V4SF 0 "register_operand" "=x")
2405 (float_truncate:V4SF
2406 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2408 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2409 [(set_attr "type" "ssecvt")
2410 (set_attr "prefix" "vex")
2411 (set_attr "mode" "V4SF")])
2413 (define_expand "sse2_cvtpd2ps"
2414 [(set (match_operand:V4SF 0 "register_operand" "")
2416 (float_truncate:V2SF
2417 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2420 "operands[2] = CONST0_RTX (V2SFmode);")
2422 (define_insn "*sse2_cvtpd2ps"
2423 [(set (match_operand:V4SF 0 "register_operand" "=x")
2425 (float_truncate:V2SF
2426 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2427 (match_operand:V2SF 2 "const0_operand" "")))]
2431 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2433 return "cvtpd2ps\t{%1, %0|%0, %1}";
2435 [(set_attr "type" "ssecvt")
2436 (set_attr "amdfam10_decode" "double")
2437 (set_attr "athlon_decode" "vector")
2438 (set_attr "bdver1_decode" "double")
2439 (set_attr "prefix_data16" "1")
2440 (set_attr "prefix" "maybe_vex")
2441 (set_attr "mode" "V4SF")])
2443 (define_insn "avx_cvtps2pd256"
2444 [(set (match_operand:V4DF 0 "register_operand" "=x")
2446 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2448 "vcvtps2pd\t{%1, %0|%0, %1}"
2449 [(set_attr "type" "ssecvt")
2450 (set_attr "prefix" "vex")
2451 (set_attr "mode" "V4DF")])
2453 (define_insn "*avx_cvtps2pd256_2"
2454 [(set (match_operand:V4DF 0 "register_operand" "=x")
2457 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2458 (parallel [(const_int 0) (const_int 1)
2459 (const_int 2) (const_int 3)]))))]
2461 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2462 [(set_attr "type" "ssecvt")
2463 (set_attr "prefix" "vex")
2464 (set_attr "mode" "V4DF")])
2466 (define_insn "sse2_cvtps2pd"
2467 [(set (match_operand:V2DF 0 "register_operand" "=x")
2470 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2471 (parallel [(const_int 0) (const_int 1)]))))]
2473 "%vcvtps2pd\t{%1, %0|%0, %1}"
2474 [(set_attr "type" "ssecvt")
2475 (set_attr "amdfam10_decode" "direct")
2476 (set_attr "athlon_decode" "double")
2477 (set_attr "bdver1_decode" "double")
2478 (set_attr "prefix_data16" "0")
2479 (set_attr "prefix" "maybe_vex")
2480 (set_attr "mode" "V2DF")])
2482 (define_expand "vec_unpacks_hi_v4sf"
2487 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2488 (parallel [(const_int 6) (const_int 7)
2489 (const_int 2) (const_int 3)])))
2490 (set (match_operand:V2DF 0 "register_operand" "")
2494 (parallel [(const_int 0) (const_int 1)]))))]
2496 "operands[2] = gen_reg_rtx (V4SFmode);")
2498 (define_expand "vec_unpacks_hi_v8sf"
2501 (match_operand:V8SF 1 "nonimmediate_operand" "")
2502 (parallel [(const_int 4) (const_int 5)
2503 (const_int 6) (const_int 7)])))
2504 (set (match_operand:V4DF 0 "register_operand" "")
2508 "operands[2] = gen_reg_rtx (V4SFmode);")
2510 (define_expand "vec_unpacks_lo_v4sf"
2511 [(set (match_operand:V2DF 0 "register_operand" "")
2514 (match_operand:V4SF 1 "nonimmediate_operand" "")
2515 (parallel [(const_int 0) (const_int 1)]))))]
2518 (define_expand "vec_unpacks_lo_v8sf"
2519 [(set (match_operand:V4DF 0 "register_operand" "")
2522 (match_operand:V8SF 1 "nonimmediate_operand" "")
2523 (parallel [(const_int 0) (const_int 1)
2524 (const_int 2) (const_int 3)]))))]
2527 (define_expand "vec_unpacks_float_hi_v8hi"
2528 [(match_operand:V4SF 0 "register_operand" "")
2529 (match_operand:V8HI 1 "register_operand" "")]
2532 rtx tmp = gen_reg_rtx (V4SImode);
2534 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2535 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2539 (define_expand "vec_unpacks_float_lo_v8hi"
2540 [(match_operand:V4SF 0 "register_operand" "")
2541 (match_operand:V8HI 1 "register_operand" "")]
2544 rtx tmp = gen_reg_rtx (V4SImode);
2546 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2547 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2551 (define_expand "vec_unpacku_float_hi_v8hi"
2552 [(match_operand:V4SF 0 "register_operand" "")
2553 (match_operand:V8HI 1 "register_operand" "")]
2556 rtx tmp = gen_reg_rtx (V4SImode);
2558 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2559 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2563 (define_expand "vec_unpacku_float_lo_v8hi"
2564 [(match_operand:V4SF 0 "register_operand" "")
2565 (match_operand:V8HI 1 "register_operand" "")]
2568 rtx tmp = gen_reg_rtx (V4SImode);
2570 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2571 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2575 (define_expand "vec_unpacks_float_hi_v4si"
2578 (match_operand:V4SI 1 "nonimmediate_operand" "")
2579 (parallel [(const_int 2) (const_int 3)
2580 (const_int 2) (const_int 3)])))
2581 (set (match_operand:V2DF 0 "register_operand" "")
2585 (parallel [(const_int 0) (const_int 1)]))))]
2587 "operands[2] = gen_reg_rtx (V4SImode);")
2589 (define_expand "vec_unpacks_float_lo_v4si"
2590 [(set (match_operand:V2DF 0 "register_operand" "")
2593 (match_operand:V4SI 1 "nonimmediate_operand" "")
2594 (parallel [(const_int 0) (const_int 1)]))))]
2597 (define_expand "vec_unpacks_float_hi_v8si"
2600 (match_operand:V8SI 1 "nonimmediate_operand" "")
2601 (parallel [(const_int 4) (const_int 5)
2602 (const_int 6) (const_int 7)])))
2603 (set (match_operand:V4DF 0 "register_operand" "")
2607 "operands[2] = gen_reg_rtx (V4SImode);")
2609 (define_expand "vec_unpacks_float_lo_v8si"
2610 [(set (match_operand:V4DF 0 "register_operand" "")
2613 (match_operand:V8SI 1 "nonimmediate_operand" "")
2614 (parallel [(const_int 0) (const_int 1)
2615 (const_int 2) (const_int 3)]))))]
2618 (define_expand "vec_unpacku_float_hi_v4si"
2621 (match_operand:V4SI 1 "nonimmediate_operand" "")
2622 (parallel [(const_int 2) (const_int 3)
2623 (const_int 2) (const_int 3)])))
2628 (parallel [(const_int 0) (const_int 1)]))))
2630 (lt:V2DF (match_dup 6) (match_dup 3)))
2632 (and:V2DF (match_dup 7) (match_dup 4)))
2633 (set (match_operand:V2DF 0 "register_operand" "")
2634 (plus:V2DF (match_dup 6) (match_dup 8)))]
2637 REAL_VALUE_TYPE TWO32r;
2641 real_ldexp (&TWO32r, &dconst1, 32);
2642 x = const_double_from_real_value (TWO32r, DFmode);
2644 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2645 operands[4] = force_reg (V2DFmode,
2646 ix86_build_const_vector (V2DFmode, 1, x));
2648 operands[5] = gen_reg_rtx (V4SImode);
2650 for (i = 6; i < 9; i++)
2651 operands[i] = gen_reg_rtx (V2DFmode);
2654 (define_expand "vec_unpacku_float_lo_v4si"
2658 (match_operand:V4SI 1 "nonimmediate_operand" "")
2659 (parallel [(const_int 0) (const_int 1)]))))
2661 (lt:V2DF (match_dup 5) (match_dup 3)))
2663 (and:V2DF (match_dup 6) (match_dup 4)))
2664 (set (match_operand:V2DF 0 "register_operand" "")
2665 (plus:V2DF (match_dup 5) (match_dup 7)))]
2668 REAL_VALUE_TYPE TWO32r;
2672 real_ldexp (&TWO32r, &dconst1, 32);
2673 x = const_double_from_real_value (TWO32r, DFmode);
2675 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2676 operands[4] = force_reg (V2DFmode,
2677 ix86_build_const_vector (V2DFmode, 1, x));
2679 for (i = 5; i < 8; i++)
2680 operands[i] = gen_reg_rtx (V2DFmode);
2683 (define_expand "vec_pack_trunc_v4df"
2685 (float_truncate:V4SF
2686 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2688 (float_truncate:V4SF
2689 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2690 (set (match_operand:V8SF 0 "register_operand" "")
2696 operands[3] = gen_reg_rtx (V4SFmode);
2697 operands[4] = gen_reg_rtx (V4SFmode);
2700 (define_expand "vec_pack_trunc_v2df"
2701 [(match_operand:V4SF 0 "register_operand" "")
2702 (match_operand:V2DF 1 "nonimmediate_operand" "")
2703 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2708 r1 = gen_reg_rtx (V4SFmode);
2709 r2 = gen_reg_rtx (V4SFmode);
2711 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2712 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2713 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2717 (define_expand "vec_pack_sfix_trunc_v2df"
2718 [(match_operand:V4SI 0 "register_operand" "")
2719 (match_operand:V2DF 1 "nonimmediate_operand" "")
2720 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2725 r1 = gen_reg_rtx (V4SImode);
2726 r2 = gen_reg_rtx (V4SImode);
2728 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2729 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2730 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2731 gen_lowpart (V2DImode, r1),
2732 gen_lowpart (V2DImode, r2)));
2736 (define_expand "vec_pack_sfix_v2df"
2737 [(match_operand:V4SI 0 "register_operand" "")
2738 (match_operand:V2DF 1 "nonimmediate_operand" "")
2739 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2744 r1 = gen_reg_rtx (V4SImode);
2745 r2 = gen_reg_rtx (V4SImode);
2747 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2748 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2749 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2750 gen_lowpart (V2DImode, r1),
2751 gen_lowpart (V2DImode, r2)));
2755 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2757 ;; Parallel single-precision floating point element swizzling
2759 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2761 (define_expand "sse_movhlps_exp"
2762 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2765 (match_operand:V4SF 1 "nonimmediate_operand" "")
2766 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2767 (parallel [(const_int 6)
2773 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2775 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
2777 /* Fix up the destination if needed. */
2778 if (dst != operands[0])
2779 emit_move_insn (operands[0], dst);
2784 (define_insn "sse_movhlps"
2785 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
2788 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2789 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
2790 (parallel [(const_int 6)
2794 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2796 movhlps\t{%2, %0|%0, %2}
2797 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2798 movlps\t{%H2, %0|%0, %H2}
2799 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2800 %vmovhps\t{%2, %0|%0, %2}"
2801 [(set_attr "isa" "noavx,avx,noavx,avx,*")
2802 (set_attr "type" "ssemov")
2803 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2804 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2806 (define_expand "sse_movlhps_exp"
2807 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2810 (match_operand:V4SF 1 "nonimmediate_operand" "")
2811 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2812 (parallel [(const_int 0)
2818 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2820 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
2822 /* Fix up the destination if needed. */
2823 if (dst != operands[0])
2824 emit_move_insn (operands[0], dst);
2829 (define_insn "sse_movlhps"
2830 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
2833 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2834 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
2835 (parallel [(const_int 0)
2839 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2841 movlhps\t{%2, %0|%0, %2}
2842 vmovlhps\t{%2, %1, %0|%0, %1, %2}
2843 movhps\t{%2, %0|%0, %2}
2844 vmovhps\t{%2, %1, %0|%0, %1, %2}
2845 %vmovlps\t{%2, %H0|%H0, %2}"
2846 [(set_attr "isa" "noavx,avx,noavx,avx,*")
2847 (set_attr "type" "ssemov")
2848 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2849 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2851 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2852 (define_insn "avx_unpckhps256"
2853 [(set (match_operand:V8SF 0 "register_operand" "=x")
2856 (match_operand:V8SF 1 "register_operand" "x")
2857 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2858 (parallel [(const_int 2) (const_int 10)
2859 (const_int 3) (const_int 11)
2860 (const_int 6) (const_int 14)
2861 (const_int 7) (const_int 15)])))]
2863 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2864 [(set_attr "type" "sselog")
2865 (set_attr "prefix" "vex")
2866 (set_attr "mode" "V8SF")])
2868 (define_expand "vec_interleave_highv8sf"
2872 (match_operand:V8SF 1 "register_operand" "x")
2873 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2874 (parallel [(const_int 0) (const_int 8)
2875 (const_int 1) (const_int 9)
2876 (const_int 4) (const_int 12)
2877 (const_int 5) (const_int 13)])))
2883 (parallel [(const_int 2) (const_int 10)
2884 (const_int 3) (const_int 11)
2885 (const_int 6) (const_int 14)
2886 (const_int 7) (const_int 15)])))
2887 (set (match_operand:V8SF 0 "register_operand" "")
2892 (parallel [(const_int 4) (const_int 5)
2893 (const_int 6) (const_int 7)
2894 (const_int 12) (const_int 13)
2895 (const_int 14) (const_int 15)])))]
2898 operands[3] = gen_reg_rtx (V8SFmode);
2899 operands[4] = gen_reg_rtx (V8SFmode);
2902 (define_insn "vec_interleave_highv4sf"
2903 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2906 (match_operand:V4SF 1 "register_operand" "0,x")
2907 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2908 (parallel [(const_int 2) (const_int 6)
2909 (const_int 3) (const_int 7)])))]
2912 unpckhps\t{%2, %0|%0, %2}
2913 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2914 [(set_attr "isa" "noavx,avx")
2915 (set_attr "type" "sselog")
2916 (set_attr "prefix" "orig,vex")
2917 (set_attr "mode" "V4SF")])
2919 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2920 (define_insn "avx_unpcklps256"
2921 [(set (match_operand:V8SF 0 "register_operand" "=x")
2924 (match_operand:V8SF 1 "register_operand" "x")
2925 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2926 (parallel [(const_int 0) (const_int 8)
2927 (const_int 1) (const_int 9)
2928 (const_int 4) (const_int 12)
2929 (const_int 5) (const_int 13)])))]
2931 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2932 [(set_attr "type" "sselog")
2933 (set_attr "prefix" "vex")
2934 (set_attr "mode" "V8SF")])
2936 (define_expand "vec_interleave_lowv8sf"
2940 (match_operand:V8SF 1 "register_operand" "x")
2941 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2942 (parallel [(const_int 0) (const_int 8)
2943 (const_int 1) (const_int 9)
2944 (const_int 4) (const_int 12)
2945 (const_int 5) (const_int 13)])))
2951 (parallel [(const_int 2) (const_int 10)
2952 (const_int 3) (const_int 11)
2953 (const_int 6) (const_int 14)
2954 (const_int 7) (const_int 15)])))
2955 (set (match_operand:V8SF 0 "register_operand" "")
2960 (parallel [(const_int 0) (const_int 1)
2961 (const_int 2) (const_int 3)
2962 (const_int 8) (const_int 9)
2963 (const_int 10) (const_int 11)])))]
2966 operands[3] = gen_reg_rtx (V8SFmode);
2967 operands[4] = gen_reg_rtx (V8SFmode);
2970 (define_insn "vec_interleave_lowv4sf"
2971 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2974 (match_operand:V4SF 1 "register_operand" "0,x")
2975 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2976 (parallel [(const_int 0) (const_int 4)
2977 (const_int 1) (const_int 5)])))]
2980 unpcklps\t{%2, %0|%0, %2}
2981 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2982 [(set_attr "isa" "noavx,avx")
2983 (set_attr "type" "sselog")
2984 (set_attr "prefix" "orig,vex")
2985 (set_attr "mode" "V4SF")])
2987 ;; These are modeled with the same vec_concat as the others so that we
2988 ;; capture users of shufps that can use the new instructions
2989 (define_insn "avx_movshdup256"
2990 [(set (match_operand:V8SF 0 "register_operand" "=x")
2993 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2995 (parallel [(const_int 1) (const_int 1)
2996 (const_int 3) (const_int 3)
2997 (const_int 5) (const_int 5)
2998 (const_int 7) (const_int 7)])))]
3000 "vmovshdup\t{%1, %0|%0, %1}"
3001 [(set_attr "type" "sse")
3002 (set_attr "prefix" "vex")
3003 (set_attr "mode" "V8SF")])
3005 (define_insn "sse3_movshdup"
3006 [(set (match_operand:V4SF 0 "register_operand" "=x")
3009 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3011 (parallel [(const_int 1)
3016 "%vmovshdup\t{%1, %0|%0, %1}"
3017 [(set_attr "type" "sse")
3018 (set_attr "prefix_rep" "1")
3019 (set_attr "prefix" "maybe_vex")
3020 (set_attr "mode" "V4SF")])
3022 (define_insn "avx_movsldup256"
3023 [(set (match_operand:V8SF 0 "register_operand" "=x")
3026 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3028 (parallel [(const_int 0) (const_int 0)
3029 (const_int 2) (const_int 2)
3030 (const_int 4) (const_int 4)
3031 (const_int 6) (const_int 6)])))]
3033 "vmovsldup\t{%1, %0|%0, %1}"
3034 [(set_attr "type" "sse")
3035 (set_attr "prefix" "vex")
3036 (set_attr "mode" "V8SF")])
3038 (define_insn "sse3_movsldup"
3039 [(set (match_operand:V4SF 0 "register_operand" "=x")
3042 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3044 (parallel [(const_int 0)
3049 "%vmovsldup\t{%1, %0|%0, %1}"
3050 [(set_attr "type" "sse")
3051 (set_attr "prefix_rep" "1")
3052 (set_attr "prefix" "maybe_vex")
3053 (set_attr "mode" "V4SF")])
3055 (define_expand "avx_shufps256"
3056 [(match_operand:V8SF 0 "register_operand" "")
3057 (match_operand:V8SF 1 "register_operand" "")
3058 (match_operand:V8SF 2 "nonimmediate_operand" "")
3059 (match_operand:SI 3 "const_int_operand" "")]
3062 int mask = INTVAL (operands[3]);
3063 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3064 GEN_INT ((mask >> 0) & 3),
3065 GEN_INT ((mask >> 2) & 3),
3066 GEN_INT (((mask >> 4) & 3) + 8),
3067 GEN_INT (((mask >> 6) & 3) + 8),
3068 GEN_INT (((mask >> 0) & 3) + 4),
3069 GEN_INT (((mask >> 2) & 3) + 4),
3070 GEN_INT (((mask >> 4) & 3) + 12),
3071 GEN_INT (((mask >> 6) & 3) + 12)));
3075 ;; One bit in mask selects 2 elements.
3076 (define_insn "avx_shufps256_1"
3077 [(set (match_operand:V8SF 0 "register_operand" "=x")
3080 (match_operand:V8SF 1 "register_operand" "x")
3081 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3082 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3083 (match_operand 4 "const_0_to_3_operand" "")
3084 (match_operand 5 "const_8_to_11_operand" "")
3085 (match_operand 6 "const_8_to_11_operand" "")
3086 (match_operand 7 "const_4_to_7_operand" "")
3087 (match_operand 8 "const_4_to_7_operand" "")
3088 (match_operand 9 "const_12_to_15_operand" "")
3089 (match_operand 10 "const_12_to_15_operand" "")])))]
3091 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3092 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3093 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3094 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3097 mask = INTVAL (operands[3]);
3098 mask |= INTVAL (operands[4]) << 2;
3099 mask |= (INTVAL (operands[5]) - 8) << 4;
3100 mask |= (INTVAL (operands[6]) - 8) << 6;
3101 operands[3] = GEN_INT (mask);
3103 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3105 [(set_attr "type" "sselog")
3106 (set_attr "length_immediate" "1")
3107 (set_attr "prefix" "vex")
3108 (set_attr "mode" "V8SF")])
3110 (define_expand "sse_shufps"
3111 [(match_operand:V4SF 0 "register_operand" "")
3112 (match_operand:V4SF 1 "register_operand" "")
3113 (match_operand:V4SF 2 "nonimmediate_operand" "")
3114 (match_operand:SI 3 "const_int_operand" "")]
3117 int mask = INTVAL (operands[3]);
3118 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3119 GEN_INT ((mask >> 0) & 3),
3120 GEN_INT ((mask >> 2) & 3),
3121 GEN_INT (((mask >> 4) & 3) + 4),
3122 GEN_INT (((mask >> 6) & 3) + 4)));
3126 (define_insn "sse_shufps_<mode>"
3127 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3128 (vec_select:VI4F_128
3129 (vec_concat:<ssedoublevecmode>
3130 (match_operand:VI4F_128 1 "register_operand" "0,x")
3131 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3132 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3133 (match_operand 4 "const_0_to_3_operand" "")
3134 (match_operand 5 "const_4_to_7_operand" "")
3135 (match_operand 6 "const_4_to_7_operand" "")])))]
3139 mask |= INTVAL (operands[3]) << 0;
3140 mask |= INTVAL (operands[4]) << 2;
3141 mask |= (INTVAL (operands[5]) - 4) << 4;
3142 mask |= (INTVAL (operands[6]) - 4) << 6;
3143 operands[3] = GEN_INT (mask);
3145 switch (which_alternative)
3148 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3150 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3155 [(set_attr "isa" "noavx,avx")
3156 (set_attr "type" "sselog")
3157 (set_attr "length_immediate" "1")
3158 (set_attr "prefix" "orig,vex")
3159 (set_attr "mode" "V4SF")])
3161 (define_insn "sse_storehps"
3162 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3164 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3165 (parallel [(const_int 2) (const_int 3)])))]
3168 %vmovhps\t{%1, %0|%0, %1}
3169 %vmovhlps\t{%1, %d0|%d0, %1}
3170 %vmovlps\t{%H1, %d0|%d0, %H1}"
3171 [(set_attr "type" "ssemov")
3172 (set_attr "prefix" "maybe_vex")
3173 (set_attr "mode" "V2SF,V4SF,V2SF")])
3175 (define_expand "sse_loadhps_exp"
3176 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3179 (match_operand:V4SF 1 "nonimmediate_operand" "")
3180 (parallel [(const_int 0) (const_int 1)]))
3181 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3184 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3186 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3188 /* Fix up the destination if needed. */
3189 if (dst != operands[0])
3190 emit_move_insn (operands[0], dst);
3195 (define_insn "sse_loadhps"
3196 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3199 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3200 (parallel [(const_int 0) (const_int 1)]))
3201 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3204 movhps\t{%2, %0|%0, %2}
3205 vmovhps\t{%2, %1, %0|%0, %1, %2}
3206 movlhps\t{%2, %0|%0, %2}
3207 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3208 %vmovlps\t{%2, %H0|%H0, %2}"
3209 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3210 (set_attr "type" "ssemov")
3211 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3212 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3214 (define_insn "sse_storelps"
3215 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3217 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3218 (parallel [(const_int 0) (const_int 1)])))]
3221 %vmovlps\t{%1, %0|%0, %1}
3222 %vmovaps\t{%1, %0|%0, %1}
3223 %vmovlps\t{%1, %d0|%d0, %1}"
3224 [(set_attr "type" "ssemov")
3225 (set_attr "prefix" "maybe_vex")
3226 (set_attr "mode" "V2SF,V4SF,V2SF")])
3228 (define_expand "sse_loadlps_exp"
3229 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3231 (match_operand:V2SF 2 "nonimmediate_operand" "")
3233 (match_operand:V4SF 1 "nonimmediate_operand" "")
3234 (parallel [(const_int 2) (const_int 3)]))))]
3237 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3239 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3241 /* Fix up the destination if needed. */
3242 if (dst != operands[0])
3243 emit_move_insn (operands[0], dst);
3248 (define_insn "sse_loadlps"
3249 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3251 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3253 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3254 (parallel [(const_int 2) (const_int 3)]))))]
3257 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3258 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3259 movlps\t{%2, %0|%0, %2}
3260 vmovlps\t{%2, %1, %0|%0, %1, %2}
3261 %vmovlps\t{%2, %0|%0, %2}"
3262 [(set_attr "isa" "noavx,avx,noavx,avx,*")
3263 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3264 (set_attr "length_immediate" "1,1,*,*,*")
3265 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3266 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3268 (define_insn "sse_movss"
3269 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3271 (match_operand:V4SF 2 "register_operand" " x,x")
3272 (match_operand:V4SF 1 "register_operand" " 0,x")
3276 movss\t{%2, %0|%0, %2}
3277 vmovss\t{%2, %1, %0|%0, %1, %2}"
3278 [(set_attr "isa" "noavx,avx")
3279 (set_attr "type" "ssemov")
3280 (set_attr "prefix" "orig,vex")
3281 (set_attr "mode" "SF")])
3283 (define_expand "vec_dupv4sf"
3284 [(set (match_operand:V4SF 0 "register_operand" "")
3286 (match_operand:SF 1 "nonimmediate_operand" "")))]
3290 operands[1] = force_reg (SFmode, operands[1]);
3293 (define_insn "*vec_dupv4sf_avx"
3294 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3296 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3299 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3300 vbroadcastss\t{%1, %0|%0, %1}"
3301 [(set_attr "type" "sselog1,ssemov")
3302 (set_attr "length_immediate" "1,0")
3303 (set_attr "prefix_extra" "0,1")
3304 (set_attr "prefix" "vex")
3305 (set_attr "mode" "V4SF")])
3307 (define_insn "*vec_dupv4sf"
3308 [(set (match_operand:V4SF 0 "register_operand" "=x")
3310 (match_operand:SF 1 "register_operand" "0")))]
3312 "shufps\t{$0, %0, %0|%0, %0, 0}"
3313 [(set_attr "type" "sselog1")
3314 (set_attr "length_immediate" "1")
3315 (set_attr "mode" "V4SF")])
3317 ;; Although insertps takes register source, we prefer
3318 ;; unpcklps with register source since it is shorter.
3319 (define_insn "*vec_concatv2sf_sse4_1"
3320 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3322 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3323 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3326 unpcklps\t{%2, %0|%0, %2}
3327 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3328 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3329 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3330 %vmovss\t{%1, %0|%0, %1}
3331 punpckldq\t{%2, %0|%0, %2}
3332 movd\t{%1, %0|%0, %1}"
3333 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
3334 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3335 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3336 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3337 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3338 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3339 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3341 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3342 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3343 ;; alternatives pretty much forces the MMX alternative to be chosen.
3344 (define_insn "*vec_concatv2sf_sse"
3345 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3347 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3348 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3351 unpcklps\t{%2, %0|%0, %2}
3352 movss\t{%1, %0|%0, %1}
3353 punpckldq\t{%2, %0|%0, %2}
3354 movd\t{%1, %0|%0, %1}"
3355 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3356 (set_attr "mode" "V4SF,SF,DI,DI")])
3358 (define_insn "*vec_concatv4sf"
3359 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3361 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3362 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3365 movlhps\t{%2, %0|%0, %2}
3366 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3367 movhps\t{%2, %0|%0, %2}
3368 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3369 [(set_attr "isa" "noavx,avx,noavx,avx")
3370 (set_attr "type" "ssemov")
3371 (set_attr "prefix" "orig,vex,orig,vex")
3372 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3374 (define_expand "vec_init<mode>"
3375 [(match_operand:V_128 0 "register_operand" "")
3376 (match_operand 1 "" "")]
3379 ix86_expand_vector_init (false, operands[0], operands[1]);
3383 ;; Avoid combining registers from different units in a single alternative,
3384 ;; see comment above inline_secondary_memory_needed function in i386.c
3385 (define_insn "vec_set<mode>_0"
3386 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3387 "=Y4,Y2,Y2,x,x,x,Y4 ,x ,m,m ,m")
3389 (vec_duplicate:VI4F_128
3390 (match_operand:<ssescalarmode> 2 "general_operand"
3391 " Y4,m ,*r,m,x,x,*rm,*rm,x,fF,*r"))
3392 (match_operand:VI4F_128 1 "vector_move_operand"
3393 " C ,C ,C ,C,0,x,0 ,x ,0,0 ,0")
3397 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3398 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3399 %vmovd\t{%2, %0|%0, %2}
3400 movss\t{%2, %0|%0, %2}
3401 movss\t{%2, %0|%0, %2}
3402 vmovss\t{%2, %1, %0|%0, %1, %2}
3403 pinsrd\t{$0, %2, %0|%0, %2, 0}
3404 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3408 [(set_attr "isa" "*,*,*,noavx,noavx,avx,noavx,avx,*,*,*")
3410 (cond [(eq_attr "alternative" "0,6,7")
3411 (const_string "sselog")
3412 (eq_attr "alternative" "9")
3413 (const_string "fmov")
3414 (eq_attr "alternative" "10")
3415 (const_string "imov")
3417 (const_string "ssemov")))
3418 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
3419 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
3420 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
3421 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
3423 ;; A subset is vec_setv4sf.
3424 (define_insn "*vec_setv4sf_sse4_1"
3425 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3428 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3429 (match_operand:V4SF 1 "register_operand" "0,x")
3430 (match_operand:SI 3 "const_int_operand" "")))]
3432 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3433 < GET_MODE_NUNITS (V4SFmode))"
3435 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3436 switch (which_alternative)
3439 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3441 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3446 [(set_attr "isa" "noavx,avx")
3447 (set_attr "type" "sselog")
3448 (set_attr "prefix_data16" "1,*")
3449 (set_attr "prefix_extra" "1")
3450 (set_attr "length_immediate" "1")
3451 (set_attr "prefix" "orig,vex")
3452 (set_attr "mode" "V4SF")])
3454 (define_insn "sse4_1_insertps"
3455 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3456 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3457 (match_operand:V4SF 1 "register_operand" "0,x")
3458 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3462 if (MEM_P (operands[2]))
3464 unsigned count_s = INTVAL (operands[3]) >> 6;
3466 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3467 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3469 switch (which_alternative)
3472 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3474 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3479 [(set_attr "isa" "noavx,avx")
3480 (set_attr "type" "sselog")
3481 (set_attr "prefix_data16" "1,*")
3482 (set_attr "prefix_extra" "1")
3483 (set_attr "length_immediate" "1")
3484 (set_attr "prefix" "orig,vex")
3485 (set_attr "mode" "V4SF")])
3488 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3490 (vec_duplicate:VI4F_128
3491 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3494 "TARGET_SSE && reload_completed"
3497 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3502 (define_expand "vec_set<mode>"
3503 [(match_operand:V_128 0 "register_operand" "")
3504 (match_operand:<ssescalarmode> 1 "register_operand" "")
3505 (match_operand 2 "const_int_operand" "")]
3508 ix86_expand_vector_set (false, operands[0], operands[1],
3509 INTVAL (operands[2]));
3513 (define_insn_and_split "*vec_extractv4sf_0"
3514 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3516 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3517 (parallel [(const_int 0)])))]
3518 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3520 "&& reload_completed"
3523 rtx op1 = operands[1];
3525 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3527 op1 = gen_lowpart (SFmode, op1);
3528 emit_move_insn (operands[0], op1);
3532 (define_expand "avx_vextractf128<mode>"
3533 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3534 (match_operand:V_256 1 "register_operand" "")
3535 (match_operand:SI 2 "const_0_to_1_operand" "")]
3538 rtx (*insn)(rtx, rtx);
3540 switch (INTVAL (operands[2]))
3543 insn = gen_vec_extract_lo_<mode>;
3546 insn = gen_vec_extract_hi_<mode>;
3552 emit_insn (insn (operands[0], operands[1]));
3556 (define_insn_and_split "vec_extract_lo_<mode>"
3557 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3558 (vec_select:<ssehalfvecmode>
3559 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3560 (parallel [(const_int 0) (const_int 1)])))]
3563 "&& reload_completed"
3566 rtx op1 = operands[1];
3568 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3570 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3571 emit_move_insn (operands[0], op1);
3575 (define_insn "vec_extract_hi_<mode>"
3576 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3577 (vec_select:<ssehalfvecmode>
3578 (match_operand:VI8F_256 1 "register_operand" "x,x")
3579 (parallel [(const_int 2) (const_int 3)])))]
3581 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3582 [(set_attr "type" "sselog")
3583 (set_attr "prefix_extra" "1")
3584 (set_attr "length_immediate" "1")
3585 (set_attr "memory" "none,store")
3586 (set_attr "prefix" "vex")
3587 (set_attr "mode" "V8SF")])
3589 (define_insn_and_split "vec_extract_lo_<mode>"
3590 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3591 (vec_select:<ssehalfvecmode>
3592 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3593 (parallel [(const_int 0) (const_int 1)
3594 (const_int 2) (const_int 3)])))]
3597 "&& reload_completed"
3600 rtx op1 = operands[1];
3602 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3604 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3605 emit_move_insn (operands[0], op1);
3609 (define_insn "vec_extract_hi_<mode>"
3610 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3611 (vec_select:<ssehalfvecmode>
3612 (match_operand:VI4F_256 1 "register_operand" "x,x")
3613 (parallel [(const_int 4) (const_int 5)
3614 (const_int 6) (const_int 7)])))]
3616 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3617 [(set_attr "type" "sselog")
3618 (set_attr "prefix_extra" "1")
3619 (set_attr "length_immediate" "1")
3620 (set_attr "memory" "none,store")
3621 (set_attr "prefix" "vex")
3622 (set_attr "mode" "V8SF")])
3624 (define_insn_and_split "vec_extract_lo_v16hi"
3625 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3627 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3628 (parallel [(const_int 0) (const_int 1)
3629 (const_int 2) (const_int 3)
3630 (const_int 4) (const_int 5)
3631 (const_int 6) (const_int 7)])))]
3634 "&& reload_completed"
3637 rtx op1 = operands[1];
3639 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3641 op1 = gen_lowpart (V8HImode, op1);
3642 emit_move_insn (operands[0], op1);
3646 (define_insn "vec_extract_hi_v16hi"
3647 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3649 (match_operand:V16HI 1 "register_operand" "x,x")
3650 (parallel [(const_int 8) (const_int 9)
3651 (const_int 10) (const_int 11)
3652 (const_int 12) (const_int 13)
3653 (const_int 14) (const_int 15)])))]
3655 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3656 [(set_attr "type" "sselog")
3657 (set_attr "prefix_extra" "1")
3658 (set_attr "length_immediate" "1")
3659 (set_attr "memory" "none,store")
3660 (set_attr "prefix" "vex")
3661 (set_attr "mode" "V8SF")])
3663 (define_insn_and_split "vec_extract_lo_v32qi"
3664 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3666 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3667 (parallel [(const_int 0) (const_int 1)
3668 (const_int 2) (const_int 3)
3669 (const_int 4) (const_int 5)
3670 (const_int 6) (const_int 7)
3671 (const_int 8) (const_int 9)
3672 (const_int 10) (const_int 11)
3673 (const_int 12) (const_int 13)
3674 (const_int 14) (const_int 15)])))]
3677 "&& reload_completed"
3680 rtx op1 = operands[1];
3682 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3684 op1 = gen_lowpart (V16QImode, op1);
3685 emit_move_insn (operands[0], op1);
3689 (define_insn "vec_extract_hi_v32qi"
3690 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3692 (match_operand:V32QI 1 "register_operand" "x,x")
3693 (parallel [(const_int 16) (const_int 17)
3694 (const_int 18) (const_int 19)
3695 (const_int 20) (const_int 21)
3696 (const_int 22) (const_int 23)
3697 (const_int 24) (const_int 25)
3698 (const_int 26) (const_int 27)
3699 (const_int 28) (const_int 29)
3700 (const_int 30) (const_int 31)])))]
3702 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3703 [(set_attr "type" "sselog")
3704 (set_attr "prefix_extra" "1")
3705 (set_attr "length_immediate" "1")
3706 (set_attr "memory" "none,store")
3707 (set_attr "prefix" "vex")
3708 (set_attr "mode" "V8SF")])
3710 (define_insn "*sse4_1_extractps"
3711 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3713 (match_operand:V4SF 1 "register_operand" "x")
3714 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3716 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3717 [(set_attr "type" "sselog")
3718 (set_attr "prefix_data16" "1")
3719 (set_attr "prefix_extra" "1")
3720 (set_attr "length_immediate" "1")
3721 (set_attr "prefix" "maybe_vex")
3722 (set_attr "mode" "V4SF")])
3724 (define_insn_and_split "*vec_extract_v4sf_mem"
3725 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3727 (match_operand:V4SF 1 "memory_operand" "o")
3728 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3734 int i = INTVAL (operands[2]);
3736 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3740 ;; Modes handled by vec_extract patterns.
3741 (define_mode_iterator VEC_EXTRACT_MODE
3742 [V16QI V8HI V4SI V2DI
3743 (V8SF "TARGET_AVX") V4SF
3744 (V4DF "TARGET_AVX") V2DF])
3746 (define_expand "vec_extract<mode>"
3747 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3748 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
3749 (match_operand 2 "const_int_operand" "")]
3752 ix86_expand_vector_extract (false, operands[0], operands[1],
3753 INTVAL (operands[2]));
3757 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3759 ;; Parallel double-precision floating point element swizzling
3761 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3763 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3764 (define_insn "avx_unpckhpd256"
3765 [(set (match_operand:V4DF 0 "register_operand" "=x")
3768 (match_operand:V4DF 1 "register_operand" "x")
3769 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3770 (parallel [(const_int 1) (const_int 5)
3771 (const_int 3) (const_int 7)])))]
3773 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3774 [(set_attr "type" "sselog")
3775 (set_attr "prefix" "vex")
3776 (set_attr "mode" "V4DF")])
3778 (define_expand "vec_interleave_highv4df"
3782 (match_operand:V4DF 1 "register_operand" "x")
3783 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3784 (parallel [(const_int 0) (const_int 4)
3785 (const_int 2) (const_int 6)])))
3791 (parallel [(const_int 1) (const_int 5)
3792 (const_int 3) (const_int 7)])))
3793 (set (match_operand:V4DF 0 "register_operand" "")
3798 (parallel [(const_int 2) (const_int 3)
3799 (const_int 6) (const_int 7)])))]
3802 operands[3] = gen_reg_rtx (V4DFmode);
3803 operands[4] = gen_reg_rtx (V4DFmode);
3807 (define_expand "vec_interleave_highv2df"
3808 [(set (match_operand:V2DF 0 "register_operand" "")
3811 (match_operand:V2DF 1 "nonimmediate_operand" "")
3812 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3813 (parallel [(const_int 1)
3817 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
3818 operands[2] = force_reg (V2DFmode, operands[2]);
3821 (define_insn "*vec_interleave_highv2df"
3822 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,Y3,x,x,m")
3825 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o ,o,o,x")
3826 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1 ,0,x,0"))
3827 (parallel [(const_int 1)
3829 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3831 unpckhpd\t{%2, %0|%0, %2}
3832 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3833 %vmovddup\t{%H1, %0|%0, %H1}
3834 movlpd\t{%H1, %0|%0, %H1}
3835 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3836 %vmovhpd\t{%1, %0|%0, %1}"
3837 [(set_attr "isa" "noavx,avx,*,noavx,avx,*")
3838 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3839 (set_attr "prefix_data16" "*,*,*,1,*,1")
3840 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3841 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3843 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3844 (define_expand "avx_movddup256"
3845 [(set (match_operand:V4DF 0 "register_operand" "")
3848 (match_operand:V4DF 1 "nonimmediate_operand" "")
3850 (parallel [(const_int 0) (const_int 4)
3851 (const_int 2) (const_int 6)])))]
3854 (define_expand "avx_unpcklpd256"
3855 [(set (match_operand:V4DF 0 "register_operand" "")
3858 (match_operand:V4DF 1 "register_operand" "")
3859 (match_operand:V4DF 2 "nonimmediate_operand" ""))
3860 (parallel [(const_int 0) (const_int 4)
3861 (const_int 2) (const_int 6)])))]
3864 (define_insn "*avx_unpcklpd256"
3865 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
3868 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
3869 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
3870 (parallel [(const_int 0) (const_int 4)
3871 (const_int 2) (const_int 6)])))]
3873 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
3875 vmovddup\t{%1, %0|%0, %1}
3876 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
3877 [(set_attr "type" "sselog")
3878 (set_attr "prefix" "vex")
3879 (set_attr "mode" "V4DF")])
3881 (define_expand "vec_interleave_lowv4df"
3885 (match_operand:V4DF 1 "register_operand" "x")
3886 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3887 (parallel [(const_int 0) (const_int 4)
3888 (const_int 2) (const_int 6)])))
3894 (parallel [(const_int 1) (const_int 5)
3895 (const_int 3) (const_int 7)])))
3896 (set (match_operand:V4DF 0 "register_operand" "")
3901 (parallel [(const_int 0) (const_int 1)
3902 (const_int 4) (const_int 5)])))]
3905 operands[3] = gen_reg_rtx (V4DFmode);
3906 operands[4] = gen_reg_rtx (V4DFmode);
3909 (define_expand "vec_interleave_lowv2df"
3910 [(set (match_operand:V2DF 0 "register_operand" "")
3913 (match_operand:V2DF 1 "nonimmediate_operand" "")
3914 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3915 (parallel [(const_int 0)
3919 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
3920 operands[1] = force_reg (V2DFmode, operands[1]);
3923 (define_insn "*vec_interleave_lowv2df"
3924 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,Y3,x,x,o")
3927 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m ,0,x,0")
3928 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1 ,m,m,x"))
3929 (parallel [(const_int 0)
3931 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3933 unpcklpd\t{%2, %0|%0, %2}
3934 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
3935 %vmovddup\t{%1, %0|%0, %1}
3936 movhpd\t{%2, %0|%0, %2}
3937 vmovhpd\t{%2, %1, %0|%0, %1, %2}
3938 %vmovlpd\t{%2, %H0|%H0, %2}"
3939 [(set_attr "isa" "noavx,avx,*,noavx,avx,*")
3940 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3941 (set_attr "prefix_data16" "*,*,*,1,*,1")
3942 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3943 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3946 [(set (match_operand:V2DF 0 "memory_operand" "")
3949 (match_operand:V2DF 1 "register_operand" "")
3951 (parallel [(const_int 0)
3953 "TARGET_SSE3 && reload_completed"
3956 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
3957 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
3958 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
3963 [(set (match_operand:V2DF 0 "register_operand" "")
3966 (match_operand:V2DF 1 "memory_operand" "")
3968 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
3969 (match_operand:SI 3 "const_int_operand" "")])))]
3970 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
3971 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
3973 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
3976 (define_expand "avx_shufpd256"
3977 [(match_operand:V4DF 0 "register_operand" "")
3978 (match_operand:V4DF 1 "register_operand" "")
3979 (match_operand:V4DF 2 "nonimmediate_operand" "")
3980 (match_operand:SI 3 "const_int_operand" "")]
3983 int mask = INTVAL (operands[3]);
3984 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
3986 GEN_INT (mask & 2 ? 5 : 4),
3987 GEN_INT (mask & 4 ? 3 : 2),
3988 GEN_INT (mask & 8 ? 7 : 6)));
3992 (define_insn "avx_shufpd256_1"
3993 [(set (match_operand:V4DF 0 "register_operand" "=x")
3996 (match_operand:V4DF 1 "register_operand" "x")
3997 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3998 (parallel [(match_operand 3 "const_0_to_1_operand" "")
3999 (match_operand 4 "const_4_to_5_operand" "")
4000 (match_operand 5 "const_2_to_3_operand" "")
4001 (match_operand 6 "const_6_to_7_operand" "")])))]
4005 mask = INTVAL (operands[3]);
4006 mask |= (INTVAL (operands[4]) - 4) << 1;
4007 mask |= (INTVAL (operands[5]) - 2) << 2;
4008 mask |= (INTVAL (operands[6]) - 6) << 3;
4009 operands[3] = GEN_INT (mask);
4011 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4013 [(set_attr "type" "sselog")
4014 (set_attr "length_immediate" "1")
4015 (set_attr "prefix" "vex")
4016 (set_attr "mode" "V4DF")])
4018 (define_expand "sse2_shufpd"
4019 [(match_operand:V2DF 0 "register_operand" "")
4020 (match_operand:V2DF 1 "register_operand" "")
4021 (match_operand:V2DF 2 "nonimmediate_operand" "")
4022 (match_operand:SI 3 "const_int_operand" "")]
4025 int mask = INTVAL (operands[3]);
4026 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4028 GEN_INT (mask & 2 ? 3 : 2)));
4032 ;; Modes handled by vec_extract_even/odd pattern.
4033 (define_mode_iterator VEC_EXTRACT_EVENODD_MODE
4034 [(V16QI "TARGET_SSE2")
4035 (V8HI "TARGET_SSE2")
4036 (V4SI "TARGET_SSE2")
4037 (V2DI "TARGET_SSE2")
4038 (V8SF "TARGET_AVX") V4SF
4039 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
4041 (define_expand "vec_extract_even<mode>"
4042 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4043 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4044 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4047 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4051 (define_expand "vec_extract_odd<mode>"
4052 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4053 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4054 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4057 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4061 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4063 (define_insn "vec_interleave_highv2di"
4064 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4067 (match_operand:V2DI 1 "register_operand" "0,x")
4068 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4069 (parallel [(const_int 1)
4073 punpckhqdq\t{%2, %0|%0, %2}
4074 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4075 [(set_attr "isa" "noavx,avx")
4076 (set_attr "type" "sselog")
4077 (set_attr "prefix_data16" "1,*")
4078 (set_attr "prefix" "orig,vex")
4079 (set_attr "mode" "TI")])
4081 (define_insn "vec_interleave_lowv2di"
4082 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4085 (match_operand:V2DI 1 "register_operand" "0,x")
4086 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4087 (parallel [(const_int 0)
4091 punpcklqdq\t{%2, %0|%0, %2}
4092 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4093 [(set_attr "isa" "noavx,avx")
4094 (set_attr "type" "sselog")
4095 (set_attr "prefix_data16" "1,*")
4096 (set_attr "prefix" "orig,vex")
4097 (set_attr "mode" "TI")])
4099 (define_insn "sse2_shufpd_<mode>"
4100 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4101 (vec_select:VI8F_128
4102 (vec_concat:<ssedoublevecmode>
4103 (match_operand:VI8F_128 1 "register_operand" "0,x")
4104 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4105 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4106 (match_operand 4 "const_2_to_3_operand" "")])))]
4110 mask = INTVAL (operands[3]);
4111 mask |= (INTVAL (operands[4]) - 2) << 1;
4112 operands[3] = GEN_INT (mask);
4114 switch (which_alternative)
4117 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4119 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4124 [(set_attr "isa" "noavx,avx")
4125 (set_attr "type" "sselog")
4126 (set_attr "length_immediate" "1")
4127 (set_attr "prefix" "orig,vex")
4128 (set_attr "mode" "V2DF")])
4130 ;; Avoid combining registers from different units in a single alternative,
4131 ;; see comment above inline_secondary_memory_needed function in i386.c
4132 (define_insn "sse2_storehpd"
4133 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4135 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4136 (parallel [(const_int 1)])))]
4137 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4139 %vmovhpd\t{%1, %0|%0, %1}
4141 vunpckhpd\t{%d1, %0|%0, %d1}
4145 [(set_attr "isa" "*,noavx,avx,*,*,*")
4146 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4147 (set (attr "prefix_data16")
4149 (and (eq_attr "alternative" "0")
4150 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4152 (const_string "*")))
4153 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4154 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4157 [(set (match_operand:DF 0 "register_operand" "")
4159 (match_operand:V2DF 1 "memory_operand" "")
4160 (parallel [(const_int 1)])))]
4161 "TARGET_SSE2 && reload_completed"
4162 [(set (match_dup 0) (match_dup 1))]
4163 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4165 (define_insn "*vec_extractv2df_1_sse"
4166 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4168 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4169 (parallel [(const_int 1)])))]
4170 "!TARGET_SSE2 && TARGET_SSE
4171 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4173 movhps\t{%1, %0|%0, %1}
4174 movhlps\t{%1, %0|%0, %1}
4175 movlps\t{%H1, %0|%0, %H1}"
4176 [(set_attr "type" "ssemov")
4177 (set_attr "mode" "V2SF,V4SF,V2SF")])
4179 ;; Avoid combining registers from different units in a single alternative,
4180 ;; see comment above inline_secondary_memory_needed function in i386.c
4181 (define_insn "sse2_storelpd"
4182 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4184 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4185 (parallel [(const_int 0)])))]
4186 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4188 %vmovlpd\t{%1, %0|%0, %1}
4193 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4194 (set_attr "prefix_data16" "1,*,*,*,*")
4195 (set_attr "prefix" "maybe_vex")
4196 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4199 [(set (match_operand:DF 0 "register_operand" "")
4201 (match_operand:V2DF 1 "nonimmediate_operand" "")
4202 (parallel [(const_int 0)])))]
4203 "TARGET_SSE2 && reload_completed"
4206 rtx op1 = operands[1];
4208 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4210 op1 = gen_lowpart (DFmode, op1);
4211 emit_move_insn (operands[0], op1);
4215 (define_insn "*vec_extractv2df_0_sse"
4216 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4218 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4219 (parallel [(const_int 0)])))]
4220 "!TARGET_SSE2 && TARGET_SSE
4221 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4223 movlps\t{%1, %0|%0, %1}
4224 movaps\t{%1, %0|%0, %1}
4225 movlps\t{%1, %0|%0, %1}"
4226 [(set_attr "type" "ssemov")
4227 (set_attr "mode" "V2SF,V4SF,V2SF")])
4229 (define_expand "sse2_loadhpd_exp"
4230 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4233 (match_operand:V2DF 1 "nonimmediate_operand" "")
4234 (parallel [(const_int 0)]))
4235 (match_operand:DF 2 "nonimmediate_operand" "")))]
4238 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4240 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4242 /* Fix up the destination if needed. */
4243 if (dst != operands[0])
4244 emit_move_insn (operands[0], dst);
4249 ;; Avoid combining registers from different units in a single alternative,
4250 ;; see comment above inline_secondary_memory_needed function in i386.c
4251 (define_insn "sse2_loadhpd"
4252 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4256 (match_operand:V2DF 1 "nonimmediate_operand"
4258 (parallel [(const_int 0)]))
4259 (match_operand:DF 2 "nonimmediate_operand"
4260 " m,m,x,x,x,*f,r")))]
4261 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4263 movhpd\t{%2, %0|%0, %2}
4264 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4265 unpcklpd\t{%2, %0|%0, %2}
4266 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4270 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
4271 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4272 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4273 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4274 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4277 [(set (match_operand:V2DF 0 "memory_operand" "")
4279 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4280 (match_operand:DF 1 "register_operand" "")))]
4281 "TARGET_SSE2 && reload_completed"
4282 [(set (match_dup 0) (match_dup 1))]
4283 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4285 (define_expand "sse2_loadlpd_exp"
4286 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4288 (match_operand:DF 2 "nonimmediate_operand" "")
4290 (match_operand:V2DF 1 "nonimmediate_operand" "")
4291 (parallel [(const_int 1)]))))]
4294 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4296 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4298 /* Fix up the destination if needed. */
4299 if (dst != operands[0])
4300 emit_move_insn (operands[0], dst);
4305 ;; Avoid combining registers from different units in a single alternative,
4306 ;; see comment above inline_secondary_memory_needed function in i386.c
4307 (define_insn "sse2_loadlpd"
4308 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4309 "=x,x,x,x,x,x,x,x,m,m ,m")
4311 (match_operand:DF 2 "nonimmediate_operand"
4312 " m,m,m,x,x,0,0,x,x,*f,r")
4314 (match_operand:V2DF 1 "vector_move_operand"
4315 " C,0,x,0,x,x,o,o,0,0 ,0")
4316 (parallel [(const_int 1)]))))]
4317 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4319 %vmovsd\t{%2, %0|%0, %2}
4320 movlpd\t{%2, %0|%0, %2}
4321 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4322 movsd\t{%2, %0|%0, %2}
4323 vmovsd\t{%2, %1, %0|%0, %1, %2}
4324 shufpd\t{$2, %1, %0|%0, %1, 2}
4325 movhpd\t{%H1, %0|%0, %H1}
4326 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4330 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
4332 (cond [(eq_attr "alternative" "5")
4333 (const_string "sselog")
4334 (eq_attr "alternative" "9")
4335 (const_string "fmov")
4336 (eq_attr "alternative" "10")
4337 (const_string "imov")
4339 (const_string "ssemov")))
4340 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4341 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4342 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4343 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4346 [(set (match_operand:V2DF 0 "memory_operand" "")
4348 (match_operand:DF 1 "register_operand" "")
4349 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4350 "TARGET_SSE2 && reload_completed"
4351 [(set (match_dup 0) (match_dup 1))]
4352 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4354 (define_insn "sse2_movsd"
4355 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4357 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4358 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4362 movsd\t{%2, %0|%0, %2}
4363 vmovsd\t{%2, %1, %0|%0, %1, %2}
4364 movlpd\t{%2, %0|%0, %2}
4365 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4366 %vmovlpd\t{%2, %0|%0, %2}
4367 shufpd\t{$2, %1, %0|%0, %1, 2}
4368 movhps\t{%H1, %0|%0, %H1}
4369 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4370 %vmovhps\t{%1, %H0|%H0, %1}"
4371 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
4374 (eq_attr "alternative" "5")
4375 (const_string "sselog")
4376 (const_string "ssemov")))
4377 (set (attr "prefix_data16")
4379 (and (eq_attr "alternative" "2,4")
4380 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4382 (const_string "*")))
4383 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4384 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4385 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4387 (define_expand "vec_dupv2df"
4388 [(set (match_operand:V2DF 0 "register_operand" "")
4390 (match_operand:DF 1 "nonimmediate_operand" "")))]
4394 operands[1] = force_reg (DFmode, operands[1]);
4397 (define_insn "*vec_dupv2df_sse3"
4398 [(set (match_operand:V2DF 0 "register_operand" "=x")
4400 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4402 "%vmovddup\t{%1, %0|%0, %1}"
4403 [(set_attr "type" "sselog1")
4404 (set_attr "prefix" "maybe_vex")
4405 (set_attr "mode" "DF")])
4407 (define_insn "*vec_dupv2df"
4408 [(set (match_operand:V2DF 0 "register_operand" "=x")
4410 (match_operand:DF 1 "register_operand" "0")))]
4413 [(set_attr "type" "sselog1")
4414 (set_attr "mode" "V2DF")])
4416 (define_insn "*vec_concatv2df_sse3"
4417 [(set (match_operand:V2DF 0 "register_operand" "=x")
4419 (match_operand:DF 1 "nonimmediate_operand" "xm")
4422 "%vmovddup\t{%1, %0|%0, %1}"
4423 [(set_attr "type" "sselog1")
4424 (set_attr "prefix" "maybe_vex")
4425 (set_attr "mode" "DF")])
4427 (define_insn "*vec_concatv2df"
4428 [(set (match_operand:V2DF 0 "register_operand" "=Y2,x,Y2,x,Y2,x,x")
4430 (match_operand:DF 1 "nonimmediate_operand" " 0 ,x,0 ,x,m ,0,0")
4431 (match_operand:DF 2 "vector_move_operand" " Y2,x,m ,m,C ,x,m")))]
4434 unpcklpd\t{%2, %0|%0, %2}
4435 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4436 movhpd\t{%2, %0|%0, %2}
4437 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4438 %vmovsd\t{%1, %0|%0, %1}
4439 movlhps\t{%2, %0|%0, %2}
4440 movhps\t{%2, %0|%0, %2}"
4441 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx")
4444 (eq_attr "alternative" "0,1")
4445 (const_string "sselog")
4446 (const_string "ssemov")))
4447 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4448 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4449 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4451 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4453 ;; Parallel integral arithmetic
4455 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4457 (define_expand "neg<mode>2"
4458 [(set (match_operand:VI_128 0 "register_operand" "")
4461 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4463 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4465 (define_expand "<plusminus_insn><mode>3"
4466 [(set (match_operand:VI_128 0 "register_operand" "")
4468 (match_operand:VI_128 1 "nonimmediate_operand" "")
4469 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
4471 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4473 (define_insn "*<plusminus_insn><mode>3"
4474 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
4476 (match_operand:VI_128 1 "nonimmediate_operand" "<comm>0,x")
4477 (match_operand:VI_128 2 "nonimmediate_operand" "xm,xm")))]
4478 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4480 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4481 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4482 [(set_attr "isa" "noavx,avx")
4483 (set_attr "type" "sseiadd")
4484 (set_attr "prefix_data16" "1,*")
4485 (set_attr "prefix" "orig,vex")
4486 (set_attr "mode" "TI")])
4488 (define_expand "sse2_<plusminus_insn><mode>3"
4489 [(set (match_operand:VI12_128 0 "register_operand" "")
4490 (sat_plusminus:VI12_128
4491 (match_operand:VI12_128 1 "nonimmediate_operand" "")
4492 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
4494 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4496 (define_insn "*sse2_<plusminus_insn><mode>3"
4497 [(set (match_operand:VI12_128 0 "register_operand" "=x,x")
4498 (sat_plusminus:VI12_128
4499 (match_operand:VI12_128 1 "nonimmediate_operand" "<comm>0,x")
4500 (match_operand:VI12_128 2 "nonimmediate_operand" "xm,xm")))]
4501 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4503 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4504 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4505 [(set_attr "isa" "noavx,avx")
4506 (set_attr "type" "sseiadd")
4507 (set_attr "prefix_data16" "1,*")
4508 (set_attr "prefix" "orig,vex")
4509 (set_attr "mode" "TI")])
4511 (define_insn_and_split "mulv16qi3"
4512 [(set (match_operand:V16QI 0 "register_operand" "")
4513 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4514 (match_operand:V16QI 2 "register_operand" "")))]
4516 && can_create_pseudo_p ()"
4524 for (i = 0; i < 6; ++i)
4525 t[i] = gen_reg_rtx (V16QImode);
4527 /* Unpack data such that we've got a source byte in each low byte of
4528 each word. We don't care what goes into the high byte of each word.
4529 Rather than trying to get zero in there, most convenient is to let
4530 it be a copy of the low byte. */
4531 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4532 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4533 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4534 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4536 /* Multiply words. The end-of-line annotations here give a picture of what
4537 the output of that instruction looks like. Dot means don't care; the
4538 letters are the bytes of the result with A being the most significant. */
4539 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4540 gen_lowpart (V8HImode, t[0]),
4541 gen_lowpart (V8HImode, t[1])));
4542 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4543 gen_lowpart (V8HImode, t[2]),
4544 gen_lowpart (V8HImode, t[3])));
4546 /* Extract the even bytes and merge them back together. */
4547 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4551 (define_expand "mulv8hi3"
4552 [(set (match_operand:V8HI 0 "register_operand" "")
4553 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4554 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4556 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4558 (define_insn "*mulv8hi3"
4559 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4560 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4561 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
4562 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4564 pmullw\t{%2, %0|%0, %2}
4565 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4566 [(set_attr "isa" "noavx,avx")
4567 (set_attr "type" "sseimul")
4568 (set_attr "prefix_data16" "1,*")
4569 (set_attr "prefix" "orig,vex")
4570 (set_attr "mode" "TI")])
4572 (define_expand "<s>mulv8hi3_highpart"
4573 [(set (match_operand:V8HI 0 "register_operand" "")
4578 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4580 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4583 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4585 (define_insn "*<s>mulv8hi3_highpart"
4586 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4591 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
4593 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
4595 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4597 pmulh<u>w\t{%2, %0|%0, %2}
4598 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4599 [(set_attr "isa" "noavx,avx")
4600 (set_attr "type" "sseimul")
4601 (set_attr "prefix_data16" "1,*")
4602 (set_attr "prefix" "orig,vex")
4603 (set_attr "mode" "TI")])
4605 (define_expand "sse2_umulv2siv2di3"
4606 [(set (match_operand:V2DI 0 "register_operand" "")
4610 (match_operand:V4SI 1 "nonimmediate_operand" "")
4611 (parallel [(const_int 0) (const_int 2)])))
4614 (match_operand:V4SI 2 "nonimmediate_operand" "")
4615 (parallel [(const_int 0) (const_int 2)])))))]
4617 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4619 (define_insn "*sse2_umulv2siv2di3"
4620 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4624 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4625 (parallel [(const_int 0) (const_int 2)])))
4628 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4629 (parallel [(const_int 0) (const_int 2)])))))]
4630 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4632 pmuludq\t{%2, %0|%0, %2}
4633 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4634 [(set_attr "isa" "noavx,avx")
4635 (set_attr "type" "sseimul")
4636 (set_attr "prefix_data16" "1,*")
4637 (set_attr "prefix" "orig,vex")
4638 (set_attr "mode" "TI")])
4640 (define_expand "sse4_1_mulv2siv2di3"
4641 [(set (match_operand:V2DI 0 "register_operand" "")
4645 (match_operand:V4SI 1 "nonimmediate_operand" "")
4646 (parallel [(const_int 0) (const_int 2)])))
4649 (match_operand:V4SI 2 "nonimmediate_operand" "")
4650 (parallel [(const_int 0) (const_int 2)])))))]
4652 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4654 (define_insn "*sse4_1_mulv2siv2di3"
4655 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4659 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4660 (parallel [(const_int 0) (const_int 2)])))
4663 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4664 (parallel [(const_int 0) (const_int 2)])))))]
4665 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4667 pmuldq\t{%2, %0|%0, %2}
4668 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4669 [(set_attr "isa" "noavx,avx")
4670 (set_attr "type" "sseimul")
4671 (set_attr "prefix_data16" "1,*")
4672 (set_attr "prefix_extra" "1")
4673 (set_attr "prefix" "orig,vex")
4674 (set_attr "mode" "TI")])
4676 (define_expand "sse2_pmaddwd"
4677 [(set (match_operand:V4SI 0 "register_operand" "")
4682 (match_operand:V8HI 1 "nonimmediate_operand" "")
4683 (parallel [(const_int 0)
4689 (match_operand:V8HI 2 "nonimmediate_operand" "")
4690 (parallel [(const_int 0)
4696 (vec_select:V4HI (match_dup 1)
4697 (parallel [(const_int 1)
4702 (vec_select:V4HI (match_dup 2)
4703 (parallel [(const_int 1)
4706 (const_int 7)]))))))]
4708 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4710 (define_insn "*sse2_pmaddwd"
4711 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4716 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4717 (parallel [(const_int 0)
4723 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
4724 (parallel [(const_int 0)
4730 (vec_select:V4HI (match_dup 1)
4731 (parallel [(const_int 1)
4736 (vec_select:V4HI (match_dup 2)
4737 (parallel [(const_int 1)
4740 (const_int 7)]))))))]
4741 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4743 pmaddwd\t{%2, %0|%0, %2}
4744 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
4745 [(set_attr "isa" "noavx,avx")
4746 (set_attr "type" "sseiadd")
4747 (set_attr "atom_unit" "simul")
4748 (set_attr "prefix_data16" "1,*")
4749 (set_attr "prefix" "orig,vex")
4750 (set_attr "mode" "TI")])
4752 (define_expand "mulv4si3"
4753 [(set (match_operand:V4SI 0 "register_operand" "")
4754 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4755 (match_operand:V4SI 2 "register_operand" "")))]
4758 if (TARGET_SSE4_1 || TARGET_AVX)
4759 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
4762 (define_insn "*sse4_1_mulv4si3"
4763 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4764 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4765 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")))]
4766 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4768 pmulld\t{%2, %0|%0, %2}
4769 vpmulld\t{%2, %1, %0|%0, %1, %2}"
4770 [(set_attr "isa" "noavx,avx")
4771 (set_attr "type" "sseimul")
4772 (set_attr "prefix_extra" "1")
4773 (set_attr "prefix" "orig,vex")
4774 (set_attr "mode" "TI")])
4776 (define_insn_and_split "*sse2_mulv4si3"
4777 [(set (match_operand:V4SI 0 "register_operand" "")
4778 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4779 (match_operand:V4SI 2 "register_operand" "")))]
4780 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
4781 && can_create_pseudo_p ()"
4786 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4792 t1 = gen_reg_rtx (V4SImode);
4793 t2 = gen_reg_rtx (V4SImode);
4794 t3 = gen_reg_rtx (V4SImode);
4795 t4 = gen_reg_rtx (V4SImode);
4796 t5 = gen_reg_rtx (V4SImode);
4797 t6 = gen_reg_rtx (V4SImode);
4798 thirtytwo = GEN_INT (32);
4800 /* Multiply elements 2 and 0. */
4801 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
4804 /* Shift both input vectors down one element, so that elements 3
4805 and 1 are now in the slots for elements 2 and 0. For K8, at
4806 least, this is faster than using a shuffle. */
4807 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
4808 gen_lowpart (V1TImode, op1),
4810 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
4811 gen_lowpart (V1TImode, op2),
4813 /* Multiply elements 3 and 1. */
4814 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
4817 /* Move the results in element 2 down to element 1; we don't care
4818 what goes in elements 2 and 3. */
4819 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
4820 const0_rtx, const0_rtx));
4821 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
4822 const0_rtx, const0_rtx));
4824 /* Merge the parts back together. */
4825 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
4829 (define_insn_and_split "mulv2di3"
4830 [(set (match_operand:V2DI 0 "register_operand" "")
4831 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
4832 (match_operand:V2DI 2 "register_operand" "")))]
4834 && can_create_pseudo_p ()"
4839 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4848 /* op1: A,B,C,D, op2: E,F,G,H */
4849 op1 = gen_lowpart (V4SImode, op1);
4850 op2 = gen_lowpart (V4SImode, op2);
4852 t1 = gen_reg_rtx (V4SImode);
4853 t2 = gen_reg_rtx (V4SImode);
4854 t3 = gen_reg_rtx (V2DImode);
4855 t4 = gen_reg_rtx (V2DImode);
4858 emit_insn (gen_sse2_pshufd_1 (t1, op1,
4864 /* t2: (B*E),(A*F),(D*G),(C*H) */
4865 emit_insn (gen_mulv4si3 (t2, t1, op2));
4867 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
4868 emit_insn (gen_xop_phadddq (t3, t2));
4870 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
4871 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
4873 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
4874 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
4878 t1 = gen_reg_rtx (V2DImode);
4879 t2 = gen_reg_rtx (V2DImode);
4880 t3 = gen_reg_rtx (V2DImode);
4881 t4 = gen_reg_rtx (V2DImode);
4882 t5 = gen_reg_rtx (V2DImode);
4883 t6 = gen_reg_rtx (V2DImode);
4884 thirtytwo = GEN_INT (32);
4886 /* Multiply low parts. */
4887 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
4888 gen_lowpart (V4SImode, op2)));
4890 /* Shift input vectors left 32 bits so we can multiply high parts. */
4891 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
4892 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
4894 /* Multiply high parts by low parts. */
4895 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
4896 gen_lowpart (V4SImode, t3)));
4897 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
4898 gen_lowpart (V4SImode, t2)));
4900 /* Shift them back. */
4901 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
4902 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
4904 /* Add the three parts together. */
4905 emit_insn (gen_addv2di3 (t6, t1, t4));
4906 emit_insn (gen_addv2di3 (op0, t6, t5));
4911 (define_expand "vec_widen_smult_hi_v8hi"
4912 [(match_operand:V4SI 0 "register_operand" "")
4913 (match_operand:V8HI 1 "register_operand" "")
4914 (match_operand:V8HI 2 "register_operand" "")]
4917 rtx op1, op2, t1, t2, dest;
4921 t1 = gen_reg_rtx (V8HImode);
4922 t2 = gen_reg_rtx (V8HImode);
4923 dest = gen_lowpart (V8HImode, operands[0]);
4925 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4926 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4927 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4931 (define_expand "vec_widen_smult_lo_v8hi"
4932 [(match_operand:V4SI 0 "register_operand" "")
4933 (match_operand:V8HI 1 "register_operand" "")
4934 (match_operand:V8HI 2 "register_operand" "")]
4937 rtx op1, op2, t1, t2, dest;
4941 t1 = gen_reg_rtx (V8HImode);
4942 t2 = gen_reg_rtx (V8HImode);
4943 dest = gen_lowpart (V8HImode, operands[0]);
4945 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4946 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4947 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
4951 (define_expand "vec_widen_umult_hi_v8hi"
4952 [(match_operand:V4SI 0 "register_operand" "")
4953 (match_operand:V8HI 1 "register_operand" "")
4954 (match_operand:V8HI 2 "register_operand" "")]
4957 rtx op1, op2, t1, t2, dest;
4961 t1 = gen_reg_rtx (V8HImode);
4962 t2 = gen_reg_rtx (V8HImode);
4963 dest = gen_lowpart (V8HImode, operands[0]);
4965 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4966 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
4967 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4971 (define_expand "vec_widen_umult_lo_v8hi"
4972 [(match_operand:V4SI 0 "register_operand" "")
4973 (match_operand:V8HI 1 "register_operand" "")
4974 (match_operand:V8HI 2 "register_operand" "")]
4977 rtx op1, op2, t1, t2, dest;
4981 t1 = gen_reg_rtx (V8HImode);
4982 t2 = gen_reg_rtx (V8HImode);
4983 dest = gen_lowpart (V8HImode, operands[0]);
4985 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4986 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
4987 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
4991 (define_expand "vec_widen_smult_hi_v4si"
4992 [(match_operand:V2DI 0 "register_operand" "")
4993 (match_operand:V4SI 1 "register_operand" "")
4994 (match_operand:V4SI 2 "register_operand" "")]
4999 t1 = gen_reg_rtx (V4SImode);
5000 t2 = gen_reg_rtx (V4SImode);
5002 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5007 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5012 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5016 (define_expand "vec_widen_smult_lo_v4si"
5017 [(match_operand:V2DI 0 "register_operand" "")
5018 (match_operand:V4SI 1 "register_operand" "")
5019 (match_operand:V4SI 2 "register_operand" "")]
5024 t1 = gen_reg_rtx (V4SImode);
5025 t2 = gen_reg_rtx (V4SImode);
5027 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5032 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5037 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5041 (define_expand "vec_widen_umult_hi_v4si"
5042 [(match_operand:V2DI 0 "register_operand" "")
5043 (match_operand:V4SI 1 "register_operand" "")
5044 (match_operand:V4SI 2 "register_operand" "")]
5047 rtx op1, op2, t1, t2;
5051 t1 = gen_reg_rtx (V4SImode);
5052 t2 = gen_reg_rtx (V4SImode);
5054 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5055 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5056 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5060 (define_expand "vec_widen_umult_lo_v4si"
5061 [(match_operand:V2DI 0 "register_operand" "")
5062 (match_operand:V4SI 1 "register_operand" "")
5063 (match_operand:V4SI 2 "register_operand" "")]
5066 rtx op1, op2, t1, t2;
5070 t1 = gen_reg_rtx (V4SImode);
5071 t2 = gen_reg_rtx (V4SImode);
5073 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5074 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5075 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5079 (define_expand "sdot_prodv8hi"
5080 [(match_operand:V4SI 0 "register_operand" "")
5081 (match_operand:V8HI 1 "register_operand" "")
5082 (match_operand:V8HI 2 "register_operand" "")
5083 (match_operand:V4SI 3 "register_operand" "")]
5086 rtx t = gen_reg_rtx (V4SImode);
5087 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5088 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5092 (define_expand "udot_prodv4si"
5093 [(match_operand:V2DI 0 "register_operand" "")
5094 (match_operand:V4SI 1 "register_operand" "")
5095 (match_operand:V4SI 2 "register_operand" "")
5096 (match_operand:V2DI 3 "register_operand" "")]
5101 t1 = gen_reg_rtx (V2DImode);
5102 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5103 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5105 t2 = gen_reg_rtx (V4SImode);
5106 t3 = gen_reg_rtx (V4SImode);
5107 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5108 gen_lowpart (V1TImode, operands[1]),
5110 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5111 gen_lowpart (V1TImode, operands[2]),
5114 t4 = gen_reg_rtx (V2DImode);
5115 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5117 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5121 (define_insn "ashr<mode>3"
5122 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5124 (match_operand:VI24_128 1 "register_operand" "0,x")
5125 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5128 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5129 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5130 [(set_attr "isa" "noavx,avx")
5131 (set_attr "type" "sseishft")
5132 (set (attr "length_immediate")
5133 (if_then_else (match_operand 2 "const_int_operand" "")
5135 (const_string "0")))
5136 (set_attr "prefix_data16" "1,*")
5137 (set_attr "prefix" "orig,vex")
5138 (set_attr "mode" "TI")])
5140 (define_insn "lshr<mode>3"
5141 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5143 (match_operand:VI248_128 1 "register_operand" "0,x")
5144 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5147 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5148 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5149 [(set_attr "isa" "noavx,avx")
5150 (set_attr "type" "sseishft")
5151 (set (attr "length_immediate")
5152 (if_then_else (match_operand 2 "const_int_operand" "")
5154 (const_string "0")))
5155 (set_attr "prefix_data16" "1,*")
5156 (set_attr "prefix" "orig,vex")
5157 (set_attr "mode" "TI")])
5159 (define_insn "ashl<mode>3"
5160 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5162 (match_operand:VI248_128 1 "register_operand" "0,x")
5163 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5166 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5167 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5168 [(set_attr "isa" "noavx,avx")
5169 (set_attr "type" "sseishft")
5170 (set (attr "length_immediate")
5171 (if_then_else (match_operand 2 "const_int_operand" "")
5173 (const_string "0")))
5174 (set_attr "prefix_data16" "1,*")
5175 (set_attr "prefix" "orig,vex")
5176 (set_attr "mode" "TI")])
5178 (define_expand "vec_shl_<mode>"
5179 [(set (match_operand:VI_128 0 "register_operand" "")
5181 (match_operand:VI_128 1 "register_operand" "")
5182 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5185 operands[0] = gen_lowpart (V1TImode, operands[0]);
5186 operands[1] = gen_lowpart (V1TImode, operands[1]);
5189 (define_insn "sse2_ashlv1ti3"
5190 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5192 (match_operand:V1TI 1 "register_operand" "0,x")
5193 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5196 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5198 switch (which_alternative)
5201 return "pslldq\t{%2, %0|%0, %2}";
5203 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5208 [(set_attr "isa" "noavx,avx")
5209 (set_attr "type" "sseishft")
5210 (set_attr "length_immediate" "1")
5211 (set_attr "prefix_data16" "1,*")
5212 (set_attr "prefix" "orig,vex")
5213 (set_attr "mode" "TI")])
5215 (define_expand "vec_shr_<mode>"
5216 [(set (match_operand:VI_128 0 "register_operand" "")
5218 (match_operand:VI_128 1 "register_operand" "")
5219 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5222 operands[0] = gen_lowpart (V1TImode, operands[0]);
5223 operands[1] = gen_lowpart (V1TImode, operands[1]);
5226 (define_insn "sse2_lshrv1ti3"
5227 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5229 (match_operand:V1TI 1 "register_operand" "0,x")
5230 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5233 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5235 switch (which_alternative)
5238 return "psrldq\t{%2, %0|%0, %2}";
5240 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5245 [(set_attr "isa" "noavx,avx")
5246 (set_attr "type" "sseishft")
5247 (set_attr "length_immediate" "1")
5248 (set_attr "atom_unit" "sishuf")
5249 (set_attr "prefix_data16" "1,*")
5250 (set_attr "prefix" "orig,vex")
5251 (set_attr "mode" "TI")])
5253 (define_insn "*sse4_1_<code><mode>3"
5254 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5256 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5257 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5258 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5260 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5261 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5262 [(set_attr "isa" "noavx,avx")
5263 (set_attr "type" "sseiadd")
5264 (set_attr "prefix_extra" "1,*")
5265 (set_attr "prefix" "orig,vex")
5266 (set_attr "mode" "TI")])
5268 (define_insn "*<code>v8hi3"
5269 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5271 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5272 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5273 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5275 p<maxmin_int>w\t{%2, %0|%0, %2}
5276 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5277 [(set_attr "isa" "noavx,avx")
5278 (set_attr "type" "sseiadd")
5279 (set_attr "prefix_data16" "1,*")
5280 (set_attr "prefix_extra" "*,1")
5281 (set_attr "prefix" "orig,vex")
5282 (set_attr "mode" "TI")])
5284 (define_expand "smax<mode>3"
5285 [(set (match_operand:VI14_128 0 "register_operand" "")
5286 (smax:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5287 (match_operand:VI14_128 2 "register_operand" "")))]
5291 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5297 xops[0] = operands[0];
5298 xops[1] = operands[1];
5299 xops[2] = operands[2];
5300 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5301 xops[4] = operands[1];
5302 xops[5] = operands[2];
5303 ok = ix86_expand_int_vcond (xops);
5309 (define_expand "smin<mode>3"
5310 [(set (match_operand:VI14_128 0 "register_operand" "")
5311 (smin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5312 (match_operand:VI14_128 2 "register_operand" "")))]
5316 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5322 xops[0] = operands[0];
5323 xops[1] = operands[2];
5324 xops[2] = operands[1];
5325 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5326 xops[4] = operands[1];
5327 xops[5] = operands[2];
5328 ok = ix86_expand_int_vcond (xops);
5334 (define_expand "<code>v8hi3"
5335 [(set (match_operand:V8HI 0 "register_operand" "")
5337 (match_operand:V8HI 1 "nonimmediate_operand" "")
5338 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5340 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5342 (define_expand "smaxv2di3"
5343 [(set (match_operand:V2DI 0 "register_operand" "")
5344 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
5345 (match_operand:V2DI 2 "register_operand" "")))]
5351 xops[0] = operands[0];
5352 xops[1] = operands[1];
5353 xops[2] = operands[2];
5354 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5355 xops[4] = operands[1];
5356 xops[5] = operands[2];
5357 ok = ix86_expand_int_vcond (xops);
5362 (define_expand "sminv2di3"
5363 [(set (match_operand:V2DI 0 "register_operand" "")
5364 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
5365 (match_operand:V2DI 2 "register_operand" "")))]
5371 xops[0] = operands[0];
5372 xops[1] = operands[2];
5373 xops[2] = operands[1];
5374 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5375 xops[4] = operands[1];
5376 xops[5] = operands[2];
5377 ok = ix86_expand_int_vcond (xops);
5382 (define_insn "*sse4_1_<code><mode>3"
5383 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5385 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5386 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5387 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5389 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5390 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5391 [(set_attr "isa" "noavx,avx")
5392 (set_attr "type" "sseiadd")
5393 (set_attr "prefix_extra" "1,*")
5394 (set_attr "prefix" "orig,vex")
5395 (set_attr "mode" "TI")])
5397 (define_insn "*<code>v16qi3"
5398 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5400 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5401 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5402 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5404 p<maxmin_int>b\t{%2, %0|%0, %2}
5405 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5406 [(set_attr "isa" "noavx,avx")
5407 (set_attr "type" "sseiadd")
5408 (set_attr "prefix_data16" "1,*")
5409 (set_attr "prefix_extra" "*,1")
5410 (set_attr "prefix" "orig,vex")
5411 (set_attr "mode" "TI")])
5413 (define_expand "<code>v16qi3"
5414 [(set (match_operand:V16QI 0 "register_operand" "")
5416 (match_operand:V16QI 1 "nonimmediate_operand" "")
5417 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5419 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5421 (define_expand "umaxv8hi3"
5422 [(set (match_operand:V8HI 0 "register_operand" "")
5423 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5424 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5428 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5431 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5432 if (rtx_equal_p (op3, op2))
5433 op3 = gen_reg_rtx (V8HImode);
5434 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5435 emit_insn (gen_addv8hi3 (op0, op3, op2));
5440 (define_expand "umaxv4si3"
5441 [(set (match_operand:V4SI 0 "register_operand" "")
5442 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5443 (match_operand:V4SI 2 "register_operand" "")))]
5447 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5453 xops[0] = operands[0];
5454 xops[1] = operands[1];
5455 xops[2] = operands[2];
5456 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5457 xops[4] = operands[1];
5458 xops[5] = operands[2];
5459 ok = ix86_expand_int_vcond (xops);
5465 (define_expand "umin<mode>3"
5466 [(set (match_operand:VI24_128 0 "register_operand" "")
5467 (umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
5468 (match_operand:VI24_128 2 "register_operand" "")))]
5472 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5478 xops[0] = operands[0];
5479 xops[1] = operands[2];
5480 xops[2] = operands[1];
5481 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5482 xops[4] = operands[1];
5483 xops[5] = operands[2];
5484 ok = ix86_expand_int_vcond (xops);
5490 (define_expand "umaxv2di3"
5491 [(set (match_operand:V2DI 0 "register_operand" "")
5492 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
5493 (match_operand:V2DI 2 "register_operand" "")))]
5499 xops[0] = operands[0];
5500 xops[1] = operands[1];
5501 xops[2] = operands[2];
5502 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5503 xops[4] = operands[1];
5504 xops[5] = operands[2];
5505 ok = ix86_expand_int_vcond (xops);
5510 (define_expand "uminv2di3"
5511 [(set (match_operand:V2DI 0 "register_operand" "")
5512 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
5513 (match_operand:V2DI 2 "register_operand" "")))]
5519 xops[0] = operands[0];
5520 xops[1] = operands[2];
5521 xops[2] = operands[1];
5522 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5523 xops[4] = operands[1];
5524 xops[5] = operands[2];
5525 ok = ix86_expand_int_vcond (xops);
5530 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5532 ;; Parallel integral comparisons
5534 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5536 (define_insn "*sse4_1_eqv2di3"
5537 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5539 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
5540 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5541 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5543 pcmpeqq\t{%2, %0|%0, %2}
5544 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
5545 [(set_attr "isa" "noavx,avx")
5546 (set_attr "type" "ssecmp")
5547 (set_attr "prefix_extra" "1")
5548 (set_attr "prefix" "orig,vex")
5549 (set_attr "mode" "TI")])
5551 (define_insn "*sse2_eq<mode>3"
5552 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5554 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
5555 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5556 "TARGET_SSE2 && !TARGET_XOP
5557 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5559 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
5560 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5561 [(set_attr "isa" "noavx,avx")
5562 (set_attr "type" "ssecmp")
5563 (set_attr "prefix_data16" "1,*")
5564 (set_attr "prefix" "orig,vex")
5565 (set_attr "mode" "TI")])
5567 (define_expand "sse2_eq<mode>3"
5568 [(set (match_operand:VI124_128 0 "register_operand" "")
5570 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5571 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5572 "TARGET_SSE2 && !TARGET_XOP "
5573 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5575 (define_expand "sse4_1_eqv2di3"
5576 [(set (match_operand:V2DI 0 "register_operand" "")
5578 (match_operand:V2DI 1 "nonimmediate_operand" "")
5579 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5581 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5583 (define_insn "sse4_2_gtv2di3"
5584 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5586 (match_operand:V2DI 1 "register_operand" "0,x")
5587 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5590 pcmpgtq\t{%2, %0|%0, %2}
5591 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
5592 [(set_attr "isa" "noavx,avx")
5593 (set_attr "type" "ssecmp")
5594 (set_attr "prefix_extra" "1")
5595 (set_attr "prefix" "orig,vex")
5596 (set_attr "mode" "TI")])
5598 (define_insn "sse2_gt<mode>3"
5599 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5601 (match_operand:VI124_128 1 "register_operand" "0,x")
5602 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5603 "TARGET_SSE2 && !TARGET_XOP"
5605 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
5606 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5607 [(set_attr "isa" "noavx,avx")
5608 (set_attr "type" "ssecmp")
5609 (set_attr "prefix_data16" "1,*")
5610 (set_attr "prefix" "orig,vex")
5611 (set_attr "mode" "TI")])
5613 (define_expand "vcond<mode>"
5614 [(set (match_operand:VI124_128 0 "register_operand" "")
5615 (if_then_else:VI124_128
5616 (match_operator 3 ""
5617 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5618 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5619 (match_operand:VI124_128 1 "general_operand" "")
5620 (match_operand:VI124_128 2 "general_operand" "")))]
5623 bool ok = ix86_expand_int_vcond (operands);
5628 (define_expand "vcondv2di"
5629 [(set (match_operand:V2DI 0 "register_operand" "")
5631 (match_operator 3 ""
5632 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5633 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5634 (match_operand:V2DI 1 "general_operand" "")
5635 (match_operand:V2DI 2 "general_operand" "")))]
5638 bool ok = ix86_expand_int_vcond (operands);
5643 (define_expand "vcondu<mode>"
5644 [(set (match_operand:VI124_128 0 "register_operand" "")
5645 (if_then_else:VI124_128
5646 (match_operator 3 ""
5647 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5648 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5649 (match_operand:VI124_128 1 "general_operand" "")
5650 (match_operand:VI124_128 2 "general_operand" "")))]
5653 bool ok = ix86_expand_int_vcond (operands);
5658 (define_expand "vconduv2di"
5659 [(set (match_operand:V2DI 0 "register_operand" "")
5661 (match_operator 3 ""
5662 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5663 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5664 (match_operand:V2DI 1 "general_operand" "")
5665 (match_operand:V2DI 2 "general_operand" "")))]
5668 bool ok = ix86_expand_int_vcond (operands);
5673 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5675 ;; Parallel bitwise logical operations
5677 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5679 (define_expand "one_cmpl<mode>2"
5680 [(set (match_operand:VI 0 "register_operand" "")
5681 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
5685 int i, n = GET_MODE_NUNITS (<MODE>mode);
5686 rtvec v = rtvec_alloc (n);
5688 for (i = 0; i < n; ++i)
5689 RTVEC_ELT (v, i) = constm1_rtx;
5691 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5694 (define_expand "sse2_andnot<mode>3"
5695 [(set (match_operand:VI_128 0 "register_operand" "")
5697 (not:VI_128 (match_operand:VI_128 1 "register_operand" ""))
5698 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
5701 (define_insn "*andnot<mode>3"
5702 [(set (match_operand:VI 0 "register_operand" "=x,x")
5704 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
5705 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5708 static char buf[32];
5711 = (get_attr_mode (insn) == MODE_TI) ? "pandn" : "andnps";
5713 switch (which_alternative)
5716 ops = "%s\t{%%2, %%0|%%0, %%2}";
5719 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5725 snprintf (buf, sizeof (buf), ops, tmp);
5728 [(set_attr "isa" "noavx,avx")
5729 (set_attr "type" "sselog")
5730 (set (attr "prefix_data16")
5732 (and (eq_attr "alternative" "0")
5733 (eq_attr "mode" "TI"))
5735 (const_string "*")))
5736 (set_attr "prefix" "orig,vex")
5738 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5739 (const_string "V8SF")
5740 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5743 (const_string "V4SF")))])
5745 (define_expand "<code><mode>3"
5746 [(set (match_operand:VI 0 "register_operand" "")
5748 (match_operand:VI 1 "nonimmediate_operand" "")
5749 (match_operand:VI 2 "nonimmediate_operand" "")))]
5751 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5753 (define_insn "*<code><mode>3"
5754 [(set (match_operand:VI 0 "register_operand" "=x,x")
5756 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
5757 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5759 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5761 static char buf[32];
5764 = (get_attr_mode (insn) == MODE_TI) ? "p<logic>" : "<logic>ps";
5766 switch (which_alternative)
5769 ops = "%s\t{%%2, %%0|%%0, %%2}";
5772 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5778 snprintf (buf, sizeof (buf), ops, tmp);
5781 [(set_attr "isa" "noavx,avx")
5782 (set_attr "type" "sselog")
5783 (set (attr "prefix_data16")
5785 (and (eq_attr "alternative" "0")
5786 (eq_attr "mode" "TI"))
5788 (const_string "*")))
5789 (set_attr "prefix" "orig,vex")
5791 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5792 (const_string "V8SF")
5793 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5796 (const_string "V4SF")))])
5798 (define_insn "*andnottf3"
5799 [(set (match_operand:TF 0 "register_operand" "=x,x")
5801 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
5802 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5805 pandn\t{%2, %0|%0, %2}
5806 vpandn\t{%2, %1, %0|%0, %1, %2}"
5807 [(set_attr "isa" "noavx,avx")
5808 (set_attr "type" "sselog")
5809 (set_attr "prefix_data16" "1,*")
5810 (set_attr "prefix" "orig,vex")
5811 (set_attr "mode" "TI")])
5813 (define_expand "<code>tf3"
5814 [(set (match_operand:TF 0 "register_operand" "")
5816 (match_operand:TF 1 "nonimmediate_operand" "")
5817 (match_operand:TF 2 "nonimmediate_operand" "")))]
5819 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
5821 (define_insn "*<code>tf3"
5822 [(set (match_operand:TF 0 "register_operand" "=x,x")
5824 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
5825 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5827 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
5829 p<logic>\t{%2, %0|%0, %2}
5830 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
5831 [(set_attr "isa" "noavx,avx")
5832 (set_attr "type" "sselog")
5833 (set_attr "prefix_data16" "1,*")
5834 (set_attr "prefix" "orig,vex")
5835 (set_attr "mode" "TI")])
5837 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5839 ;; Parallel integral element swizzling
5841 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5843 (define_expand "vec_pack_trunc_<mode>"
5844 [(match_operand:<ssepackmode> 0 "register_operand" "")
5845 (match_operand:VI248_128 1 "register_operand" "")
5846 (match_operand:VI248_128 2 "register_operand" "")]
5849 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
5850 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
5851 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
5855 (define_insn "sse2_packsswb"
5856 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5859 (match_operand:V8HI 1 "register_operand" "0,x"))
5861 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5864 packsswb\t{%2, %0|%0, %2}
5865 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
5866 [(set_attr "isa" "noavx,avx")
5867 (set_attr "type" "sselog")
5868 (set_attr "prefix_data16" "1,*")
5869 (set_attr "prefix" "orig,vex")
5870 (set_attr "mode" "TI")])
5872 (define_insn "sse2_packssdw"
5873 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5876 (match_operand:V4SI 1 "register_operand" "0,x"))
5878 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
5881 packssdw\t{%2, %0|%0, %2}
5882 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
5883 [(set_attr "isa" "noavx,avx")
5884 (set_attr "type" "sselog")
5885 (set_attr "prefix_data16" "1,*")
5886 (set_attr "prefix" "orig,vex")
5887 (set_attr "mode" "TI")])
5889 (define_insn "sse2_packuswb"
5890 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5893 (match_operand:V8HI 1 "register_operand" "0,x"))
5895 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5898 packuswb\t{%2, %0|%0, %2}
5899 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
5900 [(set_attr "isa" "noavx,avx")
5901 (set_attr "type" "sselog")
5902 (set_attr "prefix_data16" "1,*")
5903 (set_attr "prefix" "orig,vex")
5904 (set_attr "mode" "TI")])
5906 (define_insn "vec_interleave_highv16qi"
5907 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5910 (match_operand:V16QI 1 "register_operand" "0,x")
5911 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5912 (parallel [(const_int 8) (const_int 24)
5913 (const_int 9) (const_int 25)
5914 (const_int 10) (const_int 26)
5915 (const_int 11) (const_int 27)
5916 (const_int 12) (const_int 28)
5917 (const_int 13) (const_int 29)
5918 (const_int 14) (const_int 30)
5919 (const_int 15) (const_int 31)])))]
5922 punpckhbw\t{%2, %0|%0, %2}
5923 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
5924 [(set_attr "isa" "noavx,avx")
5925 (set_attr "type" "sselog")
5926 (set_attr "prefix_data16" "1,*")
5927 (set_attr "prefix" "orig,vex")
5928 (set_attr "mode" "TI")])
5930 (define_insn "vec_interleave_lowv16qi"
5931 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5934 (match_operand:V16QI 1 "register_operand" "0,x")
5935 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5936 (parallel [(const_int 0) (const_int 16)
5937 (const_int 1) (const_int 17)
5938 (const_int 2) (const_int 18)
5939 (const_int 3) (const_int 19)
5940 (const_int 4) (const_int 20)
5941 (const_int 5) (const_int 21)
5942 (const_int 6) (const_int 22)
5943 (const_int 7) (const_int 23)])))]
5946 punpcklbw\t{%2, %0|%0, %2}
5947 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
5948 [(set_attr "isa" "noavx,avx")
5949 (set_attr "type" "sselog")
5950 (set_attr "prefix_data16" "1,*")
5951 (set_attr "prefix" "orig,vex")
5952 (set_attr "mode" "TI")])
5954 (define_insn "vec_interleave_highv8hi"
5955 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5958 (match_operand:V8HI 1 "register_operand" "0,x")
5959 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
5960 (parallel [(const_int 4) (const_int 12)
5961 (const_int 5) (const_int 13)
5962 (const_int 6) (const_int 14)
5963 (const_int 7) (const_int 15)])))]
5966 punpckhwd\t{%2, %0|%0, %2}
5967 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
5968 [(set_attr "isa" "noavx,avx")
5969 (set_attr "type" "sselog")
5970 (set_attr "prefix_data16" "1,*")
5971 (set_attr "prefix" "orig,vex")
5972 (set_attr "mode" "TI")])
5974 (define_insn "vec_interleave_lowv8hi"
5975 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5978 (match_operand:V8HI 1 "register_operand" "0,x")
5979 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
5980 (parallel [(const_int 0) (const_int 8)
5981 (const_int 1) (const_int 9)
5982 (const_int 2) (const_int 10)
5983 (const_int 3) (const_int 11)])))]
5986 punpcklwd\t{%2, %0|%0, %2}
5987 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
5988 [(set_attr "isa" "noavx,avx")
5989 (set_attr "type" "sselog")
5990 (set_attr "prefix_data16" "1,*")
5991 (set_attr "prefix" "orig,vex")
5992 (set_attr "mode" "TI")])
5994 (define_insn "vec_interleave_highv4si"
5995 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
5998 (match_operand:V4SI 1 "register_operand" "0,x")
5999 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6000 (parallel [(const_int 2) (const_int 6)
6001 (const_int 3) (const_int 7)])))]
6004 punpckhdq\t{%2, %0|%0, %2}
6005 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6006 [(set_attr "isa" "noavx,avx")
6007 (set_attr "type" "sselog")
6008 (set_attr "prefix_data16" "1,*")
6009 (set_attr "prefix" "orig,vex")
6010 (set_attr "mode" "TI")])
6012 (define_insn "vec_interleave_lowv4si"
6013 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6016 (match_operand:V4SI 1 "register_operand" "0,x")
6017 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6018 (parallel [(const_int 0) (const_int 4)
6019 (const_int 1) (const_int 5)])))]
6022 punpckldq\t{%2, %0|%0, %2}
6023 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6024 [(set_attr "isa" "noavx,avx")
6025 (set_attr "type" "sselog")
6026 (set_attr "prefix_data16" "1,*")
6027 (set_attr "prefix" "orig,vex")
6028 (set_attr "mode" "TI")])
6030 ;; Modes handled by pinsr patterns.
6031 (define_mode_iterator PINSR_MODE
6032 [(V16QI "TARGET_SSE4_1") V8HI
6033 (V4SI "TARGET_SSE4_1")
6034 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6036 (define_mode_attr sse2p4_1
6037 [(V16QI "sse4_1") (V8HI "sse2")
6038 (V4SI "sse4_1") (V2DI "sse4_1")])
6040 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6041 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6042 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6043 (vec_merge:PINSR_MODE
6044 (vec_duplicate:PINSR_MODE
6045 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6046 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6047 (match_operand:SI 3 "const_int_operand" "")))]
6049 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6050 < GET_MODE_NUNITS (<MODE>mode))"
6052 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6054 switch (which_alternative)
6057 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6058 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6061 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6063 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6064 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6067 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6072 [(set_attr "isa" "noavx,noavx,avx,avx")
6073 (set_attr "type" "sselog")
6074 (set (attr "prefix_rex")
6076 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6077 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6079 (const_string "*")))
6080 (set (attr "prefix_data16")
6082 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6083 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6085 (const_string "*")))
6086 (set (attr "prefix_extra")
6088 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6089 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6091 (const_string "1")))
6092 (set_attr "length_immediate" "1")
6093 (set_attr "prefix" "orig,orig,vex,vex")
6094 (set_attr "mode" "TI")])
6096 (define_insn "*sse4_1_pextrb_<mode>"
6097 [(set (match_operand:SWI48 0 "register_operand" "=r")
6100 (match_operand:V16QI 1 "register_operand" "x")
6101 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6103 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6104 [(set_attr "type" "sselog")
6105 (set_attr "prefix_extra" "1")
6106 (set_attr "length_immediate" "1")
6107 (set_attr "prefix" "maybe_vex")
6108 (set_attr "mode" "TI")])
6110 (define_insn "*sse4_1_pextrb_memory"
6111 [(set (match_operand:QI 0 "memory_operand" "=m")
6113 (match_operand:V16QI 1 "register_operand" "x")
6114 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6116 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6117 [(set_attr "type" "sselog")
6118 (set_attr "prefix_extra" "1")
6119 (set_attr "length_immediate" "1")
6120 (set_attr "prefix" "maybe_vex")
6121 (set_attr "mode" "TI")])
6123 (define_insn "*sse2_pextrw_<mode>"
6124 [(set (match_operand:SWI48 0 "register_operand" "=r")
6127 (match_operand:V8HI 1 "register_operand" "x")
6128 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6130 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6131 [(set_attr "type" "sselog")
6132 (set_attr "prefix_data16" "1")
6133 (set_attr "length_immediate" "1")
6134 (set_attr "prefix" "maybe_vex")
6135 (set_attr "mode" "TI")])
6137 (define_insn "*sse4_1_pextrw_memory"
6138 [(set (match_operand:HI 0 "memory_operand" "=m")
6140 (match_operand:V8HI 1 "register_operand" "x")
6141 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6143 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6144 [(set_attr "type" "sselog")
6145 (set_attr "prefix_extra" "1")
6146 (set_attr "length_immediate" "1")
6147 (set_attr "prefix" "maybe_vex")
6148 (set_attr "mode" "TI")])
6150 (define_insn "*sse4_1_pextrd"
6151 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6153 (match_operand:V4SI 1 "register_operand" "x")
6154 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6156 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6157 [(set_attr "type" "sselog")
6158 (set_attr "prefix_extra" "1")
6159 (set_attr "length_immediate" "1")
6160 (set_attr "prefix" "maybe_vex")
6161 (set_attr "mode" "TI")])
6163 (define_insn "*sse4_1_pextrd_zext"
6164 [(set (match_operand:DI 0 "register_operand" "=r")
6167 (match_operand:V4SI 1 "register_operand" "x")
6168 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6169 "TARGET_64BIT && TARGET_SSE4_1"
6170 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6171 [(set_attr "type" "sselog")
6172 (set_attr "prefix_extra" "1")
6173 (set_attr "length_immediate" "1")
6174 (set_attr "prefix" "maybe_vex")
6175 (set_attr "mode" "TI")])
6177 ;; It must come before *vec_extractv2di_1_rex64 since it is preferred.
6178 (define_insn "*sse4_1_pextrq"
6179 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6181 (match_operand:V2DI 1 "register_operand" "x")
6182 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6183 "TARGET_SSE4_1 && TARGET_64BIT"
6184 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6185 [(set_attr "type" "sselog")
6186 (set_attr "prefix_rex" "1")
6187 (set_attr "prefix_extra" "1")
6188 (set_attr "length_immediate" "1")
6189 (set_attr "prefix" "maybe_vex")
6190 (set_attr "mode" "TI")])
6192 (define_expand "sse2_pshufd"
6193 [(match_operand:V4SI 0 "register_operand" "")
6194 (match_operand:V4SI 1 "nonimmediate_operand" "")
6195 (match_operand:SI 2 "const_int_operand" "")]
6198 int mask = INTVAL (operands[2]);
6199 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6200 GEN_INT ((mask >> 0) & 3),
6201 GEN_INT ((mask >> 2) & 3),
6202 GEN_INT ((mask >> 4) & 3),
6203 GEN_INT ((mask >> 6) & 3)));
6207 (define_insn "sse2_pshufd_1"
6208 [(set (match_operand:V4SI 0 "register_operand" "=x")
6210 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6211 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6212 (match_operand 3 "const_0_to_3_operand" "")
6213 (match_operand 4 "const_0_to_3_operand" "")
6214 (match_operand 5 "const_0_to_3_operand" "")])))]
6218 mask |= INTVAL (operands[2]) << 0;
6219 mask |= INTVAL (operands[3]) << 2;
6220 mask |= INTVAL (operands[4]) << 4;
6221 mask |= INTVAL (operands[5]) << 6;
6222 operands[2] = GEN_INT (mask);
6224 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6226 [(set_attr "type" "sselog1")
6227 (set_attr "prefix_data16" "1")
6228 (set_attr "prefix" "maybe_vex")
6229 (set_attr "length_immediate" "1")
6230 (set_attr "mode" "TI")])
6232 (define_expand "sse2_pshuflw"
6233 [(match_operand:V8HI 0 "register_operand" "")
6234 (match_operand:V8HI 1 "nonimmediate_operand" "")
6235 (match_operand:SI 2 "const_int_operand" "")]
6238 int mask = INTVAL (operands[2]);
6239 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6240 GEN_INT ((mask >> 0) & 3),
6241 GEN_INT ((mask >> 2) & 3),
6242 GEN_INT ((mask >> 4) & 3),
6243 GEN_INT ((mask >> 6) & 3)));
6247 (define_insn "sse2_pshuflw_1"
6248 [(set (match_operand:V8HI 0 "register_operand" "=x")
6250 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6251 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6252 (match_operand 3 "const_0_to_3_operand" "")
6253 (match_operand 4 "const_0_to_3_operand" "")
6254 (match_operand 5 "const_0_to_3_operand" "")
6262 mask |= INTVAL (operands[2]) << 0;
6263 mask |= INTVAL (operands[3]) << 2;
6264 mask |= INTVAL (operands[4]) << 4;
6265 mask |= INTVAL (operands[5]) << 6;
6266 operands[2] = GEN_INT (mask);
6268 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6270 [(set_attr "type" "sselog")
6271 (set_attr "prefix_data16" "0")
6272 (set_attr "prefix_rep" "1")
6273 (set_attr "prefix" "maybe_vex")
6274 (set_attr "length_immediate" "1")
6275 (set_attr "mode" "TI")])
6277 (define_expand "sse2_pshufhw"
6278 [(match_operand:V8HI 0 "register_operand" "")
6279 (match_operand:V8HI 1 "nonimmediate_operand" "")
6280 (match_operand:SI 2 "const_int_operand" "")]
6283 int mask = INTVAL (operands[2]);
6284 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6285 GEN_INT (((mask >> 0) & 3) + 4),
6286 GEN_INT (((mask >> 2) & 3) + 4),
6287 GEN_INT (((mask >> 4) & 3) + 4),
6288 GEN_INT (((mask >> 6) & 3) + 4)));
6292 (define_insn "sse2_pshufhw_1"
6293 [(set (match_operand:V8HI 0 "register_operand" "=x")
6295 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6296 (parallel [(const_int 0)
6300 (match_operand 2 "const_4_to_7_operand" "")
6301 (match_operand 3 "const_4_to_7_operand" "")
6302 (match_operand 4 "const_4_to_7_operand" "")
6303 (match_operand 5 "const_4_to_7_operand" "")])))]
6307 mask |= (INTVAL (operands[2]) - 4) << 0;
6308 mask |= (INTVAL (operands[3]) - 4) << 2;
6309 mask |= (INTVAL (operands[4]) - 4) << 4;
6310 mask |= (INTVAL (operands[5]) - 4) << 6;
6311 operands[2] = GEN_INT (mask);
6313 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6315 [(set_attr "type" "sselog")
6316 (set_attr "prefix_rep" "1")
6317 (set_attr "prefix_data16" "0")
6318 (set_attr "prefix" "maybe_vex")
6319 (set_attr "length_immediate" "1")
6320 (set_attr "mode" "TI")])
6322 (define_expand "sse2_loadd"
6323 [(set (match_operand:V4SI 0 "register_operand" "")
6326 (match_operand:SI 1 "nonimmediate_operand" ""))
6330 "operands[2] = CONST0_RTX (V4SImode);")
6332 (define_insn "sse2_loadld"
6333 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x,x")
6336 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
6337 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
6341 %vmovd\t{%2, %0|%0, %2}
6342 %vmovd\t{%2, %0|%0, %2}
6343 movss\t{%2, %0|%0, %2}
6344 movss\t{%2, %0|%0, %2}
6345 vmovss\t{%2, %1, %0|%0, %1, %2}"
6346 [(set_attr "isa" "*,*,noavx,noavx,avx")
6347 (set_attr "type" "ssemov")
6348 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
6349 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
6351 (define_insn_and_split "sse2_stored"
6352 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
6354 (match_operand:V4SI 1 "register_operand" "x,Yi")
6355 (parallel [(const_int 0)])))]
6358 "&& reload_completed
6359 && (TARGET_INTER_UNIT_MOVES
6360 || MEM_P (operands [0])
6361 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6362 [(set (match_dup 0) (match_dup 1))]
6363 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
6365 (define_insn_and_split "*vec_ext_v4si_mem"
6366 [(set (match_operand:SI 0 "register_operand" "=r")
6368 (match_operand:V4SI 1 "memory_operand" "o")
6369 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6375 int i = INTVAL (operands[2]);
6377 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6381 (define_expand "sse_storeq"
6382 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6384 (match_operand:V2DI 1 "register_operand" "")
6385 (parallel [(const_int 0)])))]
6388 (define_insn "*sse2_storeq_rex64"
6389 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
6391 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6392 (parallel [(const_int 0)])))]
6393 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6397 mov{q}\t{%1, %0|%0, %1}"
6398 [(set_attr "type" "*,*,imov")
6399 (set_attr "mode" "*,*,DI")])
6401 (define_insn "*sse2_storeq"
6402 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
6404 (match_operand:V2DI 1 "register_operand" "x")
6405 (parallel [(const_int 0)])))]
6410 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6412 (match_operand:V2DI 1 "register_operand" "")
6413 (parallel [(const_int 0)])))]
6416 && (TARGET_INTER_UNIT_MOVES
6417 || MEM_P (operands [0])
6418 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6419 [(set (match_dup 0) (match_dup 1))]
6420 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
6422 (define_insn "*vec_extractv2di_1_rex64"
6423 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
6425 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
6426 (parallel [(const_int 1)])))]
6427 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6429 %vmovhps\t{%1, %0|%0, %1}
6430 psrldq\t{$8, %0|%0, 8}
6431 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6432 %vmovq\t{%H1, %0|%0, %H1}
6433 mov{q}\t{%H1, %0|%0, %H1}"
6434 [(set_attr "isa" "*,noavx,avx,*,*")
6435 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
6436 (set_attr "length_immediate" "*,1,1,*,*")
6437 (set_attr "memory" "*,none,none,*,*")
6438 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
6439 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
6441 (define_insn "*vec_extractv2di_1"
6442 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,Y2,Y2,Y2,x,x")
6444 (match_operand:V2DI 1 "nonimmediate_operand" " x,0 ,Y2,o ,x,o")
6445 (parallel [(const_int 1)])))]
6446 "!TARGET_64BIT && TARGET_SSE
6447 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6449 %vmovhps\t{%1, %0|%0, %1}
6450 psrldq\t{$8, %0|%0, 8}
6451 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6452 %vmovq\t{%H1, %0|%0, %H1}
6453 movhlps\t{%1, %0|%0, %1}
6454 movlps\t{%H1, %0|%0, %H1}"
6455 [(set_attr "isa" "*,noavx,avx,*,noavx,noavx")
6456 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov")
6457 (set_attr "length_immediate" "*,1,1,*,*,*")
6458 (set_attr "memory" "*,none,none,*,*,*")
6459 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
6460 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
6462 (define_insn "*vec_dupv4si_avx"
6463 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6465 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
6468 vpshufd\t{$0, %1, %0|%0, %1, 0}
6469 vbroadcastss\t{%1, %0|%0, %1}"
6470 [(set_attr "type" "sselog1,ssemov")
6471 (set_attr "length_immediate" "1,0")
6472 (set_attr "prefix_extra" "0,1")
6473 (set_attr "prefix" "vex")
6474 (set_attr "mode" "TI,V4SF")])
6476 (define_insn "*vec_dupv4si"
6477 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
6479 (match_operand:SI 1 "register_operand" " Y2,0")))]
6482 pshufd\t{$0, %1, %0|%0, %1, 0}
6483 shufps\t{$0, %0, %0|%0, %0, 0}"
6484 [(set_attr "type" "sselog1")
6485 (set_attr "length_immediate" "1")
6486 (set_attr "mode" "TI,V4SF")])
6488 (define_insn "*vec_dupv2di_sse3"
6489 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6491 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
6495 vpunpcklqdq\t{%d1, %0|%0, %d1}
6496 %vmovddup\t{%1, %0|%0, %1}"
6497 [(set_attr "isa" "noavx,avx,*")
6498 (set_attr "type" "sselog1")
6499 (set_attr "prefix" "orig,vex,maybe_vex")
6500 (set_attr "mode" "TI,TI,DF")])
6502 (define_insn "*vec_dupv2di"
6503 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
6505 (match_operand:DI 1 "register_operand" " 0 ,0")))]
6510 [(set_attr "type" "sselog1,ssemov")
6511 (set_attr "mode" "TI,V4SF")])
6513 (define_insn "*vec_concatv2si_sse4_1"
6514 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
6516 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
6517 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
6520 pinsrd\t{$1, %2, %0|%0, %2, 1}
6521 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
6522 punpckldq\t{%2, %0|%0, %2}
6523 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
6524 %vmovd\t{%1, %0|%0, %1}
6525 punpckldq\t{%2, %0|%0, %2}
6526 movd\t{%1, %0|%0, %1}"
6527 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
6528 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6529 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
6530 (set_attr "length_immediate" "1,1,*,*,*,*,*")
6531 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
6532 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
6534 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6535 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6536 ;; alternatives pretty much forces the MMX alternative to be chosen.
6537 (define_insn "*vec_concatv2si_sse2"
6538 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
6540 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
6541 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
6544 punpckldq\t{%2, %0|%0, %2}
6545 movd\t{%1, %0|%0, %1}
6546 punpckldq\t{%2, %0|%0, %2}
6547 movd\t{%1, %0|%0, %1}"
6548 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6549 (set_attr "mode" "TI,TI,DI,DI")])
6551 (define_insn "*vec_concatv2si_sse"
6552 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
6554 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
6555 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
6558 unpcklps\t{%2, %0|%0, %2}
6559 movss\t{%1, %0|%0, %1}
6560 punpckldq\t{%2, %0|%0, %2}
6561 movd\t{%1, %0|%0, %1}"
6562 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6563 (set_attr "mode" "V4SF,V4SF,DI,DI")])
6565 (define_insn "*vec_concatv4si"
6566 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x,x,x")
6568 (match_operand:V2SI 1 "register_operand" " 0 ,x,0,0,x")
6569 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,x,m,m")))]
6572 punpcklqdq\t{%2, %0|%0, %2}
6573 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6574 movlhps\t{%2, %0|%0, %2}
6575 movhps\t{%2, %0|%0, %2}
6576 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6577 [(set_attr "isa" "noavx,avx,noavx,noavx,avx")
6578 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
6579 (set_attr "prefix" "orig,vex,orig,orig,vex")
6580 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
6582 ;; movd instead of movq is required to handle broken assemblers.
6583 (define_insn "*vec_concatv2di_rex64"
6584 [(set (match_operand:V2DI 0 "register_operand"
6585 "=Y4,x ,x ,Yi,!x,x,x,x,x")
6587 (match_operand:DI 1 "nonimmediate_operand"
6588 " 0 ,x ,xm,r ,*y,0,x,0,x")
6589 (match_operand:DI 2 "vector_move_operand"
6590 " rm,rm,C ,C ,C ,x,x,m,m")))]
6593 pinsrq\t{$1, %2, %0|%0, %2, 1}
6594 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
6595 %vmovq\t{%1, %0|%0, %1}
6596 %vmovd\t{%1, %0|%0, %1}
6597 movq2dq\t{%1, %0|%0, %1}
6598 punpcklqdq\t{%2, %0|%0, %2}
6599 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6600 movhps\t{%2, %0|%0, %2}
6601 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6602 [(set_attr "isa" "noavx,avx,*,*,*,noavx,avx,noavx,avx")
6605 (eq_attr "alternative" "0,1,5,6")
6606 (const_string "sselog")
6607 (const_string "ssemov")))
6608 (set (attr "prefix_rex")
6610 (and (eq_attr "alternative" "0,3")
6611 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
6613 (const_string "*")))
6614 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
6615 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
6616 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
6617 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
6619 (define_insn "vec_concatv2di"
6620 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x,x")
6622 (match_operand:DI 1 "nonimmediate_operand" "Y2m,*y , 0,x,0,0,x")
6623 (match_operand:DI 2 "vector_move_operand" " C , C ,Y2,x,x,m,m")))]
6624 "!TARGET_64BIT && TARGET_SSE"
6626 %vmovq\t{%1, %0|%0, %1}
6627 movq2dq\t{%1, %0|%0, %1}
6628 punpcklqdq\t{%2, %0|%0, %2}
6629 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6630 movlhps\t{%2, %0|%0, %2}
6631 movhps\t{%2, %0|%0, %2}
6632 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6633 [(set_attr "isa" "*,*,noavx,avx,noavx,noavx,avx")
6634 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
6635 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
6636 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
6638 (define_expand "vec_unpacks_lo_<mode>"
6639 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6640 (match_operand:VI124_128 1 "register_operand" "")]
6642 "ix86_expand_sse_unpack (operands, false, false); DONE;")
6644 (define_expand "vec_unpacks_hi_<mode>"
6645 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6646 (match_operand:VI124_128 1 "register_operand" "")]
6648 "ix86_expand_sse_unpack (operands, false, true); DONE;")
6650 (define_expand "vec_unpacku_lo_<mode>"
6651 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6652 (match_operand:VI124_128 1 "register_operand" "")]
6654 "ix86_expand_sse_unpack (operands, true, false); DONE;")
6656 (define_expand "vec_unpacku_hi_<mode>"
6657 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6658 (match_operand:VI124_128 1 "register_operand" "")]
6660 "ix86_expand_sse_unpack (operands, true, true); DONE;")
6662 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6666 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6668 (define_expand "sse2_uavgv16qi3"
6669 [(set (match_operand:V16QI 0 "register_operand" "")
6675 (match_operand:V16QI 1 "nonimmediate_operand" ""))
6677 (match_operand:V16QI 2 "nonimmediate_operand" "")))
6678 (const_vector:V16QI [(const_int 1) (const_int 1)
6679 (const_int 1) (const_int 1)
6680 (const_int 1) (const_int 1)
6681 (const_int 1) (const_int 1)
6682 (const_int 1) (const_int 1)
6683 (const_int 1) (const_int 1)
6684 (const_int 1) (const_int 1)
6685 (const_int 1) (const_int 1)]))
6688 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
6690 (define_insn "*sse2_uavgv16qi3"
6691 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6697 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
6699 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
6700 (const_vector:V16QI [(const_int 1) (const_int 1)
6701 (const_int 1) (const_int 1)
6702 (const_int 1) (const_int 1)
6703 (const_int 1) (const_int 1)
6704 (const_int 1) (const_int 1)
6705 (const_int 1) (const_int 1)
6706 (const_int 1) (const_int 1)
6707 (const_int 1) (const_int 1)]))
6709 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
6711 pavgb\t{%2, %0|%0, %2}
6712 vpavgb\t{%2, %1, %0|%0, %1, %2}"
6713 [(set_attr "isa" "noavx,avx")
6714 (set_attr "type" "sseiadd")
6715 (set_attr "prefix_data16" "1,*")
6716 (set_attr "prefix" "orig,vex")
6717 (set_attr "mode" "TI")])
6719 (define_expand "sse2_uavgv8hi3"
6720 [(set (match_operand:V8HI 0 "register_operand" "")
6726 (match_operand:V8HI 1 "nonimmediate_operand" ""))
6728 (match_operand:V8HI 2 "nonimmediate_operand" "")))
6729 (const_vector:V8HI [(const_int 1) (const_int 1)
6730 (const_int 1) (const_int 1)
6731 (const_int 1) (const_int 1)
6732 (const_int 1) (const_int 1)]))
6735 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
6737 (define_insn "*sse2_uavgv8hi3"
6738 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6744 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
6746 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
6747 (const_vector:V8HI [(const_int 1) (const_int 1)
6748 (const_int 1) (const_int 1)
6749 (const_int 1) (const_int 1)
6750 (const_int 1) (const_int 1)]))
6752 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
6754 pavgw\t{%2, %0|%0, %2}
6755 vpavgw\t{%2, %1, %0|%0, %1, %2}"
6756 [(set_attr "isa" "noavx,avx")
6757 (set_attr "type" "sseiadd")
6758 (set_attr "prefix_data16" "1,*")
6759 (set_attr "prefix" "orig,vex")
6760 (set_attr "mode" "TI")])
6762 ;; The correct representation for this is absolutely enormous, and
6763 ;; surely not generally useful.
6764 (define_insn "sse2_psadbw"
6765 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6766 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0,x")
6767 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
6771 psadbw\t{%2, %0|%0, %2}
6772 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
6773 [(set_attr "isa" "noavx,avx")
6774 (set_attr "type" "sseiadd")
6775 (set_attr "atom_unit" "simul")
6776 (set_attr "prefix_data16" "1,*")
6777 (set_attr "prefix" "orig,vex")
6778 (set_attr "mode" "TI")])
6780 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
6781 [(set (match_operand:SI 0 "register_operand" "=r")
6783 [(match_operand:VF 1 "register_operand" "x")]
6786 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
6787 [(set_attr "type" "ssemov")
6788 (set_attr "prefix" "maybe_vex")
6789 (set_attr "mode" "<MODE>")])
6791 (define_insn "sse2_pmovmskb"
6792 [(set (match_operand:SI 0 "register_operand" "=r")
6793 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
6796 "%vpmovmskb\t{%1, %0|%0, %1}"
6797 [(set_attr "type" "ssemov")
6798 (set_attr "prefix_data16" "1")
6799 (set_attr "prefix" "maybe_vex")
6800 (set_attr "mode" "SI")])
6802 (define_expand "sse2_maskmovdqu"
6803 [(set (match_operand:V16QI 0 "memory_operand" "")
6804 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
6805 (match_operand:V16QI 2 "register_operand" "")
6810 (define_insn "*sse2_maskmovdqu"
6811 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
6812 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
6813 (match_operand:V16QI 2 "register_operand" "x")
6814 (mem:V16QI (match_dup 0))]
6817 "%vmaskmovdqu\t{%2, %1|%1, %2}"
6818 [(set_attr "type" "ssemov")
6819 (set_attr "prefix_data16" "1")
6820 ;; The implicit %rdi operand confuses default length_vex computation.
6821 (set (attr "length_vex")
6822 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
6823 (set_attr "prefix" "maybe_vex")
6824 (set_attr "mode" "TI")])
6826 (define_insn "sse_ldmxcsr"
6827 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
6831 [(set_attr "type" "sse")
6832 (set_attr "atom_sse_attr" "mxcsr")
6833 (set_attr "prefix" "maybe_vex")
6834 (set_attr "memory" "load")])
6836 (define_insn "sse_stmxcsr"
6837 [(set (match_operand:SI 0 "memory_operand" "=m")
6838 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
6841 [(set_attr "type" "sse")
6842 (set_attr "atom_sse_attr" "mxcsr")
6843 (set_attr "prefix" "maybe_vex")
6844 (set_attr "memory" "store")])
6846 (define_expand "sse_sfence"
6848 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6849 "TARGET_SSE || TARGET_3DNOW_A"
6851 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6852 MEM_VOLATILE_P (operands[0]) = 1;
6855 (define_insn "*sse_sfence"
6856 [(set (match_operand:BLK 0 "" "")
6857 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6858 "TARGET_SSE || TARGET_3DNOW_A"
6860 [(set_attr "type" "sse")
6861 (set_attr "length_address" "0")
6862 (set_attr "atom_sse_attr" "fence")
6863 (set_attr "memory" "unknown")])
6865 (define_insn "sse2_clflush"
6866 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
6870 [(set_attr "type" "sse")
6871 (set_attr "atom_sse_attr" "fence")
6872 (set_attr "memory" "unknown")])
6874 (define_expand "sse2_mfence"
6876 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
6879 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6880 MEM_VOLATILE_P (operands[0]) = 1;
6883 (define_insn "*sse2_mfence"
6884 [(set (match_operand:BLK 0 "" "")
6885 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
6886 "TARGET_64BIT || TARGET_SSE2"
6888 [(set_attr "type" "sse")
6889 (set_attr "length_address" "0")
6890 (set_attr "atom_sse_attr" "fence")
6891 (set_attr "memory" "unknown")])
6893 (define_expand "sse2_lfence"
6895 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
6898 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6899 MEM_VOLATILE_P (operands[0]) = 1;
6902 (define_insn "*sse2_lfence"
6903 [(set (match_operand:BLK 0 "" "")
6904 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
6907 [(set_attr "type" "sse")
6908 (set_attr "length_address" "0")
6909 (set_attr "atom_sse_attr" "lfence")
6910 (set_attr "memory" "unknown")])
6912 (define_insn "sse3_mwait"
6913 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
6914 (match_operand:SI 1 "register_operand" "c")]
6917 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
6918 ;; Since 32bit register operands are implicitly zero extended to 64bit,
6919 ;; we only need to set up 32bit registers.
6921 [(set_attr "length" "3")])
6923 (define_insn "sse3_monitor"
6924 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
6925 (match_operand:SI 1 "register_operand" "c")
6926 (match_operand:SI 2 "register_operand" "d")]
6928 "TARGET_SSE3 && !TARGET_64BIT"
6929 "monitor\t%0, %1, %2"
6930 [(set_attr "length" "3")])
6932 (define_insn "sse3_monitor64"
6933 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
6934 (match_operand:SI 1 "register_operand" "c")
6935 (match_operand:SI 2 "register_operand" "d")]
6937 "TARGET_SSE3 && TARGET_64BIT"
6938 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
6939 ;; RCX and RDX are used. Since 32bit register operands are implicitly
6940 ;; zero extended to 64bit, we only need to set up 32bit registers.
6942 [(set_attr "length" "3")])
6944 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6946 ;; SSSE3 instructions
6948 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6950 (define_insn "ssse3_phaddwv8hi3"
6951 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6957 (match_operand:V8HI 1 "register_operand" "0,x")
6958 (parallel [(const_int 0)]))
6959 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
6961 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
6962 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
6965 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
6966 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
6968 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
6969 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
6974 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
6975 (parallel [(const_int 0)]))
6976 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
6978 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
6979 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
6982 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
6983 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
6985 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
6986 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
6989 phaddw\t{%2, %0|%0, %2}
6990 vphaddw\t{%2, %1, %0|%0, %1, %2}"
6991 [(set_attr "isa" "noavx,avx")
6992 (set_attr "type" "sseiadd")
6993 (set_attr "atom_unit" "complex")
6994 (set_attr "prefix_data16" "1,*")
6995 (set_attr "prefix_extra" "1")
6996 (set_attr "prefix" "orig,vex")
6997 (set_attr "mode" "TI")])
6999 (define_insn "ssse3_phaddwv4hi3"
7000 [(set (match_operand:V4HI 0 "register_operand" "=y")
7005 (match_operand:V4HI 1 "register_operand" "0")
7006 (parallel [(const_int 0)]))
7007 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7009 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7010 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7014 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7015 (parallel [(const_int 0)]))
7016 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7018 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7019 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7021 "phaddw\t{%2, %0|%0, %2}"
7022 [(set_attr "type" "sseiadd")
7023 (set_attr "atom_unit" "complex")
7024 (set_attr "prefix_extra" "1")
7025 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7026 (set_attr "mode" "DI")])
7028 (define_insn "ssse3_phadddv4si3"
7029 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7034 (match_operand:V4SI 1 "register_operand" "0,x")
7035 (parallel [(const_int 0)]))
7036 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7038 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7039 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7043 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7044 (parallel [(const_int 0)]))
7045 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7047 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7048 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7051 phaddd\t{%2, %0|%0, %2}
7052 vphaddd\t{%2, %1, %0|%0, %1, %2}"
7053 [(set_attr "isa" "noavx,avx")
7054 (set_attr "type" "sseiadd")
7055 (set_attr "atom_unit" "complex")
7056 (set_attr "prefix_data16" "1,*")
7057 (set_attr "prefix_extra" "1")
7058 (set_attr "prefix" "orig,vex")
7059 (set_attr "mode" "TI")])
7061 (define_insn "ssse3_phadddv2si3"
7062 [(set (match_operand:V2SI 0 "register_operand" "=y")
7066 (match_operand:V2SI 1 "register_operand" "0")
7067 (parallel [(const_int 0)]))
7068 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7071 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7072 (parallel [(const_int 0)]))
7073 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7075 "phaddd\t{%2, %0|%0, %2}"
7076 [(set_attr "type" "sseiadd")
7077 (set_attr "atom_unit" "complex")
7078 (set_attr "prefix_extra" "1")
7079 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7080 (set_attr "mode" "DI")])
7082 (define_insn "ssse3_phaddswv8hi3"
7083 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7089 (match_operand:V8HI 1 "register_operand" "0,x")
7090 (parallel [(const_int 0)]))
7091 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7093 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7094 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7097 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7098 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7100 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7101 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7106 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7107 (parallel [(const_int 0)]))
7108 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7110 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7111 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7114 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7115 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7117 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7118 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7121 phaddsw\t{%2, %0|%0, %2}
7122 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
7123 [(set_attr "isa" "noavx,avx")
7124 (set_attr "type" "sseiadd")
7125 (set_attr "atom_unit" "complex")
7126 (set_attr "prefix_data16" "1,*")
7127 (set_attr "prefix_extra" "1")
7128 (set_attr "prefix" "orig,vex")
7129 (set_attr "mode" "TI")])
7131 (define_insn "ssse3_phaddswv4hi3"
7132 [(set (match_operand:V4HI 0 "register_operand" "=y")
7137 (match_operand:V4HI 1 "register_operand" "0")
7138 (parallel [(const_int 0)]))
7139 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7141 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7142 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7146 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7147 (parallel [(const_int 0)]))
7148 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7150 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7151 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7153 "phaddsw\t{%2, %0|%0, %2}"
7154 [(set_attr "type" "sseiadd")
7155 (set_attr "atom_unit" "complex")
7156 (set_attr "prefix_extra" "1")
7157 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7158 (set_attr "mode" "DI")])
7160 (define_insn "ssse3_phsubwv8hi3"
7161 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7167 (match_operand:V8HI 1 "register_operand" "0,x")
7168 (parallel [(const_int 0)]))
7169 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7171 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7172 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7175 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7176 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7178 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7179 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7184 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7185 (parallel [(const_int 0)]))
7186 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7188 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7189 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7192 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7193 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7195 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7196 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7199 phsubw\t{%2, %0|%0, %2}
7200 vphsubw\t{%2, %1, %0|%0, %1, %2}"
7201 [(set_attr "isa" "noavx,avx")
7202 (set_attr "type" "sseiadd")
7203 (set_attr "atom_unit" "complex")
7204 (set_attr "prefix_data16" "1,*")
7205 (set_attr "prefix_extra" "1")
7206 (set_attr "prefix" "orig,vex")
7207 (set_attr "mode" "TI")])
7209 (define_insn "ssse3_phsubwv4hi3"
7210 [(set (match_operand:V4HI 0 "register_operand" "=y")
7215 (match_operand:V4HI 1 "register_operand" "0")
7216 (parallel [(const_int 0)]))
7217 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7219 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7220 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7224 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7225 (parallel [(const_int 0)]))
7226 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7228 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7229 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7231 "phsubw\t{%2, %0|%0, %2}"
7232 [(set_attr "type" "sseiadd")
7233 (set_attr "atom_unit" "complex")
7234 (set_attr "prefix_extra" "1")
7235 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7236 (set_attr "mode" "DI")])
7238 (define_insn "ssse3_phsubdv4si3"
7239 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7244 (match_operand:V4SI 1 "register_operand" "0,x")
7245 (parallel [(const_int 0)]))
7246 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7248 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7249 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7253 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7254 (parallel [(const_int 0)]))
7255 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7257 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7258 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7261 phsubd\t{%2, %0|%0, %2}
7262 vphsubd\t{%2, %1, %0|%0, %1, %2}"
7264 [(set_attr "isa" "noavx,avx")
7265 (set_attr "type" "sseiadd")
7266 (set_attr "atom_unit" "complex")
7267 (set_attr "prefix_data16" "1,*")
7268 (set_attr "prefix_extra" "1")
7269 (set_attr "prefix" "orig,vex")
7270 (set_attr "mode" "TI")])
7272 (define_insn "ssse3_phsubdv2si3"
7273 [(set (match_operand:V2SI 0 "register_operand" "=y")
7277 (match_operand:V2SI 1 "register_operand" "0")
7278 (parallel [(const_int 0)]))
7279 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7282 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7283 (parallel [(const_int 0)]))
7284 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7286 "phsubd\t{%2, %0|%0, %2}"
7287 [(set_attr "type" "sseiadd")
7288 (set_attr "atom_unit" "complex")
7289 (set_attr "prefix_extra" "1")
7290 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7291 (set_attr "mode" "DI")])
7293 (define_insn "ssse3_phsubswv8hi3"
7294 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7300 (match_operand:V8HI 1 "register_operand" "0,x")
7301 (parallel [(const_int 0)]))
7302 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7304 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7305 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7308 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7309 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7311 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7312 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7317 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7318 (parallel [(const_int 0)]))
7319 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7321 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7322 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7325 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7326 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7328 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7329 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7332 phsubsw\t{%2, %0|%0, %2}
7333 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
7334 [(set_attr "isa" "noavx,avx")
7335 (set_attr "type" "sseiadd")
7336 (set_attr "atom_unit" "complex")
7337 (set_attr "prefix_data16" "1,*")
7338 (set_attr "prefix_extra" "1")
7339 (set_attr "prefix" "orig,vex")
7340 (set_attr "mode" "TI")])
7342 (define_insn "ssse3_phsubswv4hi3"
7343 [(set (match_operand:V4HI 0 "register_operand" "=y")
7348 (match_operand:V4HI 1 "register_operand" "0")
7349 (parallel [(const_int 0)]))
7350 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7352 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7353 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7357 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7358 (parallel [(const_int 0)]))
7359 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7361 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7362 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7364 "phsubsw\t{%2, %0|%0, %2}"
7365 [(set_attr "type" "sseiadd")
7366 (set_attr "atom_unit" "complex")
7367 (set_attr "prefix_extra" "1")
7368 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7369 (set_attr "mode" "DI")])
7371 (define_insn "ssse3_pmaddubsw128"
7372 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7377 (match_operand:V16QI 1 "register_operand" "0,x")
7378 (parallel [(const_int 0)
7388 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7389 (parallel [(const_int 0)
7399 (vec_select:V8QI (match_dup 1)
7400 (parallel [(const_int 1)
7409 (vec_select:V8QI (match_dup 2)
7410 (parallel [(const_int 1)
7417 (const_int 15)]))))))]
7420 pmaddubsw\t{%2, %0|%0, %2}
7421 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
7422 [(set_attr "isa" "noavx,avx")
7423 (set_attr "type" "sseiadd")
7424 (set_attr "atom_unit" "simul")
7425 (set_attr "prefix_data16" "1,*")
7426 (set_attr "prefix_extra" "1")
7427 (set_attr "prefix" "orig,vex")
7428 (set_attr "mode" "TI")])
7430 (define_insn "ssse3_pmaddubsw"
7431 [(set (match_operand:V4HI 0 "register_operand" "=y")
7436 (match_operand:V8QI 1 "register_operand" "0")
7437 (parallel [(const_int 0)
7443 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
7444 (parallel [(const_int 0)
7450 (vec_select:V4QI (match_dup 1)
7451 (parallel [(const_int 1)
7456 (vec_select:V4QI (match_dup 2)
7457 (parallel [(const_int 1)
7460 (const_int 7)]))))))]
7462 "pmaddubsw\t{%2, %0|%0, %2}"
7463 [(set_attr "type" "sseiadd")
7464 (set_attr "atom_unit" "simul")
7465 (set_attr "prefix_extra" "1")
7466 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7467 (set_attr "mode" "DI")])
7469 (define_expand "ssse3_pmulhrswv8hi3"
7470 [(set (match_operand:V8HI 0 "register_operand" "")
7477 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7479 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7481 (const_vector:V8HI [(const_int 1) (const_int 1)
7482 (const_int 1) (const_int 1)
7483 (const_int 1) (const_int 1)
7484 (const_int 1) (const_int 1)]))
7487 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
7489 (define_insn "*ssse3_pmulhrswv8hi3"
7490 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7497 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7499 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7501 (const_vector:V8HI [(const_int 1) (const_int 1)
7502 (const_int 1) (const_int 1)
7503 (const_int 1) (const_int 1)
7504 (const_int 1) (const_int 1)]))
7506 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
7508 pmulhrsw\t{%2, %0|%0, %2}
7509 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
7510 [(set_attr "isa" "noavx,avx")
7511 (set_attr "type" "sseimul")
7512 (set_attr "prefix_data16" "1,*")
7513 (set_attr "prefix_extra" "1")
7514 (set_attr "prefix" "orig,vex")
7515 (set_attr "mode" "TI")])
7517 (define_expand "ssse3_pmulhrswv4hi3"
7518 [(set (match_operand:V4HI 0 "register_operand" "")
7525 (match_operand:V4HI 1 "nonimmediate_operand" ""))
7527 (match_operand:V4HI 2 "nonimmediate_operand" "")))
7529 (const_vector:V4HI [(const_int 1) (const_int 1)
7530 (const_int 1) (const_int 1)]))
7533 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
7535 (define_insn "*ssse3_pmulhrswv4hi3"
7536 [(set (match_operand:V4HI 0 "register_operand" "=y")
7543 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
7545 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
7547 (const_vector:V4HI [(const_int 1) (const_int 1)
7548 (const_int 1) (const_int 1)]))
7550 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
7551 "pmulhrsw\t{%2, %0|%0, %2}"
7552 [(set_attr "type" "sseimul")
7553 (set_attr "prefix_extra" "1")
7554 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7555 (set_attr "mode" "DI")])
7557 (define_insn "ssse3_pshufbv16qi3"
7558 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7559 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7560 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
7564 pshufb\t{%2, %0|%0, %2}
7565 vpshufb\t{%2, %1, %0|%0, %1, %2}"
7566 [(set_attr "isa" "noavx,avx")
7567 (set_attr "type" "sselog1")
7568 (set_attr "prefix_data16" "1,*")
7569 (set_attr "prefix_extra" "1")
7570 (set_attr "prefix" "orig,vex")
7571 (set_attr "mode" "TI")])
7573 (define_insn "ssse3_pshufbv8qi3"
7574 [(set (match_operand:V8QI 0 "register_operand" "=y")
7575 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
7576 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
7579 "pshufb\t{%2, %0|%0, %2}";
7580 [(set_attr "type" "sselog1")
7581 (set_attr "prefix_extra" "1")
7582 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7583 (set_attr "mode" "DI")])
7585 (define_insn "ssse3_psign<mode>3"
7586 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
7588 [(match_operand:VI124_128 1 "register_operand" "0,x")
7589 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")]
7593 psign<ssemodesuffix>\t{%2, %0|%0, %2}
7594 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7595 [(set_attr "isa" "noavx,avx")
7596 (set_attr "type" "sselog1")
7597 (set_attr "prefix_data16" "1,*")
7598 (set_attr "prefix_extra" "1")
7599 (set_attr "prefix" "orig,vex")
7600 (set_attr "mode" "TI")])
7602 (define_insn "ssse3_psign<mode>3"
7603 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7605 [(match_operand:MMXMODEI 1 "register_operand" "0")
7606 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
7609 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
7610 [(set_attr "type" "sselog1")
7611 (set_attr "prefix_extra" "1")
7612 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7613 (set_attr "mode" "DI")])
7615 (define_insn "ssse3_palignrti"
7616 [(set (match_operand:TI 0 "register_operand" "=x,x")
7617 (unspec:TI [(match_operand:TI 1 "register_operand" "0,x")
7618 (match_operand:TI 2 "nonimmediate_operand" "xm,xm")
7619 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
7623 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7625 switch (which_alternative)
7628 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7630 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7635 [(set_attr "isa" "noavx,avx")
7636 (set_attr "type" "sseishft")
7637 (set_attr "atom_unit" "sishuf")
7638 (set_attr "prefix_data16" "1,*")
7639 (set_attr "prefix_extra" "1")
7640 (set_attr "length_immediate" "1")
7641 (set_attr "prefix" "orig,vex")
7642 (set_attr "mode" "TI")])
7644 (define_insn "ssse3_palignrdi"
7645 [(set (match_operand:DI 0 "register_operand" "=y")
7646 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
7647 (match_operand:DI 2 "nonimmediate_operand" "ym")
7648 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
7652 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7653 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7655 [(set_attr "type" "sseishft")
7656 (set_attr "atom_unit" "sishuf")
7657 (set_attr "prefix_extra" "1")
7658 (set_attr "length_immediate" "1")
7659 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7660 (set_attr "mode" "DI")])
7662 (define_insn "abs<mode>2"
7663 [(set (match_operand:VI124_128 0 "register_operand" "=x")
7665 (match_operand:VI124_128 1 "nonimmediate_operand" "xm")))]
7667 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
7668 [(set_attr "type" "sselog1")
7669 (set_attr "prefix_data16" "1")
7670 (set_attr "prefix_extra" "1")
7671 (set_attr "prefix" "maybe_vex")
7672 (set_attr "mode" "TI")])
7674 (define_insn "abs<mode>2"
7675 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7677 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
7679 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
7680 [(set_attr "type" "sselog1")
7681 (set_attr "prefix_rep" "0")
7682 (set_attr "prefix_extra" "1")
7683 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7684 (set_attr "mode" "DI")])
7686 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7688 ;; AMD SSE4A instructions
7690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7692 (define_insn "sse4a_movnt<mode>"
7693 [(set (match_operand:MODEF 0 "memory_operand" "=m")
7695 [(match_operand:MODEF 1 "register_operand" "x")]
7698 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
7699 [(set_attr "type" "ssemov")
7700 (set_attr "mode" "<MODE>")])
7702 (define_insn "sse4a_vmmovnt<mode>"
7703 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
7704 (unspec:<ssescalarmode>
7705 [(vec_select:<ssescalarmode>
7706 (match_operand:VF_128 1 "register_operand" "x")
7707 (parallel [(const_int 0)]))]
7710 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
7711 [(set_attr "type" "ssemov")
7712 (set_attr "mode" "<ssescalarmode>")])
7714 (define_insn "sse4a_extrqi"
7715 [(set (match_operand:V2DI 0 "register_operand" "=x")
7716 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7717 (match_operand 2 "const_0_to_255_operand" "")
7718 (match_operand 3 "const_0_to_255_operand" "")]
7721 "extrq\t{%3, %2, %0|%0, %2, %3}"
7722 [(set_attr "type" "sse")
7723 (set_attr "prefix_data16" "1")
7724 (set_attr "length_immediate" "2")
7725 (set_attr "mode" "TI")])
7727 (define_insn "sse4a_extrq"
7728 [(set (match_operand:V2DI 0 "register_operand" "=x")
7729 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7730 (match_operand:V16QI 2 "register_operand" "x")]
7733 "extrq\t{%2, %0|%0, %2}"
7734 [(set_attr "type" "sse")
7735 (set_attr "prefix_data16" "1")
7736 (set_attr "mode" "TI")])
7738 (define_insn "sse4a_insertqi"
7739 [(set (match_operand:V2DI 0 "register_operand" "=x")
7740 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7741 (match_operand:V2DI 2 "register_operand" "x")
7742 (match_operand 3 "const_0_to_255_operand" "")
7743 (match_operand 4 "const_0_to_255_operand" "")]
7746 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
7747 [(set_attr "type" "sseins")
7748 (set_attr "prefix_data16" "0")
7749 (set_attr "prefix_rep" "1")
7750 (set_attr "length_immediate" "2")
7751 (set_attr "mode" "TI")])
7753 (define_insn "sse4a_insertq"
7754 [(set (match_operand:V2DI 0 "register_operand" "=x")
7755 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7756 (match_operand:V2DI 2 "register_operand" "x")]
7759 "insertq\t{%2, %0|%0, %2}"
7760 [(set_attr "type" "sseins")
7761 (set_attr "prefix_data16" "0")
7762 (set_attr "prefix_rep" "1")
7763 (set_attr "mode" "TI")])
7765 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7767 ;; Intel SSE4.1 instructions
7769 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7771 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
7772 [(set (match_operand:VF 0 "register_operand" "=x,x")
7774 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7775 (match_operand:VF 1 "register_operand" "0,x")
7776 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "")))]
7779 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7780 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7781 [(set_attr "isa" "noavx,avx")
7782 (set_attr "type" "ssemov")
7783 (set_attr "length_immediate" "1")
7784 (set_attr "prefix_data16" "1,*")
7785 (set_attr "prefix_extra" "1")
7786 (set_attr "prefix" "orig,vex")
7787 (set_attr "mode" "<MODE>")])
7789 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
7790 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
7792 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7793 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7794 (match_operand:VF 3 "register_operand" "Yz,x")]
7798 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7799 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7800 [(set_attr "isa" "noavx,avx")
7801 (set_attr "type" "ssemov")
7802 (set_attr "length_immediate" "1")
7803 (set_attr "prefix_data16" "1,*")
7804 (set_attr "prefix_extra" "1")
7805 (set_attr "prefix" "orig,vex")
7806 (set_attr "mode" "<MODE>")])
7808 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
7809 [(set (match_operand:VF 0 "register_operand" "=x,x")
7811 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
7812 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7813 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7817 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7818 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7819 [(set_attr "isa" "noavx,avx")
7820 (set_attr "type" "ssemul")
7821 (set_attr "length_immediate" "1")
7822 (set_attr "prefix_data16" "1,*")
7823 (set_attr "prefix_extra" "1")
7824 (set_attr "prefix" "orig,vex")
7825 (set_attr "mode" "<MODE>")])
7827 (define_insn "sse4_1_movntdqa"
7828 [(set (match_operand:V2DI 0 "register_operand" "=x")
7829 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
7832 "%vmovntdqa\t{%1, %0|%0, %1}"
7833 [(set_attr "type" "ssemov")
7834 (set_attr "prefix_extra" "1")
7835 (set_attr "prefix" "maybe_vex")
7836 (set_attr "mode" "TI")])
7838 (define_insn "sse4_1_mpsadbw"
7839 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7840 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7841 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7842 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7846 mpsadbw\t{%3, %2, %0|%0, %2, %3}
7847 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7848 [(set_attr "isa" "noavx,avx")
7849 (set_attr "type" "sselog1")
7850 (set_attr "length_immediate" "1")
7851 (set_attr "prefix_extra" "1")
7852 (set_attr "prefix" "orig,vex")
7853 (set_attr "mode" "TI")])
7855 (define_insn "sse4_1_packusdw"
7856 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7859 (match_operand:V4SI 1 "register_operand" "0,x"))
7861 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
7864 packusdw\t{%2, %0|%0, %2}
7865 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
7866 [(set_attr "isa" "noavx,avx")
7867 (set_attr "type" "sselog")
7868 (set_attr "prefix_extra" "1")
7869 (set_attr "prefix" "orig,vex")
7870 (set_attr "mode" "TI")])
7872 (define_insn "sse4_1_pblendvb"
7873 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x,x")
7875 [(match_operand:V16QI 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7876 (match_operand:V16QI 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7877 (match_operand:V16QI 3 "register_operand" "Yz,x")]
7881 pblendvb\t{%3, %2, %0|%0, %2, %3}
7882 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7883 [(set_attr "isa" "noavx,avx")
7884 (set_attr "type" "ssemov")
7885 (set_attr "prefix_extra" "1")
7886 (set_attr "length_immediate" "*,1")
7887 (set_attr "prefix" "orig,vex")
7888 (set_attr "mode" "TI")])
7890 (define_insn "sse4_1_pblendw"
7891 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7893 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7894 (match_operand:V8HI 1 "register_operand" "0,x")
7895 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
7898 pblendw\t{%3, %2, %0|%0, %2, %3}
7899 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7900 [(set_attr "isa" "noavx,avx")
7901 (set_attr "type" "ssemov")
7902 (set_attr "prefix_extra" "1")
7903 (set_attr "length_immediate" "1")
7904 (set_attr "prefix" "orig,vex")
7905 (set_attr "mode" "TI")])
7907 (define_insn "sse4_1_phminposuw"
7908 [(set (match_operand:V8HI 0 "register_operand" "=x")
7909 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
7910 UNSPEC_PHMINPOSUW))]
7912 "%vphminposuw\t{%1, %0|%0, %1}"
7913 [(set_attr "type" "sselog1")
7914 (set_attr "prefix_extra" "1")
7915 (set_attr "prefix" "maybe_vex")
7916 (set_attr "mode" "TI")])
7918 (define_insn "sse4_1_<code>v8qiv8hi2"
7919 [(set (match_operand:V8HI 0 "register_operand" "=x")
7922 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7923 (parallel [(const_int 0)
7932 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
7933 [(set_attr "type" "ssemov")
7934 (set_attr "prefix_extra" "1")
7935 (set_attr "prefix" "maybe_vex")
7936 (set_attr "mode" "TI")])
7938 (define_insn "sse4_1_<code>v4qiv4si2"
7939 [(set (match_operand:V4SI 0 "register_operand" "=x")
7942 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7943 (parallel [(const_int 0)
7948 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
7949 [(set_attr "type" "ssemov")
7950 (set_attr "prefix_extra" "1")
7951 (set_attr "prefix" "maybe_vex")
7952 (set_attr "mode" "TI")])
7954 (define_insn "sse4_1_<code>v4hiv4si2"
7955 [(set (match_operand:V4SI 0 "register_operand" "=x")
7958 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7959 (parallel [(const_int 0)
7964 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
7965 [(set_attr "type" "ssemov")
7966 (set_attr "prefix_extra" "1")
7967 (set_attr "prefix" "maybe_vex")
7968 (set_attr "mode" "TI")])
7970 (define_insn "sse4_1_<code>v2qiv2di2"
7971 [(set (match_operand:V2DI 0 "register_operand" "=x")
7974 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7975 (parallel [(const_int 0)
7978 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
7979 [(set_attr "type" "ssemov")
7980 (set_attr "prefix_extra" "1")
7981 (set_attr "prefix" "maybe_vex")
7982 (set_attr "mode" "TI")])
7984 (define_insn "sse4_1_<code>v2hiv2di2"
7985 [(set (match_operand:V2DI 0 "register_operand" "=x")
7988 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
7989 (parallel [(const_int 0)
7992 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
7993 [(set_attr "type" "ssemov")
7994 (set_attr "prefix_extra" "1")
7995 (set_attr "prefix" "maybe_vex")
7996 (set_attr "mode" "TI")])
7998 (define_insn "sse4_1_<code>v2siv2di2"
7999 [(set (match_operand:V2DI 0 "register_operand" "=x")
8002 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8003 (parallel [(const_int 0)
8006 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
8007 [(set_attr "type" "ssemov")
8008 (set_attr "prefix_extra" "1")
8009 (set_attr "prefix" "maybe_vex")
8010 (set_attr "mode" "TI")])
8012 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
8013 ;; setting FLAGS_REG. But it is not a really compare instruction.
8014 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
8015 [(set (reg:CC FLAGS_REG)
8016 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
8017 (match_operand:VF 1 "nonimmediate_operand" "xm")]
8020 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
8021 [(set_attr "type" "ssecomi")
8022 (set_attr "prefix_extra" "1")
8023 (set_attr "prefix" "vex")
8024 (set_attr "mode" "<MODE>")])
8026 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
8027 ;; But it is not a really compare instruction.
8028 (define_insn "avx_ptest256"
8029 [(set (reg:CC FLAGS_REG)
8030 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
8031 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
8034 "vptest\t{%1, %0|%0, %1}"
8035 [(set_attr "type" "ssecomi")
8036 (set_attr "prefix_extra" "1")
8037 (set_attr "prefix" "vex")
8038 (set_attr "mode" "OI")])
8040 (define_insn "sse4_1_ptest"
8041 [(set (reg:CC FLAGS_REG)
8042 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
8043 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8046 "%vptest\t{%1, %0|%0, %1}"
8047 [(set_attr "type" "ssecomi")
8048 (set_attr "prefix_extra" "1")
8049 (set_attr "prefix" "maybe_vex")
8050 (set_attr "mode" "TI")])
8052 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
8053 [(set (match_operand:VF 0 "register_operand" "=x")
8055 [(match_operand:VF 1 "nonimmediate_operand" "xm")
8056 (match_operand:SI 2 "const_0_to_15_operand" "n")]
8059 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8060 [(set_attr "type" "ssecvt")
8061 (set (attr "prefix_data16")
8063 (ne (symbol_ref "TARGET_AVX") (const_int 0))
8065 (const_string "1")))
8066 (set_attr "prefix_extra" "1")
8067 (set_attr "length_immediate" "1")
8068 (set_attr "prefix" "maybe_vex")
8069 (set_attr "mode" "<MODE>")])
8071 (define_insn "sse4_1_round<ssescalarmodesuffix>"
8072 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
8075 [(match_operand:VF_128 2 "register_operand" "x,x")
8076 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
8078 (match_operand:VF_128 1 "register_operand" "0,x")
8082 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
8083 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8084 [(set_attr "isa" "noavx,avx")
8085 (set_attr "type" "ssecvt")
8086 (set_attr "length_immediate" "1")
8087 (set_attr "prefix_data16" "1,*")
8088 (set_attr "prefix_extra" "1")
8089 (set_attr "prefix" "orig,vex")
8090 (set_attr "mode" "<MODE>")])
8092 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8094 ;; Intel SSE4.2 string/text processing instructions
8096 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8098 (define_insn_and_split "sse4_2_pcmpestr"
8099 [(set (match_operand:SI 0 "register_operand" "=c,c")
8101 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8102 (match_operand:SI 3 "register_operand" "a,a")
8103 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
8104 (match_operand:SI 5 "register_operand" "d,d")
8105 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
8107 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8115 (set (reg:CC FLAGS_REG)
8124 && can_create_pseudo_p ()"
8129 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8130 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8131 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8134 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
8135 operands[3], operands[4],
8136 operands[5], operands[6]));
8138 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
8139 operands[3], operands[4],
8140 operands[5], operands[6]));
8141 if (flags && !(ecx || xmm0))
8142 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
8143 operands[2], operands[3],
8144 operands[4], operands[5],
8148 [(set_attr "type" "sselog")
8149 (set_attr "prefix_data16" "1")
8150 (set_attr "prefix_extra" "1")
8151 (set_attr "length_immediate" "1")
8152 (set_attr "memory" "none,load")
8153 (set_attr "mode" "TI")])
8155 (define_insn "sse4_2_pcmpestri"
8156 [(set (match_operand:SI 0 "register_operand" "=c,c")
8158 [(match_operand:V16QI 1 "register_operand" "x,x")
8159 (match_operand:SI 2 "register_operand" "a,a")
8160 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8161 (match_operand:SI 4 "register_operand" "d,d")
8162 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8164 (set (reg:CC FLAGS_REG)
8173 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
8174 [(set_attr "type" "sselog")
8175 (set_attr "prefix_data16" "1")
8176 (set_attr "prefix_extra" "1")
8177 (set_attr "prefix" "maybe_vex")
8178 (set_attr "length_immediate" "1")
8179 (set_attr "memory" "none,load")
8180 (set_attr "mode" "TI")])
8182 (define_insn "sse4_2_pcmpestrm"
8183 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8185 [(match_operand:V16QI 1 "register_operand" "x,x")
8186 (match_operand:SI 2 "register_operand" "a,a")
8187 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8188 (match_operand:SI 4 "register_operand" "d,d")
8189 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8191 (set (reg:CC FLAGS_REG)
8200 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
8201 [(set_attr "type" "sselog")
8202 (set_attr "prefix_data16" "1")
8203 (set_attr "prefix_extra" "1")
8204 (set_attr "length_immediate" "1")
8205 (set_attr "prefix" "maybe_vex")
8206 (set_attr "memory" "none,load")
8207 (set_attr "mode" "TI")])
8209 (define_insn "sse4_2_pcmpestr_cconly"
8210 [(set (reg:CC FLAGS_REG)
8212 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8213 (match_operand:SI 3 "register_operand" "a,a,a,a")
8214 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
8215 (match_operand:SI 5 "register_operand" "d,d,d,d")
8216 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
8218 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8219 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8222 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8223 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8224 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
8225 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
8226 [(set_attr "type" "sselog")
8227 (set_attr "prefix_data16" "1")
8228 (set_attr "prefix_extra" "1")
8229 (set_attr "length_immediate" "1")
8230 (set_attr "memory" "none,load,none,load")
8231 (set_attr "prefix" "maybe_vex")
8232 (set_attr "mode" "TI")])
8234 (define_insn_and_split "sse4_2_pcmpistr"
8235 [(set (match_operand:SI 0 "register_operand" "=c,c")
8237 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8238 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
8239 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
8241 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8247 (set (reg:CC FLAGS_REG)
8254 && can_create_pseudo_p ()"
8259 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8260 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8261 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8264 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
8265 operands[3], operands[4]));
8267 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
8268 operands[3], operands[4]));
8269 if (flags && !(ecx || xmm0))
8270 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
8271 operands[2], operands[3],
8275 [(set_attr "type" "sselog")
8276 (set_attr "prefix_data16" "1")
8277 (set_attr "prefix_extra" "1")
8278 (set_attr "length_immediate" "1")
8279 (set_attr "memory" "none,load")
8280 (set_attr "mode" "TI")])
8282 (define_insn "sse4_2_pcmpistri"
8283 [(set (match_operand:SI 0 "register_operand" "=c,c")
8285 [(match_operand:V16QI 1 "register_operand" "x,x")
8286 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8287 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8289 (set (reg:CC FLAGS_REG)
8296 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
8297 [(set_attr "type" "sselog")
8298 (set_attr "prefix_data16" "1")
8299 (set_attr "prefix_extra" "1")
8300 (set_attr "length_immediate" "1")
8301 (set_attr "prefix" "maybe_vex")
8302 (set_attr "memory" "none,load")
8303 (set_attr "mode" "TI")])
8305 (define_insn "sse4_2_pcmpistrm"
8306 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8308 [(match_operand:V16QI 1 "register_operand" "x,x")
8309 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8310 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8312 (set (reg:CC FLAGS_REG)
8319 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
8320 [(set_attr "type" "sselog")
8321 (set_attr "prefix_data16" "1")
8322 (set_attr "prefix_extra" "1")
8323 (set_attr "length_immediate" "1")
8324 (set_attr "prefix" "maybe_vex")
8325 (set_attr "memory" "none,load")
8326 (set_attr "mode" "TI")])
8328 (define_insn "sse4_2_pcmpistr_cconly"
8329 [(set (reg:CC FLAGS_REG)
8331 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8332 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
8333 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
8335 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8336 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8339 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8340 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8341 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
8342 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
8343 [(set_attr "type" "sselog")
8344 (set_attr "prefix_data16" "1")
8345 (set_attr "prefix_extra" "1")
8346 (set_attr "length_immediate" "1")
8347 (set_attr "memory" "none,load,none,load")
8348 (set_attr "prefix" "maybe_vex")
8349 (set_attr "mode" "TI")])
8351 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8355 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8357 ;; XOP parallel integer multiply/add instructions.
8358 ;; Note the XOP multiply/add instructions
8359 ;; a[i] = b[i] * c[i] + d[i];
8360 ;; do not allow the value being added to be a memory operation.
8361 (define_insn "xop_pmacsww"
8362 [(set (match_operand:V8HI 0 "register_operand" "=x")
8365 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8366 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8367 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8369 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8370 [(set_attr "type" "ssemuladd")
8371 (set_attr "mode" "TI")])
8373 (define_insn "xop_pmacssww"
8374 [(set (match_operand:V8HI 0 "register_operand" "=x")
8376 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8377 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8378 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8380 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8381 [(set_attr "type" "ssemuladd")
8382 (set_attr "mode" "TI")])
8384 (define_insn "xop_pmacsdd"
8385 [(set (match_operand:V4SI 0 "register_operand" "=x")
8388 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8389 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8390 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8392 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8393 [(set_attr "type" "ssemuladd")
8394 (set_attr "mode" "TI")])
8396 (define_insn "xop_pmacssdd"
8397 [(set (match_operand:V4SI 0 "register_operand" "=x")
8399 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8400 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8401 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8403 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8404 [(set_attr "type" "ssemuladd")
8405 (set_attr "mode" "TI")])
8407 (define_insn "xop_pmacssdql"
8408 [(set (match_operand:V2DI 0 "register_operand" "=x")
8413 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8414 (parallel [(const_int 1)
8417 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8418 (parallel [(const_int 1)
8420 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8422 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8423 [(set_attr "type" "ssemuladd")
8424 (set_attr "mode" "TI")])
8426 (define_insn "xop_pmacssdqh"
8427 [(set (match_operand:V2DI 0 "register_operand" "=x")
8432 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8433 (parallel [(const_int 0)
8437 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8438 (parallel [(const_int 0)
8440 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8442 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8443 [(set_attr "type" "ssemuladd")
8444 (set_attr "mode" "TI")])
8446 (define_insn "xop_pmacsdql"
8447 [(set (match_operand:V2DI 0 "register_operand" "=x")
8452 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8453 (parallel [(const_int 1)
8457 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8458 (parallel [(const_int 1)
8460 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8462 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8463 [(set_attr "type" "ssemuladd")
8464 (set_attr "mode" "TI")])
8466 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8467 ;; fake it with a multiply/add. In general, we expect the define_split to
8468 ;; occur before register allocation, so we have to handle the corner case where
8469 ;; the target is the same as operands 1/2
8470 (define_insn_and_split "xop_mulv2div2di3_low"
8471 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8475 (match_operand:V4SI 1 "register_operand" "%x")
8476 (parallel [(const_int 1)
8480 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8481 (parallel [(const_int 1)
8482 (const_int 3)])))))]
8485 "&& reload_completed"
8494 (parallel [(const_int 1)
8499 (parallel [(const_int 1)
8503 operands[3] = CONST0_RTX (V2DImode);
8505 [(set_attr "type" "ssemul")
8506 (set_attr "mode" "TI")])
8508 (define_insn "xop_pmacsdqh"
8509 [(set (match_operand:V2DI 0 "register_operand" "=x")
8514 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8515 (parallel [(const_int 0)
8519 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8520 (parallel [(const_int 0)
8522 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8524 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8525 [(set_attr "type" "ssemuladd")
8526 (set_attr "mode" "TI")])
8528 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8529 ;; fake it with a multiply/add. In general, we expect the define_split to
8530 ;; occur before register allocation, so we have to handle the corner case where
8531 ;; the target is the same as either operands[1] or operands[2]
8532 (define_insn_and_split "xop_mulv2div2di3_high"
8533 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8537 (match_operand:V4SI 1 "register_operand" "%x")
8538 (parallel [(const_int 0)
8542 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8543 (parallel [(const_int 0)
8544 (const_int 2)])))))]
8547 "&& reload_completed"
8556 (parallel [(const_int 0)
8561 (parallel [(const_int 0)
8565 operands[3] = CONST0_RTX (V2DImode);
8567 [(set_attr "type" "ssemul")
8568 (set_attr "mode" "TI")])
8570 ;; XOP parallel integer multiply/add instructions for the intrinisics
8571 (define_insn "xop_pmacsswd"
8572 [(set (match_operand:V4SI 0 "register_operand" "=x")
8577 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8578 (parallel [(const_int 1)
8584 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8585 (parallel [(const_int 1)
8589 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8591 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8592 [(set_attr "type" "ssemuladd")
8593 (set_attr "mode" "TI")])
8595 (define_insn "xop_pmacswd"
8596 [(set (match_operand:V4SI 0 "register_operand" "=x")
8601 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8602 (parallel [(const_int 1)
8608 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8609 (parallel [(const_int 1)
8613 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8615 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8616 [(set_attr "type" "ssemuladd")
8617 (set_attr "mode" "TI")])
8619 (define_insn "xop_pmadcsswd"
8620 [(set (match_operand:V4SI 0 "register_operand" "=x")
8626 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8627 (parallel [(const_int 0)
8633 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8634 (parallel [(const_int 0)
8642 (parallel [(const_int 1)
8649 (parallel [(const_int 1)
8653 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8655 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8656 [(set_attr "type" "ssemuladd")
8657 (set_attr "mode" "TI")])
8659 (define_insn "xop_pmadcswd"
8660 [(set (match_operand:V4SI 0 "register_operand" "=x")
8666 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8667 (parallel [(const_int 0)
8673 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8674 (parallel [(const_int 0)
8682 (parallel [(const_int 1)
8689 (parallel [(const_int 1)
8693 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8695 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8696 [(set_attr "type" "ssemuladd")
8697 (set_attr "mode" "TI")])
8699 ;; XOP parallel XMM conditional moves
8700 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
8701 [(set (match_operand:V 0 "register_operand" "=x,x")
8703 (match_operand:V 3 "nonimmediate_operand" "x,m")
8704 (match_operand:V 1 "vector_move_operand" "x,x")
8705 (match_operand:V 2 "vector_move_operand" "xm,x")))]
8707 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8708 [(set_attr "type" "sse4arg")])
8710 ;; XOP horizontal add/subtract instructions
8711 (define_insn "xop_phaddbw"
8712 [(set (match_operand:V8HI 0 "register_operand" "=x")
8716 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8717 (parallel [(const_int 0)
8728 (parallel [(const_int 1)
8735 (const_int 15)])))))]
8737 "vphaddbw\t{%1, %0|%0, %1}"
8738 [(set_attr "type" "sseiadd1")])
8740 (define_insn "xop_phaddbd"
8741 [(set (match_operand:V4SI 0 "register_operand" "=x")
8746 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8747 (parallel [(const_int 0)
8754 (parallel [(const_int 1)
8762 (parallel [(const_int 2)
8769 (parallel [(const_int 3)
8772 (const_int 15)]))))))]
8774 "vphaddbd\t{%1, %0|%0, %1}"
8775 [(set_attr "type" "sseiadd1")])
8777 (define_insn "xop_phaddbq"
8778 [(set (match_operand:V2DI 0 "register_operand" "=x")
8784 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8785 (parallel [(const_int 0)
8790 (parallel [(const_int 1)
8796 (parallel [(const_int 2)
8801 (parallel [(const_int 3)
8808 (parallel [(const_int 8)
8813 (parallel [(const_int 9)
8819 (parallel [(const_int 10)
8824 (parallel [(const_int 11)
8825 (const_int 15)])))))))]
8827 "vphaddbq\t{%1, %0|%0, %1}"
8828 [(set_attr "type" "sseiadd1")])
8830 (define_insn "xop_phaddwd"
8831 [(set (match_operand:V4SI 0 "register_operand" "=x")
8835 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8836 (parallel [(const_int 0)
8843 (parallel [(const_int 1)
8846 (const_int 7)])))))]
8848 "vphaddwd\t{%1, %0|%0, %1}"
8849 [(set_attr "type" "sseiadd1")])
8851 (define_insn "xop_phaddwq"
8852 [(set (match_operand:V2DI 0 "register_operand" "=x")
8857 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8858 (parallel [(const_int 0)
8863 (parallel [(const_int 1)
8869 (parallel [(const_int 2)
8874 (parallel [(const_int 3)
8875 (const_int 7)]))))))]
8877 "vphaddwq\t{%1, %0|%0, %1}"
8878 [(set_attr "type" "sseiadd1")])
8880 (define_insn "xop_phadddq"
8881 [(set (match_operand:V2DI 0 "register_operand" "=x")
8885 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8886 (parallel [(const_int 0)
8891 (parallel [(const_int 1)
8892 (const_int 3)])))))]
8894 "vphadddq\t{%1, %0|%0, %1}"
8895 [(set_attr "type" "sseiadd1")])
8897 (define_insn "xop_phaddubw"
8898 [(set (match_operand:V8HI 0 "register_operand" "=x")
8902 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8903 (parallel [(const_int 0)
8914 (parallel [(const_int 1)
8921 (const_int 15)])))))]
8923 "vphaddubw\t{%1, %0|%0, %1}"
8924 [(set_attr "type" "sseiadd1")])
8926 (define_insn "xop_phaddubd"
8927 [(set (match_operand:V4SI 0 "register_operand" "=x")
8932 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8933 (parallel [(const_int 0)
8940 (parallel [(const_int 1)
8948 (parallel [(const_int 2)
8955 (parallel [(const_int 3)
8958 (const_int 15)]))))))]
8960 "vphaddubd\t{%1, %0|%0, %1}"
8961 [(set_attr "type" "sseiadd1")])
8963 (define_insn "xop_phaddubq"
8964 [(set (match_operand:V2DI 0 "register_operand" "=x")
8970 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8971 (parallel [(const_int 0)
8976 (parallel [(const_int 1)
8982 (parallel [(const_int 2)
8987 (parallel [(const_int 3)
8994 (parallel [(const_int 8)
8999 (parallel [(const_int 9)
9005 (parallel [(const_int 10)
9010 (parallel [(const_int 11)
9011 (const_int 15)])))))))]
9013 "vphaddubq\t{%1, %0|%0, %1}"
9014 [(set_attr "type" "sseiadd1")])
9016 (define_insn "xop_phadduwd"
9017 [(set (match_operand:V4SI 0 "register_operand" "=x")
9021 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9022 (parallel [(const_int 0)
9029 (parallel [(const_int 1)
9032 (const_int 7)])))))]
9034 "vphadduwd\t{%1, %0|%0, %1}"
9035 [(set_attr "type" "sseiadd1")])
9037 (define_insn "xop_phadduwq"
9038 [(set (match_operand:V2DI 0 "register_operand" "=x")
9043 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9044 (parallel [(const_int 0)
9049 (parallel [(const_int 1)
9055 (parallel [(const_int 2)
9060 (parallel [(const_int 3)
9061 (const_int 7)]))))))]
9063 "vphadduwq\t{%1, %0|%0, %1}"
9064 [(set_attr "type" "sseiadd1")])
9066 (define_insn "xop_phaddudq"
9067 [(set (match_operand:V2DI 0 "register_operand" "=x")
9071 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9072 (parallel [(const_int 0)
9077 (parallel [(const_int 1)
9078 (const_int 3)])))))]
9080 "vphaddudq\t{%1, %0|%0, %1}"
9081 [(set_attr "type" "sseiadd1")])
9083 (define_insn "xop_phsubbw"
9084 [(set (match_operand:V8HI 0 "register_operand" "=x")
9088 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9089 (parallel [(const_int 0)
9100 (parallel [(const_int 1)
9107 (const_int 15)])))))]
9109 "vphsubbw\t{%1, %0|%0, %1}"
9110 [(set_attr "type" "sseiadd1")])
9112 (define_insn "xop_phsubwd"
9113 [(set (match_operand:V4SI 0 "register_operand" "=x")
9117 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9118 (parallel [(const_int 0)
9125 (parallel [(const_int 1)
9128 (const_int 7)])))))]
9130 "vphsubwd\t{%1, %0|%0, %1}"
9131 [(set_attr "type" "sseiadd1")])
9133 (define_insn "xop_phsubdq"
9134 [(set (match_operand:V2DI 0 "register_operand" "=x")
9138 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9139 (parallel [(const_int 0)
9144 (parallel [(const_int 1)
9145 (const_int 3)])))))]
9147 "vphsubdq\t{%1, %0|%0, %1}"
9148 [(set_attr "type" "sseiadd1")])
9150 ;; XOP permute instructions
9151 (define_insn "xop_pperm"
9152 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9154 [(match_operand:V16QI 1 "register_operand" "x,x")
9155 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9156 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
9157 UNSPEC_XOP_PERMUTE))]
9158 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9159 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9160 [(set_attr "type" "sse4arg")
9161 (set_attr "mode" "TI")])
9163 ;; XOP pack instructions that combine two vectors into a smaller vector
9164 (define_insn "xop_pperm_pack_v2di_v4si"
9165 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9168 (match_operand:V2DI 1 "register_operand" "x,x"))
9170 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
9171 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9172 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9173 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9174 [(set_attr "type" "sse4arg")
9175 (set_attr "mode" "TI")])
9177 (define_insn "xop_pperm_pack_v4si_v8hi"
9178 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9181 (match_operand:V4SI 1 "register_operand" "x,x"))
9183 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
9184 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9185 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9186 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9187 [(set_attr "type" "sse4arg")
9188 (set_attr "mode" "TI")])
9190 (define_insn "xop_pperm_pack_v8hi_v16qi"
9191 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9194 (match_operand:V8HI 1 "register_operand" "x,x"))
9196 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
9197 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9198 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9199 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9200 [(set_attr "type" "sse4arg")
9201 (set_attr "mode" "TI")])
9203 ;; XOP packed rotate instructions
9204 (define_expand "rotl<mode>3"
9205 [(set (match_operand:VI_128 0 "register_operand" "")
9207 (match_operand:VI_128 1 "nonimmediate_operand" "")
9208 (match_operand:SI 2 "general_operand")))]
9211 /* If we were given a scalar, convert it to parallel */
9212 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9214 rtvec vs = rtvec_alloc (<ssescalarnum>);
9215 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9216 rtx reg = gen_reg_rtx (<MODE>mode);
9217 rtx op2 = operands[2];
9220 if (GET_MODE (op2) != <ssescalarmode>mode)
9222 op2 = gen_reg_rtx (<ssescalarmode>mode);
9223 convert_move (op2, operands[2], false);
9226 for (i = 0; i < <ssescalarnum>; i++)
9227 RTVEC_ELT (vs, i) = op2;
9229 emit_insn (gen_vec_init<mode> (reg, par));
9230 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9235 (define_expand "rotr<mode>3"
9236 [(set (match_operand:VI_128 0 "register_operand" "")
9238 (match_operand:VI_128 1 "nonimmediate_operand" "")
9239 (match_operand:SI 2 "general_operand")))]
9242 /* If we were given a scalar, convert it to parallel */
9243 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9245 rtvec vs = rtvec_alloc (<ssescalarnum>);
9246 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9247 rtx neg = gen_reg_rtx (<MODE>mode);
9248 rtx reg = gen_reg_rtx (<MODE>mode);
9249 rtx op2 = operands[2];
9252 if (GET_MODE (op2) != <ssescalarmode>mode)
9254 op2 = gen_reg_rtx (<ssescalarmode>mode);
9255 convert_move (op2, operands[2], false);
9258 for (i = 0; i < <ssescalarnum>; i++)
9259 RTVEC_ELT (vs, i) = op2;
9261 emit_insn (gen_vec_init<mode> (reg, par));
9262 emit_insn (gen_neg<mode>2 (neg, reg));
9263 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
9268 (define_insn "xop_rotl<mode>3"
9269 [(set (match_operand:VI_128 0 "register_operand" "=x")
9271 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9272 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9274 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9275 [(set_attr "type" "sseishft")
9276 (set_attr "length_immediate" "1")
9277 (set_attr "mode" "TI")])
9279 (define_insn "xop_rotr<mode>3"
9280 [(set (match_operand:VI_128 0 "register_operand" "=x")
9282 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9283 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9286 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
9287 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
9289 [(set_attr "type" "sseishft")
9290 (set_attr "length_immediate" "1")
9291 (set_attr "mode" "TI")])
9293 (define_expand "vrotr<mode>3"
9294 [(match_operand:VI_128 0 "register_operand" "")
9295 (match_operand:VI_128 1 "register_operand" "")
9296 (match_operand:VI_128 2 "register_operand" "")]
9299 rtx reg = gen_reg_rtx (<MODE>mode);
9300 emit_insn (gen_neg<mode>2 (reg, operands[2]));
9301 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9305 (define_expand "vrotl<mode>3"
9306 [(match_operand:VI_128 0 "register_operand" "")
9307 (match_operand:VI_128 1 "register_operand" "")
9308 (match_operand:VI_128 2 "register_operand" "")]
9311 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
9315 (define_insn "xop_vrotl<mode>3"
9316 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9317 (if_then_else:VI_128
9319 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9322 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9326 (neg:VI_128 (match_dup 2)))))]
9327 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9328 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9329 [(set_attr "type" "sseishft")
9330 (set_attr "prefix_data16" "0")
9331 (set_attr "prefix_extra" "2")
9332 (set_attr "mode" "TI")])
9334 ;; XOP packed shift instructions.
9335 ;; FIXME: add V2DI back in
9336 (define_expand "vlshr<mode>3"
9337 [(match_operand:VI124_128 0 "register_operand" "")
9338 (match_operand:VI124_128 1 "register_operand" "")
9339 (match_operand:VI124_128 2 "register_operand" "")]
9342 rtx neg = gen_reg_rtx (<MODE>mode);
9343 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9344 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
9348 (define_expand "vashr<mode>3"
9349 [(match_operand:VI124_128 0 "register_operand" "")
9350 (match_operand:VI124_128 1 "register_operand" "")
9351 (match_operand:VI124_128 2 "register_operand" "")]
9354 rtx neg = gen_reg_rtx (<MODE>mode);
9355 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9356 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
9360 (define_expand "vashl<mode>3"
9361 [(match_operand:VI124_128 0 "register_operand" "")
9362 (match_operand:VI124_128 1 "register_operand" "")
9363 (match_operand:VI124_128 2 "register_operand" "")]
9366 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
9370 (define_insn "xop_ashl<mode>3"
9371 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9372 (if_then_else:VI_128
9374 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9377 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9381 (neg:VI_128 (match_dup 2)))))]
9382 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9383 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9384 [(set_attr "type" "sseishft")
9385 (set_attr "prefix_data16" "0")
9386 (set_attr "prefix_extra" "2")
9387 (set_attr "mode" "TI")])
9389 (define_insn "xop_lshl<mode>3"
9390 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9391 (if_then_else:VI_128
9393 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9396 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9400 (neg:VI_128 (match_dup 2)))))]
9401 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9402 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9403 [(set_attr "type" "sseishft")
9404 (set_attr "prefix_data16" "0")
9405 (set_attr "prefix_extra" "2")
9406 (set_attr "mode" "TI")])
9408 ;; SSE2 doesn't have some shift varients, so define versions for XOP
9409 (define_expand "ashlv16qi3"
9410 [(match_operand:V16QI 0 "register_operand" "")
9411 (match_operand:V16QI 1 "register_operand" "")
9412 (match_operand:SI 2 "nonmemory_operand" "")]
9415 rtvec vs = rtvec_alloc (16);
9416 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9417 rtx reg = gen_reg_rtx (V16QImode);
9419 for (i = 0; i < 16; i++)
9420 RTVEC_ELT (vs, i) = operands[2];
9422 emit_insn (gen_vec_initv16qi (reg, par));
9423 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9427 (define_expand "lshlv16qi3"
9428 [(match_operand:V16QI 0 "register_operand" "")
9429 (match_operand:V16QI 1 "register_operand" "")
9430 (match_operand:SI 2 "nonmemory_operand" "")]
9433 rtvec vs = rtvec_alloc (16);
9434 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9435 rtx reg = gen_reg_rtx (V16QImode);
9437 for (i = 0; i < 16; i++)
9438 RTVEC_ELT (vs, i) = operands[2];
9440 emit_insn (gen_vec_initv16qi (reg, par));
9441 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
9445 (define_expand "ashrv16qi3"
9446 [(match_operand:V16QI 0 "register_operand" "")
9447 (match_operand:V16QI 1 "register_operand" "")
9448 (match_operand:SI 2 "nonmemory_operand" "")]
9451 rtvec vs = rtvec_alloc (16);
9452 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9453 rtx reg = gen_reg_rtx (V16QImode);
9455 rtx ele = ((CONST_INT_P (operands[2]))
9456 ? GEN_INT (- INTVAL (operands[2]))
9459 for (i = 0; i < 16; i++)
9460 RTVEC_ELT (vs, i) = ele;
9462 emit_insn (gen_vec_initv16qi (reg, par));
9464 if (!CONST_INT_P (operands[2]))
9466 rtx neg = gen_reg_rtx (V16QImode);
9467 emit_insn (gen_negv16qi2 (neg, reg));
9468 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
9471 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9476 (define_expand "ashrv2di3"
9477 [(match_operand:V2DI 0 "register_operand" "")
9478 (match_operand:V2DI 1 "register_operand" "")
9479 (match_operand:DI 2 "nonmemory_operand" "")]
9482 rtvec vs = rtvec_alloc (2);
9483 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
9484 rtx reg = gen_reg_rtx (V2DImode);
9487 if (CONST_INT_P (operands[2]))
9488 ele = GEN_INT (- INTVAL (operands[2]));
9489 else if (GET_MODE (operands[2]) != DImode)
9491 rtx move = gen_reg_rtx (DImode);
9492 ele = gen_reg_rtx (DImode);
9493 convert_move (move, operands[2], false);
9494 emit_insn (gen_negdi2 (ele, move));
9498 ele = gen_reg_rtx (DImode);
9499 emit_insn (gen_negdi2 (ele, operands[2]));
9502 RTVEC_ELT (vs, 0) = ele;
9503 RTVEC_ELT (vs, 1) = ele;
9504 emit_insn (gen_vec_initv2di (reg, par));
9505 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
9510 (define_insn "xop_frcz<mode>2"
9511 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
9513 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
9516 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
9517 [(set_attr "type" "ssecvt1")
9518 (set_attr "mode" "<MODE>")])
9521 (define_expand "xop_vmfrcz<mode>2"
9522 [(set (match_operand:VF_128 0 "register_operand")
9525 [(match_operand:VF_128 1 "nonimmediate_operand")]
9531 operands[3] = CONST0_RTX (<MODE>mode);
9534 (define_insn "*xop_vmfrcz_<mode>"
9535 [(set (match_operand:VF_128 0 "register_operand" "=x")
9538 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
9540 (match_operand:VF_128 2 "const0_operand")
9543 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9544 [(set_attr "type" "ssecvt1")
9545 (set_attr "mode" "<MODE>")])
9547 (define_insn "xop_maskcmp<mode>3"
9548 [(set (match_operand:VI_128 0 "register_operand" "=x")
9549 (match_operator:VI_128 1 "ix86_comparison_int_operator"
9550 [(match_operand:VI_128 2 "register_operand" "x")
9551 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
9553 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9554 [(set_attr "type" "sse4arg")
9555 (set_attr "prefix_data16" "0")
9556 (set_attr "prefix_rep" "0")
9557 (set_attr "prefix_extra" "2")
9558 (set_attr "length_immediate" "1")
9559 (set_attr "mode" "TI")])
9561 (define_insn "xop_maskcmp_uns<mode>3"
9562 [(set (match_operand:VI_128 0 "register_operand" "=x")
9563 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
9564 [(match_operand:VI_128 2 "register_operand" "x")
9565 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
9567 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9568 [(set_attr "type" "ssecmp")
9569 (set_attr "prefix_data16" "0")
9570 (set_attr "prefix_rep" "0")
9571 (set_attr "prefix_extra" "2")
9572 (set_attr "length_immediate" "1")
9573 (set_attr "mode" "TI")])
9575 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
9576 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
9577 ;; the exact instruction generated for the intrinsic.
9578 (define_insn "xop_maskcmp_uns2<mode>3"
9579 [(set (match_operand:VI_128 0 "register_operand" "=x")
9581 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
9582 [(match_operand:VI_128 2 "register_operand" "x")
9583 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
9584 UNSPEC_XOP_UNSIGNED_CMP))]
9586 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9587 [(set_attr "type" "ssecmp")
9588 (set_attr "prefix_data16" "0")
9589 (set_attr "prefix_extra" "2")
9590 (set_attr "length_immediate" "1")
9591 (set_attr "mode" "TI")])
9593 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
9594 ;; being added here to be complete.
9595 (define_insn "xop_pcom_tf<mode>3"
9596 [(set (match_operand:VI_128 0 "register_operand" "=x")
9598 [(match_operand:VI_128 1 "register_operand" "x")
9599 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
9600 (match_operand:SI 3 "const_int_operand" "n")]
9601 UNSPEC_XOP_TRUEFALSE))]
9604 return ((INTVAL (operands[3]) != 0)
9605 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9606 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
9608 [(set_attr "type" "ssecmp")
9609 (set_attr "prefix_data16" "0")
9610 (set_attr "prefix_extra" "2")
9611 (set_attr "length_immediate" "1")
9612 (set_attr "mode" "TI")])
9614 (define_insn "xop_vpermil2<mode>3"
9615 [(set (match_operand:VF 0 "register_operand" "=x")
9617 [(match_operand:VF 1 "register_operand" "x")
9618 (match_operand:VF 2 "nonimmediate_operand" "%x")
9619 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
9620 (match_operand:SI 4 "const_0_to_3_operand" "n")]
9623 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
9624 [(set_attr "type" "sse4arg")
9625 (set_attr "length_immediate" "1")
9626 (set_attr "mode" "<MODE>")])
9628 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9630 (define_insn "aesenc"
9631 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9632 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9633 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9637 aesenc\t{%2, %0|%0, %2}
9638 vaesenc\t{%2, %1, %0|%0, %1, %2}"
9639 [(set_attr "isa" "noavx,avx")
9640 (set_attr "type" "sselog1")
9641 (set_attr "prefix_extra" "1")
9642 (set_attr "prefix" "orig,vex")
9643 (set_attr "mode" "TI")])
9645 (define_insn "aesenclast"
9646 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9647 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9648 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9649 UNSPEC_AESENCLAST))]
9652 aesenclast\t{%2, %0|%0, %2}
9653 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
9654 [(set_attr "isa" "noavx,avx")
9655 (set_attr "type" "sselog1")
9656 (set_attr "prefix_extra" "1")
9657 (set_attr "prefix" "orig,vex")
9658 (set_attr "mode" "TI")])
9660 (define_insn "aesdec"
9661 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9662 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9663 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9667 aesdec\t{%2, %0|%0, %2}
9668 vaesdec\t{%2, %1, %0|%0, %1, %2}"
9669 [(set_attr "isa" "noavx,avx")
9670 (set_attr "type" "sselog1")
9671 (set_attr "prefix_extra" "1")
9672 (set_attr "prefix" "orig,vex")
9673 (set_attr "mode" "TI")])
9675 (define_insn "aesdeclast"
9676 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9677 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9678 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9679 UNSPEC_AESDECLAST))]
9682 aesdeclast\t{%2, %0|%0, %2}
9683 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
9684 [(set_attr "isa" "noavx,avx")
9685 (set_attr "type" "sselog1")
9686 (set_attr "prefix_extra" "1")
9687 (set_attr "prefix" "orig,vex")
9688 (set_attr "mode" "TI")])
9690 (define_insn "aesimc"
9691 [(set (match_operand:V2DI 0 "register_operand" "=x")
9692 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9695 "%vaesimc\t{%1, %0|%0, %1}"
9696 [(set_attr "type" "sselog1")
9697 (set_attr "prefix_extra" "1")
9698 (set_attr "prefix" "maybe_vex")
9699 (set_attr "mode" "TI")])
9701 (define_insn "aeskeygenassist"
9702 [(set (match_operand:V2DI 0 "register_operand" "=x")
9703 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
9704 (match_operand:SI 2 "const_0_to_255_operand" "n")]
9705 UNSPEC_AESKEYGENASSIST))]
9707 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
9708 [(set_attr "type" "sselog1")
9709 (set_attr "prefix_extra" "1")
9710 (set_attr "length_immediate" "1")
9711 (set_attr "prefix" "maybe_vex")
9712 (set_attr "mode" "TI")])
9714 (define_insn "pclmulqdq"
9715 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9716 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9717 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
9718 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9722 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
9723 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9724 [(set_attr "isa" "noavx,avx")
9725 (set_attr "type" "sselog1")
9726 (set_attr "prefix_extra" "1")
9727 (set_attr "length_immediate" "1")
9728 (set_attr "prefix" "orig,vex")
9729 (set_attr "mode" "TI")])
9731 (define_expand "avx_vzeroall"
9732 [(match_par_dup 0 [(const_int 0)])]
9735 int nregs = TARGET_64BIT ? 16 : 8;
9738 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
9740 XVECEXP (operands[0], 0, 0)
9741 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
9744 for (regno = 0; regno < nregs; regno++)
9745 XVECEXP (operands[0], 0, regno + 1)
9746 = gen_rtx_SET (VOIDmode,
9747 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
9748 CONST0_RTX (V8SImode));
9751 (define_insn "*avx_vzeroall"
9752 [(match_parallel 0 "vzeroall_operation"
9753 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
9756 [(set_attr "type" "sse")
9757 (set_attr "modrm" "0")
9758 (set_attr "memory" "none")
9759 (set_attr "prefix" "vex")
9760 (set_attr "mode" "OI")])
9762 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
9763 ;; if the upper 128bits are unused.
9764 (define_insn "avx_vzeroupper"
9765 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
9766 UNSPECV_VZEROUPPER)]
9769 [(set_attr "type" "sse")
9770 (set_attr "modrm" "0")
9771 (set_attr "memory" "none")
9772 (set_attr "prefix" "vex")
9773 (set_attr "mode" "OI")])
9775 ;; Modes handled by AVX vec_dup patterns.
9776 (define_mode_iterator AVX_VEC_DUP_MODE
9777 [V8SI V8SF V4DI V4DF])
9779 (define_insn "vec_dup<mode>"
9780 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x")
9781 (vec_duplicate:AVX_VEC_DUP_MODE
9782 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
9785 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
9787 [(set_attr "type" "ssemov")
9788 (set_attr "prefix_extra" "1")
9789 (set_attr "prefix" "vex")
9790 (set_attr "mode" "V8SF")])
9793 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "")
9794 (vec_duplicate:AVX_VEC_DUP_MODE
9795 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
9796 "TARGET_AVX && reload_completed"
9798 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
9800 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
9801 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
9803 (define_insn "avx_vbroadcastf128_<mode>"
9804 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
9806 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
9810 vbroadcastf128\t{%1, %0|%0, %1}
9811 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
9812 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
9813 [(set_attr "type" "ssemov,sselog1,sselog1")
9814 (set_attr "prefix_extra" "1")
9815 (set_attr "length_immediate" "0,1,1")
9816 (set_attr "prefix" "vex")
9817 (set_attr "mode" "V4SF,V8SF,V8SF")])
9819 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
9820 ;; If it so happens that the input is in memory, use vbroadcast.
9821 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
9822 (define_insn "*avx_vperm_broadcast_v4sf"
9823 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
9825 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
9826 (match_parallel 2 "avx_vbroadcast_operand"
9827 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9830 int elt = INTVAL (operands[3]);
9831 switch (which_alternative)
9835 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
9836 return "vbroadcastss\t{%1, %0|%0, %1}";
9838 operands[2] = GEN_INT (elt * 0x55);
9839 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
9844 [(set_attr "type" "ssemov,ssemov,sselog1")
9845 (set_attr "prefix_extra" "1")
9846 (set_attr "length_immediate" "0,0,1")
9847 (set_attr "prefix" "vex")
9848 (set_attr "mode" "SF,SF,V4SF")])
9850 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
9851 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
9853 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
9854 (match_parallel 2 "avx_vbroadcast_operand"
9855 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9858 "&& reload_completed"
9859 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
9861 rtx op0 = operands[0], op1 = operands[1];
9862 int elt = INTVAL (operands[3]);
9868 /* Shuffle element we care about into all elements of the 128-bit lane.
9869 The other lane gets shuffled too, but we don't care. */
9870 if (<MODE>mode == V4DFmode)
9871 mask = (elt & 1 ? 15 : 0);
9873 mask = (elt & 3) * 0x55;
9874 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
9876 /* Shuffle the lane we care about into both lanes of the dest. */
9877 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
9878 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
9882 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
9883 elt * GET_MODE_SIZE (<ssescalarmode>mode));
9886 (define_expand "avx_vpermil<mode>"
9887 [(set (match_operand:VF2 0 "register_operand" "")
9889 (match_operand:VF2 1 "nonimmediate_operand" "")
9890 (match_operand:SI 2 "const_0_to_255_operand" "")))]
9893 int mask = INTVAL (operands[2]);
9894 rtx perm[<ssescalarnum>];
9896 perm[0] = GEN_INT (mask & 1);
9897 perm[1] = GEN_INT ((mask >> 1) & 1);
9898 if (<MODE>mode == V4DFmode)
9900 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
9901 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
9905 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
9908 (define_expand "avx_vpermil<mode>"
9909 [(set (match_operand:VF1 0 "register_operand" "")
9911 (match_operand:VF1 1 "nonimmediate_operand" "")
9912 (match_operand:SI 2 "const_0_to_255_operand" "")))]
9915 int mask = INTVAL (operands[2]);
9916 rtx perm[<ssescalarnum>];
9918 perm[0] = GEN_INT (mask & 3);
9919 perm[1] = GEN_INT ((mask >> 2) & 3);
9920 perm[2] = GEN_INT ((mask >> 4) & 3);
9921 perm[3] = GEN_INT ((mask >> 6) & 3);
9922 if (<MODE>mode == V8SFmode)
9924 perm[4] = GEN_INT ((mask & 3) + 4);
9925 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
9926 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
9927 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
9931 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
9934 (define_insn "*avx_vpermilp<mode>"
9935 [(set (match_operand:VF 0 "register_operand" "=x")
9937 (match_operand:VF 1 "nonimmediate_operand" "xm")
9938 (match_parallel 2 ""
9939 [(match_operand 3 "const_int_operand" "")])))]
9941 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
9943 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
9944 operands[2] = GEN_INT (mask);
9945 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
9947 [(set_attr "type" "sselog")
9948 (set_attr "prefix_extra" "1")
9949 (set_attr "length_immediate" "1")
9950 (set_attr "prefix" "vex")
9951 (set_attr "mode" "<MODE>")])
9953 (define_insn "avx_vpermilvar<mode>3"
9954 [(set (match_operand:VF 0 "register_operand" "=x")
9956 [(match_operand:VF 1 "register_operand" "x")
9957 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
9960 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9961 [(set_attr "type" "sselog")
9962 (set_attr "prefix_extra" "1")
9963 (set_attr "prefix" "vex")
9964 (set_attr "mode" "<MODE>")])
9966 (define_expand "avx_vperm2f128<mode>3"
9967 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
9968 (unspec:AVX256MODE2P
9969 [(match_operand:AVX256MODE2P 1 "register_operand" "")
9970 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
9971 (match_operand:SI 3 "const_0_to_255_operand" "")]
9972 UNSPEC_VPERMIL2F128))]
9975 int mask = INTVAL (operands[3]);
9976 if ((mask & 0x88) == 0)
9978 rtx perm[<ssescalarnum>], t1, t2;
9979 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
9981 base = (mask & 3) * nelt2;
9982 for (i = 0; i < nelt2; ++i)
9983 perm[i] = GEN_INT (base + i);
9985 base = ((mask >> 4) & 3) * nelt2;
9986 for (i = 0; i < nelt2; ++i)
9987 perm[i + nelt2] = GEN_INT (base + i);
9989 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
9990 operands[1], operands[2]);
9991 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
9992 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
9993 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
9999 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
10000 ;; means that in order to represent this properly in rtl we'd have to
10001 ;; nest *another* vec_concat with a zero operand and do the select from
10002 ;; a 4x wide vector. That doesn't seem very nice.
10003 (define_insn "*avx_vperm2f128<mode>_full"
10004 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10005 (unspec:AVX256MODE2P
10006 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
10007 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
10008 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10009 UNSPEC_VPERMIL2F128))]
10011 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10012 [(set_attr "type" "sselog")
10013 (set_attr "prefix_extra" "1")
10014 (set_attr "length_immediate" "1")
10015 (set_attr "prefix" "vex")
10016 (set_attr "mode" "V8SF")])
10018 (define_insn "*avx_vperm2f128<mode>_nozero"
10019 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10020 (vec_select:AVX256MODE2P
10021 (vec_concat:<ssedoublevecmode>
10022 (match_operand:AVX256MODE2P 1 "register_operand" "x")
10023 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
10024 (match_parallel 3 ""
10025 [(match_operand 4 "const_int_operand" "")])))]
10027 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
10029 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
10030 operands[3] = GEN_INT (mask);
10031 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10033 [(set_attr "type" "sselog")
10034 (set_attr "prefix_extra" "1")
10035 (set_attr "length_immediate" "1")
10036 (set_attr "prefix" "vex")
10037 (set_attr "mode" "V8SF")])
10039 (define_expand "avx_vinsertf128<mode>"
10040 [(match_operand:V_256 0 "register_operand" "")
10041 (match_operand:V_256 1 "register_operand" "")
10042 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
10043 (match_operand:SI 3 "const_0_to_1_operand" "")]
10046 rtx (*insn)(rtx, rtx, rtx);
10048 switch (INTVAL (operands[3]))
10051 insn = gen_vec_set_lo_<mode>;
10054 insn = gen_vec_set_hi_<mode>;
10057 gcc_unreachable ();
10060 emit_insn (insn (operands[0], operands[1], operands[2]));
10064 (define_insn "vec_set_lo_<mode>"
10065 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10066 (vec_concat:VI8F_256
10067 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10068 (vec_select:<ssehalfvecmode>
10069 (match_operand:VI8F_256 1 "register_operand" "x")
10070 (parallel [(const_int 2) (const_int 3)]))))]
10072 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10073 [(set_attr "type" "sselog")
10074 (set_attr "prefix_extra" "1")
10075 (set_attr "length_immediate" "1")
10076 (set_attr "prefix" "vex")
10077 (set_attr "mode" "V8SF")])
10079 (define_insn "vec_set_hi_<mode>"
10080 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10081 (vec_concat:VI8F_256
10082 (vec_select:<ssehalfvecmode>
10083 (match_operand:VI8F_256 1 "register_operand" "x")
10084 (parallel [(const_int 0) (const_int 1)]))
10085 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10087 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10088 [(set_attr "type" "sselog")
10089 (set_attr "prefix_extra" "1")
10090 (set_attr "length_immediate" "1")
10091 (set_attr "prefix" "vex")
10092 (set_attr "mode" "V8SF")])
10094 (define_insn "vec_set_lo_<mode>"
10095 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10096 (vec_concat:VI4F_256
10097 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10098 (vec_select:<ssehalfvecmode>
10099 (match_operand:VI4F_256 1 "register_operand" "x")
10100 (parallel [(const_int 4) (const_int 5)
10101 (const_int 6) (const_int 7)]))))]
10103 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10104 [(set_attr "type" "sselog")
10105 (set_attr "prefix_extra" "1")
10106 (set_attr "length_immediate" "1")
10107 (set_attr "prefix" "vex")
10108 (set_attr "mode" "V8SF")])
10110 (define_insn "vec_set_hi_<mode>"
10111 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10112 (vec_concat:VI4F_256
10113 (vec_select:<ssehalfvecmode>
10114 (match_operand:VI4F_256 1 "register_operand" "x")
10115 (parallel [(const_int 0) (const_int 1)
10116 (const_int 2) (const_int 3)]))
10117 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10119 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10120 [(set_attr "type" "sselog")
10121 (set_attr "prefix_extra" "1")
10122 (set_attr "length_immediate" "1")
10123 (set_attr "prefix" "vex")
10124 (set_attr "mode" "V8SF")])
10126 (define_insn "vec_set_lo_v16hi"
10127 [(set (match_operand:V16HI 0 "register_operand" "=x")
10129 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10131 (match_operand:V16HI 1 "register_operand" "x")
10132 (parallel [(const_int 8) (const_int 9)
10133 (const_int 10) (const_int 11)
10134 (const_int 12) (const_int 13)
10135 (const_int 14) (const_int 15)]))))]
10137 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10138 [(set_attr "type" "sselog")
10139 (set_attr "prefix_extra" "1")
10140 (set_attr "length_immediate" "1")
10141 (set_attr "prefix" "vex")
10142 (set_attr "mode" "V8SF")])
10144 (define_insn "vec_set_hi_v16hi"
10145 [(set (match_operand:V16HI 0 "register_operand" "=x")
10148 (match_operand:V16HI 1 "register_operand" "x")
10149 (parallel [(const_int 0) (const_int 1)
10150 (const_int 2) (const_int 3)
10151 (const_int 4) (const_int 5)
10152 (const_int 6) (const_int 7)]))
10153 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
10155 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10156 [(set_attr "type" "sselog")
10157 (set_attr "prefix_extra" "1")
10158 (set_attr "length_immediate" "1")
10159 (set_attr "prefix" "vex")
10160 (set_attr "mode" "V8SF")])
10162 (define_insn "vec_set_lo_v32qi"
10163 [(set (match_operand:V32QI 0 "register_operand" "=x")
10165 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
10167 (match_operand:V32QI 1 "register_operand" "x")
10168 (parallel [(const_int 16) (const_int 17)
10169 (const_int 18) (const_int 19)
10170 (const_int 20) (const_int 21)
10171 (const_int 22) (const_int 23)
10172 (const_int 24) (const_int 25)
10173 (const_int 26) (const_int 27)
10174 (const_int 28) (const_int 29)
10175 (const_int 30) (const_int 31)]))))]
10177 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10178 [(set_attr "type" "sselog")
10179 (set_attr "prefix_extra" "1")
10180 (set_attr "length_immediate" "1")
10181 (set_attr "prefix" "vex")
10182 (set_attr "mode" "V8SF")])
10184 (define_insn "vec_set_hi_v32qi"
10185 [(set (match_operand:V32QI 0 "register_operand" "=x")
10188 (match_operand:V32QI 1 "register_operand" "x")
10189 (parallel [(const_int 0) (const_int 1)
10190 (const_int 2) (const_int 3)
10191 (const_int 4) (const_int 5)
10192 (const_int 6) (const_int 7)
10193 (const_int 8) (const_int 9)
10194 (const_int 10) (const_int 11)
10195 (const_int 12) (const_int 13)
10196 (const_int 14) (const_int 15)]))
10197 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
10199 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10200 [(set_attr "type" "sselog")
10201 (set_attr "prefix_extra" "1")
10202 (set_attr "length_immediate" "1")
10203 (set_attr "prefix" "vex")
10204 (set_attr "mode" "V8SF")])
10206 (define_expand "avx_maskload<ssemodesuffix><avxsizesuffix>"
10207 [(set (match_operand:VF 0 "register_operand" "")
10209 [(match_operand:<sseintvecmode> 2 "register_operand" "")
10210 (match_operand:VF 1 "memory_operand" "")
10215 (define_expand "avx_maskstore<ssemodesuffix><avxsizesuffix>"
10216 [(set (match_operand:VF 0 "memory_operand" "")
10218 [(match_operand:<sseintvecmode> 1 "register_operand" "")
10219 (match_operand:VF 2 "register_operand" "")
10224 (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
10225 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
10227 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
10228 (match_operand:VF 2 "nonimmediate_operand" "m,x")
10232 && (REG_P (operands[0]) == MEM_P (operands[2]))"
10233 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10234 [(set_attr "type" "sselog1")
10235 (set_attr "prefix_extra" "1")
10236 (set_attr "prefix" "vex")
10237 (set_attr "mode" "<MODE>")])
10239 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
10240 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
10241 (unspec:AVX256MODE2P
10242 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
10246 "&& reload_completed"
10249 rtx op0 = operands[0];
10250 rtx op1 = operands[1];
10252 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
10254 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
10255 emit_move_insn (op0, op1);
10259 (define_expand "vec_init<mode>"
10260 [(match_operand:V_256 0 "register_operand" "")
10261 (match_operand 1 "" "")]
10264 ix86_expand_vector_init (false, operands[0], operands[1]);
10268 (define_insn "*vec_concat<mode>_avx"
10269 [(set (match_operand:V_256 0 "register_operand" "=x,x")
10271 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
10272 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
10275 switch (which_alternative)
10278 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
10280 switch (get_attr_mode (insn))
10283 return "vmovaps\t{%1, %x0|%x0, %1}";
10285 return "vmovapd\t{%1, %x0|%x0, %1}";
10287 return "vmovdqa\t{%1, %x0|%x0, %1}";
10290 gcc_unreachable ();
10293 [(set_attr "type" "sselog,ssemov")
10294 (set_attr "prefix_extra" "1,*")
10295 (set_attr "length_immediate" "1,*")
10296 (set_attr "prefix" "vex")
10297 (set_attr "mode" "<sseinsnmode>")])
10299 (define_insn "vcvtph2ps"
10300 [(set (match_operand:V4SF 0 "register_operand" "=x")
10302 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
10304 (parallel [(const_int 0) (const_int 1)
10305 (const_int 1) (const_int 2)])))]
10307 "vcvtph2ps\t{%1, %0|%0, %1}"
10308 [(set_attr "type" "ssecvt")
10309 (set_attr "prefix" "vex")
10310 (set_attr "mode" "V4SF")])
10312 (define_insn "*vcvtph2ps_load"
10313 [(set (match_operand:V4SF 0 "register_operand" "=x")
10314 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
10315 UNSPEC_VCVTPH2PS))]
10317 "vcvtph2ps\t{%1, %0|%0, %1}"
10318 [(set_attr "type" "ssecvt")
10319 (set_attr "prefix" "vex")
10320 (set_attr "mode" "V8SF")])
10322 (define_insn "vcvtph2ps256"
10323 [(set (match_operand:V8SF 0 "register_operand" "=x")
10324 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
10325 UNSPEC_VCVTPH2PS))]
10327 "vcvtph2ps\t{%1, %0|%0, %1}"
10328 [(set_attr "type" "ssecvt")
10329 (set_attr "prefix" "vex")
10330 (set_attr "mode" "V8SF")])
10332 (define_expand "vcvtps2ph"
10333 [(set (match_operand:V8HI 0 "register_operand" "")
10335 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
10336 (match_operand:SI 2 "const_0_to_255_operand" "")]
10340 "operands[3] = CONST0_RTX (V4HImode);")
10342 (define_insn "*vcvtps2ph"
10343 [(set (match_operand:V8HI 0 "register_operand" "=x")
10345 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10346 (match_operand:SI 2 "const_0_to_255_operand" "N")]
10348 (match_operand:V4HI 3 "const0_operand" "")))]
10350 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10351 [(set_attr "type" "ssecvt")
10352 (set_attr "prefix" "vex")
10353 (set_attr "mode" "V4SF")])
10355 (define_insn "*vcvtps2ph_store"
10356 [(set (match_operand:V4HI 0 "memory_operand" "=m")
10357 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10358 (match_operand:SI 2 "const_0_to_255_operand" "N")]
10359 UNSPEC_VCVTPS2PH))]
10361 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10362 [(set_attr "type" "ssecvt")
10363 (set_attr "prefix" "vex")
10364 (set_attr "mode" "V4SF")])
10366 (define_insn "vcvtps2ph256"
10367 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
10368 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
10369 (match_operand:SI 2 "const_0_to_255_operand" "N")]
10370 UNSPEC_VCVTPS2PH))]
10372 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10373 [(set_attr "type" "ssecvt")
10374 (set_attr "prefix" "vex")
10375 (set_attr "mode" "V8SF")])