1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V1TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 ;; All QImode vector integer modes
77 (define_mode_iterator VI1
78 [(V32QI "TARGET_AVX") V16QI])
80 ;; All DImode vector integer modes
81 (define_mode_iterator VI8
82 [(V4DI "TARGET_AVX") V2DI])
84 ;; All 128bit vector integer modes
85 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
87 ;; Random 128bit vector integer mode combinations
88 (define_mode_iterator VI12_128 [V16QI V8HI])
89 (define_mode_iterator VI14_128 [V16QI V4SI])
90 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
91 (define_mode_iterator VI24_128 [V8HI V4SI])
92 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
94 ;; Int-float size matches
95 (define_mode_iterator VI4F_128 [V4SI V4SF])
96 (define_mode_iterator VI8F_128 [V2DI V2DF])
97 (define_mode_iterator VI4F_256 [V8SI V8SF])
98 (define_mode_iterator VI8F_256 [V4DI V4DF])
100 ;; Mapping from float mode to required SSE level
101 (define_mode_attr sse
102 [(SF "sse") (DF "sse2")
103 (V4SF "sse") (V2DF "sse2")
104 (V8SF "avx") (V4DF "avx")])
106 (define_mode_attr sse2
107 [(V16QI "sse2") (V32QI "avx")
108 (V2DI "sse2") (V4DI "avx")])
110 (define_mode_attr sse3
111 [(V16QI "sse3") (V32QI "avx")])
113 (define_mode_attr sse4_1
114 [(V4SF "sse4_1") (V2DF "sse4_1")
115 (V8SF "avx") (V4DF "avx")])
117 (define_mode_attr avxsizesuffix
118 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
119 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
120 (V8SF "256") (V4DF "256")
121 (V4SF "") (V2DF "")])
123 ;; SSE instruction mode
124 (define_mode_attr sseinsnmode
125 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
126 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
127 (V8SF "V8SF") (V4DF "V4DF")
128 (V4SF "V4SF") (V2DF "V2DF")])
130 ;; Mapping of vector float modes to an integer mode of the same size
131 (define_mode_attr sseintvecmode
132 [(V8SF "V8SI") (V4DF "V4DI")
133 (V4SF "V4SI") (V2DF "V2DI")])
135 ;; Mapping of vector modes to a vector mode of double size
136 (define_mode_attr ssedoublevecmode
137 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
138 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
139 (V8SF "V16SF") (V4DF "V8DF")
140 (V4SF "V8SF") (V2DF "V4DF")])
142 ;; Mapping of vector modes to a vector mode of half size
143 (define_mode_attr ssehalfvecmode
144 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
145 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
146 (V8SF "V4SF") (V4DF "V2DF")
149 ;; Mapping of vector modes back to the scalar modes
150 (define_mode_attr ssescalarmode
151 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
152 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
153 (V8SF "SF") (V4DF "DF")
154 (V4SF "SF") (V2DF "DF")])
156 ;; Number of scalar elements in each vector type
157 (define_mode_attr ssescalarnum
158 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
159 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
160 (V8SF "8") (V4DF "4")
161 (V4SF "4") (V2DF "2")])
163 ;; SSE scalar suffix for vector modes
164 (define_mode_attr ssescalarmodesuffix
165 [(V8SF "ss") (V4DF "sd")
166 (V4SF "ss") (V2DF "sd")
167 (V8SI "ss") (V4DI "sd")
170 ;; Pack/unpack vector modes
171 (define_mode_attr sseunpackmode
172 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
174 (define_mode_attr ssepackmode
175 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
177 ;; Mapping of the max integer size for xop rotate immediate constraint
178 (define_mode_attr sserotatemax
179 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
181 ;; Instruction suffix for sign and zero extensions.
182 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
187 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
188 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
190 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
192 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
194 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
200 ;; All of these patterns are enabled for SSE1 as well as SSE2.
201 ;; This is essential for maintaining stable calling conventions.
203 (define_expand "mov<mode>"
204 [(set (match_operand:V16 0 "nonimmediate_operand" "")
205 (match_operand:V16 1 "nonimmediate_operand" ""))]
208 ix86_expand_vector_move (<MODE>mode, operands);
212 (define_insn "*mov<mode>_internal"
213 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
214 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
216 && (register_operand (operands[0], <MODE>mode)
217 || register_operand (operands[1], <MODE>mode))"
219 switch (which_alternative)
222 return standard_sse_constant_opcode (insn, operands[1]);
225 switch (get_attr_mode (insn))
230 && (misaligned_operand (operands[0], <MODE>mode)
231 || misaligned_operand (operands[1], <MODE>mode)))
232 return "vmovups\t{%1, %0|%0, %1}";
234 return "%vmovaps\t{%1, %0|%0, %1}";
239 && (misaligned_operand (operands[0], <MODE>mode)
240 || misaligned_operand (operands[1], <MODE>mode)))
241 return "vmovupd\t{%1, %0|%0, %1}";
242 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
243 return "%vmovaps\t{%1, %0|%0, %1}";
245 return "%vmovapd\t{%1, %0|%0, %1}";
250 && (misaligned_operand (operands[0], <MODE>mode)
251 || misaligned_operand (operands[1], <MODE>mode)))
252 return "vmovdqu\t{%1, %0|%0, %1}";
253 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
254 return "%vmovaps\t{%1, %0|%0, %1}";
256 return "%vmovdqa\t{%1, %0|%0, %1}";
265 [(set_attr "type" "sselog1,ssemov,ssemov")
266 (set_attr "prefix" "maybe_vex")
268 (cond [(ne (symbol_ref "TARGET_AVX") (const_int 0))
269 (const_string "<sseinsnmode>")
271 (ne (symbol_ref "optimize_function_for_size_p (cfun)")
273 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
274 (and (eq_attr "alternative" "2")
275 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
277 (const_string "V4SF")
278 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
279 (const_string "V4SF")
280 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
281 (const_string "V2DF")
283 (const_string "TI")))])
285 (define_insn "sse2_movq128"
286 [(set (match_operand:V2DI 0 "register_operand" "=x")
289 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
290 (parallel [(const_int 0)]))
293 "%vmovq\t{%1, %0|%0, %1}"
294 [(set_attr "type" "ssemov")
295 (set_attr "prefix" "maybe_vex")
296 (set_attr "mode" "TI")])
298 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
299 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
300 ;; from memory, we'd prefer to load the memory directly into the %xmm
301 ;; register. To facilitate this happy circumstance, this pattern won't
302 ;; split until after register allocation. If the 64-bit value didn't
303 ;; come from memory, this is the best we can do. This is much better
304 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
307 (define_insn_and_split "movdi_to_sse"
309 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
310 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
311 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
312 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
314 "&& reload_completed"
317 if (register_operand (operands[1], DImode))
319 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
320 Assemble the 64-bit DImode value in an xmm register. */
321 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
322 gen_rtx_SUBREG (SImode, operands[1], 0)));
323 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
324 gen_rtx_SUBREG (SImode, operands[1], 4)));
325 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
328 else if (memory_operand (operands[1], DImode))
329 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
330 operands[1], const0_rtx));
336 [(set (match_operand:V4SF 0 "register_operand" "")
337 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
338 "TARGET_SSE && reload_completed"
341 (vec_duplicate:V4SF (match_dup 1))
345 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
346 operands[2] = CONST0_RTX (V4SFmode);
350 [(set (match_operand:V2DF 0 "register_operand" "")
351 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
352 "TARGET_SSE2 && reload_completed"
353 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
355 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
356 operands[2] = CONST0_RTX (DFmode);
359 (define_expand "push<mode>1"
360 [(match_operand:V16 0 "register_operand" "")]
363 ix86_expand_push (<MODE>mode, operands[0]);
367 (define_expand "movmisalign<mode>"
368 [(set (match_operand:V16 0 "nonimmediate_operand" "")
369 (match_operand:V16 1 "nonimmediate_operand" ""))]
372 ix86_expand_vector_move_misalign (<MODE>mode, operands);
376 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
377 [(set (match_operand:VF 0 "nonimmediate_operand" "")
379 [(match_operand:VF 1 "nonimmediate_operand" "")]
383 if (MEM_P (operands[0]) && MEM_P (operands[1]))
384 operands[1] = force_reg (<MODE>mode, operands[1]);
387 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
388 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
390 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
392 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
393 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
394 [(set_attr "type" "ssemov")
395 (set_attr "movu" "1")
396 (set_attr "prefix" "maybe_vex")
397 (set_attr "mode" "<MODE>")])
399 (define_expand "<sse2>_movdqu<avxsizesuffix>"
400 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
401 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
405 if (MEM_P (operands[0]) && MEM_P (operands[1]))
406 operands[1] = force_reg (<MODE>mode, operands[1]);
409 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
410 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
411 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
413 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
414 "%vmovdqu\t{%1, %0|%0, %1}"
415 [(set_attr "type" "ssemov")
416 (set_attr "movu" "1")
417 (set (attr "prefix_data16")
419 (ne (symbol_ref "TARGET_AVX") (const_int 0))
422 (set_attr "prefix" "maybe_vex")
423 (set_attr "mode" "<sseinsnmode>")])
425 (define_insn "<sse3>_lddqu<avxsizesuffix>"
426 [(set (match_operand:VI1 0 "register_operand" "=x")
427 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
430 "%vlddqu\t{%1, %0|%0, %1}"
431 [(set_attr "type" "ssemov")
432 (set_attr "movu" "1")
433 (set (attr "prefix_data16")
435 (ne (symbol_ref "TARGET_AVX") (const_int 0))
438 (set (attr "prefix_rep")
440 (ne (symbol_ref "TARGET_AVX") (const_int 0))
443 (set_attr "prefix" "maybe_vex")
444 (set_attr "mode" "<sseinsnmode>")])
446 (define_insn "sse2_movntsi"
447 [(set (match_operand:SI 0 "memory_operand" "=m")
448 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
451 "movnti\t{%1, %0|%0, %1}"
452 [(set_attr "type" "ssemov")
453 (set_attr "prefix_data16" "0")
454 (set_attr "mode" "V2DF")])
456 (define_insn "<sse>_movnt<mode>"
457 [(set (match_operand:VF 0 "memory_operand" "=m")
458 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
461 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
462 [(set_attr "type" "ssemov")
463 (set_attr "prefix" "maybe_vex")
464 (set_attr "mode" "<MODE>")])
466 (define_insn "<sse2>_movnt<mode>"
467 [(set (match_operand:VI8 0 "memory_operand" "=m")
468 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
471 "%vmovntdq\t{%1, %0|%0, %1}"
472 [(set_attr "type" "ssecvt")
473 (set (attr "prefix_data16")
475 (ne (symbol_ref "TARGET_AVX") (const_int 0))
478 (set_attr "prefix" "maybe_vex")
479 (set_attr "mode" "<sseinsnmode>")])
481 ; Expand patterns for non-temporal stores. At the moment, only those
482 ; that directly map to insns are defined; it would be possible to
483 ; define patterns for other modes that would expand to several insns.
485 ;; Modes handled by storent patterns.
486 (define_mode_iterator STORENT_MODE
487 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
489 (V8SF "TARGET_AVX") V4SF
490 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
492 (define_expand "storent<mode>"
493 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
495 [(match_operand:STORENT_MODE 1 "register_operand" "")]
499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501 ;; Parallel floating point arithmetic
503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 (define_expand "<code><mode>2"
506 [(set (match_operand:VF 0 "register_operand" "")
508 (match_operand:VF 1 "register_operand" "")))]
510 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
512 (define_insn_and_split "*absneg<mode>2"
513 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
514 (match_operator:VF 3 "absneg_operator"
515 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
516 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
522 enum rtx_code absneg_op;
528 if (MEM_P (operands[1]))
529 op1 = operands[2], op2 = operands[1];
531 op1 = operands[1], op2 = operands[2];
536 if (rtx_equal_p (operands[0], operands[1]))
542 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
543 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
544 t = gen_rtx_SET (VOIDmode, operands[0], t);
548 [(set_attr "isa" "noavx,noavx,avx,avx")])
550 (define_expand "<plusminus_insn><mode>3"
551 [(set (match_operand:VF 0 "register_operand" "")
553 (match_operand:VF 1 "nonimmediate_operand" "")
554 (match_operand:VF 2 "nonimmediate_operand" "")))]
556 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
558 (define_insn "*<plusminus_insn><mode>3"
559 [(set (match_operand:VF 0 "register_operand" "=x,x")
561 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
562 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
563 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
565 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
566 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
567 [(set_attr "isa" "noavx,avx")
568 (set_attr "type" "sseadd")
569 (set_attr "prefix" "orig,vex")
570 (set_attr "mode" "<MODE>")])
572 (define_insn "<sse>_vm<plusminus_insn><mode>3"
573 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
576 (match_operand:VF_128 1 "register_operand" "0,x")
577 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
582 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
583 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
584 [(set_attr "isa" "noavx,avx")
585 (set_attr "type" "sseadd")
586 (set_attr "prefix" "orig,vex")
587 (set_attr "mode" "<ssescalarmode>")])
589 (define_expand "mul<mode>3"
590 [(set (match_operand:VF 0 "register_operand" "")
592 (match_operand:VF 1 "nonimmediate_operand" "")
593 (match_operand:VF 2 "nonimmediate_operand" "")))]
595 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
597 (define_insn "*mul<mode>3"
598 [(set (match_operand:VF 0 "register_operand" "=x,x")
600 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
601 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
602 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
604 mul<ssemodesuffix>\t{%2, %0|%0, %2}
605 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
606 [(set_attr "isa" "noavx,avx")
607 (set_attr "type" "ssemul")
608 (set_attr "prefix" "orig,vex")
609 (set_attr "mode" "<MODE>")])
611 (define_insn "<sse>_vmmul<mode>3"
612 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
615 (match_operand:VF_128 1 "register_operand" "0,x")
616 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
621 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
622 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
623 [(set_attr "isa" "noavx,avx")
624 (set_attr "type" "ssemul")
625 (set_attr "prefix" "orig,vex")
626 (set_attr "mode" "<ssescalarmode>")])
628 (define_expand "div<mode>3"
629 [(set (match_operand:VF2 0 "register_operand" "")
630 (div:VF2 (match_operand:VF2 1 "register_operand" "")
631 (match_operand:VF2 2 "nonimmediate_operand" "")))]
633 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
635 (define_expand "div<mode>3"
636 [(set (match_operand:VF1 0 "register_operand" "")
637 (div:VF1 (match_operand:VF1 1 "register_operand" "")
638 (match_operand:VF1 2 "nonimmediate_operand" "")))]
641 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
643 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
644 && flag_finite_math_only && !flag_trapping_math
645 && flag_unsafe_math_optimizations)
647 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
652 (define_insn "<sse>_div<mode>3"
653 [(set (match_operand:VF 0 "register_operand" "=x,x")
655 (match_operand:VF 1 "register_operand" "0,x")
656 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
659 div<ssemodesuffix>\t{%2, %0|%0, %2}
660 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
661 [(set_attr "isa" "noavx,avx")
662 (set_attr "type" "ssediv")
663 (set_attr "prefix" "orig,vex")
664 (set_attr "mode" "<MODE>")])
666 (define_insn "<sse>_vmdiv<mode>3"
667 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
670 (match_operand:VF_128 1 "register_operand" "0,x")
671 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
676 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
677 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
678 [(set_attr "isa" "noavx,avx")
679 (set_attr "type" "ssediv")
680 (set_attr "prefix" "orig,vex")
681 (set_attr "mode" "<ssescalarmode>")])
683 (define_insn "<sse>_rcp<mode>2"
684 [(set (match_operand:VF1 0 "register_operand" "=x")
686 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
688 "%vrcpps\t{%1, %0|%0, %1}"
689 [(set_attr "type" "sse")
690 (set_attr "atom_sse_attr" "rcp")
691 (set_attr "prefix" "maybe_vex")
692 (set_attr "mode" "<MODE>")])
694 (define_insn "sse_vmrcpv4sf2"
695 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
697 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
699 (match_operand:V4SF 2 "register_operand" "0,x")
703 rcpss\t{%1, %0|%0, %1}
704 vrcpss\t{%1, %2, %0|%0, %2, %1}"
705 [(set_attr "isa" "noavx,avx")
706 (set_attr "type" "sse")
707 (set_attr "atom_sse_attr" "rcp")
708 (set_attr "prefix" "orig,vex")
709 (set_attr "mode" "SF")])
711 (define_expand "sqrt<mode>2"
712 [(set (match_operand:VF2 0 "register_operand" "")
713 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
716 (define_expand "sqrt<mode>2"
717 [(set (match_operand:VF1 0 "register_operand" "")
718 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
721 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
722 && flag_finite_math_only && !flag_trapping_math
723 && flag_unsafe_math_optimizations)
725 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
730 (define_insn "<sse>_sqrt<mode>2"
731 [(set (match_operand:VF 0 "register_operand" "=x")
732 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
734 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
735 [(set_attr "type" "sse")
736 (set_attr "atom_sse_attr" "sqrt")
737 (set_attr "prefix" "maybe_vex")
738 (set_attr "mode" "<MODE>")])
740 (define_insn "<sse>_vmsqrt<mode>2"
741 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
744 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
745 (match_operand:VF_128 2 "register_operand" "0,x")
749 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
750 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
751 [(set_attr "isa" "noavx,avx")
752 (set_attr "type" "sse")
753 (set_attr "atom_sse_attr" "sqrt")
754 (set_attr "prefix" "orig,vex")
755 (set_attr "mode" "<ssescalarmode>")])
757 (define_expand "rsqrt<mode>2"
758 [(set (match_operand:VF1 0 "register_operand" "")
760 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
763 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
767 (define_insn "<sse>_rsqrt<mode>2"
768 [(set (match_operand:VF1 0 "register_operand" "=x")
770 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
772 "%vrsqrtps\t{%1, %0|%0, %1}"
773 [(set_attr "type" "sse")
774 (set_attr "prefix" "maybe_vex")
775 (set_attr "mode" "<MODE>")])
777 (define_insn "sse_vmrsqrtv4sf2"
778 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
780 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
782 (match_operand:V4SF 2 "register_operand" "0,x")
786 rsqrtss\t{%1, %0|%0, %1}
787 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
788 [(set_attr "isa" "noavx,avx")
789 (set_attr "type" "sse")
790 (set_attr "prefix" "orig,vex")
791 (set_attr "mode" "SF")])
793 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
794 ;; isn't really correct, as those rtl operators aren't defined when
795 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
797 (define_expand "<code><mode>3"
798 [(set (match_operand:VF 0 "register_operand" "")
800 (match_operand:VF 1 "nonimmediate_operand" "")
801 (match_operand:VF 2 "nonimmediate_operand" "")))]
804 if (!flag_finite_math_only)
805 operands[1] = force_reg (<MODE>mode, operands[1]);
806 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
809 (define_insn "*<code><mode>3_finite"
810 [(set (match_operand:VF 0 "register_operand" "=x,x")
812 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
813 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
814 "TARGET_SSE && flag_finite_math_only
815 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
817 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
818 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
819 [(set_attr "isa" "noavx,avx")
820 (set_attr "type" "sseadd")
821 (set_attr "prefix" "orig,vex")
822 (set_attr "mode" "<MODE>")])
824 (define_insn "*<code><mode>3"
825 [(set (match_operand:VF 0 "register_operand" "=x,x")
827 (match_operand:VF 1 "register_operand" "0,x")
828 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
829 "TARGET_SSE && !flag_finite_math_only"
831 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
832 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
833 [(set_attr "isa" "noavx,avx")
834 (set_attr "type" "sseadd")
835 (set_attr "prefix" "orig,vex")
836 (set_attr "mode" "<MODE>")])
838 (define_insn "<sse>_vm<code><mode>3"
839 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
842 (match_operand:VF_128 1 "register_operand" "0,x")
843 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
848 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
849 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
850 [(set_attr "isa" "noavx,avx")
851 (set_attr "type" "sse")
852 (set_attr "prefix" "orig,vex")
853 (set_attr "mode" "<ssescalarmode>")])
855 ;; These versions of the min/max patterns implement exactly the operations
856 ;; min = (op1 < op2 ? op1 : op2)
857 ;; max = (!(op1 < op2) ? op1 : op2)
858 ;; Their operands are not commutative, and thus they may be used in the
859 ;; presence of -0.0 and NaN.
861 (define_insn "*ieee_smin<mode>3"
862 [(set (match_operand:VF 0 "register_operand" "=x,x")
864 [(match_operand:VF 1 "register_operand" "0,x")
865 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
869 min<ssemodesuffix>\t{%2, %0|%0, %2}
870 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
871 [(set_attr "isa" "noavx,avx")
872 (set_attr "type" "sseadd")
873 (set_attr "prefix" "orig,vex")
874 (set_attr "mode" "<MODE>")])
876 (define_insn "*ieee_smax<mode>3"
877 [(set (match_operand:VF 0 "register_operand" "=x,x")
879 [(match_operand:VF 1 "register_operand" "0,x")
880 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
884 max<ssemodesuffix>\t{%2, %0|%0, %2}
885 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
886 [(set_attr "isa" "noavx,avx")
887 (set_attr "type" "sseadd")
888 (set_attr "prefix" "orig,vex")
889 (set_attr "mode" "<MODE>")])
891 (define_insn "avx_addsubv4df3"
892 [(set (match_operand:V4DF 0 "register_operand" "=x")
895 (match_operand:V4DF 1 "register_operand" "x")
896 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
897 (minus:V4DF (match_dup 1) (match_dup 2))
900 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
901 [(set_attr "type" "sseadd")
902 (set_attr "prefix" "vex")
903 (set_attr "mode" "V4DF")])
905 (define_insn "sse3_addsubv2df3"
906 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
909 (match_operand:V2DF 1 "register_operand" "0,x")
910 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
911 (minus:V2DF (match_dup 1) (match_dup 2))
915 addsubpd\t{%2, %0|%0, %2}
916 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
917 [(set_attr "isa" "noavx,avx")
918 (set_attr "type" "sseadd")
919 (set_attr "atom_unit" "complex")
920 (set_attr "prefix" "orig,vex")
921 (set_attr "mode" "V2DF")])
923 (define_insn "avx_addsubv8sf3"
924 [(set (match_operand:V8SF 0 "register_operand" "=x")
927 (match_operand:V8SF 1 "register_operand" "x")
928 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
929 (minus:V8SF (match_dup 1) (match_dup 2))
932 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
933 [(set_attr "type" "sseadd")
934 (set_attr "prefix" "vex")
935 (set_attr "mode" "V8SF")])
937 (define_insn "sse3_addsubv4sf3"
938 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
941 (match_operand:V4SF 1 "register_operand" "0,x")
942 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
943 (minus:V4SF (match_dup 1) (match_dup 2))
947 addsubps\t{%2, %0|%0, %2}
948 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
949 [(set_attr "isa" "noavx,avx")
950 (set_attr "type" "sseadd")
951 (set_attr "prefix" "orig,vex")
952 (set_attr "prefix_rep" "1,*")
953 (set_attr "mode" "V4SF")])
955 (define_insn "avx_h<plusminus_insn>v4df3"
956 [(set (match_operand:V4DF 0 "register_operand" "=x")
961 (match_operand:V4DF 1 "register_operand" "x")
962 (parallel [(const_int 0)]))
963 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
965 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
966 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
970 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
971 (parallel [(const_int 0)]))
972 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
974 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
975 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
977 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
978 [(set_attr "type" "sseadd")
979 (set_attr "prefix" "vex")
980 (set_attr "mode" "V4DF")])
982 (define_insn "sse3_h<plusminus_insn>v2df3"
983 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
987 (match_operand:V2DF 1 "register_operand" "0,x")
988 (parallel [(const_int 0)]))
989 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
992 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
993 (parallel [(const_int 0)]))
994 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
997 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
998 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
999 [(set_attr "isa" "noavx,avx")
1000 (set_attr "type" "sseadd")
1001 (set_attr "prefix" "orig,vex")
1002 (set_attr "mode" "V2DF")])
1004 (define_insn "avx_h<plusminus_insn>v8sf3"
1005 [(set (match_operand:V8SF 0 "register_operand" "=x")
1011 (match_operand:V8SF 1 "register_operand" "x")
1012 (parallel [(const_int 0)]))
1013 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1015 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1016 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1020 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1021 (parallel [(const_int 0)]))
1022 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1024 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1025 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1029 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1030 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1032 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1033 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1036 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1037 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1039 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1040 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1042 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1043 [(set_attr "type" "sseadd")
1044 (set_attr "prefix" "vex")
1045 (set_attr "mode" "V8SF")])
1047 (define_insn "sse3_h<plusminus_insn>v4sf3"
1048 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1053 (match_operand:V4SF 1 "register_operand" "0,x")
1054 (parallel [(const_int 0)]))
1055 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1057 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1058 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1062 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1063 (parallel [(const_int 0)]))
1064 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1066 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1067 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1070 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1071 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1072 [(set_attr "isa" "noavx,avx")
1073 (set_attr "type" "sseadd")
1074 (set_attr "atom_unit" "complex")
1075 (set_attr "prefix" "orig,vex")
1076 (set_attr "prefix_rep" "1,*")
1077 (set_attr "mode" "V4SF")])
1079 (define_expand "reduc_splus_v4df"
1080 [(match_operand:V4DF 0 "register_operand" "")
1081 (match_operand:V4DF 1 "register_operand" "")]
1084 rtx tmp = gen_reg_rtx (V4DFmode);
1085 rtx tmp2 = gen_reg_rtx (V4DFmode);
1086 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1087 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1088 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1092 (define_expand "reduc_splus_v2df"
1093 [(match_operand:V2DF 0 "register_operand" "")
1094 (match_operand:V2DF 1 "register_operand" "")]
1097 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1101 (define_expand "reduc_splus_v8sf"
1102 [(match_operand:V8SF 0 "register_operand" "")
1103 (match_operand:V8SF 1 "register_operand" "")]
1106 rtx tmp = gen_reg_rtx (V8SFmode);
1107 rtx tmp2 = gen_reg_rtx (V8SFmode);
1108 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1109 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1110 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1111 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1115 (define_expand "reduc_splus_v4sf"
1116 [(match_operand:V4SF 0 "register_operand" "")
1117 (match_operand:V4SF 1 "register_operand" "")]
1122 rtx tmp = gen_reg_rtx (V4SFmode);
1123 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1124 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1127 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1132 (define_expand "reduc_smax_v4sf"
1133 [(match_operand:V4SF 0 "register_operand" "")
1134 (match_operand:V4SF 1 "register_operand" "")]
1137 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1141 (define_expand "reduc_smin_v4sf"
1142 [(match_operand:V4SF 0 "register_operand" "")
1143 (match_operand:V4SF 1 "register_operand" "")]
1146 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1150 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1152 ;; Parallel floating point comparisons
1154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1156 (define_insn "avx_cmp<mode>3"
1157 [(set (match_operand:VF 0 "register_operand" "=x")
1159 [(match_operand:VF 1 "register_operand" "x")
1160 (match_operand:VF 2 "nonimmediate_operand" "xm")
1161 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1164 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1165 [(set_attr "type" "ssecmp")
1166 (set_attr "length_immediate" "1")
1167 (set_attr "prefix" "vex")
1168 (set_attr "mode" "<MODE>")])
1170 (define_insn "avx_vmcmp<mode>3"
1171 [(set (match_operand:VF_128 0 "register_operand" "=x")
1174 [(match_operand:VF_128 1 "register_operand" "x")
1175 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1176 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1181 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1182 [(set_attr "type" "ssecmp")
1183 (set_attr "length_immediate" "1")
1184 (set_attr "prefix" "vex")
1185 (set_attr "mode" "<ssescalarmode>")])
1187 (define_insn "*<sse>_maskcmp<mode>3_comm"
1188 [(set (match_operand:VF 0 "register_operand" "=x,x")
1189 (match_operator:VF 3 "sse_comparison_operator"
1190 [(match_operand:VF 1 "register_operand" "%0,x")
1191 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1193 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
1195 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1196 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1197 [(set_attr "isa" "noavx,avx")
1198 (set_attr "type" "ssecmp")
1199 (set_attr "length_immediate" "1")
1200 (set_attr "prefix" "orig,vex")
1201 (set_attr "mode" "<MODE>")])
1203 (define_insn "<sse>_maskcmp<mode>3"
1204 [(set (match_operand:VF 0 "register_operand" "=x,x")
1205 (match_operator:VF 3 "sse_comparison_operator"
1206 [(match_operand:VF 1 "register_operand" "0,x")
1207 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1210 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1211 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1212 [(set_attr "isa" "noavx,avx")
1213 (set_attr "type" "ssecmp")
1214 (set_attr "length_immediate" "1")
1215 (set_attr "prefix" "orig,vex")
1216 (set_attr "mode" "<MODE>")])
1218 (define_insn "<sse>_vmmaskcmp<mode>3"
1219 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1221 (match_operator:VF_128 3 "sse_comparison_operator"
1222 [(match_operand:VF_128 1 "register_operand" "0,x")
1223 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1228 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1229 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1230 [(set_attr "isa" "noavx,avx")
1231 (set_attr "type" "ssecmp")
1232 (set_attr "length_immediate" "1,*")
1233 (set_attr "prefix" "orig,vex")
1234 (set_attr "mode" "<ssescalarmode>")])
1236 (define_insn "<sse>_comi"
1237 [(set (reg:CCFP FLAGS_REG)
1240 (match_operand:<ssevecmode> 0 "register_operand" "x")
1241 (parallel [(const_int 0)]))
1243 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1244 (parallel [(const_int 0)]))))]
1245 "SSE_FLOAT_MODE_P (<MODE>mode)"
1246 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1247 [(set_attr "type" "ssecomi")
1248 (set_attr "prefix" "maybe_vex")
1249 (set_attr "prefix_rep" "0")
1250 (set (attr "prefix_data16")
1251 (if_then_else (eq_attr "mode" "DF")
1253 (const_string "0")))
1254 (set_attr "mode" "<MODE>")])
1256 (define_insn "<sse>_ucomi"
1257 [(set (reg:CCFPU FLAGS_REG)
1260 (match_operand:<ssevecmode> 0 "register_operand" "x")
1261 (parallel [(const_int 0)]))
1263 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1264 (parallel [(const_int 0)]))))]
1265 "SSE_FLOAT_MODE_P (<MODE>mode)"
1266 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1267 [(set_attr "type" "ssecomi")
1268 (set_attr "prefix" "maybe_vex")
1269 (set_attr "prefix_rep" "0")
1270 (set (attr "prefix_data16")
1271 (if_then_else (eq_attr "mode" "DF")
1273 (const_string "0")))
1274 (set_attr "mode" "<MODE>")])
1276 (define_expand "vcond<mode>"
1277 [(set (match_operand:VF 0 "register_operand" "")
1279 (match_operator 3 ""
1280 [(match_operand:VF 4 "nonimmediate_operand" "")
1281 (match_operand:VF 5 "nonimmediate_operand" "")])
1282 (match_operand:VF 1 "general_operand" "")
1283 (match_operand:VF 2 "general_operand" "")))]
1286 bool ok = ix86_expand_fp_vcond (operands);
1291 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1293 ;; Parallel floating point logical operations
1295 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1297 (define_insn "<sse>_andnot<mode>3"
1298 [(set (match_operand:VF 0 "register_operand" "=x,x")
1301 (match_operand:VF 1 "register_operand" "0,x"))
1302 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1305 static char buf[32];
1308 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1310 switch (which_alternative)
1313 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1316 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1322 snprintf (buf, sizeof (buf), insn, suffix);
1325 [(set_attr "isa" "noavx,avx")
1326 (set_attr "type" "sselog")
1327 (set_attr "prefix" "orig,vex")
1328 (set_attr "mode" "<MODE>")])
1330 (define_expand "<code><mode>3"
1331 [(set (match_operand:VF 0 "register_operand" "")
1333 (match_operand:VF 1 "nonimmediate_operand" "")
1334 (match_operand:VF 2 "nonimmediate_operand" "")))]
1336 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1338 (define_insn "*<code><mode>3"
1339 [(set (match_operand:VF 0 "register_operand" "=x,x")
1341 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1342 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1343 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1345 static char buf[32];
1348 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1350 switch (which_alternative)
1353 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1356 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1362 snprintf (buf, sizeof (buf), insn, suffix);
1365 [(set_attr "isa" "noavx,avx")
1366 (set_attr "type" "sselog")
1367 (set_attr "prefix" "orig,vex")
1368 (set_attr "mode" "<MODE>")])
1370 (define_expand "copysign<mode>3"
1373 (not:VF (match_dup 3))
1374 (match_operand:VF 1 "nonimmediate_operand" "")))
1376 (and:VF (match_dup 3)
1377 (match_operand:VF 2 "nonimmediate_operand" "")))
1378 (set (match_operand:VF 0 "register_operand" "")
1379 (ior:VF (match_dup 4) (match_dup 5)))]
1382 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1384 operands[4] = gen_reg_rtx (<MODE>mode);
1385 operands[5] = gen_reg_rtx (<MODE>mode);
1388 ;; Also define scalar versions. These are used for abs, neg, and
1389 ;; conditional move. Using subregs into vector modes causes register
1390 ;; allocation lossage. These patterns do not allow memory operands
1391 ;; because the native instructions read the full 128-bits.
1393 (define_insn "*andnot<mode>3"
1394 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1397 (match_operand:MODEF 1 "register_operand" "0,x"))
1398 (match_operand:MODEF 2 "register_operand" "x,x")))]
1399 "SSE_FLOAT_MODE_P (<MODE>mode)"
1401 static char buf[32];
1404 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1406 switch (which_alternative)
1409 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1412 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1418 snprintf (buf, sizeof (buf), insn, suffix);
1421 [(set_attr "isa" "noavx,avx")
1422 (set_attr "type" "sselog")
1423 (set_attr "prefix" "orig,vex")
1424 (set_attr "mode" "<ssevecmode>")])
1426 (define_insn "*<code><mode>3"
1427 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1429 (match_operand:MODEF 1 "register_operand" "%0,x")
1430 (match_operand:MODEF 2 "register_operand" "x,x")))]
1431 "SSE_FLOAT_MODE_P (<MODE>mode)"
1433 static char buf[32];
1436 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1438 switch (which_alternative)
1441 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1444 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1450 snprintf (buf, sizeof (buf), insn, suffix);
1453 [(set_attr "isa" "noavx,avx")
1454 (set_attr "type" "sselog")
1455 (set_attr "prefix" "orig,vex")
1456 (set_attr "mode" "<ssevecmode>")])
1458 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1460 ;; FMA4 floating point multiply/accumulate instructions. This
1461 ;; includes the scalar version of the instructions as well as the
1464 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1466 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1467 ;; combine to generate a multiply/add with two memory references. We then
1468 ;; split this insn, into loading up the destination register with one of the
1469 ;; memory operations. If we don't manage to split the insn, reload will
1470 ;; generate the appropriate moves. The reason this is needed, is that combine
1471 ;; has already folded one of the memory references into both the multiply and
1472 ;; add insns, and it can't generate a new pseudo. I.e.:
1473 ;; (set (reg1) (mem (addr1)))
1474 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1475 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1477 ;; ??? This is historic, pre-dating the gimple fma transformation.
1478 ;; We could now properly represent that only one memory operand is
1479 ;; allowed and not be penalized during optimization.
1481 ;; Intrinsic FMA operations.
1483 ;; The standard names for fma is only available with SSE math enabled.
1484 (define_expand "fma<mode>4"
1485 [(set (match_operand:FMAMODE 0 "register_operand")
1487 (match_operand:FMAMODE 1 "nonimmediate_operand")
1488 (match_operand:FMAMODE 2 "nonimmediate_operand")
1489 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1490 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1492 (define_expand "fms<mode>4"
1493 [(set (match_operand:FMAMODE 0 "register_operand")
1495 (match_operand:FMAMODE 1 "nonimmediate_operand")
1496 (match_operand:FMAMODE 2 "nonimmediate_operand")
1497 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1498 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1500 (define_expand "fnma<mode>4"
1501 [(set (match_operand:FMAMODE 0 "register_operand")
1503 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1504 (match_operand:FMAMODE 2 "nonimmediate_operand")
1505 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1506 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1508 (define_expand "fnms<mode>4"
1509 [(set (match_operand:FMAMODE 0 "register_operand")
1511 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1512 (match_operand:FMAMODE 2 "nonimmediate_operand")
1513 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1514 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1516 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1517 (define_expand "fma4i_fmadd_<mode>"
1518 [(set (match_operand:FMAMODE 0 "register_operand")
1520 (match_operand:FMAMODE 1 "nonimmediate_operand")
1521 (match_operand:FMAMODE 2 "nonimmediate_operand")
1522 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1523 "TARGET_FMA || TARGET_FMA4")
1525 (define_insn "*fma4i_fmadd_<mode>"
1526 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1528 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1529 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1530 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1532 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1533 [(set_attr "type" "ssemuladd")
1534 (set_attr "mode" "<MODE>")])
1536 (define_insn "*fma4i_fmsub_<mode>"
1537 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1539 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1540 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1542 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1544 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1545 [(set_attr "type" "ssemuladd")
1546 (set_attr "mode" "<MODE>")])
1548 (define_insn "*fma4i_fnmadd_<mode>"
1549 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1552 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1553 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1554 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1556 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1557 [(set_attr "type" "ssemuladd")
1558 (set_attr "mode" "<MODE>")])
1560 (define_insn "*fma4i_fnmsub_<mode>"
1561 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1564 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1565 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1567 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1569 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1570 [(set_attr "type" "ssemuladd")
1571 (set_attr "mode" "<MODE>")])
1573 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1574 ;; entire destination register, with the high-order elements zeroed.
1576 (define_expand "fma4i_vmfmadd_<mode>"
1577 [(set (match_operand:VF_128 0 "register_operand")
1580 (match_operand:VF_128 1 "nonimmediate_operand")
1581 (match_operand:VF_128 2 "nonimmediate_operand")
1582 (match_operand:VF_128 3 "nonimmediate_operand"))
1587 operands[4] = CONST0_RTX (<MODE>mode);
1590 (define_insn "*fma4i_vmfmadd_<mode>"
1591 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1594 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1595 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1596 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1597 (match_operand:VF_128 4 "const0_operand" "")
1600 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1601 [(set_attr "type" "ssemuladd")
1602 (set_attr "mode" "<MODE>")])
1604 (define_insn "*fma4i_vmfmsub_<mode>"
1605 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1608 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1609 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1611 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1612 (match_operand:VF_128 4 "const0_operand" "")
1615 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1616 [(set_attr "type" "ssemuladd")
1617 (set_attr "mode" "<MODE>")])
1619 (define_insn "*fma4i_vmfnmadd_<mode>"
1620 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1624 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1625 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1626 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1627 (match_operand:VF_128 4 "const0_operand" "")
1630 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1631 [(set_attr "type" "ssemuladd")
1632 (set_attr "mode" "<MODE>")])
1634 (define_insn "*fma4i_vmfnmsub_<mode>"
1635 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1639 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1640 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1642 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1643 (match_operand:VF_128 4 "const0_operand" "")
1646 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1647 [(set_attr "type" "ssemuladd")
1648 (set_attr "mode" "<MODE>")])
1650 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1652 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1654 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1656 ;; It would be possible to represent these without the UNSPEC as
1659 ;; (fma op1 op2 op3)
1660 ;; (fma op1 op2 (neg op3))
1663 ;; But this doesn't seem useful in practice.
1665 (define_expand "fmaddsub_<mode>"
1666 [(set (match_operand:VF 0 "register_operand")
1668 [(match_operand:VF 1 "nonimmediate_operand")
1669 (match_operand:VF 2 "nonimmediate_operand")
1670 (match_operand:VF 3 "nonimmediate_operand")]
1672 "TARGET_FMA || TARGET_FMA4")
1674 (define_insn "*fma4_fmaddsub_<mode>"
1675 [(set (match_operand:VF 0 "register_operand" "=x,x")
1677 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1678 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1679 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1682 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1683 [(set_attr "type" "ssemuladd")
1684 (set_attr "mode" "<MODE>")])
1686 (define_insn "*fma4_fmsubadd_<mode>"
1687 [(set (match_operand:VF 0 "register_operand" "=x,x")
1689 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1690 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1692 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1695 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1696 [(set_attr "type" "ssemuladd")
1697 (set_attr "mode" "<MODE>")])
1699 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1701 ;; FMA3 floating point multiply/accumulate instructions.
1703 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1705 (define_insn "*fma_fmadd_<mode>"
1706 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1708 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1709 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1710 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1713 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1714 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1715 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1716 [(set_attr "type" "ssemuladd")
1717 (set_attr "mode" "<MODE>")])
1719 (define_insn "*fma_fmsub_<mode>"
1720 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1722 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1723 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1725 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1728 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1729 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1730 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1731 [(set_attr "type" "ssemuladd")
1732 (set_attr "mode" "<MODE>")])
1734 (define_insn "*fma_fmadd_<mode>"
1735 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1738 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1739 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1740 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1743 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1744 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1745 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1746 [(set_attr "type" "ssemuladd")
1747 (set_attr "mode" "<MODE>")])
1749 (define_insn "*fma_fmsub_<mode>"
1750 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1753 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1754 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1756 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1759 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1760 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1761 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1762 [(set_attr "type" "ssemuladd")
1763 (set_attr "mode" "<MODE>")])
1765 (define_insn "*fma_fmaddsub_<mode>"
1766 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1768 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1769 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1770 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
1774 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1775 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1776 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1777 [(set_attr "type" "ssemuladd")
1778 (set_attr "mode" "<MODE>")])
1780 (define_insn "*fma_fmsubadd_<mode>"
1781 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1783 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1784 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1786 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
1790 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1791 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1792 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1793 [(set_attr "type" "ssemuladd")
1794 (set_attr "mode" "<MODE>")])
1796 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1798 ;; Parallel single-precision floating point conversion operations
1800 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1802 (define_insn "sse_cvtpi2ps"
1803 [(set (match_operand:V4SF 0 "register_operand" "=x")
1806 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1807 (match_operand:V4SF 1 "register_operand" "0")
1810 "cvtpi2ps\t{%2, %0|%0, %2}"
1811 [(set_attr "type" "ssecvt")
1812 (set_attr "mode" "V4SF")])
1814 (define_insn "sse_cvtps2pi"
1815 [(set (match_operand:V2SI 0 "register_operand" "=y")
1817 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1819 (parallel [(const_int 0) (const_int 1)])))]
1821 "cvtps2pi\t{%1, %0|%0, %1}"
1822 [(set_attr "type" "ssecvt")
1823 (set_attr "unit" "mmx")
1824 (set_attr "mode" "DI")])
1826 (define_insn "sse_cvttps2pi"
1827 [(set (match_operand:V2SI 0 "register_operand" "=y")
1829 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1830 (parallel [(const_int 0) (const_int 1)])))]
1832 "cvttps2pi\t{%1, %0|%0, %1}"
1833 [(set_attr "type" "ssecvt")
1834 (set_attr "unit" "mmx")
1835 (set_attr "prefix_rep" "0")
1836 (set_attr "mode" "SF")])
1838 (define_insn "sse_cvtsi2ss"
1839 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1842 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
1843 (match_operand:V4SF 1 "register_operand" "0,0,x")
1847 cvtsi2ss\t{%2, %0|%0, %2}
1848 cvtsi2ss\t{%2, %0|%0, %2}
1849 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
1850 [(set_attr "isa" "noavx,noavx,avx")
1851 (set_attr "type" "sseicvt")
1852 (set_attr "athlon_decode" "vector,double,*")
1853 (set_attr "amdfam10_decode" "vector,double,*")
1854 (set_attr "bdver1_decode" "double,direct,*")
1855 (set_attr "prefix" "orig,orig,vex")
1856 (set_attr "mode" "SF")])
1858 (define_insn "sse_cvtsi2ssq"
1859 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1862 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
1863 (match_operand:V4SF 1 "register_operand" "0,0,x")
1865 "TARGET_SSE && TARGET_64BIT"
1867 cvtsi2ssq\t{%2, %0|%0, %2}
1868 cvtsi2ssq\t{%2, %0|%0, %2}
1869 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
1870 [(set_attr "isa" "noavx,noavx,avx")
1871 (set_attr "type" "sseicvt")
1872 (set_attr "athlon_decode" "vector,double,*")
1873 (set_attr "amdfam10_decode" "vector,double,*")
1874 (set_attr "bdver1_decode" "double,direct,*")
1875 (set_attr "length_vex" "*,*,4")
1876 (set_attr "prefix_rex" "1,1,*")
1877 (set_attr "prefix" "orig,orig,vex")
1878 (set_attr "mode" "SF")])
1880 (define_insn "sse_cvtss2si"
1881 [(set (match_operand:SI 0 "register_operand" "=r,r")
1884 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1885 (parallel [(const_int 0)]))]
1886 UNSPEC_FIX_NOTRUNC))]
1888 "%vcvtss2si\t{%1, %0|%0, %1}"
1889 [(set_attr "type" "sseicvt")
1890 (set_attr "athlon_decode" "double,vector")
1891 (set_attr "bdver1_decode" "double,double")
1892 (set_attr "prefix_rep" "1")
1893 (set_attr "prefix" "maybe_vex")
1894 (set_attr "mode" "SI")])
1896 (define_insn "sse_cvtss2si_2"
1897 [(set (match_operand:SI 0 "register_operand" "=r,r")
1898 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1899 UNSPEC_FIX_NOTRUNC))]
1901 "%vcvtss2si\t{%1, %0|%0, %1}"
1902 [(set_attr "type" "sseicvt")
1903 (set_attr "athlon_decode" "double,vector")
1904 (set_attr "amdfam10_decode" "double,double")
1905 (set_attr "bdver1_decode" "double,double")
1906 (set_attr "prefix_rep" "1")
1907 (set_attr "prefix" "maybe_vex")
1908 (set_attr "mode" "SI")])
1910 (define_insn "sse_cvtss2siq"
1911 [(set (match_operand:DI 0 "register_operand" "=r,r")
1914 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1915 (parallel [(const_int 0)]))]
1916 UNSPEC_FIX_NOTRUNC))]
1917 "TARGET_SSE && TARGET_64BIT"
1918 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1919 [(set_attr "type" "sseicvt")
1920 (set_attr "athlon_decode" "double,vector")
1921 (set_attr "bdver1_decode" "double,double")
1922 (set_attr "prefix_rep" "1")
1923 (set_attr "prefix" "maybe_vex")
1924 (set_attr "mode" "DI")])
1926 (define_insn "sse_cvtss2siq_2"
1927 [(set (match_operand:DI 0 "register_operand" "=r,r")
1928 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1929 UNSPEC_FIX_NOTRUNC))]
1930 "TARGET_SSE && TARGET_64BIT"
1931 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1932 [(set_attr "type" "sseicvt")
1933 (set_attr "athlon_decode" "double,vector")
1934 (set_attr "amdfam10_decode" "double,double")
1935 (set_attr "bdver1_decode" "double,double")
1936 (set_attr "prefix_rep" "1")
1937 (set_attr "prefix" "maybe_vex")
1938 (set_attr "mode" "DI")])
1940 (define_insn "sse_cvttss2si"
1941 [(set (match_operand:SI 0 "register_operand" "=r,r")
1944 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1945 (parallel [(const_int 0)]))))]
1947 "%vcvttss2si\t{%1, %0|%0, %1}"
1948 [(set_attr "type" "sseicvt")
1949 (set_attr "athlon_decode" "double,vector")
1950 (set_attr "amdfam10_decode" "double,double")
1951 (set_attr "bdver1_decode" "double,double")
1952 (set_attr "prefix_rep" "1")
1953 (set_attr "prefix" "maybe_vex")
1954 (set_attr "mode" "SI")])
1956 (define_insn "sse_cvttss2siq"
1957 [(set (match_operand:DI 0 "register_operand" "=r,r")
1960 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1961 (parallel [(const_int 0)]))))]
1962 "TARGET_SSE && TARGET_64BIT"
1963 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
1964 [(set_attr "type" "sseicvt")
1965 (set_attr "athlon_decode" "double,vector")
1966 (set_attr "amdfam10_decode" "double,double")
1967 (set_attr "bdver1_decode" "double,double")
1968 (set_attr "prefix_rep" "1")
1969 (set_attr "prefix" "maybe_vex")
1970 (set_attr "mode" "DI")])
1972 (define_insn "avx_cvtdq2ps256"
1973 [(set (match_operand:V8SF 0 "register_operand" "=x")
1974 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
1976 "vcvtdq2ps\t{%1, %0|%0, %1}"
1977 [(set_attr "type" "ssecvt")
1978 (set_attr "prefix" "vex")
1979 (set_attr "mode" "V8SF")])
1981 (define_insn "sse2_cvtdq2ps"
1982 [(set (match_operand:V4SF 0 "register_operand" "=x")
1983 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1985 "%vcvtdq2ps\t{%1, %0|%0, %1}"
1986 [(set_attr "type" "ssecvt")
1987 (set_attr "prefix" "maybe_vex")
1988 (set_attr "mode" "V4SF")])
1990 (define_expand "sse2_cvtudq2ps"
1992 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
1994 (lt:V4SF (match_dup 5) (match_dup 3)))
1996 (and:V4SF (match_dup 6) (match_dup 4)))
1997 (set (match_operand:V4SF 0 "register_operand" "")
1998 (plus:V4SF (match_dup 5) (match_dup 7)))]
2001 REAL_VALUE_TYPE TWO32r;
2005 real_ldexp (&TWO32r, &dconst1, 32);
2006 x = const_double_from_real_value (TWO32r, SFmode);
2008 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2009 operands[4] = force_reg (V4SFmode,
2010 ix86_build_const_vector (V4SFmode, 1, x));
2012 for (i = 5; i < 8; i++)
2013 operands[i] = gen_reg_rtx (V4SFmode);
2016 (define_insn "avx_cvtps2dq256"
2017 [(set (match_operand:V8SI 0 "register_operand" "=x")
2018 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2019 UNSPEC_FIX_NOTRUNC))]
2021 "vcvtps2dq\t{%1, %0|%0, %1}"
2022 [(set_attr "type" "ssecvt")
2023 (set_attr "prefix" "vex")
2024 (set_attr "mode" "OI")])
2026 (define_insn "sse2_cvtps2dq"
2027 [(set (match_operand:V4SI 0 "register_operand" "=x")
2028 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2029 UNSPEC_FIX_NOTRUNC))]
2031 "%vcvtps2dq\t{%1, %0|%0, %1}"
2032 [(set_attr "type" "ssecvt")
2033 (set (attr "prefix_data16")
2035 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2037 (const_string "1")))
2038 (set_attr "prefix" "maybe_vex")
2039 (set_attr "mode" "TI")])
2041 (define_insn "avx_cvttps2dq256"
2042 [(set (match_operand:V8SI 0 "register_operand" "=x")
2043 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2045 "vcvttps2dq\t{%1, %0|%0, %1}"
2046 [(set_attr "type" "ssecvt")
2047 (set_attr "prefix" "vex")
2048 (set_attr "mode" "OI")])
2050 (define_insn "sse2_cvttps2dq"
2051 [(set (match_operand:V4SI 0 "register_operand" "=x")
2052 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2054 "%vcvttps2dq\t{%1, %0|%0, %1}"
2055 [(set_attr "type" "ssecvt")
2056 (set (attr "prefix_rep")
2058 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2060 (const_string "1")))
2061 (set (attr "prefix_data16")
2063 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2065 (const_string "0")))
2066 (set_attr "prefix_data16" "0")
2067 (set_attr "prefix" "maybe_vex")
2068 (set_attr "mode" "TI")])
2070 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2072 ;; Parallel double-precision floating point conversion operations
2074 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2076 (define_insn "sse2_cvtpi2pd"
2077 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2078 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2080 "cvtpi2pd\t{%1, %0|%0, %1}"
2081 [(set_attr "type" "ssecvt")
2082 (set_attr "unit" "mmx,*")
2083 (set_attr "prefix_data16" "1,*")
2084 (set_attr "mode" "V2DF")])
2086 (define_insn "sse2_cvtpd2pi"
2087 [(set (match_operand:V2SI 0 "register_operand" "=y")
2088 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2089 UNSPEC_FIX_NOTRUNC))]
2091 "cvtpd2pi\t{%1, %0|%0, %1}"
2092 [(set_attr "type" "ssecvt")
2093 (set_attr "unit" "mmx")
2094 (set_attr "bdver1_decode" "double")
2095 (set_attr "prefix_data16" "1")
2096 (set_attr "mode" "DI")])
2098 (define_insn "sse2_cvttpd2pi"
2099 [(set (match_operand:V2SI 0 "register_operand" "=y")
2100 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2102 "cvttpd2pi\t{%1, %0|%0, %1}"
2103 [(set_attr "type" "ssecvt")
2104 (set_attr "unit" "mmx")
2105 (set_attr "bdver1_decode" "double")
2106 (set_attr "prefix_data16" "1")
2107 (set_attr "mode" "TI")])
2109 (define_insn "sse2_cvtsi2sd"
2110 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2113 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2114 (match_operand:V2DF 1 "register_operand" "0,0,x")
2118 cvtsi2sd\t{%2, %0|%0, %2}
2119 cvtsi2sd\t{%2, %0|%0, %2}
2120 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2121 [(set_attr "isa" "noavx,noavx,avx")
2122 (set_attr "type" "sseicvt")
2123 (set_attr "athlon_decode" "double,direct,*")
2124 (set_attr "amdfam10_decode" "vector,double,*")
2125 (set_attr "bdver1_decode" "double,direct,*")
2126 (set_attr "prefix" "orig,orig,vex")
2127 (set_attr "mode" "DF")])
2129 (define_insn "sse2_cvtsi2sdq"
2130 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2133 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2134 (match_operand:V2DF 1 "register_operand" "0,0,x")
2136 "TARGET_SSE2 && TARGET_64BIT"
2138 cvtsi2sdq\t{%2, %0|%0, %2}
2139 cvtsi2sdq\t{%2, %0|%0, %2}
2140 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2141 [(set_attr "isa" "noavx,noavx,avx")
2142 (set_attr "type" "sseicvt")
2143 (set_attr "athlon_decode" "double,direct,*")
2144 (set_attr "amdfam10_decode" "vector,double,*")
2145 (set_attr "bdver1_decode" "double,direct,*")
2146 (set_attr "length_vex" "*,*,4")
2147 (set_attr "prefix_rex" "1,1,*")
2148 (set_attr "prefix" "orig,orig,vex")
2149 (set_attr "mode" "DF")])
2151 (define_insn "sse2_cvtsd2si"
2152 [(set (match_operand:SI 0 "register_operand" "=r,r")
2155 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2156 (parallel [(const_int 0)]))]
2157 UNSPEC_FIX_NOTRUNC))]
2159 "%vcvtsd2si\t{%1, %0|%0, %1}"
2160 [(set_attr "type" "sseicvt")
2161 (set_attr "athlon_decode" "double,vector")
2162 (set_attr "bdver1_decode" "double,double")
2163 (set_attr "prefix_rep" "1")
2164 (set_attr "prefix" "maybe_vex")
2165 (set_attr "mode" "SI")])
2167 (define_insn "sse2_cvtsd2si_2"
2168 [(set (match_operand:SI 0 "register_operand" "=r,r")
2169 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2170 UNSPEC_FIX_NOTRUNC))]
2172 "%vcvtsd2si\t{%1, %0|%0, %1}"
2173 [(set_attr "type" "sseicvt")
2174 (set_attr "athlon_decode" "double,vector")
2175 (set_attr "amdfam10_decode" "double,double")
2176 (set_attr "bdver1_decode" "double,double")
2177 (set_attr "prefix_rep" "1")
2178 (set_attr "prefix" "maybe_vex")
2179 (set_attr "mode" "SI")])
2181 (define_insn "sse2_cvtsd2siq"
2182 [(set (match_operand:DI 0 "register_operand" "=r,r")
2185 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2186 (parallel [(const_int 0)]))]
2187 UNSPEC_FIX_NOTRUNC))]
2188 "TARGET_SSE2 && TARGET_64BIT"
2189 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2190 [(set_attr "type" "sseicvt")
2191 (set_attr "athlon_decode" "double,vector")
2192 (set_attr "bdver1_decode" "double,double")
2193 (set_attr "prefix_rep" "1")
2194 (set_attr "prefix" "maybe_vex")
2195 (set_attr "mode" "DI")])
2197 (define_insn "sse2_cvtsd2siq_2"
2198 [(set (match_operand:DI 0 "register_operand" "=r,r")
2199 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2200 UNSPEC_FIX_NOTRUNC))]
2201 "TARGET_SSE2 && TARGET_64BIT"
2202 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2203 [(set_attr "type" "sseicvt")
2204 (set_attr "athlon_decode" "double,vector")
2205 (set_attr "amdfam10_decode" "double,double")
2206 (set_attr "bdver1_decode" "double,double")
2207 (set_attr "prefix_rep" "1")
2208 (set_attr "prefix" "maybe_vex")
2209 (set_attr "mode" "DI")])
2211 (define_insn "sse2_cvttsd2si"
2212 [(set (match_operand:SI 0 "register_operand" "=r,r")
2215 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2216 (parallel [(const_int 0)]))))]
2218 "%vcvttsd2si\t{%1, %0|%0, %1}"
2219 [(set_attr "type" "sseicvt")
2220 (set_attr "athlon_decode" "double,vector")
2221 (set_attr "amdfam10_decode" "double,double")
2222 (set_attr "bdver1_decode" "double,double")
2223 (set_attr "prefix_rep" "1")
2224 (set_attr "prefix" "maybe_vex")
2225 (set_attr "mode" "SI")])
2227 (define_insn "sse2_cvttsd2siq"
2228 [(set (match_operand:DI 0 "register_operand" "=r,r")
2231 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2232 (parallel [(const_int 0)]))))]
2233 "TARGET_SSE2 && TARGET_64BIT"
2234 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2235 [(set_attr "type" "sseicvt")
2236 (set_attr "athlon_decode" "double,vector")
2237 (set_attr "amdfam10_decode" "double,double")
2238 (set_attr "bdver1_decode" "double,double")
2239 (set_attr "prefix_rep" "1")
2240 (set_attr "prefix" "maybe_vex")
2241 (set_attr "mode" "DI")])
2243 (define_insn "avx_cvtdq2pd256"
2244 [(set (match_operand:V4DF 0 "register_operand" "=x")
2245 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2247 "vcvtdq2pd\t{%1, %0|%0, %1}"
2248 [(set_attr "type" "ssecvt")
2249 (set_attr "prefix" "vex")
2250 (set_attr "mode" "V4DF")])
2252 (define_insn "*avx_cvtdq2pd256_2"
2253 [(set (match_operand:V4DF 0 "register_operand" "=x")
2256 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2257 (parallel [(const_int 0) (const_int 1)
2258 (const_int 2) (const_int 3)]))))]
2260 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2261 [(set_attr "type" "ssecvt")
2262 (set_attr "prefix" "vex")
2263 (set_attr "mode" "V4DF")])
2265 (define_insn "sse2_cvtdq2pd"
2266 [(set (match_operand:V2DF 0 "register_operand" "=x")
2269 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2270 (parallel [(const_int 0) (const_int 1)]))))]
2272 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2273 [(set_attr "type" "ssecvt")
2274 (set_attr "prefix" "maybe_vex")
2275 (set_attr "mode" "V2DF")])
2277 (define_insn "avx_cvtpd2dq256"
2278 [(set (match_operand:V4SI 0 "register_operand" "=x")
2279 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2280 UNSPEC_FIX_NOTRUNC))]
2282 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2283 [(set_attr "type" "ssecvt")
2284 (set_attr "prefix" "vex")
2285 (set_attr "mode" "OI")])
2287 (define_expand "sse2_cvtpd2dq"
2288 [(set (match_operand:V4SI 0 "register_operand" "")
2290 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2294 "operands[2] = CONST0_RTX (V2SImode);")
2296 (define_insn "*sse2_cvtpd2dq"
2297 [(set (match_operand:V4SI 0 "register_operand" "=x")
2299 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2301 (match_operand:V2SI 2 "const0_operand" "")))]
2305 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2307 return "cvtpd2dq\t{%1, %0|%0, %1}";
2309 [(set_attr "type" "ssecvt")
2310 (set_attr "prefix_rep" "1")
2311 (set_attr "prefix_data16" "0")
2312 (set_attr "prefix" "maybe_vex")
2313 (set_attr "mode" "TI")
2314 (set_attr "amdfam10_decode" "double")
2315 (set_attr "athlon_decode" "vector")
2316 (set_attr "bdver1_decode" "double")])
2318 (define_insn "avx_cvttpd2dq256"
2319 [(set (match_operand:V4SI 0 "register_operand" "=x")
2320 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2322 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2323 [(set_attr "type" "ssecvt")
2324 (set_attr "prefix" "vex")
2325 (set_attr "mode" "OI")])
2327 (define_expand "sse2_cvttpd2dq"
2328 [(set (match_operand:V4SI 0 "register_operand" "")
2330 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2333 "operands[2] = CONST0_RTX (V2SImode);")
2335 (define_insn "*sse2_cvttpd2dq"
2336 [(set (match_operand:V4SI 0 "register_operand" "=x")
2338 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2339 (match_operand:V2SI 2 "const0_operand" "")))]
2343 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2345 return "cvttpd2dq\t{%1, %0|%0, %1}";
2347 [(set_attr "type" "ssecvt")
2348 (set_attr "amdfam10_decode" "double")
2349 (set_attr "athlon_decode" "vector")
2350 (set_attr "bdver1_decode" "double")
2351 (set_attr "prefix" "maybe_vex")
2352 (set_attr "mode" "TI")])
2354 (define_insn "sse2_cvtsd2ss"
2355 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2358 (float_truncate:V2SF
2359 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2360 (match_operand:V4SF 1 "register_operand" "0,0,x")
2364 cvtsd2ss\t{%2, %0|%0, %2}
2365 cvtsd2ss\t{%2, %0|%0, %2}
2366 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2367 [(set_attr "isa" "noavx,noavx,avx")
2368 (set_attr "type" "ssecvt")
2369 (set_attr "athlon_decode" "vector,double,*")
2370 (set_attr "amdfam10_decode" "vector,double,*")
2371 (set_attr "bdver1_decode" "direct,direct,*")
2372 (set_attr "prefix" "orig,orig,vex")
2373 (set_attr "mode" "SF")])
2375 (define_insn "sse2_cvtss2sd"
2376 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2380 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2381 (parallel [(const_int 0) (const_int 1)])))
2382 (match_operand:V2DF 1 "register_operand" "0,0,x")
2386 cvtss2sd\t{%2, %0|%0, %2}
2387 cvtss2sd\t{%2, %0|%0, %2}
2388 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2389 [(set_attr "isa" "noavx,noavx,avx")
2390 (set_attr "type" "ssecvt")
2391 (set_attr "amdfam10_decode" "vector,double,*")
2392 (set_attr "athlon_decode" "direct,direct,*")
2393 (set_attr "bdver1_decode" "direct,direct,*")
2394 (set_attr "prefix" "orig,orig,vex")
2395 (set_attr "mode" "DF")])
2397 (define_insn "avx_cvtpd2ps256"
2398 [(set (match_operand:V4SF 0 "register_operand" "=x")
2399 (float_truncate:V4SF
2400 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2402 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2403 [(set_attr "type" "ssecvt")
2404 (set_attr "prefix" "vex")
2405 (set_attr "mode" "V4SF")])
2407 (define_expand "sse2_cvtpd2ps"
2408 [(set (match_operand:V4SF 0 "register_operand" "")
2410 (float_truncate:V2SF
2411 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2414 "operands[2] = CONST0_RTX (V2SFmode);")
2416 (define_insn "*sse2_cvtpd2ps"
2417 [(set (match_operand:V4SF 0 "register_operand" "=x")
2419 (float_truncate:V2SF
2420 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2421 (match_operand:V2SF 2 "const0_operand" "")))]
2425 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2427 return "cvtpd2ps\t{%1, %0|%0, %1}";
2429 [(set_attr "type" "ssecvt")
2430 (set_attr "amdfam10_decode" "double")
2431 (set_attr "athlon_decode" "vector")
2432 (set_attr "bdver1_decode" "double")
2433 (set_attr "prefix_data16" "1")
2434 (set_attr "prefix" "maybe_vex")
2435 (set_attr "mode" "V4SF")])
2437 (define_insn "avx_cvtps2pd256"
2438 [(set (match_operand:V4DF 0 "register_operand" "=x")
2440 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2442 "vcvtps2pd\t{%1, %0|%0, %1}"
2443 [(set_attr "type" "ssecvt")
2444 (set_attr "prefix" "vex")
2445 (set_attr "mode" "V4DF")])
2447 (define_insn "*avx_cvtps2pd256_2"
2448 [(set (match_operand:V4DF 0 "register_operand" "=x")
2451 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2452 (parallel [(const_int 0) (const_int 1)
2453 (const_int 2) (const_int 3)]))))]
2455 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2456 [(set_attr "type" "ssecvt")
2457 (set_attr "prefix" "vex")
2458 (set_attr "mode" "V4DF")])
2460 (define_insn "sse2_cvtps2pd"
2461 [(set (match_operand:V2DF 0 "register_operand" "=x")
2464 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2465 (parallel [(const_int 0) (const_int 1)]))))]
2467 "%vcvtps2pd\t{%1, %0|%0, %1}"
2468 [(set_attr "type" "ssecvt")
2469 (set_attr "amdfam10_decode" "direct")
2470 (set_attr "athlon_decode" "double")
2471 (set_attr "bdver1_decode" "double")
2472 (set_attr "prefix_data16" "0")
2473 (set_attr "prefix" "maybe_vex")
2474 (set_attr "mode" "V2DF")])
2476 (define_expand "vec_unpacks_hi_v4sf"
2481 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2482 (parallel [(const_int 6) (const_int 7)
2483 (const_int 2) (const_int 3)])))
2484 (set (match_operand:V2DF 0 "register_operand" "")
2488 (parallel [(const_int 0) (const_int 1)]))))]
2490 "operands[2] = gen_reg_rtx (V4SFmode);")
2492 (define_expand "vec_unpacks_hi_v8sf"
2495 (match_operand:V8SF 1 "nonimmediate_operand" "")
2496 (parallel [(const_int 4) (const_int 5)
2497 (const_int 6) (const_int 7)])))
2498 (set (match_operand:V4DF 0 "register_operand" "")
2502 "operands[2] = gen_reg_rtx (V4SFmode);")
2504 (define_expand "vec_unpacks_lo_v4sf"
2505 [(set (match_operand:V2DF 0 "register_operand" "")
2508 (match_operand:V4SF 1 "nonimmediate_operand" "")
2509 (parallel [(const_int 0) (const_int 1)]))))]
2512 (define_expand "vec_unpacks_lo_v8sf"
2513 [(set (match_operand:V4DF 0 "register_operand" "")
2516 (match_operand:V8SF 1 "nonimmediate_operand" "")
2517 (parallel [(const_int 0) (const_int 1)
2518 (const_int 2) (const_int 3)]))))]
2521 (define_expand "vec_unpacks_float_hi_v8hi"
2522 [(match_operand:V4SF 0 "register_operand" "")
2523 (match_operand:V8HI 1 "register_operand" "")]
2526 rtx tmp = gen_reg_rtx (V4SImode);
2528 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2529 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2533 (define_expand "vec_unpacks_float_lo_v8hi"
2534 [(match_operand:V4SF 0 "register_operand" "")
2535 (match_operand:V8HI 1 "register_operand" "")]
2538 rtx tmp = gen_reg_rtx (V4SImode);
2540 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2541 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2545 (define_expand "vec_unpacku_float_hi_v8hi"
2546 [(match_operand:V4SF 0 "register_operand" "")
2547 (match_operand:V8HI 1 "register_operand" "")]
2550 rtx tmp = gen_reg_rtx (V4SImode);
2552 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2553 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2557 (define_expand "vec_unpacku_float_lo_v8hi"
2558 [(match_operand:V4SF 0 "register_operand" "")
2559 (match_operand:V8HI 1 "register_operand" "")]
2562 rtx tmp = gen_reg_rtx (V4SImode);
2564 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2565 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2569 (define_expand "vec_unpacks_float_hi_v4si"
2572 (match_operand:V4SI 1 "nonimmediate_operand" "")
2573 (parallel [(const_int 2) (const_int 3)
2574 (const_int 2) (const_int 3)])))
2575 (set (match_operand:V2DF 0 "register_operand" "")
2579 (parallel [(const_int 0) (const_int 1)]))))]
2581 "operands[2] = gen_reg_rtx (V4SImode);")
2583 (define_expand "vec_unpacks_float_lo_v4si"
2584 [(set (match_operand:V2DF 0 "register_operand" "")
2587 (match_operand:V4SI 1 "nonimmediate_operand" "")
2588 (parallel [(const_int 0) (const_int 1)]))))]
2591 (define_expand "vec_unpacks_float_hi_v8si"
2594 (match_operand:V8SI 1 "nonimmediate_operand" "")
2595 (parallel [(const_int 4) (const_int 5)
2596 (const_int 6) (const_int 7)])))
2597 (set (match_operand:V4DF 0 "register_operand" "")
2601 "operands[2] = gen_reg_rtx (V4SImode);")
2603 (define_expand "vec_unpacks_float_lo_v8si"
2604 [(set (match_operand:V4DF 0 "register_operand" "")
2607 (match_operand:V8SI 1 "nonimmediate_operand" "")
2608 (parallel [(const_int 0) (const_int 1)
2609 (const_int 2) (const_int 3)]))))]
2612 (define_expand "vec_unpacku_float_hi_v4si"
2615 (match_operand:V4SI 1 "nonimmediate_operand" "")
2616 (parallel [(const_int 2) (const_int 3)
2617 (const_int 2) (const_int 3)])))
2622 (parallel [(const_int 0) (const_int 1)]))))
2624 (lt:V2DF (match_dup 6) (match_dup 3)))
2626 (and:V2DF (match_dup 7) (match_dup 4)))
2627 (set (match_operand:V2DF 0 "register_operand" "")
2628 (plus:V2DF (match_dup 6) (match_dup 8)))]
2631 REAL_VALUE_TYPE TWO32r;
2635 real_ldexp (&TWO32r, &dconst1, 32);
2636 x = const_double_from_real_value (TWO32r, DFmode);
2638 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2639 operands[4] = force_reg (V2DFmode,
2640 ix86_build_const_vector (V2DFmode, 1, x));
2642 operands[5] = gen_reg_rtx (V4SImode);
2644 for (i = 6; i < 9; i++)
2645 operands[i] = gen_reg_rtx (V2DFmode);
2648 (define_expand "vec_unpacku_float_lo_v4si"
2652 (match_operand:V4SI 1 "nonimmediate_operand" "")
2653 (parallel [(const_int 0) (const_int 1)]))))
2655 (lt:V2DF (match_dup 5) (match_dup 3)))
2657 (and:V2DF (match_dup 6) (match_dup 4)))
2658 (set (match_operand:V2DF 0 "register_operand" "")
2659 (plus:V2DF (match_dup 5) (match_dup 7)))]
2662 REAL_VALUE_TYPE TWO32r;
2666 real_ldexp (&TWO32r, &dconst1, 32);
2667 x = const_double_from_real_value (TWO32r, DFmode);
2669 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2670 operands[4] = force_reg (V2DFmode,
2671 ix86_build_const_vector (V2DFmode, 1, x));
2673 for (i = 5; i < 8; i++)
2674 operands[i] = gen_reg_rtx (V2DFmode);
2677 (define_expand "vec_pack_trunc_v4df"
2679 (float_truncate:V4SF
2680 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2682 (float_truncate:V4SF
2683 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2684 (set (match_operand:V8SF 0 "register_operand" "")
2690 operands[3] = gen_reg_rtx (V4SFmode);
2691 operands[4] = gen_reg_rtx (V4SFmode);
2694 (define_expand "vec_pack_trunc_v2df"
2695 [(match_operand:V4SF 0 "register_operand" "")
2696 (match_operand:V2DF 1 "nonimmediate_operand" "")
2697 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2702 r1 = gen_reg_rtx (V4SFmode);
2703 r2 = gen_reg_rtx (V4SFmode);
2705 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2706 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2707 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2711 (define_expand "vec_pack_sfix_trunc_v2df"
2712 [(match_operand:V4SI 0 "register_operand" "")
2713 (match_operand:V2DF 1 "nonimmediate_operand" "")
2714 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2719 r1 = gen_reg_rtx (V4SImode);
2720 r2 = gen_reg_rtx (V4SImode);
2722 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2723 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2724 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2725 gen_lowpart (V2DImode, r1),
2726 gen_lowpart (V2DImode, r2)));
2730 (define_expand "vec_pack_sfix_v2df"
2731 [(match_operand:V4SI 0 "register_operand" "")
2732 (match_operand:V2DF 1 "nonimmediate_operand" "")
2733 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2738 r1 = gen_reg_rtx (V4SImode);
2739 r2 = gen_reg_rtx (V4SImode);
2741 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2742 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2743 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2744 gen_lowpart (V2DImode, r1),
2745 gen_lowpart (V2DImode, r2)));
2749 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2751 ;; Parallel single-precision floating point element swizzling
2753 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2755 (define_expand "sse_movhlps_exp"
2756 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2759 (match_operand:V4SF 1 "nonimmediate_operand" "")
2760 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2761 (parallel [(const_int 6)
2767 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2769 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
2771 /* Fix up the destination if needed. */
2772 if (dst != operands[0])
2773 emit_move_insn (operands[0], dst);
2778 (define_insn "sse_movhlps"
2779 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
2782 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2783 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
2784 (parallel [(const_int 6)
2788 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2790 movhlps\t{%2, %0|%0, %2}
2791 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2792 movlps\t{%H2, %0|%0, %H2}
2793 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2794 %vmovhps\t{%2, %0|%0, %2}"
2795 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2796 (set_attr "type" "ssemov")
2797 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2798 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2800 (define_expand "sse_movlhps_exp"
2801 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2804 (match_operand:V4SF 1 "nonimmediate_operand" "")
2805 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2806 (parallel [(const_int 0)
2812 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2814 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
2816 /* Fix up the destination if needed. */
2817 if (dst != operands[0])
2818 emit_move_insn (operands[0], dst);
2823 (define_insn "sse_movlhps"
2824 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
2827 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2828 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
2829 (parallel [(const_int 0)
2833 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2835 movlhps\t{%2, %0|%0, %2}
2836 vmovlhps\t{%2, %1, %0|%0, %1, %2}
2837 movhps\t{%2, %0|%0, %2}
2838 vmovhps\t{%2, %1, %0|%0, %1, %2}
2839 %vmovlps\t{%2, %H0|%H0, %2}"
2840 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2841 (set_attr "type" "ssemov")
2842 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2843 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2845 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2846 (define_insn "avx_unpckhps256"
2847 [(set (match_operand:V8SF 0 "register_operand" "=x")
2850 (match_operand:V8SF 1 "register_operand" "x")
2851 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2852 (parallel [(const_int 2) (const_int 10)
2853 (const_int 3) (const_int 11)
2854 (const_int 6) (const_int 14)
2855 (const_int 7) (const_int 15)])))]
2857 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2858 [(set_attr "type" "sselog")
2859 (set_attr "prefix" "vex")
2860 (set_attr "mode" "V8SF")])
2862 (define_expand "vec_interleave_highv8sf"
2866 (match_operand:V8SF 1 "register_operand" "x")
2867 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2868 (parallel [(const_int 0) (const_int 8)
2869 (const_int 1) (const_int 9)
2870 (const_int 4) (const_int 12)
2871 (const_int 5) (const_int 13)])))
2877 (parallel [(const_int 2) (const_int 10)
2878 (const_int 3) (const_int 11)
2879 (const_int 6) (const_int 14)
2880 (const_int 7) (const_int 15)])))
2881 (set (match_operand:V8SF 0 "register_operand" "")
2886 (parallel [(const_int 4) (const_int 5)
2887 (const_int 6) (const_int 7)
2888 (const_int 12) (const_int 13)
2889 (const_int 14) (const_int 15)])))]
2892 operands[3] = gen_reg_rtx (V8SFmode);
2893 operands[4] = gen_reg_rtx (V8SFmode);
2896 (define_insn "vec_interleave_highv4sf"
2897 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2900 (match_operand:V4SF 1 "register_operand" "0,x")
2901 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2902 (parallel [(const_int 2) (const_int 6)
2903 (const_int 3) (const_int 7)])))]
2906 unpckhps\t{%2, %0|%0, %2}
2907 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2908 [(set_attr "isa" "noavx,avx")
2909 (set_attr "type" "sselog")
2910 (set_attr "prefix" "orig,vex")
2911 (set_attr "mode" "V4SF")])
2913 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2914 (define_insn "avx_unpcklps256"
2915 [(set (match_operand:V8SF 0 "register_operand" "=x")
2918 (match_operand:V8SF 1 "register_operand" "x")
2919 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2920 (parallel [(const_int 0) (const_int 8)
2921 (const_int 1) (const_int 9)
2922 (const_int 4) (const_int 12)
2923 (const_int 5) (const_int 13)])))]
2925 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2926 [(set_attr "type" "sselog")
2927 (set_attr "prefix" "vex")
2928 (set_attr "mode" "V8SF")])
2930 (define_expand "vec_interleave_lowv8sf"
2934 (match_operand:V8SF 1 "register_operand" "x")
2935 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2936 (parallel [(const_int 0) (const_int 8)
2937 (const_int 1) (const_int 9)
2938 (const_int 4) (const_int 12)
2939 (const_int 5) (const_int 13)])))
2945 (parallel [(const_int 2) (const_int 10)
2946 (const_int 3) (const_int 11)
2947 (const_int 6) (const_int 14)
2948 (const_int 7) (const_int 15)])))
2949 (set (match_operand:V8SF 0 "register_operand" "")
2954 (parallel [(const_int 0) (const_int 1)
2955 (const_int 2) (const_int 3)
2956 (const_int 8) (const_int 9)
2957 (const_int 10) (const_int 11)])))]
2960 operands[3] = gen_reg_rtx (V8SFmode);
2961 operands[4] = gen_reg_rtx (V8SFmode);
2964 (define_insn "vec_interleave_lowv4sf"
2965 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2968 (match_operand:V4SF 1 "register_operand" "0,x")
2969 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2970 (parallel [(const_int 0) (const_int 4)
2971 (const_int 1) (const_int 5)])))]
2974 unpcklps\t{%2, %0|%0, %2}
2975 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2976 [(set_attr "isa" "noavx,avx")
2977 (set_attr "type" "sselog")
2978 (set_attr "prefix" "orig,vex")
2979 (set_attr "mode" "V4SF")])
2981 ;; These are modeled with the same vec_concat as the others so that we
2982 ;; capture users of shufps that can use the new instructions
2983 (define_insn "avx_movshdup256"
2984 [(set (match_operand:V8SF 0 "register_operand" "=x")
2987 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2989 (parallel [(const_int 1) (const_int 1)
2990 (const_int 3) (const_int 3)
2991 (const_int 5) (const_int 5)
2992 (const_int 7) (const_int 7)])))]
2994 "vmovshdup\t{%1, %0|%0, %1}"
2995 [(set_attr "type" "sse")
2996 (set_attr "prefix" "vex")
2997 (set_attr "mode" "V8SF")])
2999 (define_insn "sse3_movshdup"
3000 [(set (match_operand:V4SF 0 "register_operand" "=x")
3003 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3005 (parallel [(const_int 1)
3010 "%vmovshdup\t{%1, %0|%0, %1}"
3011 [(set_attr "type" "sse")
3012 (set_attr "prefix_rep" "1")
3013 (set_attr "prefix" "maybe_vex")
3014 (set_attr "mode" "V4SF")])
3016 (define_insn "avx_movsldup256"
3017 [(set (match_operand:V8SF 0 "register_operand" "=x")
3020 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3022 (parallel [(const_int 0) (const_int 0)
3023 (const_int 2) (const_int 2)
3024 (const_int 4) (const_int 4)
3025 (const_int 6) (const_int 6)])))]
3027 "vmovsldup\t{%1, %0|%0, %1}"
3028 [(set_attr "type" "sse")
3029 (set_attr "prefix" "vex")
3030 (set_attr "mode" "V8SF")])
3032 (define_insn "sse3_movsldup"
3033 [(set (match_operand:V4SF 0 "register_operand" "=x")
3036 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3038 (parallel [(const_int 0)
3043 "%vmovsldup\t{%1, %0|%0, %1}"
3044 [(set_attr "type" "sse")
3045 (set_attr "prefix_rep" "1")
3046 (set_attr "prefix" "maybe_vex")
3047 (set_attr "mode" "V4SF")])
3049 (define_expand "avx_shufps256"
3050 [(match_operand:V8SF 0 "register_operand" "")
3051 (match_operand:V8SF 1 "register_operand" "")
3052 (match_operand:V8SF 2 "nonimmediate_operand" "")
3053 (match_operand:SI 3 "const_int_operand" "")]
3056 int mask = INTVAL (operands[3]);
3057 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3058 GEN_INT ((mask >> 0) & 3),
3059 GEN_INT ((mask >> 2) & 3),
3060 GEN_INT (((mask >> 4) & 3) + 8),
3061 GEN_INT (((mask >> 6) & 3) + 8),
3062 GEN_INT (((mask >> 0) & 3) + 4),
3063 GEN_INT (((mask >> 2) & 3) + 4),
3064 GEN_INT (((mask >> 4) & 3) + 12),
3065 GEN_INT (((mask >> 6) & 3) + 12)));
3069 ;; One bit in mask selects 2 elements.
3070 (define_insn "avx_shufps256_1"
3071 [(set (match_operand:V8SF 0 "register_operand" "=x")
3074 (match_operand:V8SF 1 "register_operand" "x")
3075 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3076 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3077 (match_operand 4 "const_0_to_3_operand" "")
3078 (match_operand 5 "const_8_to_11_operand" "")
3079 (match_operand 6 "const_8_to_11_operand" "")
3080 (match_operand 7 "const_4_to_7_operand" "")
3081 (match_operand 8 "const_4_to_7_operand" "")
3082 (match_operand 9 "const_12_to_15_operand" "")
3083 (match_operand 10 "const_12_to_15_operand" "")])))]
3085 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3086 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3087 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3088 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3091 mask = INTVAL (operands[3]);
3092 mask |= INTVAL (operands[4]) << 2;
3093 mask |= (INTVAL (operands[5]) - 8) << 4;
3094 mask |= (INTVAL (operands[6]) - 8) << 6;
3095 operands[3] = GEN_INT (mask);
3097 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3099 [(set_attr "type" "sselog")
3100 (set_attr "length_immediate" "1")
3101 (set_attr "prefix" "vex")
3102 (set_attr "mode" "V8SF")])
3104 (define_expand "sse_shufps"
3105 [(match_operand:V4SF 0 "register_operand" "")
3106 (match_operand:V4SF 1 "register_operand" "")
3107 (match_operand:V4SF 2 "nonimmediate_operand" "")
3108 (match_operand:SI 3 "const_int_operand" "")]
3111 int mask = INTVAL (operands[3]);
3112 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3113 GEN_INT ((mask >> 0) & 3),
3114 GEN_INT ((mask >> 2) & 3),
3115 GEN_INT (((mask >> 4) & 3) + 4),
3116 GEN_INT (((mask >> 6) & 3) + 4)));
3120 (define_insn "sse_shufps_<mode>"
3121 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3122 (vec_select:VI4F_128
3123 (vec_concat:<ssedoublevecmode>
3124 (match_operand:VI4F_128 1 "register_operand" "0,x")
3125 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3126 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3127 (match_operand 4 "const_0_to_3_operand" "")
3128 (match_operand 5 "const_4_to_7_operand" "")
3129 (match_operand 6 "const_4_to_7_operand" "")])))]
3133 mask |= INTVAL (operands[3]) << 0;
3134 mask |= INTVAL (operands[4]) << 2;
3135 mask |= (INTVAL (operands[5]) - 4) << 4;
3136 mask |= (INTVAL (operands[6]) - 4) << 6;
3137 operands[3] = GEN_INT (mask);
3139 switch (which_alternative)
3142 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3144 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3149 [(set_attr "isa" "noavx,avx")
3150 (set_attr "type" "sselog")
3151 (set_attr "length_immediate" "1")
3152 (set_attr "prefix" "orig,vex")
3153 (set_attr "mode" "V4SF")])
3155 (define_insn "sse_storehps"
3156 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3158 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3159 (parallel [(const_int 2) (const_int 3)])))]
3162 %vmovhps\t{%1, %0|%0, %1}
3163 %vmovhlps\t{%1, %d0|%d0, %1}
3164 %vmovlps\t{%H1, %d0|%d0, %H1}"
3165 [(set_attr "type" "ssemov")
3166 (set_attr "prefix" "maybe_vex")
3167 (set_attr "mode" "V2SF,V4SF,V2SF")])
3169 (define_expand "sse_loadhps_exp"
3170 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3173 (match_operand:V4SF 1 "nonimmediate_operand" "")
3174 (parallel [(const_int 0) (const_int 1)]))
3175 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3178 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3180 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3182 /* Fix up the destination if needed. */
3183 if (dst != operands[0])
3184 emit_move_insn (operands[0], dst);
3189 (define_insn "sse_loadhps"
3190 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3193 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3194 (parallel [(const_int 0) (const_int 1)]))
3195 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3198 movhps\t{%2, %0|%0, %2}
3199 vmovhps\t{%2, %1, %0|%0, %1, %2}
3200 movlhps\t{%2, %0|%0, %2}
3201 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3202 %vmovlps\t{%2, %H0|%H0, %2}"
3203 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3204 (set_attr "type" "ssemov")
3205 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3206 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3208 (define_insn "sse_storelps"
3209 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3211 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3212 (parallel [(const_int 0) (const_int 1)])))]
3215 %vmovlps\t{%1, %0|%0, %1}
3216 %vmovaps\t{%1, %0|%0, %1}
3217 %vmovlps\t{%1, %d0|%d0, %1}"
3218 [(set_attr "type" "ssemov")
3219 (set_attr "prefix" "maybe_vex")
3220 (set_attr "mode" "V2SF,V4SF,V2SF")])
3222 (define_expand "sse_loadlps_exp"
3223 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3225 (match_operand:V2SF 2 "nonimmediate_operand" "")
3227 (match_operand:V4SF 1 "nonimmediate_operand" "")
3228 (parallel [(const_int 2) (const_int 3)]))))]
3231 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3233 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3235 /* Fix up the destination if needed. */
3236 if (dst != operands[0])
3237 emit_move_insn (operands[0], dst);
3242 (define_insn "sse_loadlps"
3243 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3245 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3247 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3248 (parallel [(const_int 2) (const_int 3)]))))]
3251 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3252 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3253 movlps\t{%2, %0|%0, %2}
3254 vmovlps\t{%2, %1, %0|%0, %1, %2}
3255 %vmovlps\t{%2, %0|%0, %2}"
3256 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3257 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3258 (set_attr "length_immediate" "1,1,*,*,*")
3259 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3260 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3262 (define_insn "sse_movss"
3263 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3265 (match_operand:V4SF 2 "register_operand" " x,x")
3266 (match_operand:V4SF 1 "register_operand" " 0,x")
3270 movss\t{%2, %0|%0, %2}
3271 vmovss\t{%2, %1, %0|%0, %1, %2}"
3272 [(set_attr "isa" "noavx,avx")
3273 (set_attr "type" "ssemov")
3274 (set_attr "prefix" "orig,vex")
3275 (set_attr "mode" "SF")])
3277 (define_expand "vec_dupv4sf"
3278 [(set (match_operand:V4SF 0 "register_operand" "")
3280 (match_operand:SF 1 "nonimmediate_operand" "")))]
3284 operands[1] = force_reg (V4SFmode, operands[1]);
3287 (define_insn "*vec_dupv4sf_avx"
3288 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3290 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3293 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3294 vbroadcastss\t{%1, %0|%0, %1}"
3295 [(set_attr "type" "sselog1,ssemov")
3296 (set_attr "length_immediate" "1,0")
3297 (set_attr "prefix_extra" "0,1")
3298 (set_attr "prefix" "vex")
3299 (set_attr "mode" "V4SF")])
3301 (define_insn "*vec_dupv4sf"
3302 [(set (match_operand:V4SF 0 "register_operand" "=x")
3304 (match_operand:SF 1 "register_operand" "0")))]
3306 "shufps\t{$0, %0, %0|%0, %0, 0}"
3307 [(set_attr "type" "sselog1")
3308 (set_attr "length_immediate" "1")
3309 (set_attr "mode" "V4SF")])
3311 ;; Although insertps takes register source, we prefer
3312 ;; unpcklps with register source since it is shorter.
3313 (define_insn "*vec_concatv2sf_sse4_1"
3314 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3316 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3317 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3320 unpcklps\t{%2, %0|%0, %2}
3321 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3322 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3323 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3324 %vmovss\t{%1, %0|%0, %1}
3325 punpckldq\t{%2, %0|%0, %2}
3326 movd\t{%1, %0|%0, %1}"
3327 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
3328 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3329 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3330 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3331 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3332 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3333 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3335 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3336 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3337 ;; alternatives pretty much forces the MMX alternative to be chosen.
3338 (define_insn "*vec_concatv2sf_sse"
3339 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3341 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3342 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3345 unpcklps\t{%2, %0|%0, %2}
3346 movss\t{%1, %0|%0, %1}
3347 punpckldq\t{%2, %0|%0, %2}
3348 movd\t{%1, %0|%0, %1}"
3349 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3350 (set_attr "mode" "V4SF,SF,DI,DI")])
3352 (define_insn "*vec_concatv4sf"
3353 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3355 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3356 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3359 movlhps\t{%2, %0|%0, %2}
3360 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3361 movhps\t{%2, %0|%0, %2}
3362 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3363 [(set_attr "isa" "noavx,avx,noavx,avx")
3364 (set_attr "type" "ssemov")
3365 (set_attr "prefix" "orig,vex,orig,vex")
3366 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3368 (define_expand "vec_init<mode>"
3369 [(match_operand:V_128 0 "register_operand" "")
3370 (match_operand 1 "" "")]
3373 ix86_expand_vector_init (false, operands[0], operands[1]);
3377 ;; Avoid combining registers from different units in a single alternative,
3378 ;; see comment above inline_secondary_memory_needed function in i386.c
3379 (define_insn "*vec_set<mode>_0_sse4_1"
3380 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3381 "=x,x,x ,x,x,x ,x ,m,m,m")
3383 (vec_duplicate:VI4F_128
3384 (match_operand:<ssescalarmode> 2 "general_operand"
3385 " x,m,*r,x,x,*rm,*rm,x,*r,fF"))
3386 (match_operand:VI4F_128 1 "vector_move_operand"
3387 " C,C,C ,0,x,0 ,x ,0,0 ,0")
3391 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3392 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3393 %vmovd\t{%2, %0|%0, %2}
3394 movss\t{%2, %0|%0, %2}
3395 vmovss\t{%2, %1, %0|%0, %1, %2}
3396 pinsrd\t{$0, %2, %0|%0, %2, 0}
3397 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3401 [(set_attr "isa" "base,base,base,noavx,avx,noavx,avx,base,base,base")
3402 (set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov,sselog,sselog,*,*,*")
3403 (set_attr "prefix_extra" "*,*,*,*,*,1,1,*,*,*")
3404 (set_attr "length_immediate" "*,*,*,*,*,1,1,*,*,*")
3405 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,vex,orig,vex,*,*,*")
3406 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,TI,TI,*,*,*")])
3408 ;; Avoid combining registers from different units in a single alternative,
3409 ;; see comment above inline_secondary_memory_needed function in i386.c
3410 (define_insn "*vec_set<mode>_0_sse2"
3411 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3414 (vec_duplicate:VI4F_128
3415 (match_operand:<ssescalarmode> 2 "general_operand"
3417 (match_operand:VI4F_128 1 "vector_move_operand"
3422 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3423 movd\t{%2, %0|%0, %2}
3424 movss\t{%2, %0|%0, %2}
3428 [(set_attr "type" "ssemov")
3429 (set_attr "mode" "<ssescalarmode>,SI,SF,*,*,*")])
3431 ;; Avoid combining registers from different units in a single alternative,
3432 ;; see comment above inline_secondary_memory_needed function in i386.c
3433 (define_insn "vec_set<mode>_0"
3434 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3437 (vec_duplicate:VI4F_128
3438 (match_operand:<ssescalarmode> 2 "general_operand"
3440 (match_operand:VI4F_128 1 "vector_move_operand"
3445 movss\t{%2, %0|%0, %2}
3446 movss\t{%2, %0|%0, %2}
3450 [(set_attr "type" "ssemov")
3451 (set_attr "mode" "SF,SF,*,*,*")])
3453 ;; A subset is vec_setv4sf.
3454 (define_insn "*vec_setv4sf_sse4_1"
3455 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3458 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3459 (match_operand:V4SF 1 "register_operand" "0,x")
3460 (match_operand:SI 3 "const_int_operand" "")))]
3462 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3463 < GET_MODE_NUNITS (V4SFmode))"
3465 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3466 switch (which_alternative)
3469 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3471 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3476 [(set_attr "isa" "noavx,avx")
3477 (set_attr "type" "sselog")
3478 (set_attr "prefix_data16" "1,*")
3479 (set_attr "prefix_extra" "1")
3480 (set_attr "length_immediate" "1")
3481 (set_attr "prefix" "orig,vex")
3482 (set_attr "mode" "V4SF")])
3484 (define_insn "sse4_1_insertps"
3485 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3486 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3487 (match_operand:V4SF 1 "register_operand" "0,x")
3488 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3492 if (MEM_P (operands[2]))
3494 unsigned count_s = INTVAL (operands[3]) >> 6;
3496 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3497 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3499 switch (which_alternative)
3502 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3504 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3509 [(set_attr "isa" "noavx,avx")
3510 (set_attr "type" "sselog")
3511 (set_attr "prefix_data16" "1,*")
3512 (set_attr "prefix_extra" "1")
3513 (set_attr "length_immediate" "1")
3514 (set_attr "prefix" "orig,vex")
3515 (set_attr "mode" "V4SF")])
3518 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3520 (vec_duplicate:VI4F_128
3521 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3524 "TARGET_SSE && reload_completed"
3527 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3532 (define_expand "vec_set<mode>"
3533 [(match_operand:V_128 0 "register_operand" "")
3534 (match_operand:<ssescalarmode> 1 "register_operand" "")
3535 (match_operand 2 "const_int_operand" "")]
3538 ix86_expand_vector_set (false, operands[0], operands[1],
3539 INTVAL (operands[2]));
3543 (define_insn_and_split "*vec_extractv4sf_0"
3544 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3546 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3547 (parallel [(const_int 0)])))]
3548 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3550 "&& reload_completed"
3553 rtx op1 = operands[1];
3555 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3557 op1 = gen_lowpart (SFmode, op1);
3558 emit_move_insn (operands[0], op1);
3562 (define_expand "avx_vextractf128<mode>"
3563 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3564 (match_operand:V_256 1 "register_operand" "")
3565 (match_operand:SI 2 "const_0_to_1_operand" "")]
3568 rtx (*insn)(rtx, rtx);
3570 switch (INTVAL (operands[2]))
3573 insn = gen_vec_extract_lo_<mode>;
3576 insn = gen_vec_extract_hi_<mode>;
3582 emit_insn (insn (operands[0], operands[1]));
3586 (define_insn_and_split "vec_extract_lo_<mode>"
3587 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3588 (vec_select:<ssehalfvecmode>
3589 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3590 (parallel [(const_int 0) (const_int 1)])))]
3593 "&& reload_completed"
3596 rtx op1 = operands[1];
3598 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3600 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3601 emit_move_insn (operands[0], op1);
3605 (define_insn "vec_extract_hi_<mode>"
3606 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3607 (vec_select:<ssehalfvecmode>
3608 (match_operand:VI8F_256 1 "register_operand" "x,x")
3609 (parallel [(const_int 2) (const_int 3)])))]
3611 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3612 [(set_attr "type" "sselog")
3613 (set_attr "prefix_extra" "1")
3614 (set_attr "length_immediate" "1")
3615 (set_attr "memory" "none,store")
3616 (set_attr "prefix" "vex")
3617 (set_attr "mode" "V8SF")])
3619 (define_insn_and_split "vec_extract_lo_<mode>"
3620 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3621 (vec_select:<ssehalfvecmode>
3622 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3623 (parallel [(const_int 0) (const_int 1)
3624 (const_int 2) (const_int 3)])))]
3627 "&& reload_completed"
3630 rtx op1 = operands[1];
3632 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3634 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3635 emit_move_insn (operands[0], op1);
3639 (define_insn "vec_extract_hi_<mode>"
3640 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3641 (vec_select:<ssehalfvecmode>
3642 (match_operand:VI4F_256 1 "register_operand" "x,x")
3643 (parallel [(const_int 4) (const_int 5)
3644 (const_int 6) (const_int 7)])))]
3646 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3647 [(set_attr "type" "sselog")
3648 (set_attr "prefix_extra" "1")
3649 (set_attr "length_immediate" "1")
3650 (set_attr "memory" "none,store")
3651 (set_attr "prefix" "vex")
3652 (set_attr "mode" "V8SF")])
3654 (define_insn_and_split "vec_extract_lo_v16hi"
3655 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3657 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3658 (parallel [(const_int 0) (const_int 1)
3659 (const_int 2) (const_int 3)
3660 (const_int 4) (const_int 5)
3661 (const_int 6) (const_int 7)])))]
3664 "&& reload_completed"
3667 rtx op1 = operands[1];
3669 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3671 op1 = gen_lowpart (V8HImode, op1);
3672 emit_move_insn (operands[0], op1);
3676 (define_insn "vec_extract_hi_v16hi"
3677 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3679 (match_operand:V16HI 1 "register_operand" "x,x")
3680 (parallel [(const_int 8) (const_int 9)
3681 (const_int 10) (const_int 11)
3682 (const_int 12) (const_int 13)
3683 (const_int 14) (const_int 15)])))]
3685 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3686 [(set_attr "type" "sselog")
3687 (set_attr "prefix_extra" "1")
3688 (set_attr "length_immediate" "1")
3689 (set_attr "memory" "none,store")
3690 (set_attr "prefix" "vex")
3691 (set_attr "mode" "V8SF")])
3693 (define_insn_and_split "vec_extract_lo_v32qi"
3694 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3696 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3697 (parallel [(const_int 0) (const_int 1)
3698 (const_int 2) (const_int 3)
3699 (const_int 4) (const_int 5)
3700 (const_int 6) (const_int 7)
3701 (const_int 8) (const_int 9)
3702 (const_int 10) (const_int 11)
3703 (const_int 12) (const_int 13)
3704 (const_int 14) (const_int 15)])))]
3707 "&& reload_completed"
3710 rtx op1 = operands[1];
3712 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3714 op1 = gen_lowpart (V16QImode, op1);
3715 emit_move_insn (operands[0], op1);
3719 (define_insn "vec_extract_hi_v32qi"
3720 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3722 (match_operand:V32QI 1 "register_operand" "x,x")
3723 (parallel [(const_int 16) (const_int 17)
3724 (const_int 18) (const_int 19)
3725 (const_int 20) (const_int 21)
3726 (const_int 22) (const_int 23)
3727 (const_int 24) (const_int 25)
3728 (const_int 26) (const_int 27)
3729 (const_int 28) (const_int 29)
3730 (const_int 30) (const_int 31)])))]
3732 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3733 [(set_attr "type" "sselog")
3734 (set_attr "prefix_extra" "1")
3735 (set_attr "length_immediate" "1")
3736 (set_attr "memory" "none,store")
3737 (set_attr "prefix" "vex")
3738 (set_attr "mode" "V8SF")])
3740 (define_insn "*sse4_1_extractps"
3741 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3743 (match_operand:V4SF 1 "register_operand" "x")
3744 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3746 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3747 [(set_attr "type" "sselog")
3748 (set_attr "prefix_data16" "1")
3749 (set_attr "prefix_extra" "1")
3750 (set_attr "length_immediate" "1")
3751 (set_attr "prefix" "maybe_vex")
3752 (set_attr "mode" "V4SF")])
3754 (define_insn_and_split "*vec_extract_v4sf_mem"
3755 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3757 (match_operand:V4SF 1 "memory_operand" "o")
3758 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3764 int i = INTVAL (operands[2]);
3766 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3770 ;; Modes handled by vec_extract patterns.
3771 (define_mode_iterator VEC_EXTRACT_MODE
3772 [V16QI V8HI V4SI V2DI
3773 (V8SF "TARGET_AVX") V4SF
3774 (V4DF "TARGET_AVX") V2DF])
3776 (define_expand "vec_extract<mode>"
3777 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3778 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
3779 (match_operand 2 "const_int_operand" "")]
3782 ix86_expand_vector_extract (false, operands[0], operands[1],
3783 INTVAL (operands[2]));
3787 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3789 ;; Parallel double-precision floating point element swizzling
3791 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3793 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3794 (define_insn "avx_unpckhpd256"
3795 [(set (match_operand:V4DF 0 "register_operand" "=x")
3798 (match_operand:V4DF 1 "register_operand" "x")
3799 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3800 (parallel [(const_int 1) (const_int 5)
3801 (const_int 3) (const_int 7)])))]
3803 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3804 [(set_attr "type" "sselog")
3805 (set_attr "prefix" "vex")
3806 (set_attr "mode" "V4DF")])
3808 (define_expand "vec_interleave_highv4df"
3812 (match_operand:V4DF 1 "register_operand" "x")
3813 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3814 (parallel [(const_int 0) (const_int 4)
3815 (const_int 2) (const_int 6)])))
3821 (parallel [(const_int 1) (const_int 5)
3822 (const_int 3) (const_int 7)])))
3823 (set (match_operand:V4DF 0 "register_operand" "")
3828 (parallel [(const_int 2) (const_int 3)
3829 (const_int 6) (const_int 7)])))]
3832 operands[3] = gen_reg_rtx (V4DFmode);
3833 operands[4] = gen_reg_rtx (V4DFmode);
3837 (define_expand "vec_interleave_highv2df"
3838 [(set (match_operand:V2DF 0 "register_operand" "")
3841 (match_operand:V2DF 1 "nonimmediate_operand" "")
3842 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3843 (parallel [(const_int 1)
3847 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
3848 operands[2] = force_reg (V2DFmode, operands[2]);
3851 (define_insn "*sse3_interleave_highv2df"
3852 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
3855 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
3856 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
3857 (parallel [(const_int 1)
3859 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3861 unpckhpd\t{%2, %0|%0, %2}
3862 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3863 %vmovddup\t{%H1, %0|%0, %H1}
3864 movlpd\t{%H1, %0|%0, %H1}
3865 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3866 %vmovhpd\t{%1, %0|%0, %1}"
3867 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3868 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3869 (set_attr "prefix_data16" "*,*,*,1,*,1")
3870 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3871 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3873 (define_insn "*sse2_interleave_highv2df"
3874 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3877 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3878 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3879 (parallel [(const_int 1)
3881 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3883 unpckhpd\t{%2, %0|%0, %2}
3884 movlpd\t{%H1, %0|%0, %H1}
3885 movhpd\t{%1, %0|%0, %1}"
3886 [(set_attr "type" "sselog,ssemov,ssemov")
3887 (set_attr "prefix_data16" "*,1,1")
3888 (set_attr "mode" "V2DF,V1DF,V1DF")])
3890 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3891 (define_expand "avx_movddup256"
3892 [(set (match_operand:V4DF 0 "register_operand" "")
3895 (match_operand:V4DF 1 "nonimmediate_operand" "")
3897 (parallel [(const_int 0) (const_int 4)
3898 (const_int 2) (const_int 6)])))]
3901 (define_expand "avx_unpcklpd256"
3902 [(set (match_operand:V4DF 0 "register_operand" "")
3905 (match_operand:V4DF 1 "register_operand" "")
3906 (match_operand:V4DF 2 "nonimmediate_operand" ""))
3907 (parallel [(const_int 0) (const_int 4)
3908 (const_int 2) (const_int 6)])))]
3911 (define_insn "*avx_unpcklpd256"
3912 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
3915 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
3916 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
3917 (parallel [(const_int 0) (const_int 4)
3918 (const_int 2) (const_int 6)])))]
3920 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
3922 vmovddup\t{%1, %0|%0, %1}
3923 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
3924 [(set_attr "type" "sselog")
3925 (set_attr "prefix" "vex")
3926 (set_attr "mode" "V4DF")])
3928 (define_expand "vec_interleave_lowv4df"
3932 (match_operand:V4DF 1 "register_operand" "x")
3933 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3934 (parallel [(const_int 0) (const_int 4)
3935 (const_int 2) (const_int 6)])))
3941 (parallel [(const_int 1) (const_int 5)
3942 (const_int 3) (const_int 7)])))
3943 (set (match_operand:V4DF 0 "register_operand" "")
3948 (parallel [(const_int 0) (const_int 1)
3949 (const_int 4) (const_int 5)])))]
3952 operands[3] = gen_reg_rtx (V4DFmode);
3953 operands[4] = gen_reg_rtx (V4DFmode);
3956 (define_expand "vec_interleave_lowv2df"
3957 [(set (match_operand:V2DF 0 "register_operand" "")
3960 (match_operand:V2DF 1 "nonimmediate_operand" "")
3961 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3962 (parallel [(const_int 0)
3966 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
3967 operands[1] = force_reg (V2DFmode, operands[1]);
3970 (define_insn "*sse3_interleave_lowv2df"
3971 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
3974 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
3975 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
3976 (parallel [(const_int 0)
3978 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3980 unpcklpd\t{%2, %0|%0, %2}
3981 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
3982 %vmovddup\t{%1, %0|%0, %1}
3983 movhpd\t{%2, %0|%0, %2}
3984 vmovhpd\t{%2, %1, %0|%0, %1, %2}
3985 %vmovlpd\t{%2, %H0|%H0, %2}"
3986 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3987 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3988 (set_attr "prefix_data16" "*,*,*,1,*,1")
3989 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3990 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3992 (define_insn "*sse2_interleave_lowv2df"
3993 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
3996 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
3997 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
3998 (parallel [(const_int 0)
4000 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4002 unpcklpd\t{%2, %0|%0, %2}
4003 movhpd\t{%2, %0|%0, %2}
4004 movlpd\t{%2, %H0|%H0, %2}"
4005 [(set_attr "type" "sselog,ssemov,ssemov")
4006 (set_attr "prefix_data16" "*,1,1")
4007 (set_attr "mode" "V2DF,V1DF,V1DF")])
4010 [(set (match_operand:V2DF 0 "memory_operand" "")
4013 (match_operand:V2DF 1 "register_operand" "")
4015 (parallel [(const_int 0)
4017 "TARGET_SSE3 && reload_completed"
4020 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4021 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4022 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4027 [(set (match_operand:V2DF 0 "register_operand" "")
4030 (match_operand:V2DF 1 "memory_operand" "")
4032 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4033 (match_operand:SI 3 "const_int_operand" "")])))]
4034 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4035 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4037 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4040 (define_expand "avx_shufpd256"
4041 [(match_operand:V4DF 0 "register_operand" "")
4042 (match_operand:V4DF 1 "register_operand" "")
4043 (match_operand:V4DF 2 "nonimmediate_operand" "")
4044 (match_operand:SI 3 "const_int_operand" "")]
4047 int mask = INTVAL (operands[3]);
4048 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4050 GEN_INT (mask & 2 ? 5 : 4),
4051 GEN_INT (mask & 4 ? 3 : 2),
4052 GEN_INT (mask & 8 ? 7 : 6)));
4056 (define_insn "avx_shufpd256_1"
4057 [(set (match_operand:V4DF 0 "register_operand" "=x")
4060 (match_operand:V4DF 1 "register_operand" "x")
4061 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4062 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4063 (match_operand 4 "const_4_to_5_operand" "")
4064 (match_operand 5 "const_2_to_3_operand" "")
4065 (match_operand 6 "const_6_to_7_operand" "")])))]
4069 mask = INTVAL (operands[3]);
4070 mask |= (INTVAL (operands[4]) - 4) << 1;
4071 mask |= (INTVAL (operands[5]) - 2) << 2;
4072 mask |= (INTVAL (operands[6]) - 6) << 3;
4073 operands[3] = GEN_INT (mask);
4075 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4077 [(set_attr "type" "sselog")
4078 (set_attr "length_immediate" "1")
4079 (set_attr "prefix" "vex")
4080 (set_attr "mode" "V4DF")])
4082 (define_expand "sse2_shufpd"
4083 [(match_operand:V2DF 0 "register_operand" "")
4084 (match_operand:V2DF 1 "register_operand" "")
4085 (match_operand:V2DF 2 "nonimmediate_operand" "")
4086 (match_operand:SI 3 "const_int_operand" "")]
4089 int mask = INTVAL (operands[3]);
4090 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4092 GEN_INT (mask & 2 ? 3 : 2)));
4096 ;; Modes handled by vec_extract_even/odd pattern.
4097 (define_mode_iterator VEC_EXTRACT_EVENODD_MODE
4098 [(V16QI "TARGET_SSE2")
4099 (V8HI "TARGET_SSE2")
4100 (V4SI "TARGET_SSE2")
4101 (V2DI "TARGET_SSE2")
4102 (V8SF "TARGET_AVX") V4SF
4103 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
4105 (define_expand "vec_extract_even<mode>"
4106 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4107 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4108 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4111 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4115 (define_expand "vec_extract_odd<mode>"
4116 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4117 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4118 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4121 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4125 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4127 (define_insn "vec_interleave_highv2di"
4128 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4131 (match_operand:V2DI 1 "register_operand" "0,x")
4132 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4133 (parallel [(const_int 1)
4137 punpckhqdq\t{%2, %0|%0, %2}
4138 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4139 [(set_attr "isa" "noavx,avx")
4140 (set_attr "type" "sselog")
4141 (set_attr "prefix_data16" "1,*")
4142 (set_attr "prefix" "orig,vex")
4143 (set_attr "mode" "TI")])
4145 (define_insn "vec_interleave_lowv2di"
4146 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4149 (match_operand:V2DI 1 "register_operand" "0,x")
4150 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4151 (parallel [(const_int 0)
4155 punpcklqdq\t{%2, %0|%0, %2}
4156 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4157 [(set_attr "isa" "noavx,avx")
4158 (set_attr "type" "sselog")
4159 (set_attr "prefix_data16" "1,*")
4160 (set_attr "prefix" "orig,vex")
4161 (set_attr "mode" "TI")])
4163 (define_insn "sse2_shufpd_<mode>"
4164 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4165 (vec_select:VI8F_128
4166 (vec_concat:<ssedoublevecmode>
4167 (match_operand:VI8F_128 1 "register_operand" "0,x")
4168 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4169 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4170 (match_operand 4 "const_2_to_3_operand" "")])))]
4174 mask = INTVAL (operands[3]);
4175 mask |= (INTVAL (operands[4]) - 2) << 1;
4176 operands[3] = GEN_INT (mask);
4178 switch (which_alternative)
4181 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4183 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4188 [(set_attr "isa" "noavx,avx")
4189 (set_attr "type" "sselog")
4190 (set_attr "length_immediate" "1")
4191 (set_attr "prefix" "orig,vex")
4192 (set_attr "mode" "V2DF")])
4194 ;; Avoid combining registers from different units in a single alternative,
4195 ;; see comment above inline_secondary_memory_needed function in i386.c
4196 (define_insn "sse2_storehpd"
4197 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4199 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4200 (parallel [(const_int 1)])))]
4201 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4203 %vmovhpd\t{%1, %0|%0, %1}
4205 vunpckhpd\t{%d1, %0|%0, %d1}
4209 [(set_attr "isa" "base,noavx,avx,base,base,base")
4210 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4211 (set (attr "prefix_data16")
4213 (and (eq_attr "alternative" "0")
4214 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4216 (const_string "*")))
4217 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4218 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4221 [(set (match_operand:DF 0 "register_operand" "")
4223 (match_operand:V2DF 1 "memory_operand" "")
4224 (parallel [(const_int 1)])))]
4225 "TARGET_SSE2 && reload_completed"
4226 [(set (match_dup 0) (match_dup 1))]
4227 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4229 ;; Avoid combining registers from different units in a single alternative,
4230 ;; see comment above inline_secondary_memory_needed function in i386.c
4231 (define_insn "sse2_storelpd"
4232 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4234 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4235 (parallel [(const_int 0)])))]
4236 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4238 %vmovlpd\t{%1, %0|%0, %1}
4243 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4244 (set_attr "prefix_data16" "1,*,*,*,*")
4245 (set_attr "prefix" "maybe_vex")
4246 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4249 [(set (match_operand:DF 0 "register_operand" "")
4251 (match_operand:V2DF 1 "nonimmediate_operand" "")
4252 (parallel [(const_int 0)])))]
4253 "TARGET_SSE2 && reload_completed"
4256 rtx op1 = operands[1];
4258 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4260 op1 = gen_lowpart (DFmode, op1);
4261 emit_move_insn (operands[0], op1);
4265 (define_expand "sse2_loadhpd_exp"
4266 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4269 (match_operand:V2DF 1 "nonimmediate_operand" "")
4270 (parallel [(const_int 0)]))
4271 (match_operand:DF 2 "nonimmediate_operand" "")))]
4274 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4276 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4278 /* Fix up the destination if needed. */
4279 if (dst != operands[0])
4280 emit_move_insn (operands[0], dst);
4285 ;; Avoid combining registers from different units in a single alternative,
4286 ;; see comment above inline_secondary_memory_needed function in i386.c
4287 (define_insn "sse2_loadhpd"
4288 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4292 (match_operand:V2DF 1 "nonimmediate_operand"
4294 (parallel [(const_int 0)]))
4295 (match_operand:DF 2 "nonimmediate_operand"
4296 " m,m,x,x,x,*f,r")))]
4297 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4299 movhpd\t{%2, %0|%0, %2}
4300 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4301 unpcklpd\t{%2, %0|%0, %2}
4302 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4306 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
4307 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4308 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4309 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4310 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4313 [(set (match_operand:V2DF 0 "memory_operand" "")
4315 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4316 (match_operand:DF 1 "register_operand" "")))]
4317 "TARGET_SSE2 && reload_completed"
4318 [(set (match_dup 0) (match_dup 1))]
4319 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4321 (define_expand "sse2_loadlpd_exp"
4322 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4324 (match_operand:DF 2 "nonimmediate_operand" "")
4326 (match_operand:V2DF 1 "nonimmediate_operand" "")
4327 (parallel [(const_int 1)]))))]
4330 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4332 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4334 /* Fix up the destination if needed. */
4335 if (dst != operands[0])
4336 emit_move_insn (operands[0], dst);
4341 ;; Avoid combining registers from different units in a single alternative,
4342 ;; see comment above inline_secondary_memory_needed function in i386.c
4343 (define_insn "sse2_loadlpd"
4344 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4345 "=x,x,x,x,x,x,x,x,m,m ,m")
4347 (match_operand:DF 2 "nonimmediate_operand"
4348 " m,m,m,x,x,0,0,x,x,*f,r")
4350 (match_operand:V2DF 1 "vector_move_operand"
4351 " C,0,x,0,x,x,o,o,0,0 ,0")
4352 (parallel [(const_int 1)]))))]
4353 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4355 %vmovsd\t{%2, %0|%0, %2}
4356 movlpd\t{%2, %0|%0, %2}
4357 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4358 movsd\t{%2, %0|%0, %2}
4359 vmovsd\t{%2, %1, %0|%0, %1, %2}
4360 shufpd\t{$2, %1, %0|%0, %1, 2}
4361 movhpd\t{%H1, %0|%0, %H1}
4362 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4366 [(set_attr "isa" "base,noavx,avx,noavx,avx,noavx,noavx,avx,base,base,base")
4367 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov,fmov,imov")
4368 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4369 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4370 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4371 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4374 [(set (match_operand:V2DF 0 "memory_operand" "")
4376 (match_operand:DF 1 "register_operand" "")
4377 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4378 "TARGET_SSE2 && reload_completed"
4379 [(set (match_dup 0) (match_dup 1))]
4380 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4382 ;; Not sure these two are ever used, but it doesn't hurt to have
4384 (define_insn "*vec_extractv2df_1_sse"
4385 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4387 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4388 (parallel [(const_int 1)])))]
4389 "!TARGET_SSE2 && TARGET_SSE
4390 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4392 movhps\t{%1, %0|%0, %1}
4393 movhlps\t{%1, %0|%0, %1}
4394 movlps\t{%H1, %0|%0, %H1}"
4395 [(set_attr "type" "ssemov")
4396 (set_attr "mode" "V2SF,V4SF,V2SF")])
4398 (define_insn "*vec_extractv2df_0_sse"
4399 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4401 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4402 (parallel [(const_int 0)])))]
4403 "!TARGET_SSE2 && TARGET_SSE
4404 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4406 movlps\t{%1, %0|%0, %1}
4407 movaps\t{%1, %0|%0, %1}
4408 movlps\t{%1, %0|%0, %1}"
4409 [(set_attr "type" "ssemov")
4410 (set_attr "mode" "V2SF,V4SF,V2SF")])
4412 (define_insn "sse2_movsd"
4413 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4415 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4416 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4420 movsd\t{%2, %0|%0, %2}
4421 vmovsd\t{%2, %1, %0|%0, %1, %2}
4422 movlpd\t{%2, %0|%0, %2}
4423 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4424 %vmovlpd\t{%2, %0|%0, %2}
4425 shufpd\t{$2, %1, %0|%0, %1, 2}
4426 movhps\t{%H1, %0|%0, %H1}
4427 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4428 %vmovhps\t{%1, %H0|%H0, %1}"
4429 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx,avx,base")
4430 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4431 (set (attr "prefix_data16")
4433 (and (eq_attr "alternative" "2,4")
4434 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4436 (const_string "*")))
4437 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4438 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4439 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4441 (define_insn "*vec_dupv2df_sse3"
4442 [(set (match_operand:V2DF 0 "register_operand" "=x")
4444 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4446 "%vmovddup\t{%1, %0|%0, %1}"
4447 [(set_attr "type" "sselog1")
4448 (set_attr "prefix" "maybe_vex")
4449 (set_attr "mode" "DF")])
4451 (define_insn "vec_dupv2df"
4452 [(set (match_operand:V2DF 0 "register_operand" "=x")
4454 (match_operand:DF 1 "register_operand" "0")))]
4457 [(set_attr "type" "sselog1")
4458 (set_attr "mode" "V2DF")])
4460 (define_insn "*vec_concatv2df_sse3"
4461 [(set (match_operand:V2DF 0 "register_operand" "=x")
4463 (match_operand:DF 1 "nonimmediate_operand" "xm")
4466 "%vmovddup\t{%1, %0|%0, %1}"
4467 [(set_attr "type" "sselog1")
4468 (set_attr "prefix" "maybe_vex")
4469 (set_attr "mode" "DF")])
4471 (define_insn "*vec_concatv2df"
4472 [(set (match_operand:V2DF 0 "register_operand" "=Y2,x,Y2,x,Y2,x,x")
4474 (match_operand:DF 1 "nonimmediate_operand" " 0 ,x,0 ,x,m ,0,0")
4475 (match_operand:DF 2 "vector_move_operand" " Y2,x,m ,m,C ,x,m")))]
4478 unpcklpd\t{%2, %0|%0, %2}
4479 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4480 movhpd\t{%2, %0|%0, %2}
4481 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4482 %vmovsd\t{%1, %0|%0, %1}
4483 movlhps\t{%2, %0|%0, %2}
4484 movhps\t{%2, %0|%0, %2}"
4485 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx")
4486 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,ssemov,ssemov")
4487 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4488 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4489 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4491 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4493 ;; Parallel integral arithmetic
4495 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4497 (define_expand "neg<mode>2"
4498 [(set (match_operand:VI_128 0 "register_operand" "")
4501 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4503 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4505 (define_expand "<plusminus_insn><mode>3"
4506 [(set (match_operand:VI_128 0 "register_operand" "")
4508 (match_operand:VI_128 1 "nonimmediate_operand" "")
4509 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
4511 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4513 (define_insn "*<plusminus_insn><mode>3"
4514 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
4516 (match_operand:VI_128 1 "nonimmediate_operand" "<comm>0,x")
4517 (match_operand:VI_128 2 "nonimmediate_operand" "xm,xm")))]
4518 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4520 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4521 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4522 [(set_attr "isa" "noavx,avx")
4523 (set_attr "type" "sseiadd")
4524 (set_attr "prefix_data16" "1,*")
4525 (set_attr "prefix" "orig,vex")
4526 (set_attr "mode" "TI")])
4528 (define_expand "sse2_<plusminus_insn><mode>3"
4529 [(set (match_operand:VI12_128 0 "register_operand" "")
4530 (sat_plusminus:VI12_128
4531 (match_operand:VI12_128 1 "nonimmediate_operand" "")
4532 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
4534 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4536 (define_insn "*sse2_<plusminus_insn><mode>3"
4537 [(set (match_operand:VI12_128 0 "register_operand" "=x,x")
4538 (sat_plusminus:VI12_128
4539 (match_operand:VI12_128 1 "nonimmediate_operand" "<comm>0,x")
4540 (match_operand:VI12_128 2 "nonimmediate_operand" "xm,xm")))]
4541 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4543 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4544 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4545 [(set_attr "isa" "noavx,avx")
4546 (set_attr "type" "sseiadd")
4547 (set_attr "prefix_data16" "1,*")
4548 (set_attr "prefix" "orig,vex")
4549 (set_attr "mode" "TI")])
4551 (define_insn_and_split "mulv16qi3"
4552 [(set (match_operand:V16QI 0 "register_operand" "")
4553 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4554 (match_operand:V16QI 2 "register_operand" "")))]
4556 && can_create_pseudo_p ()"
4564 for (i = 0; i < 6; ++i)
4565 t[i] = gen_reg_rtx (V16QImode);
4567 /* Unpack data such that we've got a source byte in each low byte of
4568 each word. We don't care what goes into the high byte of each word.
4569 Rather than trying to get zero in there, most convenient is to let
4570 it be a copy of the low byte. */
4571 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4572 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4573 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4574 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4576 /* Multiply words. The end-of-line annotations here give a picture of what
4577 the output of that instruction looks like. Dot means don't care; the
4578 letters are the bytes of the result with A being the most significant. */
4579 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4580 gen_lowpart (V8HImode, t[0]),
4581 gen_lowpart (V8HImode, t[1])));
4582 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4583 gen_lowpart (V8HImode, t[2]),
4584 gen_lowpart (V8HImode, t[3])));
4586 /* Extract the even bytes and merge them back together. */
4587 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4591 (define_expand "mulv8hi3"
4592 [(set (match_operand:V8HI 0 "register_operand" "")
4593 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4594 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4596 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4598 (define_insn "*mulv8hi3"
4599 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4600 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4601 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
4602 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4604 pmullw\t{%2, %0|%0, %2}
4605 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4606 [(set_attr "isa" "noavx,avx")
4607 (set_attr "type" "sseimul")
4608 (set_attr "prefix_data16" "1,*")
4609 (set_attr "prefix" "orig,vex")
4610 (set_attr "mode" "TI")])
4612 (define_expand "<s>mulv8hi3_highpart"
4613 [(set (match_operand:V8HI 0 "register_operand" "")
4618 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4620 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4623 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4625 (define_insn "*<s>mulv8hi3_highpart"
4626 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4631 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
4633 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
4635 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4637 pmulh<u>w\t{%2, %0|%0, %2}
4638 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4639 [(set_attr "isa" "noavx,avx")
4640 (set_attr "type" "sseimul")
4641 (set_attr "prefix_data16" "1,*")
4642 (set_attr "prefix" "orig,vex")
4643 (set_attr "mode" "TI")])
4645 (define_expand "sse2_umulv2siv2di3"
4646 [(set (match_operand:V2DI 0 "register_operand" "")
4650 (match_operand:V4SI 1 "nonimmediate_operand" "")
4651 (parallel [(const_int 0) (const_int 2)])))
4654 (match_operand:V4SI 2 "nonimmediate_operand" "")
4655 (parallel [(const_int 0) (const_int 2)])))))]
4657 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4659 (define_insn "*sse2_umulv2siv2di3"
4660 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4664 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4665 (parallel [(const_int 0) (const_int 2)])))
4668 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4669 (parallel [(const_int 0) (const_int 2)])))))]
4670 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4672 pmuludq\t{%2, %0|%0, %2}
4673 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4674 [(set_attr "isa" "noavx,avx")
4675 (set_attr "type" "sseimul")
4676 (set_attr "prefix_data16" "1,*")
4677 (set_attr "prefix" "orig,vex")
4678 (set_attr "mode" "TI")])
4680 (define_expand "sse4_1_mulv2siv2di3"
4681 [(set (match_operand:V2DI 0 "register_operand" "")
4685 (match_operand:V4SI 1 "nonimmediate_operand" "")
4686 (parallel [(const_int 0) (const_int 2)])))
4689 (match_operand:V4SI 2 "nonimmediate_operand" "")
4690 (parallel [(const_int 0) (const_int 2)])))))]
4692 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4694 (define_insn "*sse4_1_mulv2siv2di3"
4695 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4699 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4700 (parallel [(const_int 0) (const_int 2)])))
4703 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4704 (parallel [(const_int 0) (const_int 2)])))))]
4705 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4707 pmuldq\t{%2, %0|%0, %2}
4708 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4709 [(set_attr "isa" "noavx,avx")
4710 (set_attr "type" "sseimul")
4711 (set_attr "prefix_data16" "1,*")
4712 (set_attr "prefix_extra" "1")
4713 (set_attr "prefix" "orig,vex")
4714 (set_attr "mode" "TI")])
4716 (define_expand "sse2_pmaddwd"
4717 [(set (match_operand:V4SI 0 "register_operand" "")
4722 (match_operand:V8HI 1 "nonimmediate_operand" "")
4723 (parallel [(const_int 0)
4729 (match_operand:V8HI 2 "nonimmediate_operand" "")
4730 (parallel [(const_int 0)
4736 (vec_select:V4HI (match_dup 1)
4737 (parallel [(const_int 1)
4742 (vec_select:V4HI (match_dup 2)
4743 (parallel [(const_int 1)
4746 (const_int 7)]))))))]
4748 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4750 (define_insn "*sse2_pmaddwd"
4751 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4756 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4757 (parallel [(const_int 0)
4763 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
4764 (parallel [(const_int 0)
4770 (vec_select:V4HI (match_dup 1)
4771 (parallel [(const_int 1)
4776 (vec_select:V4HI (match_dup 2)
4777 (parallel [(const_int 1)
4780 (const_int 7)]))))))]
4781 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4783 pmaddwd\t{%2, %0|%0, %2}
4784 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
4785 [(set_attr "isa" "noavx,avx")
4786 (set_attr "type" "sseiadd")
4787 (set_attr "atom_unit" "simul")
4788 (set_attr "prefix_data16" "1,*")
4789 (set_attr "prefix" "orig,vex")
4790 (set_attr "mode" "TI")])
4792 (define_expand "mulv4si3"
4793 [(set (match_operand:V4SI 0 "register_operand" "")
4794 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4795 (match_operand:V4SI 2 "register_operand" "")))]
4798 if (TARGET_SSE4_1 || TARGET_AVX)
4799 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
4802 (define_insn "*sse4_1_mulv4si3"
4803 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4804 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4805 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")))]
4806 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4808 pmulld\t{%2, %0|%0, %2}
4809 vpmulld\t{%2, %1, %0|%0, %1, %2}"
4810 [(set_attr "isa" "noavx,avx")
4811 (set_attr "type" "sseimul")
4812 (set_attr "prefix_extra" "1")
4813 (set_attr "prefix" "orig,vex")
4814 (set_attr "mode" "TI")])
4816 (define_insn_and_split "*sse2_mulv4si3"
4817 [(set (match_operand:V4SI 0 "register_operand" "")
4818 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4819 (match_operand:V4SI 2 "register_operand" "")))]
4820 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
4821 && can_create_pseudo_p ()"
4826 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4832 t1 = gen_reg_rtx (V4SImode);
4833 t2 = gen_reg_rtx (V4SImode);
4834 t3 = gen_reg_rtx (V4SImode);
4835 t4 = gen_reg_rtx (V4SImode);
4836 t5 = gen_reg_rtx (V4SImode);
4837 t6 = gen_reg_rtx (V4SImode);
4838 thirtytwo = GEN_INT (32);
4840 /* Multiply elements 2 and 0. */
4841 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
4844 /* Shift both input vectors down one element, so that elements 3
4845 and 1 are now in the slots for elements 2 and 0. For K8, at
4846 least, this is faster than using a shuffle. */
4847 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
4848 gen_lowpart (V1TImode, op1),
4850 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
4851 gen_lowpart (V1TImode, op2),
4853 /* Multiply elements 3 and 1. */
4854 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
4857 /* Move the results in element 2 down to element 1; we don't care
4858 what goes in elements 2 and 3. */
4859 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
4860 const0_rtx, const0_rtx));
4861 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
4862 const0_rtx, const0_rtx));
4864 /* Merge the parts back together. */
4865 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
4869 (define_insn_and_split "mulv2di3"
4870 [(set (match_operand:V2DI 0 "register_operand" "")
4871 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
4872 (match_operand:V2DI 2 "register_operand" "")))]
4874 && can_create_pseudo_p ()"
4879 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4888 /* op1: A,B,C,D, op2: E,F,G,H */
4889 op1 = gen_lowpart (V4SImode, op1);
4890 op2 = gen_lowpart (V4SImode, op2);
4892 t1 = gen_reg_rtx (V4SImode);
4893 t2 = gen_reg_rtx (V4SImode);
4894 t3 = gen_reg_rtx (V2DImode);
4895 t4 = gen_reg_rtx (V2DImode);
4898 emit_insn (gen_sse2_pshufd_1 (t1, op1,
4904 /* t2: (B*E),(A*F),(D*G),(C*H) */
4905 emit_insn (gen_mulv4si3 (t2, t1, op2));
4907 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
4908 emit_insn (gen_xop_phadddq (t3, t2));
4910 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
4911 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
4913 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
4914 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
4918 t1 = gen_reg_rtx (V2DImode);
4919 t2 = gen_reg_rtx (V2DImode);
4920 t3 = gen_reg_rtx (V2DImode);
4921 t4 = gen_reg_rtx (V2DImode);
4922 t5 = gen_reg_rtx (V2DImode);
4923 t6 = gen_reg_rtx (V2DImode);
4924 thirtytwo = GEN_INT (32);
4926 /* Multiply low parts. */
4927 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
4928 gen_lowpart (V4SImode, op2)));
4930 /* Shift input vectors left 32 bits so we can multiply high parts. */
4931 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
4932 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
4934 /* Multiply high parts by low parts. */
4935 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
4936 gen_lowpart (V4SImode, t3)));
4937 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
4938 gen_lowpart (V4SImode, t2)));
4940 /* Shift them back. */
4941 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
4942 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
4944 /* Add the three parts together. */
4945 emit_insn (gen_addv2di3 (t6, t1, t4));
4946 emit_insn (gen_addv2di3 (op0, t6, t5));
4951 (define_expand "vec_widen_smult_hi_v8hi"
4952 [(match_operand:V4SI 0 "register_operand" "")
4953 (match_operand:V8HI 1 "register_operand" "")
4954 (match_operand:V8HI 2 "register_operand" "")]
4957 rtx op1, op2, t1, t2, dest;
4961 t1 = gen_reg_rtx (V8HImode);
4962 t2 = gen_reg_rtx (V8HImode);
4963 dest = gen_lowpart (V8HImode, operands[0]);
4965 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4966 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4967 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4971 (define_expand "vec_widen_smult_lo_v8hi"
4972 [(match_operand:V4SI 0 "register_operand" "")
4973 (match_operand:V8HI 1 "register_operand" "")
4974 (match_operand:V8HI 2 "register_operand" "")]
4977 rtx op1, op2, t1, t2, dest;
4981 t1 = gen_reg_rtx (V8HImode);
4982 t2 = gen_reg_rtx (V8HImode);
4983 dest = gen_lowpart (V8HImode, operands[0]);
4985 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4986 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4987 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
4991 (define_expand "vec_widen_umult_hi_v8hi"
4992 [(match_operand:V4SI 0 "register_operand" "")
4993 (match_operand:V8HI 1 "register_operand" "")
4994 (match_operand:V8HI 2 "register_operand" "")]
4997 rtx op1, op2, t1, t2, dest;
5001 t1 = gen_reg_rtx (V8HImode);
5002 t2 = gen_reg_rtx (V8HImode);
5003 dest = gen_lowpart (V8HImode, operands[0]);
5005 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5006 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5007 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
5011 (define_expand "vec_widen_umult_lo_v8hi"
5012 [(match_operand:V4SI 0 "register_operand" "")
5013 (match_operand:V8HI 1 "register_operand" "")
5014 (match_operand:V8HI 2 "register_operand" "")]
5017 rtx op1, op2, t1, t2, dest;
5021 t1 = gen_reg_rtx (V8HImode);
5022 t2 = gen_reg_rtx (V8HImode);
5023 dest = gen_lowpart (V8HImode, operands[0]);
5025 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5026 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5027 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5031 (define_expand "vec_widen_smult_hi_v4si"
5032 [(match_operand:V2DI 0 "register_operand" "")
5033 (match_operand:V4SI 1 "register_operand" "")
5034 (match_operand:V4SI 2 "register_operand" "")]
5039 t1 = gen_reg_rtx (V4SImode);
5040 t2 = gen_reg_rtx (V4SImode);
5042 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5047 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5052 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5056 (define_expand "vec_widen_smult_lo_v4si"
5057 [(match_operand:V2DI 0 "register_operand" "")
5058 (match_operand:V4SI 1 "register_operand" "")
5059 (match_operand:V4SI 2 "register_operand" "")]
5064 t1 = gen_reg_rtx (V4SImode);
5065 t2 = gen_reg_rtx (V4SImode);
5067 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5072 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5077 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5081 (define_expand "vec_widen_umult_hi_v4si"
5082 [(match_operand:V2DI 0 "register_operand" "")
5083 (match_operand:V4SI 1 "register_operand" "")
5084 (match_operand:V4SI 2 "register_operand" "")]
5087 rtx op1, op2, t1, t2;
5091 t1 = gen_reg_rtx (V4SImode);
5092 t2 = gen_reg_rtx (V4SImode);
5094 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5095 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5096 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5100 (define_expand "vec_widen_umult_lo_v4si"
5101 [(match_operand:V2DI 0 "register_operand" "")
5102 (match_operand:V4SI 1 "register_operand" "")
5103 (match_operand:V4SI 2 "register_operand" "")]
5106 rtx op1, op2, t1, t2;
5110 t1 = gen_reg_rtx (V4SImode);
5111 t2 = gen_reg_rtx (V4SImode);
5113 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5114 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5115 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5119 (define_expand "sdot_prodv8hi"
5120 [(match_operand:V4SI 0 "register_operand" "")
5121 (match_operand:V8HI 1 "register_operand" "")
5122 (match_operand:V8HI 2 "register_operand" "")
5123 (match_operand:V4SI 3 "register_operand" "")]
5126 rtx t = gen_reg_rtx (V4SImode);
5127 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5128 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5132 (define_expand "udot_prodv4si"
5133 [(match_operand:V2DI 0 "register_operand" "")
5134 (match_operand:V4SI 1 "register_operand" "")
5135 (match_operand:V4SI 2 "register_operand" "")
5136 (match_operand:V2DI 3 "register_operand" "")]
5141 t1 = gen_reg_rtx (V2DImode);
5142 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5143 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5145 t2 = gen_reg_rtx (V4SImode);
5146 t3 = gen_reg_rtx (V4SImode);
5147 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5148 gen_lowpart (V1TImode, operands[1]),
5150 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5151 gen_lowpart (V1TImode, operands[2]),
5154 t4 = gen_reg_rtx (V2DImode);
5155 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5157 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5161 (define_insn "ashr<mode>3"
5162 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5164 (match_operand:VI24_128 1 "register_operand" "0,x")
5165 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5168 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5169 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5170 [(set_attr "isa" "noavx,avx")
5171 (set_attr "type" "sseishft")
5172 (set (attr "length_immediate")
5173 (if_then_else (match_operand 2 "const_int_operand" "")
5175 (const_string "0")))
5176 (set_attr "prefix_data16" "1,*")
5177 (set_attr "prefix" "orig,vex")
5178 (set_attr "mode" "TI")])
5180 (define_insn "lshr<mode>3"
5181 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5183 (match_operand:VI248_128 1 "register_operand" "0,x")
5184 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5187 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5188 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5189 [(set_attr "isa" "noavx,avx")
5190 (set_attr "type" "sseishft")
5191 (set (attr "length_immediate")
5192 (if_then_else (match_operand 2 "const_int_operand" "")
5194 (const_string "0")))
5195 (set_attr "prefix_data16" "1,*")
5196 (set_attr "prefix" "orig,vex")
5197 (set_attr "mode" "TI")])
5199 (define_insn "ashl<mode>3"
5200 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5202 (match_operand:VI248_128 1 "register_operand" "0,x")
5203 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5206 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5207 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5208 [(set_attr "isa" "noavx,avx")
5209 (set_attr "type" "sseishft")
5210 (set (attr "length_immediate")
5211 (if_then_else (match_operand 2 "const_int_operand" "")
5213 (const_string "0")))
5214 (set_attr "prefix_data16" "1,*")
5215 (set_attr "prefix" "orig,vex")
5216 (set_attr "mode" "TI")])
5218 (define_expand "vec_shl_<mode>"
5219 [(set (match_operand:VI_128 0 "register_operand" "")
5221 (match_operand:VI_128 1 "register_operand" "")
5222 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5225 operands[0] = gen_lowpart (V1TImode, operands[0]);
5226 operands[1] = gen_lowpart (V1TImode, operands[1]);
5229 (define_insn "sse2_ashlv1ti3"
5230 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5232 (match_operand:V1TI 1 "register_operand" "0,x")
5233 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5236 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5238 switch (which_alternative)
5241 return "pslldq\t{%2, %0|%0, %2}";
5243 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5248 [(set_attr "isa" "noavx,avx")
5249 (set_attr "type" "sseishft")
5250 (set_attr "length_immediate" "1")
5251 (set_attr "prefix_data16" "1,*")
5252 (set_attr "prefix" "orig,vex")
5253 (set_attr "mode" "TI")])
5255 (define_expand "vec_shr_<mode>"
5256 [(set (match_operand:VI_128 0 "register_operand" "")
5258 (match_operand:VI_128 1 "register_operand" "")
5259 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5262 operands[0] = gen_lowpart (V1TImode, operands[0]);
5263 operands[1] = gen_lowpart (V1TImode, operands[1]);
5266 (define_insn "sse2_lshrv1ti3"
5267 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5269 (match_operand:V1TI 1 "register_operand" "0,x")
5270 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5273 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5275 switch (which_alternative)
5278 return "psrldq\t{%2, %0|%0, %2}";
5280 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5285 [(set_attr "isa" "noavx,avx")
5286 (set_attr "type" "sseishft")
5287 (set_attr "length_immediate" "1")
5288 (set_attr "atom_unit" "sishuf")
5289 (set_attr "prefix_data16" "1,*")
5290 (set_attr "prefix" "orig,vex")
5291 (set_attr "mode" "TI")])
5293 (define_insn "*sse4_1_<code><mode>3"
5294 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5296 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5297 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5298 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5300 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5301 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5302 [(set_attr "isa" "noavx,avx")
5303 (set_attr "type" "sseiadd")
5304 (set_attr "prefix_extra" "1,*")
5305 (set_attr "prefix" "orig,vex")
5306 (set_attr "mode" "TI")])
5308 (define_insn "*<code>v8hi3"
5309 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5311 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5312 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5313 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5315 p<maxmin_int>w\t{%2, %0|%0, %2}
5316 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5317 [(set_attr "isa" "noavx,avx")
5318 (set_attr "type" "sseiadd")
5319 (set_attr "prefix_data16" "1,*")
5320 (set_attr "prefix_extra" "*,1")
5321 (set_attr "prefix" "orig,vex")
5322 (set_attr "mode" "TI")])
5324 (define_expand "smax<mode>3"
5325 [(set (match_operand:VI14_128 0 "register_operand" "")
5326 (smax:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5327 (match_operand:VI14_128 2 "register_operand" "")))]
5331 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5337 xops[0] = operands[0];
5338 xops[1] = operands[1];
5339 xops[2] = operands[2];
5340 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5341 xops[4] = operands[1];
5342 xops[5] = operands[2];
5343 ok = ix86_expand_int_vcond (xops);
5349 (define_expand "smin<mode>3"
5350 [(set (match_operand:VI14_128 0 "register_operand" "")
5351 (smin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5352 (match_operand:VI14_128 2 "register_operand" "")))]
5356 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5362 xops[0] = operands[0];
5363 xops[1] = operands[2];
5364 xops[2] = operands[1];
5365 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5366 xops[4] = operands[1];
5367 xops[5] = operands[2];
5368 ok = ix86_expand_int_vcond (xops);
5374 (define_expand "<code>v8hi3"
5375 [(set (match_operand:V8HI 0 "register_operand" "")
5377 (match_operand:V8HI 1 "nonimmediate_operand" "")
5378 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5380 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5382 (define_expand "smaxv2di3"
5383 [(set (match_operand:V2DI 0 "register_operand" "")
5384 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
5385 (match_operand:V2DI 2 "register_operand" "")))]
5391 xops[0] = operands[0];
5392 xops[1] = operands[1];
5393 xops[2] = operands[2];
5394 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5395 xops[4] = operands[1];
5396 xops[5] = operands[2];
5397 ok = ix86_expand_int_vcond (xops);
5402 (define_expand "sminv2di3"
5403 [(set (match_operand:V2DI 0 "register_operand" "")
5404 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
5405 (match_operand:V2DI 2 "register_operand" "")))]
5411 xops[0] = operands[0];
5412 xops[1] = operands[2];
5413 xops[2] = operands[1];
5414 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5415 xops[4] = operands[1];
5416 xops[5] = operands[2];
5417 ok = ix86_expand_int_vcond (xops);
5422 (define_insn "*sse4_1_<code><mode>3"
5423 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5425 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5426 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5427 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5429 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5430 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5431 [(set_attr "isa" "noavx,avx")
5432 (set_attr "type" "sseiadd")
5433 (set_attr "prefix_extra" "1,*")
5434 (set_attr "prefix" "orig,vex")
5435 (set_attr "mode" "TI")])
5437 (define_insn "*<code>v16qi3"
5438 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5440 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5441 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5442 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5444 p<maxmin_int>b\t{%2, %0|%0, %2}
5445 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5446 [(set_attr "isa" "noavx,avx")
5447 (set_attr "type" "sseiadd")
5448 (set_attr "prefix_data16" "1,*")
5449 (set_attr "prefix_extra" "*,1")
5450 (set_attr "prefix" "orig,vex")
5451 (set_attr "mode" "TI")])
5453 (define_expand "<code>v16qi3"
5454 [(set (match_operand:V16QI 0 "register_operand" "")
5456 (match_operand:V16QI 1 "nonimmediate_operand" "")
5457 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5459 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5461 (define_expand "umaxv8hi3"
5462 [(set (match_operand:V8HI 0 "register_operand" "")
5463 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5464 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5468 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5471 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5472 if (rtx_equal_p (op3, op2))
5473 op3 = gen_reg_rtx (V8HImode);
5474 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5475 emit_insn (gen_addv8hi3 (op0, op3, op2));
5480 (define_expand "umaxv4si3"
5481 [(set (match_operand:V4SI 0 "register_operand" "")
5482 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5483 (match_operand:V4SI 2 "register_operand" "")))]
5487 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5493 xops[0] = operands[0];
5494 xops[1] = operands[1];
5495 xops[2] = operands[2];
5496 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5497 xops[4] = operands[1];
5498 xops[5] = operands[2];
5499 ok = ix86_expand_int_vcond (xops);
5505 (define_expand "umin<mode>3"
5506 [(set (match_operand:VI24_128 0 "register_operand" "")
5507 (umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
5508 (match_operand:VI24_128 2 "register_operand" "")))]
5512 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5518 xops[0] = operands[0];
5519 xops[1] = operands[2];
5520 xops[2] = operands[1];
5521 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5522 xops[4] = operands[1];
5523 xops[5] = operands[2];
5524 ok = ix86_expand_int_vcond (xops);
5530 (define_expand "umaxv2di3"
5531 [(set (match_operand:V2DI 0 "register_operand" "")
5532 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
5533 (match_operand:V2DI 2 "register_operand" "")))]
5539 xops[0] = operands[0];
5540 xops[1] = operands[1];
5541 xops[2] = operands[2];
5542 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5543 xops[4] = operands[1];
5544 xops[5] = operands[2];
5545 ok = ix86_expand_int_vcond (xops);
5550 (define_expand "uminv2di3"
5551 [(set (match_operand:V2DI 0 "register_operand" "")
5552 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
5553 (match_operand:V2DI 2 "register_operand" "")))]
5559 xops[0] = operands[0];
5560 xops[1] = operands[2];
5561 xops[2] = operands[1];
5562 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5563 xops[4] = operands[1];
5564 xops[5] = operands[2];
5565 ok = ix86_expand_int_vcond (xops);
5570 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5572 ;; Parallel integral comparisons
5574 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5576 (define_insn "*sse4_1_eqv2di3"
5577 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5579 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
5580 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5581 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5583 pcmpeqq\t{%2, %0|%0, %2}
5584 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
5585 [(set_attr "isa" "noavx,avx")
5586 (set_attr "type" "ssecmp")
5587 (set_attr "prefix_extra" "1")
5588 (set_attr "prefix" "orig,vex")
5589 (set_attr "mode" "TI")])
5591 (define_insn "*sse2_eq<mode>3"
5592 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5594 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
5595 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5596 "TARGET_SSE2 && !TARGET_XOP
5597 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5599 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
5600 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5601 [(set_attr "isa" "noavx,avx")
5602 (set_attr "type" "ssecmp")
5603 (set_attr "prefix_data16" "1,*")
5604 (set_attr "prefix" "orig,vex")
5605 (set_attr "mode" "TI")])
5607 (define_expand "sse2_eq<mode>3"
5608 [(set (match_operand:VI124_128 0 "register_operand" "")
5610 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5611 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5612 "TARGET_SSE2 && !TARGET_XOP "
5613 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5615 (define_expand "sse4_1_eqv2di3"
5616 [(set (match_operand:V2DI 0 "register_operand" "")
5618 (match_operand:V2DI 1 "nonimmediate_operand" "")
5619 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5621 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5623 (define_insn "sse4_2_gtv2di3"
5624 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5626 (match_operand:V2DI 1 "register_operand" "0,x")
5627 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5630 pcmpgtq\t{%2, %0|%0, %2}
5631 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
5632 [(set_attr "isa" "noavx,avx")
5633 (set_attr "type" "ssecmp")
5634 (set_attr "prefix_extra" "1")
5635 (set_attr "prefix" "orig,vex")
5636 (set_attr "mode" "TI")])
5638 (define_insn "sse2_gt<mode>3"
5639 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5641 (match_operand:VI124_128 1 "register_operand" "0,x")
5642 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5643 "TARGET_SSE2 && !TARGET_XOP"
5645 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
5646 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5647 [(set_attr "isa" "noavx,avx")
5648 (set_attr "type" "ssecmp")
5649 (set_attr "prefix_data16" "1,*")
5650 (set_attr "prefix" "orig,vex")
5651 (set_attr "mode" "TI")])
5653 (define_expand "vcond<mode>"
5654 [(set (match_operand:VI124_128 0 "register_operand" "")
5655 (if_then_else:VI124_128
5656 (match_operator 3 ""
5657 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5658 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5659 (match_operand:VI124_128 1 "general_operand" "")
5660 (match_operand:VI124_128 2 "general_operand" "")))]
5663 bool ok = ix86_expand_int_vcond (operands);
5668 (define_expand "vcondv2di"
5669 [(set (match_operand:V2DI 0 "register_operand" "")
5671 (match_operator 3 ""
5672 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5673 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5674 (match_operand:V2DI 1 "general_operand" "")
5675 (match_operand:V2DI 2 "general_operand" "")))]
5678 bool ok = ix86_expand_int_vcond (operands);
5683 (define_expand "vcondu<mode>"
5684 [(set (match_operand:VI124_128 0 "register_operand" "")
5685 (if_then_else:VI124_128
5686 (match_operator 3 ""
5687 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5688 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5689 (match_operand:VI124_128 1 "general_operand" "")
5690 (match_operand:VI124_128 2 "general_operand" "")))]
5693 bool ok = ix86_expand_int_vcond (operands);
5698 (define_expand "vconduv2di"
5699 [(set (match_operand:V2DI 0 "register_operand" "")
5701 (match_operator 3 ""
5702 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5703 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5704 (match_operand:V2DI 1 "general_operand" "")
5705 (match_operand:V2DI 2 "general_operand" "")))]
5708 bool ok = ix86_expand_int_vcond (operands);
5713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5715 ;; Parallel bitwise logical operations
5717 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5719 (define_expand "one_cmpl<mode>2"
5720 [(set (match_operand:VI 0 "register_operand" "")
5721 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
5725 int i, n = GET_MODE_NUNITS (<MODE>mode);
5726 rtvec v = rtvec_alloc (n);
5728 for (i = 0; i < n; ++i)
5729 RTVEC_ELT (v, i) = constm1_rtx;
5731 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5734 (define_expand "sse2_andnot<mode>3"
5735 [(set (match_operand:VI_128 0 "register_operand" "")
5737 (not:VI_128 (match_operand:VI_128 1 "register_operand" ""))
5738 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
5741 (define_insn "*andnot<mode>3"
5742 [(set (match_operand:VI 0 "register_operand" "=x,x")
5744 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
5745 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5748 static char buf[32];
5751 = (get_attr_mode (insn) == MODE_TI) ? "pandn" : "andnps";
5753 switch (which_alternative)
5756 ops = "%s\t{%%2, %%0|%%0, %%2}";
5759 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5765 snprintf (buf, sizeof (buf), ops, tmp);
5768 [(set_attr "isa" "noavx,avx")
5769 (set_attr "type" "sselog")
5770 (set (attr "prefix_data16")
5772 (and (eq_attr "alternative" "0")
5773 (eq_attr "mode" "TI"))
5775 (const_string "*")))
5776 (set_attr "prefix" "orig,vex")
5778 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5779 (const_string "V8SF")
5780 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5783 (const_string "V4SF")))])
5785 (define_expand "<code><mode>3"
5786 [(set (match_operand:VI 0 "register_operand" "")
5788 (match_operand:VI 1 "nonimmediate_operand" "")
5789 (match_operand:VI 2 "nonimmediate_operand" "")))]
5791 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5793 (define_insn "*<code><mode>3"
5794 [(set (match_operand:VI 0 "register_operand" "=x,x")
5796 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
5797 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5799 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5801 static char buf[32];
5804 = (get_attr_mode (insn) == MODE_TI) ? "p<logic>" : "<logic>ps";
5806 switch (which_alternative)
5809 ops = "%s\t{%%2, %%0|%%0, %%2}";
5812 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5818 snprintf (buf, sizeof (buf), ops, tmp);
5821 [(set_attr "isa" "noavx,avx")
5822 (set_attr "type" "sselog")
5823 (set (attr "prefix_data16")
5825 (and (eq_attr "alternative" "0")
5826 (eq_attr "mode" "TI"))
5828 (const_string "*")))
5829 (set_attr "prefix" "orig,vex")
5831 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5832 (const_string "V8SF")
5833 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5836 (const_string "V4SF")))])
5838 (define_insn "*andnottf3"
5839 [(set (match_operand:TF 0 "register_operand" "=x,x")
5841 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
5842 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5845 pandn\t{%2, %0|%0, %2}
5846 vpandn\t{%2, %1, %0|%0, %1, %2}"
5847 [(set_attr "isa" "noavx,avx")
5848 (set_attr "type" "sselog")
5849 (set_attr "prefix_data16" "1,*")
5850 (set_attr "prefix" "orig,vex")
5851 (set_attr "mode" "TI")])
5853 (define_expand "<code>tf3"
5854 [(set (match_operand:TF 0 "register_operand" "")
5856 (match_operand:TF 1 "nonimmediate_operand" "")
5857 (match_operand:TF 2 "nonimmediate_operand" "")))]
5859 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
5861 (define_insn "*<code>tf3"
5862 [(set (match_operand:TF 0 "register_operand" "=x,x")
5864 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
5865 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5867 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
5869 p<logic>\t{%2, %0|%0, %2}
5870 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
5871 [(set_attr "isa" "noavx,avx")
5872 (set_attr "type" "sselog")
5873 (set_attr "prefix_data16" "1,*")
5874 (set_attr "prefix" "orig,vex")
5875 (set_attr "mode" "TI")])
5877 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5879 ;; Parallel integral element swizzling
5881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5883 (define_expand "vec_pack_trunc_<mode>"
5884 [(match_operand:<ssepackmode> 0 "register_operand" "")
5885 (match_operand:VI248_128 1 "register_operand" "")
5886 (match_operand:VI248_128 2 "register_operand" "")]
5889 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
5890 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
5891 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
5895 (define_insn "sse2_packsswb"
5896 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5899 (match_operand:V8HI 1 "register_operand" "0,x"))
5901 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5904 packsswb\t{%2, %0|%0, %2}
5905 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
5906 [(set_attr "isa" "noavx,avx")
5907 (set_attr "type" "sselog")
5908 (set_attr "prefix_data16" "1,*")
5909 (set_attr "prefix" "orig,vex")
5910 (set_attr "mode" "TI")])
5912 (define_insn "sse2_packssdw"
5913 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5916 (match_operand:V4SI 1 "register_operand" "0,x"))
5918 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
5921 packssdw\t{%2, %0|%0, %2}
5922 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
5923 [(set_attr "isa" "noavx,avx")
5924 (set_attr "type" "sselog")
5925 (set_attr "prefix_data16" "1,*")
5926 (set_attr "prefix" "orig,vex")
5927 (set_attr "mode" "TI")])
5929 (define_insn "sse2_packuswb"
5930 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5933 (match_operand:V8HI 1 "register_operand" "0,x"))
5935 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5938 packuswb\t{%2, %0|%0, %2}
5939 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
5940 [(set_attr "isa" "noavx,avx")
5941 (set_attr "type" "sselog")
5942 (set_attr "prefix_data16" "1,*")
5943 (set_attr "prefix" "orig,vex")
5944 (set_attr "mode" "TI")])
5946 (define_insn "vec_interleave_highv16qi"
5947 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5950 (match_operand:V16QI 1 "register_operand" "0,x")
5951 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5952 (parallel [(const_int 8) (const_int 24)
5953 (const_int 9) (const_int 25)
5954 (const_int 10) (const_int 26)
5955 (const_int 11) (const_int 27)
5956 (const_int 12) (const_int 28)
5957 (const_int 13) (const_int 29)
5958 (const_int 14) (const_int 30)
5959 (const_int 15) (const_int 31)])))]
5962 punpckhbw\t{%2, %0|%0, %2}
5963 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
5964 [(set_attr "isa" "noavx,avx")
5965 (set_attr "type" "sselog")
5966 (set_attr "prefix_data16" "1,*")
5967 (set_attr "prefix" "orig,vex")
5968 (set_attr "mode" "TI")])
5970 (define_insn "vec_interleave_lowv16qi"
5971 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5974 (match_operand:V16QI 1 "register_operand" "0,x")
5975 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5976 (parallel [(const_int 0) (const_int 16)
5977 (const_int 1) (const_int 17)
5978 (const_int 2) (const_int 18)
5979 (const_int 3) (const_int 19)
5980 (const_int 4) (const_int 20)
5981 (const_int 5) (const_int 21)
5982 (const_int 6) (const_int 22)
5983 (const_int 7) (const_int 23)])))]
5986 punpcklbw\t{%2, %0|%0, %2}
5987 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
5988 [(set_attr "isa" "noavx,avx")
5989 (set_attr "type" "sselog")
5990 (set_attr "prefix_data16" "1,*")
5991 (set_attr "prefix" "orig,vex")
5992 (set_attr "mode" "TI")])
5994 (define_insn "vec_interleave_highv8hi"
5995 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5998 (match_operand:V8HI 1 "register_operand" "0,x")
5999 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6000 (parallel [(const_int 4) (const_int 12)
6001 (const_int 5) (const_int 13)
6002 (const_int 6) (const_int 14)
6003 (const_int 7) (const_int 15)])))]
6006 punpckhwd\t{%2, %0|%0, %2}
6007 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6008 [(set_attr "isa" "noavx,avx")
6009 (set_attr "type" "sselog")
6010 (set_attr "prefix_data16" "1,*")
6011 (set_attr "prefix" "orig,vex")
6012 (set_attr "mode" "TI")])
6014 (define_insn "vec_interleave_lowv8hi"
6015 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6018 (match_operand:V8HI 1 "register_operand" "0,x")
6019 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6020 (parallel [(const_int 0) (const_int 8)
6021 (const_int 1) (const_int 9)
6022 (const_int 2) (const_int 10)
6023 (const_int 3) (const_int 11)])))]
6026 punpcklwd\t{%2, %0|%0, %2}
6027 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6028 [(set_attr "isa" "noavx,avx")
6029 (set_attr "type" "sselog")
6030 (set_attr "prefix_data16" "1,*")
6031 (set_attr "prefix" "orig,vex")
6032 (set_attr "mode" "TI")])
6034 (define_insn "vec_interleave_highv4si"
6035 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6038 (match_operand:V4SI 1 "register_operand" "0,x")
6039 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6040 (parallel [(const_int 2) (const_int 6)
6041 (const_int 3) (const_int 7)])))]
6044 punpckhdq\t{%2, %0|%0, %2}
6045 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6046 [(set_attr "isa" "noavx,avx")
6047 (set_attr "type" "sselog")
6048 (set_attr "prefix_data16" "1,*")
6049 (set_attr "prefix" "orig,vex")
6050 (set_attr "mode" "TI")])
6052 (define_insn "vec_interleave_lowv4si"
6053 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6056 (match_operand:V4SI 1 "register_operand" "0,x")
6057 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6058 (parallel [(const_int 0) (const_int 4)
6059 (const_int 1) (const_int 5)])))]
6062 punpckldq\t{%2, %0|%0, %2}
6063 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6064 [(set_attr "isa" "noavx,avx")
6065 (set_attr "type" "sselog")
6066 (set_attr "prefix_data16" "1,*")
6067 (set_attr "prefix" "orig,vex")
6068 (set_attr "mode" "TI")])
6070 ;; Modes handled by pinsr patterns.
6071 (define_mode_iterator PINSR_MODE
6072 [(V16QI "TARGET_SSE4_1") V8HI
6073 (V4SI "TARGET_SSE4_1")
6074 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6076 (define_mode_attr sse2p4_1
6077 [(V16QI "sse4_1") (V8HI "sse2")
6078 (V4SI "sse4_1") (V2DI "sse4_1")])
6080 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6081 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6082 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6083 (vec_merge:PINSR_MODE
6084 (vec_duplicate:PINSR_MODE
6085 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6086 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6087 (match_operand:SI 3 "const_int_operand" "")))]
6089 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6090 < GET_MODE_NUNITS (<MODE>mode))"
6092 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6094 switch (which_alternative)
6097 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6098 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6101 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6103 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6104 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6107 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6112 [(set_attr "isa" "noavx,noavx,avx,avx")
6113 (set_attr "type" "sselog")
6114 (set (attr "prefix_rex")
6116 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6117 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6119 (const_string "*")))
6120 (set (attr "prefix_data16")
6122 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6123 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6125 (const_string "*")))
6126 (set (attr "prefix_extra")
6128 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6129 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6131 (const_string "1")))
6132 (set_attr "length_immediate" "1")
6133 (set_attr "prefix" "orig,orig,vex,vex")
6134 (set_attr "mode" "TI")])
6136 (define_insn "*sse4_1_pextrb_<mode>"
6137 [(set (match_operand:SWI48 0 "register_operand" "=r")
6140 (match_operand:V16QI 1 "register_operand" "x")
6141 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6143 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6144 [(set_attr "type" "sselog")
6145 (set_attr "prefix_extra" "1")
6146 (set_attr "length_immediate" "1")
6147 (set_attr "prefix" "maybe_vex")
6148 (set_attr "mode" "TI")])
6150 (define_insn "*sse4_1_pextrb_memory"
6151 [(set (match_operand:QI 0 "memory_operand" "=m")
6153 (match_operand:V16QI 1 "register_operand" "x")
6154 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6156 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6157 [(set_attr "type" "sselog")
6158 (set_attr "prefix_extra" "1")
6159 (set_attr "length_immediate" "1")
6160 (set_attr "prefix" "maybe_vex")
6161 (set_attr "mode" "TI")])
6163 (define_insn "*sse2_pextrw_<mode>"
6164 [(set (match_operand:SWI48 0 "register_operand" "=r")
6167 (match_operand:V8HI 1 "register_operand" "x")
6168 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6170 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6171 [(set_attr "type" "sselog")
6172 (set_attr "prefix_data16" "1")
6173 (set_attr "length_immediate" "1")
6174 (set_attr "prefix" "maybe_vex")
6175 (set_attr "mode" "TI")])
6177 (define_insn "*sse4_1_pextrw_memory"
6178 [(set (match_operand:HI 0 "memory_operand" "=m")
6180 (match_operand:V8HI 1 "register_operand" "x")
6181 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6183 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6184 [(set_attr "type" "sselog")
6185 (set_attr "prefix_extra" "1")
6186 (set_attr "length_immediate" "1")
6187 (set_attr "prefix" "maybe_vex")
6188 (set_attr "mode" "TI")])
6190 (define_insn "*sse4_1_pextrd"
6191 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6193 (match_operand:V4SI 1 "register_operand" "x")
6194 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6196 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6197 [(set_attr "type" "sselog")
6198 (set_attr "prefix_extra" "1")
6199 (set_attr "length_immediate" "1")
6200 (set_attr "prefix" "maybe_vex")
6201 (set_attr "mode" "TI")])
6203 (define_insn "*sse4_1_pextrd_zext"
6204 [(set (match_operand:DI 0 "register_operand" "=r")
6207 (match_operand:V4SI 1 "register_operand" "x")
6208 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6209 "TARGET_64BIT && TARGET_SSE4_1"
6210 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6211 [(set_attr "type" "sselog")
6212 (set_attr "prefix_extra" "1")
6213 (set_attr "length_immediate" "1")
6214 (set_attr "prefix" "maybe_vex")
6215 (set_attr "mode" "TI")])
6217 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6218 (define_insn "*sse4_1_pextrq"
6219 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6221 (match_operand:V2DI 1 "register_operand" "x")
6222 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6223 "TARGET_SSE4_1 && TARGET_64BIT"
6224 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6225 [(set_attr "type" "sselog")
6226 (set_attr "prefix_rex" "1")
6227 (set_attr "prefix_extra" "1")
6228 (set_attr "length_immediate" "1")
6229 (set_attr "prefix" "maybe_vex")
6230 (set_attr "mode" "TI")])
6232 (define_expand "sse2_pshufd"
6233 [(match_operand:V4SI 0 "register_operand" "")
6234 (match_operand:V4SI 1 "nonimmediate_operand" "")
6235 (match_operand:SI 2 "const_int_operand" "")]
6238 int mask = INTVAL (operands[2]);
6239 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6240 GEN_INT ((mask >> 0) & 3),
6241 GEN_INT ((mask >> 2) & 3),
6242 GEN_INT ((mask >> 4) & 3),
6243 GEN_INT ((mask >> 6) & 3)));
6247 (define_insn "sse2_pshufd_1"
6248 [(set (match_operand:V4SI 0 "register_operand" "=x")
6250 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6251 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6252 (match_operand 3 "const_0_to_3_operand" "")
6253 (match_operand 4 "const_0_to_3_operand" "")
6254 (match_operand 5 "const_0_to_3_operand" "")])))]
6258 mask |= INTVAL (operands[2]) << 0;
6259 mask |= INTVAL (operands[3]) << 2;
6260 mask |= INTVAL (operands[4]) << 4;
6261 mask |= INTVAL (operands[5]) << 6;
6262 operands[2] = GEN_INT (mask);
6264 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6266 [(set_attr "type" "sselog1")
6267 (set_attr "prefix_data16" "1")
6268 (set_attr "prefix" "maybe_vex")
6269 (set_attr "length_immediate" "1")
6270 (set_attr "mode" "TI")])
6272 (define_expand "sse2_pshuflw"
6273 [(match_operand:V8HI 0 "register_operand" "")
6274 (match_operand:V8HI 1 "nonimmediate_operand" "")
6275 (match_operand:SI 2 "const_int_operand" "")]
6278 int mask = INTVAL (operands[2]);
6279 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6280 GEN_INT ((mask >> 0) & 3),
6281 GEN_INT ((mask >> 2) & 3),
6282 GEN_INT ((mask >> 4) & 3),
6283 GEN_INT ((mask >> 6) & 3)));
6287 (define_insn "sse2_pshuflw_1"
6288 [(set (match_operand:V8HI 0 "register_operand" "=x")
6290 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6291 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6292 (match_operand 3 "const_0_to_3_operand" "")
6293 (match_operand 4 "const_0_to_3_operand" "")
6294 (match_operand 5 "const_0_to_3_operand" "")
6302 mask |= INTVAL (operands[2]) << 0;
6303 mask |= INTVAL (operands[3]) << 2;
6304 mask |= INTVAL (operands[4]) << 4;
6305 mask |= INTVAL (operands[5]) << 6;
6306 operands[2] = GEN_INT (mask);
6308 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6310 [(set_attr "type" "sselog")
6311 (set_attr "prefix_data16" "0")
6312 (set_attr "prefix_rep" "1")
6313 (set_attr "prefix" "maybe_vex")
6314 (set_attr "length_immediate" "1")
6315 (set_attr "mode" "TI")])
6317 (define_expand "sse2_pshufhw"
6318 [(match_operand:V8HI 0 "register_operand" "")
6319 (match_operand:V8HI 1 "nonimmediate_operand" "")
6320 (match_operand:SI 2 "const_int_operand" "")]
6323 int mask = INTVAL (operands[2]);
6324 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6325 GEN_INT (((mask >> 0) & 3) + 4),
6326 GEN_INT (((mask >> 2) & 3) + 4),
6327 GEN_INT (((mask >> 4) & 3) + 4),
6328 GEN_INT (((mask >> 6) & 3) + 4)));
6332 (define_insn "sse2_pshufhw_1"
6333 [(set (match_operand:V8HI 0 "register_operand" "=x")
6335 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6336 (parallel [(const_int 0)
6340 (match_operand 2 "const_4_to_7_operand" "")
6341 (match_operand 3 "const_4_to_7_operand" "")
6342 (match_operand 4 "const_4_to_7_operand" "")
6343 (match_operand 5 "const_4_to_7_operand" "")])))]
6347 mask |= (INTVAL (operands[2]) - 4) << 0;
6348 mask |= (INTVAL (operands[3]) - 4) << 2;
6349 mask |= (INTVAL (operands[4]) - 4) << 4;
6350 mask |= (INTVAL (operands[5]) - 4) << 6;
6351 operands[2] = GEN_INT (mask);
6353 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6355 [(set_attr "type" "sselog")
6356 (set_attr "prefix_rep" "1")
6357 (set_attr "prefix_data16" "0")
6358 (set_attr "prefix" "maybe_vex")
6359 (set_attr "length_immediate" "1")
6360 (set_attr "mode" "TI")])
6362 (define_expand "sse2_loadd"
6363 [(set (match_operand:V4SI 0 "register_operand" "")
6366 (match_operand:SI 1 "nonimmediate_operand" ""))
6370 "operands[2] = CONST0_RTX (V4SImode);")
6372 (define_insn "sse2_loadld"
6373 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x,x")
6376 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
6377 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
6381 %vmovd\t{%2, %0|%0, %2}
6382 %vmovd\t{%2, %0|%0, %2}
6383 movss\t{%2, %0|%0, %2}
6384 movss\t{%2, %0|%0, %2}
6385 vmovss\t{%2, %1, %0|%0, %1, %2}"
6386 [(set_attr "isa" "base,base,noavx,noavx,avx")
6387 (set_attr "type" "ssemov")
6388 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
6389 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
6391 (define_insn_and_split "sse2_stored"
6392 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
6394 (match_operand:V4SI 1 "register_operand" "x,Yi")
6395 (parallel [(const_int 0)])))]
6398 "&& reload_completed
6399 && (TARGET_INTER_UNIT_MOVES
6400 || MEM_P (operands [0])
6401 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6402 [(set (match_dup 0) (match_dup 1))]
6403 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
6405 (define_insn_and_split "*vec_ext_v4si_mem"
6406 [(set (match_operand:SI 0 "register_operand" "=r")
6408 (match_operand:V4SI 1 "memory_operand" "o")
6409 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6415 int i = INTVAL (operands[2]);
6417 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6421 (define_expand "sse_storeq"
6422 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6424 (match_operand:V2DI 1 "register_operand" "")
6425 (parallel [(const_int 0)])))]
6428 (define_insn "*sse2_storeq_rex64"
6429 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
6431 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6432 (parallel [(const_int 0)])))]
6433 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6437 mov{q}\t{%1, %0|%0, %1}"
6438 [(set_attr "type" "*,*,imov")
6439 (set_attr "mode" "*,*,DI")])
6441 (define_insn "*sse2_storeq"
6442 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
6444 (match_operand:V2DI 1 "register_operand" "x")
6445 (parallel [(const_int 0)])))]
6450 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6452 (match_operand:V2DI 1 "register_operand" "")
6453 (parallel [(const_int 0)])))]
6456 && (TARGET_INTER_UNIT_MOVES
6457 || MEM_P (operands [0])
6458 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6459 [(set (match_dup 0) (match_dup 1))]
6460 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
6462 (define_insn "*vec_extractv2di_1_rex64"
6463 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
6465 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
6466 (parallel [(const_int 1)])))]
6467 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6469 %vmovhps\t{%1, %0|%0, %1}
6470 psrldq\t{$8, %0|%0, 8}
6471 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6472 %vmovq\t{%H1, %0|%0, %H1}
6473 mov{q}\t{%H1, %0|%0, %H1}"
6474 [(set_attr "isa" "base,noavx,avx,base,base")
6475 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
6476 (set_attr "length_immediate" "*,1,1,*,*")
6477 (set_attr "memory" "*,none,none,*,*")
6478 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
6479 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
6481 (define_insn "*vec_extractv2di_1_sse2"
6482 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x")
6484 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o")
6485 (parallel [(const_int 1)])))]
6487 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6489 %vmovhps\t{%1, %0|%0, %1}
6490 psrldq\t{$8, %0|%0, 8}
6491 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6492 %vmovq\t{%H1, %0|%0, %H1}"
6493 [(set_attr "isa" "base,noavx,avx,base")
6494 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov")
6495 (set_attr "length_immediate" "*,1,1,*")
6496 (set_attr "memory" "*,none,none,*")
6497 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex")
6498 (set_attr "mode" "V2SF,TI,TI,TI")])
6500 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
6501 (define_insn "*vec_extractv2di_1_sse"
6502 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
6504 (match_operand:V2DI 1 "nonimmediate_operand" " x,x,o")
6505 (parallel [(const_int 1)])))]
6506 "!TARGET_SSE2 && TARGET_SSE
6507 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6509 movhps\t{%1, %0|%0, %1}
6510 movhlps\t{%1, %0|%0, %1}
6511 movlps\t{%H1, %0|%0, %H1}"
6512 [(set_attr "type" "ssemov")
6513 (set_attr "mode" "V2SF,V4SF,V2SF")])
6515 (define_insn "*vec_dupv4si_avx"
6516 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6518 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
6521 vpshufd\t{$0, %1, %0|%0, %1, 0}
6522 vbroadcastss\t{%1, %0|%0, %1}"
6523 [(set_attr "type" "sselog1,ssemov")
6524 (set_attr "length_immediate" "1,0")
6525 (set_attr "prefix_extra" "0,1")
6526 (set_attr "prefix" "vex")
6527 (set_attr "mode" "TI,V4SF")])
6529 (define_insn "*vec_dupv4si"
6530 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
6532 (match_operand:SI 1 "register_operand" " Y2,0")))]
6535 pshufd\t{$0, %1, %0|%0, %1, 0}
6536 shufps\t{$0, %0, %0|%0, %0, 0}"
6537 [(set_attr "type" "sselog1")
6538 (set_attr "length_immediate" "1")
6539 (set_attr "mode" "TI,V4SF")])
6541 (define_insn "*vec_dupv2di_sse3"
6542 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6544 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
6548 vpunpcklqdq\t{%d1, %0|%0, %d1}
6549 %vmovddup\t{%1, %0|%0, %1}"
6550 [(set_attr "isa" "noavx,avx,base")
6551 (set_attr "type" "sselog1")
6552 (set_attr "prefix" "orig,vex,maybe_vex")
6553 (set_attr "mode" "TI,TI,DF")])
6555 (define_insn "*vec_dupv2di"
6556 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
6558 (match_operand:DI 1 "register_operand" " 0 ,0")))]
6563 [(set_attr "type" "sselog1,ssemov")
6564 (set_attr "mode" "TI,V4SF")])
6566 (define_insn "*vec_concatv2si_sse4_1"
6567 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
6569 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
6570 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
6573 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
6574 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6575 punpckldq\t{%2, %0|%0, %2}
6576 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
6577 %vmovd\t{%1, %0|%0, %1}
6578 punpckldq\t{%2, %0|%0, %2}
6579 movd\t{%1, %0|%0, %1}"
6580 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
6581 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6582 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
6583 (set_attr "length_immediate" "1,1,*,*,*,*,*")
6584 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
6585 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
6587 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6588 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6589 ;; alternatives pretty much forces the MMX alternative to be chosen.
6590 (define_insn "*vec_concatv2si_sse2"
6591 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
6593 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
6594 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
6597 punpckldq\t{%2, %0|%0, %2}
6598 movd\t{%1, %0|%0, %1}
6599 punpckldq\t{%2, %0|%0, %2}
6600 movd\t{%1, %0|%0, %1}"
6601 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6602 (set_attr "mode" "TI,TI,DI,DI")])
6604 (define_insn "*vec_concatv2si_sse"
6605 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
6607 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
6608 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
6611 unpcklps\t{%2, %0|%0, %2}
6612 movss\t{%1, %0|%0, %1}
6613 punpckldq\t{%2, %0|%0, %2}
6614 movd\t{%1, %0|%0, %1}"
6615 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6616 (set_attr "mode" "V4SF,V4SF,DI,DI")])
6618 (define_insn "*vec_concatv4si"
6619 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x,x,x")
6621 (match_operand:V2SI 1 "register_operand" " 0 ,x,0,0,x")
6622 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,x,m,m")))]
6625 punpcklqdq\t{%2, %0|%0, %2}
6626 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6627 movlhps\t{%2, %0|%0, %2}
6628 movhps\t{%2, %0|%0, %2}
6629 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6630 [(set_attr "isa" "noavx,avx,noavx,noavx,avx")
6631 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
6632 (set_attr "prefix" "orig,vex,orig,orig,vex")
6633 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
6635 ;; movd instead of movq is required to handle broken assemblers.
6636 (define_insn "*vec_concatv2di_rex64_sse4_1"
6637 [(set (match_operand:V2DI 0 "register_operand"
6638 "=x, x, x,Yi,!x,x,x,x,x")
6640 (match_operand:DI 1 "nonimmediate_operand"
6641 " 0, x,xm,r ,*y,0,x,0,x")
6642 (match_operand:DI 2 "vector_move_operand"
6643 "rm,rm, C,C ,C ,x,x,m,m")))]
6644 "TARGET_64BIT && TARGET_SSE4_1"
6646 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
6647 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6648 %vmovq\t{%1, %0|%0, %1}
6649 %vmovd\t{%1, %0|%0, %1}
6650 movq2dq\t{%1, %0|%0, %1}
6651 punpcklqdq\t{%2, %0|%0, %2}
6652 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6653 movhps\t{%2, %0|%0, %2}
6654 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6655 [(set_attr "isa" "noavx,avx,base,base,base,noavx,avx,noavx,avx")
6656 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,sselog,sselog,ssemov,ssemov")
6657 (set (attr "prefix_rex")
6659 (and (eq_attr "alternative" "0,3")
6660 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
6662 (const_string "*")))
6663 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
6664 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
6665 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
6666 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
6668 ;; movd instead of movq is required to handle broken assemblers.
6669 (define_insn "*vec_concatv2di_rex64_sse"
6670 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x")
6672 (match_operand:DI 1 "nonimmediate_operand" "Y2m,r ,*y ,0 ,0,0")
6673 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
6674 "TARGET_64BIT && TARGET_SSE"
6676 movq\t{%1, %0|%0, %1}
6677 movd\t{%1, %0|%0, %1}
6678 movq2dq\t{%1, %0|%0, %1}
6679 punpcklqdq\t{%2, %0|%0, %2}
6680 movlhps\t{%2, %0|%0, %2}
6681 movhps\t{%2, %0|%0, %2}"
6682 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
6683 (set_attr "prefix_rex" "*,1,*,*,*,*")
6684 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
6686 (define_insn "vec_concatv2di"
6687 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x,x")
6689 (match_operand:DI 1 "nonimmediate_operand" "Y2m,*y , 0,x,0,0,x")
6690 (match_operand:DI 2 "vector_move_operand" " C , C ,Y2,x,x,m,m")))]
6691 "!TARGET_64BIT && TARGET_SSE"
6693 %vmovq\t{%1, %0|%0, %1}
6694 movq2dq\t{%1, %0|%0, %1}
6695 punpcklqdq\t{%2, %0|%0, %2}
6696 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6697 movlhps\t{%2, %0|%0, %2}
6698 movhps\t{%2, %0|%0, %2}
6699 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6700 [(set_attr "isa" "base,base,noavx,avx,noavx,noavx,avx")
6701 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
6702 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
6703 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
6705 (define_expand "vec_unpacks_lo_<mode>"
6706 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6707 (match_operand:VI124_128 1 "register_operand" "")]
6709 "ix86_expand_sse_unpack (operands, false, false); DONE;")
6711 (define_expand "vec_unpacks_hi_<mode>"
6712 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6713 (match_operand:VI124_128 1 "register_operand" "")]
6715 "ix86_expand_sse_unpack (operands, false, true); DONE;")
6717 (define_expand "vec_unpacku_lo_<mode>"
6718 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6719 (match_operand:VI124_128 1 "register_operand" "")]
6721 "ix86_expand_sse_unpack (operands, true, false); DONE;")
6723 (define_expand "vec_unpacku_hi_<mode>"
6724 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6725 (match_operand:VI124_128 1 "register_operand" "")]
6727 "ix86_expand_sse_unpack (operands, true, true); DONE;")
6729 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6733 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6735 (define_expand "sse2_uavgv16qi3"
6736 [(set (match_operand:V16QI 0 "register_operand" "")
6742 (match_operand:V16QI 1 "nonimmediate_operand" ""))
6744 (match_operand:V16QI 2 "nonimmediate_operand" "")))
6745 (const_vector:V16QI [(const_int 1) (const_int 1)
6746 (const_int 1) (const_int 1)
6747 (const_int 1) (const_int 1)
6748 (const_int 1) (const_int 1)
6749 (const_int 1) (const_int 1)
6750 (const_int 1) (const_int 1)
6751 (const_int 1) (const_int 1)
6752 (const_int 1) (const_int 1)]))
6755 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
6757 (define_insn "*sse2_uavgv16qi3"
6758 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6764 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
6766 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
6767 (const_vector:V16QI [(const_int 1) (const_int 1)
6768 (const_int 1) (const_int 1)
6769 (const_int 1) (const_int 1)
6770 (const_int 1) (const_int 1)
6771 (const_int 1) (const_int 1)
6772 (const_int 1) (const_int 1)
6773 (const_int 1) (const_int 1)
6774 (const_int 1) (const_int 1)]))
6776 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
6778 pavgb\t{%2, %0|%0, %2}
6779 vpavgb\t{%2, %1, %0|%0, %1, %2}"
6780 [(set_attr "isa" "noavx,avx")
6781 (set_attr "type" "sseiadd")
6782 (set_attr "prefix_data16" "1,*")
6783 (set_attr "prefix" "orig,vex")
6784 (set_attr "mode" "TI")])
6786 (define_expand "sse2_uavgv8hi3"
6787 [(set (match_operand:V8HI 0 "register_operand" "")
6793 (match_operand:V8HI 1 "nonimmediate_operand" ""))
6795 (match_operand:V8HI 2 "nonimmediate_operand" "")))
6796 (const_vector:V8HI [(const_int 1) (const_int 1)
6797 (const_int 1) (const_int 1)
6798 (const_int 1) (const_int 1)
6799 (const_int 1) (const_int 1)]))
6802 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
6804 (define_insn "*sse2_uavgv8hi3"
6805 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6811 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
6813 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
6814 (const_vector:V8HI [(const_int 1) (const_int 1)
6815 (const_int 1) (const_int 1)
6816 (const_int 1) (const_int 1)
6817 (const_int 1) (const_int 1)]))
6819 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
6821 pavgw\t{%2, %0|%0, %2}
6822 vpavgw\t{%2, %1, %0|%0, %1, %2}"
6823 [(set_attr "isa" "noavx,avx")
6824 (set_attr "type" "sseiadd")
6825 (set_attr "prefix_data16" "1,*")
6826 (set_attr "prefix" "orig,vex")
6827 (set_attr "mode" "TI")])
6829 ;; The correct representation for this is absolutely enormous, and
6830 ;; surely not generally useful.
6831 (define_insn "sse2_psadbw"
6832 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6833 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0,x")
6834 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
6838 psadbw\t{%2, %0|%0, %2}
6839 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
6840 [(set_attr "isa" "noavx,avx")
6841 (set_attr "type" "sseiadd")
6842 (set_attr "atom_unit" "simul")
6843 (set_attr "prefix_data16" "1,*")
6844 (set_attr "prefix" "orig,vex")
6845 (set_attr "mode" "TI")])
6847 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
6848 [(set (match_operand:SI 0 "register_operand" "=r")
6850 [(match_operand:VF 1 "register_operand" "x")]
6853 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
6854 [(set_attr "type" "ssemov")
6855 (set_attr "prefix" "maybe_vex")
6856 (set_attr "mode" "<MODE>")])
6858 (define_insn "sse2_pmovmskb"
6859 [(set (match_operand:SI 0 "register_operand" "=r")
6860 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
6863 "%vpmovmskb\t{%1, %0|%0, %1}"
6864 [(set_attr "type" "ssemov")
6865 (set_attr "prefix_data16" "1")
6866 (set_attr "prefix" "maybe_vex")
6867 (set_attr "mode" "SI")])
6869 (define_expand "sse2_maskmovdqu"
6870 [(set (match_operand:V16QI 0 "memory_operand" "")
6871 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
6872 (match_operand:V16QI 2 "register_operand" "")
6877 (define_insn "*sse2_maskmovdqu"
6878 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
6879 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
6880 (match_operand:V16QI 2 "register_operand" "x")
6881 (mem:V16QI (match_dup 0))]
6884 "%vmaskmovdqu\t{%2, %1|%1, %2}"
6885 [(set_attr "type" "ssemov")
6886 (set_attr "prefix_data16" "1")
6887 ;; The implicit %rdi operand confuses default length_vex computation.
6888 (set (attr "length_vex")
6889 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
6890 (set_attr "prefix" "maybe_vex")
6891 (set_attr "mode" "TI")])
6893 (define_insn "sse_ldmxcsr"
6894 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
6898 [(set_attr "type" "sse")
6899 (set_attr "atom_sse_attr" "mxcsr")
6900 (set_attr "prefix" "maybe_vex")
6901 (set_attr "memory" "load")])
6903 (define_insn "sse_stmxcsr"
6904 [(set (match_operand:SI 0 "memory_operand" "=m")
6905 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
6908 [(set_attr "type" "sse")
6909 (set_attr "atom_sse_attr" "mxcsr")
6910 (set_attr "prefix" "maybe_vex")
6911 (set_attr "memory" "store")])
6913 (define_expand "sse_sfence"
6915 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6916 "TARGET_SSE || TARGET_3DNOW_A"
6918 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6919 MEM_VOLATILE_P (operands[0]) = 1;
6922 (define_insn "*sse_sfence"
6923 [(set (match_operand:BLK 0 "" "")
6924 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6925 "TARGET_SSE || TARGET_3DNOW_A"
6927 [(set_attr "type" "sse")
6928 (set_attr "length_address" "0")
6929 (set_attr "atom_sse_attr" "fence")
6930 (set_attr "memory" "unknown")])
6932 (define_insn "sse2_clflush"
6933 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
6937 [(set_attr "type" "sse")
6938 (set_attr "atom_sse_attr" "fence")
6939 (set_attr "memory" "unknown")])
6941 (define_expand "sse2_mfence"
6943 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
6946 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6947 MEM_VOLATILE_P (operands[0]) = 1;
6950 (define_insn "*sse2_mfence"
6951 [(set (match_operand:BLK 0 "" "")
6952 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
6953 "TARGET_64BIT || TARGET_SSE2"
6955 [(set_attr "type" "sse")
6956 (set_attr "length_address" "0")
6957 (set_attr "atom_sse_attr" "fence")
6958 (set_attr "memory" "unknown")])
6960 (define_expand "sse2_lfence"
6962 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
6965 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6966 MEM_VOLATILE_P (operands[0]) = 1;
6969 (define_insn "*sse2_lfence"
6970 [(set (match_operand:BLK 0 "" "")
6971 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
6974 [(set_attr "type" "sse")
6975 (set_attr "length_address" "0")
6976 (set_attr "atom_sse_attr" "lfence")
6977 (set_attr "memory" "unknown")])
6979 (define_insn "sse3_mwait"
6980 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
6981 (match_operand:SI 1 "register_operand" "c")]
6984 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
6985 ;; Since 32bit register operands are implicitly zero extended to 64bit,
6986 ;; we only need to set up 32bit registers.
6988 [(set_attr "length" "3")])
6990 (define_insn "sse3_monitor"
6991 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
6992 (match_operand:SI 1 "register_operand" "c")
6993 (match_operand:SI 2 "register_operand" "d")]
6995 "TARGET_SSE3 && !TARGET_64BIT"
6996 "monitor\t%0, %1, %2"
6997 [(set_attr "length" "3")])
6999 (define_insn "sse3_monitor64"
7000 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
7001 (match_operand:SI 1 "register_operand" "c")
7002 (match_operand:SI 2 "register_operand" "d")]
7004 "TARGET_SSE3 && TARGET_64BIT"
7005 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
7006 ;; RCX and RDX are used. Since 32bit register operands are implicitly
7007 ;; zero extended to 64bit, we only need to set up 32bit registers.
7009 [(set_attr "length" "3")])
7011 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7013 ;; SSSE3 instructions
7015 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7017 (define_insn "ssse3_phaddwv8hi3"
7018 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7024 (match_operand:V8HI 1 "register_operand" "0,x")
7025 (parallel [(const_int 0)]))
7026 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7028 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7029 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7032 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7033 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7035 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7036 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7041 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7042 (parallel [(const_int 0)]))
7043 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7045 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7046 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7049 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7050 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7052 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7053 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7056 phaddw\t{%2, %0|%0, %2}
7057 vphaddw\t{%2, %1, %0|%0, %1, %2}"
7058 [(set_attr "isa" "noavx,avx")
7059 (set_attr "type" "sseiadd")
7060 (set_attr "atom_unit" "complex")
7061 (set_attr "prefix_data16" "1,*")
7062 (set_attr "prefix_extra" "1")
7063 (set_attr "prefix" "orig,vex")
7064 (set_attr "mode" "TI")])
7066 (define_insn "ssse3_phaddwv4hi3"
7067 [(set (match_operand:V4HI 0 "register_operand" "=y")
7072 (match_operand:V4HI 1 "register_operand" "0")
7073 (parallel [(const_int 0)]))
7074 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7076 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7077 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7081 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7082 (parallel [(const_int 0)]))
7083 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7085 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7086 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7088 "phaddw\t{%2, %0|%0, %2}"
7089 [(set_attr "type" "sseiadd")
7090 (set_attr "atom_unit" "complex")
7091 (set_attr "prefix_extra" "1")
7092 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7093 (set_attr "mode" "DI")])
7095 (define_insn "ssse3_phadddv4si3"
7096 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7101 (match_operand:V4SI 1 "register_operand" "0,x")
7102 (parallel [(const_int 0)]))
7103 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7105 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7106 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7110 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7111 (parallel [(const_int 0)]))
7112 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7114 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7115 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7118 phaddd\t{%2, %0|%0, %2}
7119 vphaddd\t{%2, %1, %0|%0, %1, %2}"
7120 [(set_attr "isa" "noavx,avx")
7121 (set_attr "type" "sseiadd")
7122 (set_attr "atom_unit" "complex")
7123 (set_attr "prefix_data16" "1,*")
7124 (set_attr "prefix_extra" "1")
7125 (set_attr "prefix" "orig,vex")
7126 (set_attr "mode" "TI")])
7128 (define_insn "ssse3_phadddv2si3"
7129 [(set (match_operand:V2SI 0 "register_operand" "=y")
7133 (match_operand:V2SI 1 "register_operand" "0")
7134 (parallel [(const_int 0)]))
7135 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7138 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7139 (parallel [(const_int 0)]))
7140 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7142 "phaddd\t{%2, %0|%0, %2}"
7143 [(set_attr "type" "sseiadd")
7144 (set_attr "atom_unit" "complex")
7145 (set_attr "prefix_extra" "1")
7146 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7147 (set_attr "mode" "DI")])
7149 (define_insn "ssse3_phaddswv8hi3"
7150 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7156 (match_operand:V8HI 1 "register_operand" "0,x")
7157 (parallel [(const_int 0)]))
7158 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7160 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7161 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7164 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7165 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7167 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7168 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7173 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7174 (parallel [(const_int 0)]))
7175 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7177 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7178 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7181 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7182 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7184 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7185 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7188 phaddsw\t{%2, %0|%0, %2}
7189 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
7190 [(set_attr "isa" "noavx,avx")
7191 (set_attr "type" "sseiadd")
7192 (set_attr "atom_unit" "complex")
7193 (set_attr "prefix_data16" "1,*")
7194 (set_attr "prefix_extra" "1")
7195 (set_attr "prefix" "orig,vex")
7196 (set_attr "mode" "TI")])
7198 (define_insn "ssse3_phaddswv4hi3"
7199 [(set (match_operand:V4HI 0 "register_operand" "=y")
7204 (match_operand:V4HI 1 "register_operand" "0")
7205 (parallel [(const_int 0)]))
7206 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7208 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7209 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7213 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7214 (parallel [(const_int 0)]))
7215 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7217 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7218 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7220 "phaddsw\t{%2, %0|%0, %2}"
7221 [(set_attr "type" "sseiadd")
7222 (set_attr "atom_unit" "complex")
7223 (set_attr "prefix_extra" "1")
7224 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7225 (set_attr "mode" "DI")])
7227 (define_insn "ssse3_phsubwv8hi3"
7228 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7234 (match_operand:V8HI 1 "register_operand" "0,x")
7235 (parallel [(const_int 0)]))
7236 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7238 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7239 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7242 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7243 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7245 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7246 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7251 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7252 (parallel [(const_int 0)]))
7253 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7255 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7256 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7259 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7260 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7262 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7263 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7266 phsubw\t{%2, %0|%0, %2}
7267 vphsubw\t{%2, %1, %0|%0, %1, %2}"
7268 [(set_attr "isa" "noavx,avx")
7269 (set_attr "type" "sseiadd")
7270 (set_attr "atom_unit" "complex")
7271 (set_attr "prefix_data16" "1,*")
7272 (set_attr "prefix_extra" "1")
7273 (set_attr "prefix" "orig,vex")
7274 (set_attr "mode" "TI")])
7276 (define_insn "ssse3_phsubwv4hi3"
7277 [(set (match_operand:V4HI 0 "register_operand" "=y")
7282 (match_operand:V4HI 1 "register_operand" "0")
7283 (parallel [(const_int 0)]))
7284 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7286 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7287 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7291 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7292 (parallel [(const_int 0)]))
7293 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7295 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7296 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7298 "phsubw\t{%2, %0|%0, %2}"
7299 [(set_attr "type" "sseiadd")
7300 (set_attr "atom_unit" "complex")
7301 (set_attr "prefix_extra" "1")
7302 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7303 (set_attr "mode" "DI")])
7305 (define_insn "ssse3_phsubdv4si3"
7306 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7311 (match_operand:V4SI 1 "register_operand" "0,x")
7312 (parallel [(const_int 0)]))
7313 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7315 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7316 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7320 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7321 (parallel [(const_int 0)]))
7322 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7324 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7325 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7328 phsubd\t{%2, %0|%0, %2}
7329 vphsubd\t{%2, %1, %0|%0, %1, %2}"
7331 [(set_attr "isa" "noavx,avx")
7332 (set_attr "type" "sseiadd")
7333 (set_attr "atom_unit" "complex")
7334 (set_attr "prefix_data16" "1,*")
7335 (set_attr "prefix_extra" "1")
7336 (set_attr "prefix" "orig,vex")
7337 (set_attr "mode" "TI")])
7339 (define_insn "ssse3_phsubdv2si3"
7340 [(set (match_operand:V2SI 0 "register_operand" "=y")
7344 (match_operand:V2SI 1 "register_operand" "0")
7345 (parallel [(const_int 0)]))
7346 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7349 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7350 (parallel [(const_int 0)]))
7351 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7353 "phsubd\t{%2, %0|%0, %2}"
7354 [(set_attr "type" "sseiadd")
7355 (set_attr "atom_unit" "complex")
7356 (set_attr "prefix_extra" "1")
7357 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7358 (set_attr "mode" "DI")])
7360 (define_insn "ssse3_phsubswv8hi3"
7361 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7367 (match_operand:V8HI 1 "register_operand" "0,x")
7368 (parallel [(const_int 0)]))
7369 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7371 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7372 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7375 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7376 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7378 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7379 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7384 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7385 (parallel [(const_int 0)]))
7386 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7388 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7389 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7392 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7393 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7395 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7396 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7399 phsubsw\t{%2, %0|%0, %2}
7400 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
7401 [(set_attr "isa" "noavx,avx")
7402 (set_attr "type" "sseiadd")
7403 (set_attr "atom_unit" "complex")
7404 (set_attr "prefix_data16" "1,*")
7405 (set_attr "prefix_extra" "1")
7406 (set_attr "prefix" "orig,vex")
7407 (set_attr "mode" "TI")])
7409 (define_insn "ssse3_phsubswv4hi3"
7410 [(set (match_operand:V4HI 0 "register_operand" "=y")
7415 (match_operand:V4HI 1 "register_operand" "0")
7416 (parallel [(const_int 0)]))
7417 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7419 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7420 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7424 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7425 (parallel [(const_int 0)]))
7426 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7428 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7429 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7431 "phsubsw\t{%2, %0|%0, %2}"
7432 [(set_attr "type" "sseiadd")
7433 (set_attr "atom_unit" "complex")
7434 (set_attr "prefix_extra" "1")
7435 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7436 (set_attr "mode" "DI")])
7438 (define_insn "ssse3_pmaddubsw128"
7439 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7444 (match_operand:V16QI 1 "register_operand" "0,x")
7445 (parallel [(const_int 0)
7455 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7456 (parallel [(const_int 0)
7466 (vec_select:V8QI (match_dup 1)
7467 (parallel [(const_int 1)
7476 (vec_select:V8QI (match_dup 2)
7477 (parallel [(const_int 1)
7484 (const_int 15)]))))))]
7487 pmaddubsw\t{%2, %0|%0, %2}
7488 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
7489 [(set_attr "isa" "noavx,avx")
7490 (set_attr "type" "sseiadd")
7491 (set_attr "atom_unit" "simul")
7492 (set_attr "prefix_data16" "1,*")
7493 (set_attr "prefix_extra" "1")
7494 (set_attr "prefix" "orig,vex")
7495 (set_attr "mode" "TI")])
7497 (define_insn "ssse3_pmaddubsw"
7498 [(set (match_operand:V4HI 0 "register_operand" "=y")
7503 (match_operand:V8QI 1 "register_operand" "0")
7504 (parallel [(const_int 0)
7510 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
7511 (parallel [(const_int 0)
7517 (vec_select:V4QI (match_dup 1)
7518 (parallel [(const_int 1)
7523 (vec_select:V4QI (match_dup 2)
7524 (parallel [(const_int 1)
7527 (const_int 7)]))))))]
7529 "pmaddubsw\t{%2, %0|%0, %2}"
7530 [(set_attr "type" "sseiadd")
7531 (set_attr "atom_unit" "simul")
7532 (set_attr "prefix_extra" "1")
7533 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7534 (set_attr "mode" "DI")])
7536 (define_expand "ssse3_pmulhrswv8hi3"
7537 [(set (match_operand:V8HI 0 "register_operand" "")
7544 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7546 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7548 (const_vector:V8HI [(const_int 1) (const_int 1)
7549 (const_int 1) (const_int 1)
7550 (const_int 1) (const_int 1)
7551 (const_int 1) (const_int 1)]))
7554 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
7556 (define_insn "*ssse3_pmulhrswv8hi3"
7557 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7564 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7566 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7568 (const_vector:V8HI [(const_int 1) (const_int 1)
7569 (const_int 1) (const_int 1)
7570 (const_int 1) (const_int 1)
7571 (const_int 1) (const_int 1)]))
7573 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
7575 pmulhrsw\t{%2, %0|%0, %2}
7576 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
7577 [(set_attr "isa" "noavx,avx")
7578 (set_attr "type" "sseimul")
7579 (set_attr "prefix_data16" "1,*")
7580 (set_attr "prefix_extra" "1")
7581 (set_attr "prefix" "orig,vex")
7582 (set_attr "mode" "TI")])
7584 (define_expand "ssse3_pmulhrswv4hi3"
7585 [(set (match_operand:V4HI 0 "register_operand" "")
7592 (match_operand:V4HI 1 "nonimmediate_operand" ""))
7594 (match_operand:V4HI 2 "nonimmediate_operand" "")))
7596 (const_vector:V4HI [(const_int 1) (const_int 1)
7597 (const_int 1) (const_int 1)]))
7600 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
7602 (define_insn "*ssse3_pmulhrswv4hi3"
7603 [(set (match_operand:V4HI 0 "register_operand" "=y")
7610 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
7612 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
7614 (const_vector:V4HI [(const_int 1) (const_int 1)
7615 (const_int 1) (const_int 1)]))
7617 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
7618 "pmulhrsw\t{%2, %0|%0, %2}"
7619 [(set_attr "type" "sseimul")
7620 (set_attr "prefix_extra" "1")
7621 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7622 (set_attr "mode" "DI")])
7624 (define_insn "ssse3_pshufbv16qi3"
7625 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7626 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7627 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
7631 pshufb\t{%2, %0|%0, %2}
7632 vpshufb\t{%2, %1, %0|%0, %1, %2}"
7633 [(set_attr "isa" "noavx,avx")
7634 (set_attr "type" "sselog1")
7635 (set_attr "prefix_data16" "1,*")
7636 (set_attr "prefix_extra" "1")
7637 (set_attr "prefix" "orig,vex")
7638 (set_attr "mode" "TI")])
7640 (define_insn "ssse3_pshufbv8qi3"
7641 [(set (match_operand:V8QI 0 "register_operand" "=y")
7642 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
7643 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
7646 "pshufb\t{%2, %0|%0, %2}";
7647 [(set_attr "type" "sselog1")
7648 (set_attr "prefix_extra" "1")
7649 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7650 (set_attr "mode" "DI")])
7652 (define_insn "ssse3_psign<mode>3"
7653 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
7655 [(match_operand:VI124_128 1 "register_operand" "0,x")
7656 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")]
7660 psign<ssemodesuffix>\t{%2, %0|%0, %2}
7661 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7662 [(set_attr "isa" "noavx,avx")
7663 (set_attr "type" "sselog1")
7664 (set_attr "prefix_data16" "1,*")
7665 (set_attr "prefix_extra" "1")
7666 (set_attr "prefix" "orig,vex")
7667 (set_attr "mode" "TI")])
7669 (define_insn "ssse3_psign<mode>3"
7670 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7672 [(match_operand:MMXMODEI 1 "register_operand" "0")
7673 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
7676 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
7677 [(set_attr "type" "sselog1")
7678 (set_attr "prefix_extra" "1")
7679 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7680 (set_attr "mode" "DI")])
7682 (define_insn "ssse3_palignrti"
7683 [(set (match_operand:TI 0 "register_operand" "=x,x")
7684 (unspec:TI [(match_operand:TI 1 "register_operand" "0,x")
7685 (match_operand:TI 2 "nonimmediate_operand" "xm,xm")
7686 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
7690 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7692 switch (which_alternative)
7695 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7697 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7702 [(set_attr "isa" "noavx,avx")
7703 (set_attr "type" "sseishft")
7704 (set_attr "atom_unit" "sishuf")
7705 (set_attr "prefix_data16" "1,*")
7706 (set_attr "prefix_extra" "1")
7707 (set_attr "length_immediate" "1")
7708 (set_attr "prefix" "orig,vex")
7709 (set_attr "mode" "TI")])
7711 (define_insn "ssse3_palignrdi"
7712 [(set (match_operand:DI 0 "register_operand" "=y")
7713 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
7714 (match_operand:DI 2 "nonimmediate_operand" "ym")
7715 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
7719 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7720 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7722 [(set_attr "type" "sseishft")
7723 (set_attr "atom_unit" "sishuf")
7724 (set_attr "prefix_extra" "1")
7725 (set_attr "length_immediate" "1")
7726 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7727 (set_attr "mode" "DI")])
7729 (define_insn "abs<mode>2"
7730 [(set (match_operand:VI124_128 0 "register_operand" "=x")
7732 (match_operand:VI124_128 1 "nonimmediate_operand" "xm")))]
7734 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
7735 [(set_attr "type" "sselog1")
7736 (set_attr "prefix_data16" "1")
7737 (set_attr "prefix_extra" "1")
7738 (set_attr "prefix" "maybe_vex")
7739 (set_attr "mode" "TI")])
7741 (define_insn "abs<mode>2"
7742 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7744 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
7746 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
7747 [(set_attr "type" "sselog1")
7748 (set_attr "prefix_rep" "0")
7749 (set_attr "prefix_extra" "1")
7750 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7751 (set_attr "mode" "DI")])
7753 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7755 ;; AMD SSE4A instructions
7757 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7759 (define_insn "sse4a_movnt<mode>"
7760 [(set (match_operand:MODEF 0 "memory_operand" "=m")
7762 [(match_operand:MODEF 1 "register_operand" "x")]
7765 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
7766 [(set_attr "type" "ssemov")
7767 (set_attr "mode" "<MODE>")])
7769 (define_insn "sse4a_vmmovnt<mode>"
7770 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
7771 (unspec:<ssescalarmode>
7772 [(vec_select:<ssescalarmode>
7773 (match_operand:VF_128 1 "register_operand" "x")
7774 (parallel [(const_int 0)]))]
7777 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
7778 [(set_attr "type" "ssemov")
7779 (set_attr "mode" "<ssescalarmode>")])
7781 (define_insn "sse4a_extrqi"
7782 [(set (match_operand:V2DI 0 "register_operand" "=x")
7783 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7784 (match_operand 2 "const_int_operand" "")
7785 (match_operand 3 "const_int_operand" "")]
7788 "extrq\t{%3, %2, %0|%0, %2, %3}"
7789 [(set_attr "type" "sse")
7790 (set_attr "prefix_data16" "1")
7791 (set_attr "length_immediate" "2")
7792 (set_attr "mode" "TI")])
7794 (define_insn "sse4a_extrq"
7795 [(set (match_operand:V2DI 0 "register_operand" "=x")
7796 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7797 (match_operand:V16QI 2 "register_operand" "x")]
7800 "extrq\t{%2, %0|%0, %2}"
7801 [(set_attr "type" "sse")
7802 (set_attr "prefix_data16" "1")
7803 (set_attr "mode" "TI")])
7805 (define_insn "sse4a_insertqi"
7806 [(set (match_operand:V2DI 0 "register_operand" "=x")
7807 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7808 (match_operand:V2DI 2 "register_operand" "x")
7809 (match_operand 3 "const_int_operand" "")
7810 (match_operand 4 "const_int_operand" "")]
7813 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
7814 [(set_attr "type" "sseins")
7815 (set_attr "prefix_data16" "0")
7816 (set_attr "prefix_rep" "1")
7817 (set_attr "length_immediate" "2")
7818 (set_attr "mode" "TI")])
7820 (define_insn "sse4a_insertq"
7821 [(set (match_operand:V2DI 0 "register_operand" "=x")
7822 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7823 (match_operand:V2DI 2 "register_operand" "x")]
7826 "insertq\t{%2, %0|%0, %2}"
7827 [(set_attr "type" "sseins")
7828 (set_attr "prefix_data16" "0")
7829 (set_attr "prefix_rep" "1")
7830 (set_attr "mode" "TI")])
7832 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7834 ;; Intel SSE4.1 instructions
7836 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7838 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
7839 [(set (match_operand:VF 0 "register_operand" "=x,x")
7841 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7842 (match_operand:VF 1 "register_operand" "0,x")
7843 (match_operand:SI 3 "const_int_operand" "")))]
7845 && IN_RANGE (INTVAL (operands[3]), 0, (1 << GET_MODE_NUNITS (<MODE>mode))-1)"
7847 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7848 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7849 [(set_attr "isa" "noavx,avx")
7850 (set_attr "type" "ssemov")
7851 (set_attr "length_immediate" "1")
7852 (set_attr "prefix_data16" "1,*")
7853 (set_attr "prefix_extra" "1")
7854 (set_attr "prefix" "orig,vex")
7855 (set_attr "mode" "<MODE>")])
7857 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
7858 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
7860 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7861 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7862 (match_operand:VF 3 "register_operand" "Yz,x")]
7866 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7867 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7868 [(set_attr "isa" "noavx,avx")
7869 (set_attr "type" "ssemov")
7870 (set_attr "length_immediate" "1")
7871 (set_attr "prefix_data16" "1,*")
7872 (set_attr "prefix_extra" "1")
7873 (set_attr "prefix" "orig,vex")
7874 (set_attr "mode" "<MODE>")])
7876 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
7877 [(set (match_operand:VF 0 "register_operand" "=x,x")
7879 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
7880 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7881 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7885 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7886 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7887 [(set_attr "isa" "noavx,avx")
7888 (set_attr "type" "ssemul")
7889 (set_attr "length_immediate" "1")
7890 (set_attr "prefix_data16" "1,*")
7891 (set_attr "prefix_extra" "1")
7892 (set_attr "prefix" "orig,vex")
7893 (set_attr "mode" "<MODE>")])
7895 (define_insn "sse4_1_movntdqa"
7896 [(set (match_operand:V2DI 0 "register_operand" "=x")
7897 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
7900 "%vmovntdqa\t{%1, %0|%0, %1}"
7901 [(set_attr "type" "ssemov")
7902 (set_attr "prefix_extra" "1")
7903 (set_attr "prefix" "maybe_vex")
7904 (set_attr "mode" "TI")])
7906 (define_insn "sse4_1_mpsadbw"
7907 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7908 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7909 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7910 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7914 mpsadbw\t{%3, %2, %0|%0, %2, %3}
7915 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7916 [(set_attr "isa" "noavx,avx")
7917 (set_attr "type" "sselog1")
7918 (set_attr "length_immediate" "1")
7919 (set_attr "prefix_extra" "1")
7920 (set_attr "prefix" "orig,vex")
7921 (set_attr "mode" "TI")])
7923 (define_insn "sse4_1_packusdw"
7924 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7927 (match_operand:V4SI 1 "register_operand" "0,x"))
7929 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
7932 packusdw\t{%2, %0|%0, %2}
7933 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
7934 [(set_attr "isa" "noavx,avx")
7935 (set_attr "type" "sselog")
7936 (set_attr "prefix_extra" "1")
7937 (set_attr "prefix" "orig,vex")
7938 (set_attr "mode" "TI")])
7940 (define_insn "sse4_1_pblendvb"
7941 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x,x")
7943 [(match_operand:V16QI 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7944 (match_operand:V16QI 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7945 (match_operand:V16QI 3 "register_operand" "Yz,x")]
7949 pblendvb\t{%3, %2, %0|%0, %2, %3}
7950 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7951 [(set_attr "isa" "noavx,avx")
7952 (set_attr "type" "ssemov")
7953 (set_attr "prefix_extra" "1")
7954 (set_attr "length_immediate" "*,1")
7955 (set_attr "prefix" "orig,vex")
7956 (set_attr "mode" "TI")])
7958 (define_insn "sse4_1_pblendw"
7959 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7961 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7962 (match_operand:V8HI 1 "register_operand" "0,x")
7963 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
7966 pblendw\t{%3, %2, %0|%0, %2, %3}
7967 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7968 [(set_attr "isa" "noavx,avx")
7969 (set_attr "type" "ssemov")
7970 (set_attr "prefix_extra" "1")
7971 (set_attr "length_immediate" "1")
7972 (set_attr "prefix" "orig,vex")
7973 (set_attr "mode" "TI")])
7975 (define_insn "sse4_1_phminposuw"
7976 [(set (match_operand:V8HI 0 "register_operand" "=x")
7977 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
7978 UNSPEC_PHMINPOSUW))]
7980 "%vphminposuw\t{%1, %0|%0, %1}"
7981 [(set_attr "type" "sselog1")
7982 (set_attr "prefix_extra" "1")
7983 (set_attr "prefix" "maybe_vex")
7984 (set_attr "mode" "TI")])
7986 (define_insn "sse4_1_<code>v8qiv8hi2"
7987 [(set (match_operand:V8HI 0 "register_operand" "=x")
7990 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7991 (parallel [(const_int 0)
8000 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
8001 [(set_attr "type" "ssemov")
8002 (set_attr "prefix_extra" "1")
8003 (set_attr "prefix" "maybe_vex")
8004 (set_attr "mode" "TI")])
8006 (define_insn "sse4_1_<code>v4qiv4si2"
8007 [(set (match_operand:V4SI 0 "register_operand" "=x")
8010 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8011 (parallel [(const_int 0)
8016 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
8017 [(set_attr "type" "ssemov")
8018 (set_attr "prefix_extra" "1")
8019 (set_attr "prefix" "maybe_vex")
8020 (set_attr "mode" "TI")])
8022 (define_insn "sse4_1_<code>v4hiv4si2"
8023 [(set (match_operand:V4SI 0 "register_operand" "=x")
8026 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8027 (parallel [(const_int 0)
8032 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
8033 [(set_attr "type" "ssemov")
8034 (set_attr "prefix_extra" "1")
8035 (set_attr "prefix" "maybe_vex")
8036 (set_attr "mode" "TI")])
8038 (define_insn "sse4_1_<code>v2qiv2di2"
8039 [(set (match_operand:V2DI 0 "register_operand" "=x")
8042 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8043 (parallel [(const_int 0)
8046 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
8047 [(set_attr "type" "ssemov")
8048 (set_attr "prefix_extra" "1")
8049 (set_attr "prefix" "maybe_vex")
8050 (set_attr "mode" "TI")])
8052 (define_insn "sse4_1_<code>v2hiv2di2"
8053 [(set (match_operand:V2DI 0 "register_operand" "=x")
8056 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8057 (parallel [(const_int 0)
8060 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
8061 [(set_attr "type" "ssemov")
8062 (set_attr "prefix_extra" "1")
8063 (set_attr "prefix" "maybe_vex")
8064 (set_attr "mode" "TI")])
8066 (define_insn "sse4_1_<code>v2siv2di2"
8067 [(set (match_operand:V2DI 0 "register_operand" "=x")
8070 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8071 (parallel [(const_int 0)
8074 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
8075 [(set_attr "type" "ssemov")
8076 (set_attr "prefix_extra" "1")
8077 (set_attr "prefix" "maybe_vex")
8078 (set_attr "mode" "TI")])
8080 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
8081 ;; setting FLAGS_REG. But it is not a really compare instruction.
8082 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
8083 [(set (reg:CC FLAGS_REG)
8084 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
8085 (match_operand:VF 1 "nonimmediate_operand" "xm")]
8088 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
8089 [(set_attr "type" "ssecomi")
8090 (set_attr "prefix_extra" "1")
8091 (set_attr "prefix" "vex")
8092 (set_attr "mode" "<MODE>")])
8094 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
8095 ;; But it is not a really compare instruction.
8096 (define_insn "avx_ptest256"
8097 [(set (reg:CC FLAGS_REG)
8098 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
8099 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
8102 "vptest\t{%1, %0|%0, %1}"
8103 [(set_attr "type" "ssecomi")
8104 (set_attr "prefix_extra" "1")
8105 (set_attr "prefix" "vex")
8106 (set_attr "mode" "OI")])
8108 (define_insn "sse4_1_ptest"
8109 [(set (reg:CC FLAGS_REG)
8110 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
8111 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8114 "%vptest\t{%1, %0|%0, %1}"
8115 [(set_attr "type" "ssecomi")
8116 (set_attr "prefix_extra" "1")
8117 (set_attr "prefix" "maybe_vex")
8118 (set_attr "mode" "TI")])
8120 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
8121 [(set (match_operand:VF 0 "register_operand" "=x")
8123 [(match_operand:VF 1 "nonimmediate_operand" "xm")
8124 (match_operand:SI 2 "const_0_to_15_operand" "n")]
8127 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8128 [(set_attr "type" "ssecvt")
8129 (set (attr "prefix_data16")
8131 (ne (symbol_ref "TARGET_AVX") (const_int 0))
8133 (const_string "1")))
8134 (set_attr "prefix_extra" "1")
8135 (set_attr "length_immediate" "1")
8136 (set_attr "prefix" "maybe_vex")
8137 (set_attr "mode" "<MODE>")])
8139 (define_insn "sse4_1_round<ssescalarmodesuffix>"
8140 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
8143 [(match_operand:VF_128 2 "register_operand" "x,x")
8144 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
8146 (match_operand:VF_128 1 "register_operand" "0,x")
8150 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
8151 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8152 [(set_attr "isa" "noavx,avx")
8153 (set_attr "type" "ssecvt")
8154 (set_attr "length_immediate" "1")
8155 (set_attr "prefix_data16" "1,*")
8156 (set_attr "prefix_extra" "1")
8157 (set_attr "prefix" "orig,vex")
8158 (set_attr "mode" "<MODE>")])
8160 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8162 ;; Intel SSE4.2 string/text processing instructions
8164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8166 (define_insn_and_split "sse4_2_pcmpestr"
8167 [(set (match_operand:SI 0 "register_operand" "=c,c")
8169 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8170 (match_operand:SI 3 "register_operand" "a,a")
8171 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
8172 (match_operand:SI 5 "register_operand" "d,d")
8173 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
8175 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8183 (set (reg:CC FLAGS_REG)
8192 && can_create_pseudo_p ()"
8197 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8198 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8199 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8202 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
8203 operands[3], operands[4],
8204 operands[5], operands[6]));
8206 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
8207 operands[3], operands[4],
8208 operands[5], operands[6]));
8209 if (flags && !(ecx || xmm0))
8210 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
8211 operands[2], operands[3],
8212 operands[4], operands[5],
8216 [(set_attr "type" "sselog")
8217 (set_attr "prefix_data16" "1")
8218 (set_attr "prefix_extra" "1")
8219 (set_attr "length_immediate" "1")
8220 (set_attr "memory" "none,load")
8221 (set_attr "mode" "TI")])
8223 (define_insn "sse4_2_pcmpestri"
8224 [(set (match_operand:SI 0 "register_operand" "=c,c")
8226 [(match_operand:V16QI 1 "register_operand" "x,x")
8227 (match_operand:SI 2 "register_operand" "a,a")
8228 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8229 (match_operand:SI 4 "register_operand" "d,d")
8230 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8232 (set (reg:CC FLAGS_REG)
8241 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
8242 [(set_attr "type" "sselog")
8243 (set_attr "prefix_data16" "1")
8244 (set_attr "prefix_extra" "1")
8245 (set_attr "prefix" "maybe_vex")
8246 (set_attr "length_immediate" "1")
8247 (set_attr "memory" "none,load")
8248 (set_attr "mode" "TI")])
8250 (define_insn "sse4_2_pcmpestrm"
8251 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8253 [(match_operand:V16QI 1 "register_operand" "x,x")
8254 (match_operand:SI 2 "register_operand" "a,a")
8255 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8256 (match_operand:SI 4 "register_operand" "d,d")
8257 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8259 (set (reg:CC FLAGS_REG)
8268 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
8269 [(set_attr "type" "sselog")
8270 (set_attr "prefix_data16" "1")
8271 (set_attr "prefix_extra" "1")
8272 (set_attr "length_immediate" "1")
8273 (set_attr "prefix" "maybe_vex")
8274 (set_attr "memory" "none,load")
8275 (set_attr "mode" "TI")])
8277 (define_insn "sse4_2_pcmpestr_cconly"
8278 [(set (reg:CC FLAGS_REG)
8280 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8281 (match_operand:SI 3 "register_operand" "a,a,a,a")
8282 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
8283 (match_operand:SI 5 "register_operand" "d,d,d,d")
8284 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
8286 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8287 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8290 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8291 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8292 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
8293 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
8294 [(set_attr "type" "sselog")
8295 (set_attr "prefix_data16" "1")
8296 (set_attr "prefix_extra" "1")
8297 (set_attr "length_immediate" "1")
8298 (set_attr "memory" "none,load,none,load")
8299 (set_attr "prefix" "maybe_vex")
8300 (set_attr "mode" "TI")])
8302 (define_insn_and_split "sse4_2_pcmpistr"
8303 [(set (match_operand:SI 0 "register_operand" "=c,c")
8305 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8306 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
8307 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
8309 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8315 (set (reg:CC FLAGS_REG)
8322 && can_create_pseudo_p ()"
8327 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8328 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8329 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8332 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
8333 operands[3], operands[4]));
8335 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
8336 operands[3], operands[4]));
8337 if (flags && !(ecx || xmm0))
8338 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
8339 operands[2], operands[3],
8343 [(set_attr "type" "sselog")
8344 (set_attr "prefix_data16" "1")
8345 (set_attr "prefix_extra" "1")
8346 (set_attr "length_immediate" "1")
8347 (set_attr "memory" "none,load")
8348 (set_attr "mode" "TI")])
8350 (define_insn "sse4_2_pcmpistri"
8351 [(set (match_operand:SI 0 "register_operand" "=c,c")
8353 [(match_operand:V16QI 1 "register_operand" "x,x")
8354 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8355 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8357 (set (reg:CC FLAGS_REG)
8364 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
8365 [(set_attr "type" "sselog")
8366 (set_attr "prefix_data16" "1")
8367 (set_attr "prefix_extra" "1")
8368 (set_attr "length_immediate" "1")
8369 (set_attr "prefix" "maybe_vex")
8370 (set_attr "memory" "none,load")
8371 (set_attr "mode" "TI")])
8373 (define_insn "sse4_2_pcmpistrm"
8374 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8376 [(match_operand:V16QI 1 "register_operand" "x,x")
8377 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8378 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8380 (set (reg:CC FLAGS_REG)
8387 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
8388 [(set_attr "type" "sselog")
8389 (set_attr "prefix_data16" "1")
8390 (set_attr "prefix_extra" "1")
8391 (set_attr "length_immediate" "1")
8392 (set_attr "prefix" "maybe_vex")
8393 (set_attr "memory" "none,load")
8394 (set_attr "mode" "TI")])
8396 (define_insn "sse4_2_pcmpistr_cconly"
8397 [(set (reg:CC FLAGS_REG)
8399 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8400 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
8401 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
8403 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8404 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8407 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8408 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8409 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
8410 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
8411 [(set_attr "type" "sselog")
8412 (set_attr "prefix_data16" "1")
8413 (set_attr "prefix_extra" "1")
8414 (set_attr "length_immediate" "1")
8415 (set_attr "memory" "none,load,none,load")
8416 (set_attr "prefix" "maybe_vex")
8417 (set_attr "mode" "TI")])
8419 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8423 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8425 ;; XOP parallel integer multiply/add instructions.
8426 ;; Note the XOP multiply/add instructions
8427 ;; a[i] = b[i] * c[i] + d[i];
8428 ;; do not allow the value being added to be a memory operation.
8429 (define_insn "xop_pmacsww"
8430 [(set (match_operand:V8HI 0 "register_operand" "=x")
8433 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8434 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8435 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8437 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8438 [(set_attr "type" "ssemuladd")
8439 (set_attr "mode" "TI")])
8441 (define_insn "xop_pmacssww"
8442 [(set (match_operand:V8HI 0 "register_operand" "=x")
8444 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8445 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8446 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8448 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8449 [(set_attr "type" "ssemuladd")
8450 (set_attr "mode" "TI")])
8452 (define_insn "xop_pmacsdd"
8453 [(set (match_operand:V4SI 0 "register_operand" "=x")
8456 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8457 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8458 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8460 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8461 [(set_attr "type" "ssemuladd")
8462 (set_attr "mode" "TI")])
8464 (define_insn "xop_pmacssdd"
8465 [(set (match_operand:V4SI 0 "register_operand" "=x")
8467 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8468 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8469 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8471 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8472 [(set_attr "type" "ssemuladd")
8473 (set_attr "mode" "TI")])
8475 (define_insn "xop_pmacssdql"
8476 [(set (match_operand:V2DI 0 "register_operand" "=x")
8481 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8482 (parallel [(const_int 1)
8485 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8486 (parallel [(const_int 1)
8488 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8490 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8491 [(set_attr "type" "ssemuladd")
8492 (set_attr "mode" "TI")])
8494 (define_insn "xop_pmacssdqh"
8495 [(set (match_operand:V2DI 0 "register_operand" "=x")
8500 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8501 (parallel [(const_int 0)
8505 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8506 (parallel [(const_int 0)
8508 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8510 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8511 [(set_attr "type" "ssemuladd")
8512 (set_attr "mode" "TI")])
8514 (define_insn "xop_pmacsdql"
8515 [(set (match_operand:V2DI 0 "register_operand" "=x")
8520 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8521 (parallel [(const_int 1)
8525 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8526 (parallel [(const_int 1)
8528 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8530 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8531 [(set_attr "type" "ssemuladd")
8532 (set_attr "mode" "TI")])
8534 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8535 ;; fake it with a multiply/add. In general, we expect the define_split to
8536 ;; occur before register allocation, so we have to handle the corner case where
8537 ;; the target is the same as operands 1/2
8538 (define_insn_and_split "xop_mulv2div2di3_low"
8539 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8543 (match_operand:V4SI 1 "register_operand" "%x")
8544 (parallel [(const_int 1)
8548 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8549 (parallel [(const_int 1)
8550 (const_int 3)])))))]
8553 "&& reload_completed"
8562 (parallel [(const_int 1)
8567 (parallel [(const_int 1)
8571 operands[3] = CONST0_RTX (V2DImode);
8573 [(set_attr "type" "ssemul")
8574 (set_attr "mode" "TI")])
8576 (define_insn "xop_pmacsdqh"
8577 [(set (match_operand:V2DI 0 "register_operand" "=x")
8582 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8583 (parallel [(const_int 0)
8587 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8588 (parallel [(const_int 0)
8590 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8592 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8593 [(set_attr "type" "ssemuladd")
8594 (set_attr "mode" "TI")])
8596 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8597 ;; fake it with a multiply/add. In general, we expect the define_split to
8598 ;; occur before register allocation, so we have to handle the corner case where
8599 ;; the target is the same as either operands[1] or operands[2]
8600 (define_insn_and_split "xop_mulv2div2di3_high"
8601 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8605 (match_operand:V4SI 1 "register_operand" "%x")
8606 (parallel [(const_int 0)
8610 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8611 (parallel [(const_int 0)
8612 (const_int 2)])))))]
8615 "&& reload_completed"
8624 (parallel [(const_int 0)
8629 (parallel [(const_int 0)
8633 operands[3] = CONST0_RTX (V2DImode);
8635 [(set_attr "type" "ssemul")
8636 (set_attr "mode" "TI")])
8638 ;; XOP parallel integer multiply/add instructions for the intrinisics
8639 (define_insn "xop_pmacsswd"
8640 [(set (match_operand:V4SI 0 "register_operand" "=x")
8645 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8646 (parallel [(const_int 1)
8652 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8653 (parallel [(const_int 1)
8657 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8659 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8660 [(set_attr "type" "ssemuladd")
8661 (set_attr "mode" "TI")])
8663 (define_insn "xop_pmacswd"
8664 [(set (match_operand:V4SI 0 "register_operand" "=x")
8669 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8670 (parallel [(const_int 1)
8676 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8677 (parallel [(const_int 1)
8681 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8683 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8684 [(set_attr "type" "ssemuladd")
8685 (set_attr "mode" "TI")])
8687 (define_insn "xop_pmadcsswd"
8688 [(set (match_operand:V4SI 0 "register_operand" "=x")
8694 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8695 (parallel [(const_int 0)
8701 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8702 (parallel [(const_int 0)
8710 (parallel [(const_int 1)
8717 (parallel [(const_int 1)
8721 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8723 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8724 [(set_attr "type" "ssemuladd")
8725 (set_attr "mode" "TI")])
8727 (define_insn "xop_pmadcswd"
8728 [(set (match_operand:V4SI 0 "register_operand" "=x")
8734 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8735 (parallel [(const_int 0)
8741 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8742 (parallel [(const_int 0)
8750 (parallel [(const_int 1)
8757 (parallel [(const_int 1)
8761 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8763 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8764 [(set_attr "type" "ssemuladd")
8765 (set_attr "mode" "TI")])
8767 ;; XOP parallel XMM conditional moves
8768 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
8769 [(set (match_operand:V 0 "register_operand" "=x,x")
8771 (match_operand:V 3 "nonimmediate_operand" "x,m")
8772 (match_operand:V 1 "vector_move_operand" "x,x")
8773 (match_operand:V 2 "vector_move_operand" "xm,x")))]
8775 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8776 [(set_attr "type" "sse4arg")])
8778 ;; XOP horizontal add/subtract instructions
8779 (define_insn "xop_phaddbw"
8780 [(set (match_operand:V8HI 0 "register_operand" "=x")
8784 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8785 (parallel [(const_int 0)
8796 (parallel [(const_int 1)
8803 (const_int 15)])))))]
8805 "vphaddbw\t{%1, %0|%0, %1}"
8806 [(set_attr "type" "sseiadd1")])
8808 (define_insn "xop_phaddbd"
8809 [(set (match_operand:V4SI 0 "register_operand" "=x")
8814 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8815 (parallel [(const_int 0)
8822 (parallel [(const_int 1)
8830 (parallel [(const_int 2)
8837 (parallel [(const_int 3)
8840 (const_int 15)]))))))]
8842 "vphaddbd\t{%1, %0|%0, %1}"
8843 [(set_attr "type" "sseiadd1")])
8845 (define_insn "xop_phaddbq"
8846 [(set (match_operand:V2DI 0 "register_operand" "=x")
8852 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8853 (parallel [(const_int 0)
8858 (parallel [(const_int 1)
8864 (parallel [(const_int 2)
8869 (parallel [(const_int 3)
8876 (parallel [(const_int 8)
8881 (parallel [(const_int 9)
8887 (parallel [(const_int 10)
8892 (parallel [(const_int 11)
8893 (const_int 15)])))))))]
8895 "vphaddbq\t{%1, %0|%0, %1}"
8896 [(set_attr "type" "sseiadd1")])
8898 (define_insn "xop_phaddwd"
8899 [(set (match_operand:V4SI 0 "register_operand" "=x")
8903 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8904 (parallel [(const_int 0)
8911 (parallel [(const_int 1)
8914 (const_int 7)])))))]
8916 "vphaddwd\t{%1, %0|%0, %1}"
8917 [(set_attr "type" "sseiadd1")])
8919 (define_insn "xop_phaddwq"
8920 [(set (match_operand:V2DI 0 "register_operand" "=x")
8925 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8926 (parallel [(const_int 0)
8931 (parallel [(const_int 1)
8937 (parallel [(const_int 2)
8942 (parallel [(const_int 3)
8943 (const_int 7)]))))))]
8945 "vphaddwq\t{%1, %0|%0, %1}"
8946 [(set_attr "type" "sseiadd1")])
8948 (define_insn "xop_phadddq"
8949 [(set (match_operand:V2DI 0 "register_operand" "=x")
8953 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8954 (parallel [(const_int 0)
8959 (parallel [(const_int 1)
8960 (const_int 3)])))))]
8962 "vphadddq\t{%1, %0|%0, %1}"
8963 [(set_attr "type" "sseiadd1")])
8965 (define_insn "xop_phaddubw"
8966 [(set (match_operand:V8HI 0 "register_operand" "=x")
8970 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8971 (parallel [(const_int 0)
8982 (parallel [(const_int 1)
8989 (const_int 15)])))))]
8991 "vphaddubw\t{%1, %0|%0, %1}"
8992 [(set_attr "type" "sseiadd1")])
8994 (define_insn "xop_phaddubd"
8995 [(set (match_operand:V4SI 0 "register_operand" "=x")
9000 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9001 (parallel [(const_int 0)
9008 (parallel [(const_int 1)
9016 (parallel [(const_int 2)
9023 (parallel [(const_int 3)
9026 (const_int 15)]))))))]
9028 "vphaddubd\t{%1, %0|%0, %1}"
9029 [(set_attr "type" "sseiadd1")])
9031 (define_insn "xop_phaddubq"
9032 [(set (match_operand:V2DI 0 "register_operand" "=x")
9038 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9039 (parallel [(const_int 0)
9044 (parallel [(const_int 1)
9050 (parallel [(const_int 2)
9055 (parallel [(const_int 3)
9062 (parallel [(const_int 8)
9067 (parallel [(const_int 9)
9073 (parallel [(const_int 10)
9078 (parallel [(const_int 11)
9079 (const_int 15)])))))))]
9081 "vphaddubq\t{%1, %0|%0, %1}"
9082 [(set_attr "type" "sseiadd1")])
9084 (define_insn "xop_phadduwd"
9085 [(set (match_operand:V4SI 0 "register_operand" "=x")
9089 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9090 (parallel [(const_int 0)
9097 (parallel [(const_int 1)
9100 (const_int 7)])))))]
9102 "vphadduwd\t{%1, %0|%0, %1}"
9103 [(set_attr "type" "sseiadd1")])
9105 (define_insn "xop_phadduwq"
9106 [(set (match_operand:V2DI 0 "register_operand" "=x")
9111 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9112 (parallel [(const_int 0)
9117 (parallel [(const_int 1)
9123 (parallel [(const_int 2)
9128 (parallel [(const_int 3)
9129 (const_int 7)]))))))]
9131 "vphadduwq\t{%1, %0|%0, %1}"
9132 [(set_attr "type" "sseiadd1")])
9134 (define_insn "xop_phaddudq"
9135 [(set (match_operand:V2DI 0 "register_operand" "=x")
9139 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9140 (parallel [(const_int 0)
9145 (parallel [(const_int 1)
9146 (const_int 3)])))))]
9148 "vphaddudq\t{%1, %0|%0, %1}"
9149 [(set_attr "type" "sseiadd1")])
9151 (define_insn "xop_phsubbw"
9152 [(set (match_operand:V8HI 0 "register_operand" "=x")
9156 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9157 (parallel [(const_int 0)
9168 (parallel [(const_int 1)
9175 (const_int 15)])))))]
9177 "vphsubbw\t{%1, %0|%0, %1}"
9178 [(set_attr "type" "sseiadd1")])
9180 (define_insn "xop_phsubwd"
9181 [(set (match_operand:V4SI 0 "register_operand" "=x")
9185 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9186 (parallel [(const_int 0)
9193 (parallel [(const_int 1)
9196 (const_int 7)])))))]
9198 "vphsubwd\t{%1, %0|%0, %1}"
9199 [(set_attr "type" "sseiadd1")])
9201 (define_insn "xop_phsubdq"
9202 [(set (match_operand:V2DI 0 "register_operand" "=x")
9206 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9207 (parallel [(const_int 0)
9212 (parallel [(const_int 1)
9213 (const_int 3)])))))]
9215 "vphsubdq\t{%1, %0|%0, %1}"
9216 [(set_attr "type" "sseiadd1")])
9218 ;; XOP permute instructions
9219 (define_insn "xop_pperm"
9220 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9222 [(match_operand:V16QI 1 "register_operand" "x,x")
9223 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9224 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
9225 UNSPEC_XOP_PERMUTE))]
9226 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9227 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9228 [(set_attr "type" "sse4arg")
9229 (set_attr "mode" "TI")])
9231 ;; XOP pack instructions that combine two vectors into a smaller vector
9232 (define_insn "xop_pperm_pack_v2di_v4si"
9233 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9236 (match_operand:V2DI 1 "register_operand" "x,x"))
9238 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
9239 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9240 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9241 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9242 [(set_attr "type" "sse4arg")
9243 (set_attr "mode" "TI")])
9245 (define_insn "xop_pperm_pack_v4si_v8hi"
9246 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9249 (match_operand:V4SI 1 "register_operand" "x,x"))
9251 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
9252 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9253 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9254 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9255 [(set_attr "type" "sse4arg")
9256 (set_attr "mode" "TI")])
9258 (define_insn "xop_pperm_pack_v8hi_v16qi"
9259 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9262 (match_operand:V8HI 1 "register_operand" "x,x"))
9264 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
9265 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9266 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9267 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9268 [(set_attr "type" "sse4arg")
9269 (set_attr "mode" "TI")])
9271 ;; XOP packed rotate instructions
9272 (define_expand "rotl<mode>3"
9273 [(set (match_operand:VI_128 0 "register_operand" "")
9275 (match_operand:VI_128 1 "nonimmediate_operand" "")
9276 (match_operand:SI 2 "general_operand")))]
9279 /* If we were given a scalar, convert it to parallel */
9280 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9282 rtvec vs = rtvec_alloc (<ssescalarnum>);
9283 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9284 rtx reg = gen_reg_rtx (<MODE>mode);
9285 rtx op2 = operands[2];
9288 if (GET_MODE (op2) != <ssescalarmode>mode)
9290 op2 = gen_reg_rtx (<ssescalarmode>mode);
9291 convert_move (op2, operands[2], false);
9294 for (i = 0; i < <ssescalarnum>; i++)
9295 RTVEC_ELT (vs, i) = op2;
9297 emit_insn (gen_vec_init<mode> (reg, par));
9298 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9303 (define_expand "rotr<mode>3"
9304 [(set (match_operand:VI_128 0 "register_operand" "")
9306 (match_operand:VI_128 1 "nonimmediate_operand" "")
9307 (match_operand:SI 2 "general_operand")))]
9310 /* If we were given a scalar, convert it to parallel */
9311 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9313 rtvec vs = rtvec_alloc (<ssescalarnum>);
9314 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9315 rtx neg = gen_reg_rtx (<MODE>mode);
9316 rtx reg = gen_reg_rtx (<MODE>mode);
9317 rtx op2 = operands[2];
9320 if (GET_MODE (op2) != <ssescalarmode>mode)
9322 op2 = gen_reg_rtx (<ssescalarmode>mode);
9323 convert_move (op2, operands[2], false);
9326 for (i = 0; i < <ssescalarnum>; i++)
9327 RTVEC_ELT (vs, i) = op2;
9329 emit_insn (gen_vec_init<mode> (reg, par));
9330 emit_insn (gen_neg<mode>2 (neg, reg));
9331 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
9336 (define_insn "xop_rotl<mode>3"
9337 [(set (match_operand:VI_128 0 "register_operand" "=x")
9339 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9340 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9342 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9343 [(set_attr "type" "sseishft")
9344 (set_attr "length_immediate" "1")
9345 (set_attr "mode" "TI")])
9347 (define_insn "xop_rotr<mode>3"
9348 [(set (match_operand:VI_128 0 "register_operand" "=x")
9350 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9351 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9354 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
9355 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
9357 [(set_attr "type" "sseishft")
9358 (set_attr "length_immediate" "1")
9359 (set_attr "mode" "TI")])
9361 (define_expand "vrotr<mode>3"
9362 [(match_operand:VI_128 0 "register_operand" "")
9363 (match_operand:VI_128 1 "register_operand" "")
9364 (match_operand:VI_128 2 "register_operand" "")]
9367 rtx reg = gen_reg_rtx (<MODE>mode);
9368 emit_insn (gen_neg<mode>2 (reg, operands[2]));
9369 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9373 (define_expand "vrotl<mode>3"
9374 [(match_operand:VI_128 0 "register_operand" "")
9375 (match_operand:VI_128 1 "register_operand" "")
9376 (match_operand:VI_128 2 "register_operand" "")]
9379 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
9383 (define_insn "xop_vrotl<mode>3"
9384 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9385 (if_then_else:VI_128
9387 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9390 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9394 (neg:VI_128 (match_dup 2)))))]
9395 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9396 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9397 [(set_attr "type" "sseishft")
9398 (set_attr "prefix_data16" "0")
9399 (set_attr "prefix_extra" "2")
9400 (set_attr "mode" "TI")])
9402 ;; XOP packed shift instructions.
9403 ;; FIXME: add V2DI back in
9404 (define_expand "vlshr<mode>3"
9405 [(match_operand:VI124_128 0 "register_operand" "")
9406 (match_operand:VI124_128 1 "register_operand" "")
9407 (match_operand:VI124_128 2 "register_operand" "")]
9410 rtx neg = gen_reg_rtx (<MODE>mode);
9411 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9412 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
9416 (define_expand "vashr<mode>3"
9417 [(match_operand:VI124_128 0 "register_operand" "")
9418 (match_operand:VI124_128 1 "register_operand" "")
9419 (match_operand:VI124_128 2 "register_operand" "")]
9422 rtx neg = gen_reg_rtx (<MODE>mode);
9423 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9424 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
9428 (define_expand "vashl<mode>3"
9429 [(match_operand:VI124_128 0 "register_operand" "")
9430 (match_operand:VI124_128 1 "register_operand" "")
9431 (match_operand:VI124_128 2 "register_operand" "")]
9434 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
9438 (define_insn "xop_ashl<mode>3"
9439 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9440 (if_then_else:VI_128
9442 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9445 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9449 (neg:VI_128 (match_dup 2)))))]
9450 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9451 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9452 [(set_attr "type" "sseishft")
9453 (set_attr "prefix_data16" "0")
9454 (set_attr "prefix_extra" "2")
9455 (set_attr "mode" "TI")])
9457 (define_insn "xop_lshl<mode>3"
9458 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9459 (if_then_else:VI_128
9461 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9464 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9468 (neg:VI_128 (match_dup 2)))))]
9469 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9470 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9471 [(set_attr "type" "sseishft")
9472 (set_attr "prefix_data16" "0")
9473 (set_attr "prefix_extra" "2")
9474 (set_attr "mode" "TI")])
9476 ;; SSE2 doesn't have some shift varients, so define versions for XOP
9477 (define_expand "ashlv16qi3"
9478 [(match_operand:V16QI 0 "register_operand" "")
9479 (match_operand:V16QI 1 "register_operand" "")
9480 (match_operand:SI 2 "nonmemory_operand" "")]
9483 rtvec vs = rtvec_alloc (16);
9484 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9485 rtx reg = gen_reg_rtx (V16QImode);
9487 for (i = 0; i < 16; i++)
9488 RTVEC_ELT (vs, i) = operands[2];
9490 emit_insn (gen_vec_initv16qi (reg, par));
9491 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9495 (define_expand "lshlv16qi3"
9496 [(match_operand:V16QI 0 "register_operand" "")
9497 (match_operand:V16QI 1 "register_operand" "")
9498 (match_operand:SI 2 "nonmemory_operand" "")]
9501 rtvec vs = rtvec_alloc (16);
9502 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9503 rtx reg = gen_reg_rtx (V16QImode);
9505 for (i = 0; i < 16; i++)
9506 RTVEC_ELT (vs, i) = operands[2];
9508 emit_insn (gen_vec_initv16qi (reg, par));
9509 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
9513 (define_expand "ashrv16qi3"
9514 [(match_operand:V16QI 0 "register_operand" "")
9515 (match_operand:V16QI 1 "register_operand" "")
9516 (match_operand:SI 2 "nonmemory_operand" "")]
9519 rtvec vs = rtvec_alloc (16);
9520 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9521 rtx reg = gen_reg_rtx (V16QImode);
9523 rtx ele = ((CONST_INT_P (operands[2]))
9524 ? GEN_INT (- INTVAL (operands[2]))
9527 for (i = 0; i < 16; i++)
9528 RTVEC_ELT (vs, i) = ele;
9530 emit_insn (gen_vec_initv16qi (reg, par));
9532 if (!CONST_INT_P (operands[2]))
9534 rtx neg = gen_reg_rtx (V16QImode);
9535 emit_insn (gen_negv16qi2 (neg, reg));
9536 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
9539 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9544 (define_expand "ashrv2di3"
9545 [(match_operand:V2DI 0 "register_operand" "")
9546 (match_operand:V2DI 1 "register_operand" "")
9547 (match_operand:DI 2 "nonmemory_operand" "")]
9550 rtvec vs = rtvec_alloc (2);
9551 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
9552 rtx reg = gen_reg_rtx (V2DImode);
9555 if (CONST_INT_P (operands[2]))
9556 ele = GEN_INT (- INTVAL (operands[2]));
9557 else if (GET_MODE (operands[2]) != DImode)
9559 rtx move = gen_reg_rtx (DImode);
9560 ele = gen_reg_rtx (DImode);
9561 convert_move (move, operands[2], false);
9562 emit_insn (gen_negdi2 (ele, move));
9566 ele = gen_reg_rtx (DImode);
9567 emit_insn (gen_negdi2 (ele, operands[2]));
9570 RTVEC_ELT (vs, 0) = ele;
9571 RTVEC_ELT (vs, 1) = ele;
9572 emit_insn (gen_vec_initv2di (reg, par));
9573 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
9578 (define_insn "xop_frcz<mode>2"
9579 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
9581 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
9584 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
9585 [(set_attr "type" "ssecvt1")
9586 (set_attr "mode" "<MODE>")])
9589 (define_expand "xop_vmfrcz<mode>2"
9590 [(set (match_operand:VF_128 0 "register_operand")
9593 [(match_operand:VF_128 1 "nonimmediate_operand")]
9599 operands[3] = CONST0_RTX (<MODE>mode);
9602 (define_insn "*xop_vmfrcz_<mode>"
9603 [(set (match_operand:VF_128 0 "register_operand" "=x")
9606 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
9608 (match_operand:VF_128 2 "const0_operand")
9611 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9612 [(set_attr "type" "ssecvt1")
9613 (set_attr "mode" "<MODE>")])
9615 (define_insn "xop_maskcmp<mode>3"
9616 [(set (match_operand:VI_128 0 "register_operand" "=x")
9617 (match_operator:VI_128 1 "ix86_comparison_int_operator"
9618 [(match_operand:VI_128 2 "register_operand" "x")
9619 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
9621 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9622 [(set_attr "type" "sse4arg")
9623 (set_attr "prefix_data16" "0")
9624 (set_attr "prefix_rep" "0")
9625 (set_attr "prefix_extra" "2")
9626 (set_attr "length_immediate" "1")
9627 (set_attr "mode" "TI")])
9629 (define_insn "xop_maskcmp_uns<mode>3"
9630 [(set (match_operand:VI_128 0 "register_operand" "=x")
9631 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
9632 [(match_operand:VI_128 2 "register_operand" "x")
9633 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
9635 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9636 [(set_attr "type" "ssecmp")
9637 (set_attr "prefix_data16" "0")
9638 (set_attr "prefix_rep" "0")
9639 (set_attr "prefix_extra" "2")
9640 (set_attr "length_immediate" "1")
9641 (set_attr "mode" "TI")])
9643 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
9644 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
9645 ;; the exact instruction generated for the intrinsic.
9646 (define_insn "xop_maskcmp_uns2<mode>3"
9647 [(set (match_operand:VI_128 0 "register_operand" "=x")
9649 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
9650 [(match_operand:VI_128 2 "register_operand" "x")
9651 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
9652 UNSPEC_XOP_UNSIGNED_CMP))]
9654 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9655 [(set_attr "type" "ssecmp")
9656 (set_attr "prefix_data16" "0")
9657 (set_attr "prefix_extra" "2")
9658 (set_attr "length_immediate" "1")
9659 (set_attr "mode" "TI")])
9661 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
9662 ;; being added here to be complete.
9663 (define_insn "xop_pcom_tf<mode>3"
9664 [(set (match_operand:VI_128 0 "register_operand" "=x")
9666 [(match_operand:VI_128 1 "register_operand" "x")
9667 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
9668 (match_operand:SI 3 "const_int_operand" "n")]
9669 UNSPEC_XOP_TRUEFALSE))]
9672 return ((INTVAL (operands[3]) != 0)
9673 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9674 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
9676 [(set_attr "type" "ssecmp")
9677 (set_attr "prefix_data16" "0")
9678 (set_attr "prefix_extra" "2")
9679 (set_attr "length_immediate" "1")
9680 (set_attr "mode" "TI")])
9682 (define_insn "xop_vpermil2<mode>3"
9683 [(set (match_operand:VF 0 "register_operand" "=x")
9685 [(match_operand:VF 1 "register_operand" "x")
9686 (match_operand:VF 2 "nonimmediate_operand" "%x")
9687 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
9688 (match_operand:SI 4 "const_0_to_3_operand" "n")]
9691 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
9692 [(set_attr "type" "sse4arg")
9693 (set_attr "length_immediate" "1")
9694 (set_attr "mode" "<MODE>")])
9696 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9698 (define_insn "aesenc"
9699 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9700 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9701 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9705 aesenc\t{%2, %0|%0, %2}
9706 vaesenc\t{%2, %1, %0|%0, %1, %2}"
9707 [(set_attr "isa" "noavx,avx")
9708 (set_attr "type" "sselog1")
9709 (set_attr "prefix_extra" "1")
9710 (set_attr "prefix" "orig,vex")
9711 (set_attr "mode" "TI")])
9713 (define_insn "aesenclast"
9714 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9715 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9716 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9717 UNSPEC_AESENCLAST))]
9720 aesenclast\t{%2, %0|%0, %2}
9721 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
9722 [(set_attr "isa" "noavx,avx")
9723 (set_attr "type" "sselog1")
9724 (set_attr "prefix_extra" "1")
9725 (set_attr "prefix" "orig,vex")
9726 (set_attr "mode" "TI")])
9728 (define_insn "aesdec"
9729 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9730 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9731 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9735 aesdec\t{%2, %0|%0, %2}
9736 vaesdec\t{%2, %1, %0|%0, %1, %2}"
9737 [(set_attr "isa" "noavx,avx")
9738 (set_attr "type" "sselog1")
9739 (set_attr "prefix_extra" "1")
9740 (set_attr "prefix" "orig,vex")
9741 (set_attr "mode" "TI")])
9743 (define_insn "aesdeclast"
9744 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9745 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9746 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9747 UNSPEC_AESDECLAST))]
9750 aesdeclast\t{%2, %0|%0, %2}
9751 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
9752 [(set_attr "isa" "noavx,avx")
9753 (set_attr "type" "sselog1")
9754 (set_attr "prefix_extra" "1")
9755 (set_attr "prefix" "orig,vex")
9756 (set_attr "mode" "TI")])
9758 (define_insn "aesimc"
9759 [(set (match_operand:V2DI 0 "register_operand" "=x")
9760 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9763 "%vaesimc\t{%1, %0|%0, %1}"
9764 [(set_attr "type" "sselog1")
9765 (set_attr "prefix_extra" "1")
9766 (set_attr "prefix" "maybe_vex")
9767 (set_attr "mode" "TI")])
9769 (define_insn "aeskeygenassist"
9770 [(set (match_operand:V2DI 0 "register_operand" "=x")
9771 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
9772 (match_operand:SI 2 "const_0_to_255_operand" "n")]
9773 UNSPEC_AESKEYGENASSIST))]
9775 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
9776 [(set_attr "type" "sselog1")
9777 (set_attr "prefix_extra" "1")
9778 (set_attr "length_immediate" "1")
9779 (set_attr "prefix" "maybe_vex")
9780 (set_attr "mode" "TI")])
9782 (define_insn "pclmulqdq"
9783 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9784 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9785 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
9786 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9790 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
9791 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9792 [(set_attr "isa" "noavx,avx")
9793 (set_attr "type" "sselog1")
9794 (set_attr "prefix_extra" "1")
9795 (set_attr "length_immediate" "1")
9796 (set_attr "prefix" "orig,vex")
9797 (set_attr "mode" "TI")])
9799 (define_expand "avx_vzeroall"
9800 [(match_par_dup 0 [(const_int 0)])]
9803 int nregs = TARGET_64BIT ? 16 : 8;
9806 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
9808 XVECEXP (operands[0], 0, 0)
9809 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
9812 for (regno = 0; regno < nregs; regno++)
9813 XVECEXP (operands[0], 0, regno + 1)
9814 = gen_rtx_SET (VOIDmode,
9815 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
9816 CONST0_RTX (V8SImode));
9819 (define_insn "*avx_vzeroall"
9820 [(match_parallel 0 "vzeroall_operation"
9821 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
9824 [(set_attr "type" "sse")
9825 (set_attr "modrm" "0")
9826 (set_attr "memory" "none")
9827 (set_attr "prefix" "vex")
9828 (set_attr "mode" "OI")])
9830 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
9831 ;; if the upper 128bits are unused.
9832 (define_insn "avx_vzeroupper"
9833 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
9834 UNSPECV_VZEROUPPER)]
9837 [(set_attr "type" "sse")
9838 (set_attr "modrm" "0")
9839 (set_attr "memory" "none")
9840 (set_attr "prefix" "vex")
9841 (set_attr "mode" "OI")])
9843 (define_insn "vec_dup<mode>"
9844 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
9845 (vec_duplicate:AVX256MODE24P
9846 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
9849 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
9851 [(set_attr "type" "ssemov")
9852 (set_attr "prefix_extra" "1")
9853 (set_attr "prefix" "vex")
9854 (set_attr "mode" "V8SF")])
9857 [(set (match_operand:AVX256MODE24P 0 "register_operand" "")
9858 (vec_duplicate:AVX256MODE24P
9859 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
9860 "TARGET_AVX && reload_completed"
9861 [(set (match_dup 2) (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
9862 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
9863 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
9865 (define_insn "avx_vbroadcastf128_<mode>"
9866 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
9868 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
9872 vbroadcastf128\t{%1, %0|%0, %1}
9873 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
9874 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
9875 [(set_attr "type" "ssemov,sselog1,sselog1")
9876 (set_attr "prefix_extra" "1")
9877 (set_attr "length_immediate" "0,1,1")
9878 (set_attr "prefix" "vex")
9879 (set_attr "mode" "V4SF,V8SF,V8SF")])
9881 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
9882 ;; If it so happens that the input is in memory, use vbroadcast.
9883 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
9884 (define_insn "*avx_vperm_broadcast_v4sf"
9885 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
9887 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
9888 (match_parallel 2 "avx_vbroadcast_operand"
9889 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9892 int elt = INTVAL (operands[3]);
9893 switch (which_alternative)
9897 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
9898 return "vbroadcastss\t{%1, %0|%0, %1}";
9900 operands[2] = GEN_INT (elt * 0x55);
9901 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
9906 [(set_attr "type" "ssemov,ssemov,sselog1")
9907 (set_attr "prefix_extra" "1")
9908 (set_attr "length_immediate" "0,0,1")
9909 (set_attr "prefix" "vex")
9910 (set_attr "mode" "SF,SF,V4SF")])
9912 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
9913 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
9915 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
9916 (match_parallel 2 "avx_vbroadcast_operand"
9917 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9920 "&& reload_completed"
9921 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
9923 rtx op0 = operands[0], op1 = operands[1];
9924 int elt = INTVAL (operands[3]);
9930 /* Shuffle element we care about into all elements of the 128-bit lane.
9931 The other lane gets shuffled too, but we don't care. */
9932 if (<MODE>mode == V4DFmode)
9933 mask = (elt & 1 ? 15 : 0);
9935 mask = (elt & 3) * 0x55;
9936 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
9938 /* Shuffle the lane we care about into both lanes of the dest. */
9939 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
9940 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
9944 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
9945 elt * GET_MODE_SIZE (<ssescalarmode>mode));
9948 (define_expand "avx_vpermil<mode>"
9949 [(set (match_operand:VF2 0 "register_operand" "")
9951 (match_operand:VF2 1 "nonimmediate_operand" "")
9952 (match_operand:SI 2 "const_0_to_255_operand" "")))]
9955 int mask = INTVAL (operands[2]);
9956 rtx perm[<ssescalarnum>];
9958 perm[0] = GEN_INT (mask & 1);
9959 perm[1] = GEN_INT ((mask >> 1) & 1);
9960 if (<MODE>mode == V4DFmode)
9962 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
9963 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
9967 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
9970 (define_expand "avx_vpermil<mode>"
9971 [(set (match_operand:VF1 0 "register_operand" "")
9973 (match_operand:VF1 1 "nonimmediate_operand" "")
9974 (match_operand:SI 2 "const_0_to_255_operand" "")))]
9977 int mask = INTVAL (operands[2]);
9978 rtx perm[<ssescalarnum>];
9980 perm[0] = GEN_INT (mask & 3);
9981 perm[1] = GEN_INT ((mask >> 2) & 3);
9982 perm[2] = GEN_INT ((mask >> 4) & 3);
9983 perm[3] = GEN_INT ((mask >> 6) & 3);
9984 if (<MODE>mode == V8SFmode)
9986 perm[4] = GEN_INT ((mask & 3) + 4);
9987 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
9988 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
9989 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
9993 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
9996 (define_insn "*avx_vpermilp<mode>"
9997 [(set (match_operand:VF 0 "register_operand" "=x")
9999 (match_operand:VF 1 "nonimmediate_operand" "xm")
10000 (match_parallel 2 ""
10001 [(match_operand 3 "const_int_operand" "")])))]
10003 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
10005 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
10006 operands[2] = GEN_INT (mask);
10007 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
10009 [(set_attr "type" "sselog")
10010 (set_attr "prefix_extra" "1")
10011 (set_attr "length_immediate" "1")
10012 (set_attr "prefix" "vex")
10013 (set_attr "mode" "<MODE>")])
10015 (define_insn "avx_vpermilvar<mode>3"
10016 [(set (match_operand:VF 0 "register_operand" "=x")
10018 [(match_operand:VF 1 "register_operand" "x")
10019 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
10022 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10023 [(set_attr "type" "sselog")
10024 (set_attr "prefix_extra" "1")
10025 (set_attr "prefix" "vex")
10026 (set_attr "mode" "<MODE>")])
10028 (define_expand "avx_vperm2f128<mode>3"
10029 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
10030 (unspec:AVX256MODE2P
10031 [(match_operand:AVX256MODE2P 1 "register_operand" "")
10032 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
10033 (match_operand:SI 3 "const_0_to_255_operand" "")]
10034 UNSPEC_VPERMIL2F128))]
10037 int mask = INTVAL (operands[3]);
10038 if ((mask & 0x88) == 0)
10040 rtx perm[<ssescalarnum>], t1, t2;
10041 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
10043 base = (mask & 3) * nelt2;
10044 for (i = 0; i < nelt2; ++i)
10045 perm[i] = GEN_INT (base + i);
10047 base = ((mask >> 4) & 3) * nelt2;
10048 for (i = 0; i < nelt2; ++i)
10049 perm[i + nelt2] = GEN_INT (base + i);
10051 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
10052 operands[1], operands[2]);
10053 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
10054 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
10055 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
10061 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
10062 ;; means that in order to represent this properly in rtl we'd have to
10063 ;; nest *another* vec_concat with a zero operand and do the select from
10064 ;; a 4x wide vector. That doesn't seem very nice.
10065 (define_insn "*avx_vperm2f128<mode>_full"
10066 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10067 (unspec:AVX256MODE2P
10068 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
10069 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
10070 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10071 UNSPEC_VPERMIL2F128))]
10073 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10074 [(set_attr "type" "sselog")
10075 (set_attr "prefix_extra" "1")
10076 (set_attr "length_immediate" "1")
10077 (set_attr "prefix" "vex")
10078 (set_attr "mode" "V8SF")])
10080 (define_insn "*avx_vperm2f128<mode>_nozero"
10081 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10082 (vec_select:AVX256MODE2P
10083 (vec_concat:<ssedoublevecmode>
10084 (match_operand:AVX256MODE2P 1 "register_operand" "x")
10085 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
10086 (match_parallel 3 ""
10087 [(match_operand 4 "const_int_operand" "")])))]
10089 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
10091 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
10092 operands[3] = GEN_INT (mask);
10093 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10095 [(set_attr "type" "sselog")
10096 (set_attr "prefix_extra" "1")
10097 (set_attr "length_immediate" "1")
10098 (set_attr "prefix" "vex")
10099 (set_attr "mode" "V8SF")])
10101 (define_expand "avx_vinsertf128<mode>"
10102 [(match_operand:V_256 0 "register_operand" "")
10103 (match_operand:V_256 1 "register_operand" "")
10104 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
10105 (match_operand:SI 3 "const_0_to_1_operand" "")]
10108 rtx (*insn)(rtx, rtx, rtx);
10110 switch (INTVAL (operands[3]))
10113 insn = gen_vec_set_lo_<mode>;
10116 insn = gen_vec_set_hi_<mode>;
10119 gcc_unreachable ();
10122 emit_insn (insn (operands[0], operands[1], operands[2]));
10126 (define_insn "vec_set_lo_<mode>"
10127 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10128 (vec_concat:VI8F_256
10129 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10130 (vec_select:<ssehalfvecmode>
10131 (match_operand:VI8F_256 1 "register_operand" "x")
10132 (parallel [(const_int 2) (const_int 3)]))))]
10134 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10135 [(set_attr "type" "sselog")
10136 (set_attr "prefix_extra" "1")
10137 (set_attr "length_immediate" "1")
10138 (set_attr "prefix" "vex")
10139 (set_attr "mode" "V8SF")])
10141 (define_insn "vec_set_hi_<mode>"
10142 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10143 (vec_concat:VI8F_256
10144 (vec_select:<ssehalfvecmode>
10145 (match_operand:VI8F_256 1 "register_operand" "x")
10146 (parallel [(const_int 0) (const_int 1)]))
10147 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10149 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10150 [(set_attr "type" "sselog")
10151 (set_attr "prefix_extra" "1")
10152 (set_attr "length_immediate" "1")
10153 (set_attr "prefix" "vex")
10154 (set_attr "mode" "V8SF")])
10156 (define_insn "vec_set_lo_<mode>"
10157 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10158 (vec_concat:VI4F_256
10159 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10160 (vec_select:<ssehalfvecmode>
10161 (match_operand:VI4F_256 1 "register_operand" "x")
10162 (parallel [(const_int 4) (const_int 5)
10163 (const_int 6) (const_int 7)]))))]
10165 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10166 [(set_attr "type" "sselog")
10167 (set_attr "prefix_extra" "1")
10168 (set_attr "length_immediate" "1")
10169 (set_attr "prefix" "vex")
10170 (set_attr "mode" "V8SF")])
10172 (define_insn "vec_set_hi_<mode>"
10173 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10174 (vec_concat:VI4F_256
10175 (vec_select:<ssehalfvecmode>
10176 (match_operand:VI4F_256 1 "register_operand" "x")
10177 (parallel [(const_int 0) (const_int 1)
10178 (const_int 2) (const_int 3)]))
10179 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10181 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10182 [(set_attr "type" "sselog")
10183 (set_attr "prefix_extra" "1")
10184 (set_attr "length_immediate" "1")
10185 (set_attr "prefix" "vex")
10186 (set_attr "mode" "V8SF")])
10188 (define_insn "vec_set_lo_v16hi"
10189 [(set (match_operand:V16HI 0 "register_operand" "=x")
10191 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10193 (match_operand:V16HI 1 "register_operand" "x")
10194 (parallel [(const_int 8) (const_int 9)
10195 (const_int 10) (const_int 11)
10196 (const_int 12) (const_int 13)
10197 (const_int 14) (const_int 15)]))))]
10199 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10200 [(set_attr "type" "sselog")
10201 (set_attr "prefix_extra" "1")
10202 (set_attr "length_immediate" "1")
10203 (set_attr "prefix" "vex")
10204 (set_attr "mode" "V8SF")])
10206 (define_insn "vec_set_hi_v16hi"
10207 [(set (match_operand:V16HI 0 "register_operand" "=x")
10210 (match_operand:V16HI 1 "register_operand" "x")
10211 (parallel [(const_int 0) (const_int 1)
10212 (const_int 2) (const_int 3)
10213 (const_int 4) (const_int 5)
10214 (const_int 6) (const_int 7)]))
10215 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
10217 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10218 [(set_attr "type" "sselog")
10219 (set_attr "prefix_extra" "1")
10220 (set_attr "length_immediate" "1")
10221 (set_attr "prefix" "vex")
10222 (set_attr "mode" "V8SF")])
10224 (define_insn "vec_set_lo_v32qi"
10225 [(set (match_operand:V32QI 0 "register_operand" "=x")
10227 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
10229 (match_operand:V32QI 1 "register_operand" "x")
10230 (parallel [(const_int 16) (const_int 17)
10231 (const_int 18) (const_int 19)
10232 (const_int 20) (const_int 21)
10233 (const_int 22) (const_int 23)
10234 (const_int 24) (const_int 25)
10235 (const_int 26) (const_int 27)
10236 (const_int 28) (const_int 29)
10237 (const_int 30) (const_int 31)]))))]
10239 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10240 [(set_attr "type" "sselog")
10241 (set_attr "prefix_extra" "1")
10242 (set_attr "length_immediate" "1")
10243 (set_attr "prefix" "vex")
10244 (set_attr "mode" "V8SF")])
10246 (define_insn "vec_set_hi_v32qi"
10247 [(set (match_operand:V32QI 0 "register_operand" "=x")
10250 (match_operand:V32QI 1 "register_operand" "x")
10251 (parallel [(const_int 0) (const_int 1)
10252 (const_int 2) (const_int 3)
10253 (const_int 4) (const_int 5)
10254 (const_int 6) (const_int 7)
10255 (const_int 8) (const_int 9)
10256 (const_int 10) (const_int 11)
10257 (const_int 12) (const_int 13)
10258 (const_int 14) (const_int 15)]))
10259 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
10261 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10262 [(set_attr "type" "sselog")
10263 (set_attr "prefix_extra" "1")
10264 (set_attr "length_immediate" "1")
10265 (set_attr "prefix" "vex")
10266 (set_attr "mode" "V8SF")])
10268 (define_expand "avx_maskload<ssemodesuffix><avxsizesuffix>"
10269 [(set (match_operand:VF 0 "register_operand" "")
10271 [(match_operand:<sseintvecmode> 2 "register_operand" "")
10272 (match_operand:VF 1 "memory_operand" "")
10277 (define_expand "avx_maskstore<ssemodesuffix><avxsizesuffix>"
10278 [(set (match_operand:VF 0 "memory_operand" "")
10280 [(match_operand:<sseintvecmode> 1 "register_operand" "")
10281 (match_operand:VF 2 "register_operand" "")
10286 (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
10287 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
10289 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
10290 (match_operand:VF 2 "nonimmediate_operand" "m,x")
10294 && (REG_P (operands[0]) == MEM_P (operands[2]))"
10295 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10296 [(set_attr "type" "sselog1")
10297 (set_attr "prefix_extra" "1")
10298 (set_attr "prefix" "vex")
10299 (set_attr "mode" "<MODE>")])
10301 (define_insn_and_split "avx_<ssemodesuffix><avxsizesuffix>_<ssemodesuffix>"
10302 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
10303 (unspec:AVX256MODE2P
10304 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
10308 "&& reload_completed"
10311 rtx op0 = operands[0];
10312 rtx op1 = operands[1];
10314 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
10316 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
10317 emit_move_insn (op0, op1);
10321 (define_expand "vec_init<mode>"
10322 [(match_operand:V_256 0 "register_operand" "")
10323 (match_operand 1 "" "")]
10326 ix86_expand_vector_init (false, operands[0], operands[1]);
10330 (define_insn "*vec_concat<mode>_avx"
10331 [(set (match_operand:V_256 0 "register_operand" "=x,x")
10333 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
10334 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
10337 switch (which_alternative)
10340 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
10342 switch (get_attr_mode (insn))
10345 return "vmovaps\t{%1, %x0|%x0, %1}";
10347 return "vmovapd\t{%1, %x0|%x0, %1}";
10349 return "vmovdqa\t{%1, %x0|%x0, %1}";
10352 gcc_unreachable ();
10355 [(set_attr "type" "sselog,ssemov")
10356 (set_attr "prefix_extra" "1,*")
10357 (set_attr "length_immediate" "1,*")
10358 (set_attr "prefix" "vex")
10359 (set_attr "mode" "<sseinsnmode>")])
10361 (define_insn "vcvtph2ps"
10362 [(set (match_operand:V4SF 0 "register_operand" "=x")
10364 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
10366 (parallel [(const_int 0) (const_int 1)
10367 (const_int 1) (const_int 2)])))]
10369 "vcvtph2ps\t{%1, %0|%0, %1}"
10370 [(set_attr "type" "ssecvt")
10371 (set_attr "prefix" "vex")
10372 (set_attr "mode" "V4SF")])
10374 (define_insn "*vcvtph2ps_load"
10375 [(set (match_operand:V4SF 0 "register_operand" "=x")
10376 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
10377 UNSPEC_VCVTPH2PS))]
10379 "vcvtph2ps\t{%1, %0|%0, %1}"
10380 [(set_attr "type" "ssecvt")
10381 (set_attr "prefix" "vex")
10382 (set_attr "mode" "V8SF")])
10384 (define_insn "vcvtph2ps256"
10385 [(set (match_operand:V8SF 0 "register_operand" "=x")
10386 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
10387 UNSPEC_VCVTPH2PS))]
10389 "vcvtph2ps\t{%1, %0|%0, %1}"
10390 [(set_attr "type" "ssecvt")
10391 (set_attr "prefix" "vex")
10392 (set_attr "mode" "V8SF")])
10394 (define_expand "vcvtps2ph"
10395 [(set (match_operand:V8HI 0 "register_operand" "")
10397 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
10398 (match_operand:SI 2 "immediate_operand" "")]
10402 "operands[3] = CONST0_RTX (V4HImode);")
10404 (define_insn "*vcvtps2ph"
10405 [(set (match_operand:V8HI 0 "register_operand" "=x")
10407 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10408 (match_operand:SI 2 "immediate_operand" "N")]
10410 (match_operand:V4HI 3 "const0_operand" "")))]
10412 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10413 [(set_attr "type" "ssecvt")
10414 (set_attr "prefix" "vex")
10415 (set_attr "mode" "V4SF")])
10417 (define_insn "*vcvtps2ph_store"
10418 [(set (match_operand:V4HI 0 "memory_operand" "=m")
10419 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10420 (match_operand:SI 2 "immediate_operand" "N")]
10421 UNSPEC_VCVTPS2PH))]
10423 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10424 [(set_attr "type" "ssecvt")
10425 (set_attr "prefix" "vex")
10426 (set_attr "mode" "V4SF")])
10428 (define_insn "vcvtps2ph256"
10429 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
10430 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
10431 (match_operand:SI 2 "immediate_operand" "N")]
10432 UNSPEC_VCVTPS2PH))]
10434 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10435 [(set_attr "type" "ssecvt")
10436 (set_attr "prefix" "vex")
10437 (set_attr "mode" "V8SF")])