1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; All vector modes including V1TImode, used in move patterns.
22 (define_mode_iterator V16
23 [(V32QI "TARGET_AVX") V16QI
24 (V16HI "TARGET_AVX") V8HI
25 (V8SI "TARGET_AVX") V4SI
26 (V4DI "TARGET_AVX") V2DI
28 (V8SF "TARGET_AVX") V4SF
29 (V4DF "TARGET_AVX") V2DF])
32 (define_mode_iterator V
33 [(V32QI "TARGET_AVX") V16QI
34 (V16HI "TARGET_AVX") V8HI
35 (V8SI "TARGET_AVX") V4SI
36 (V4DI "TARGET_AVX") V2DI
37 (V8SF "TARGET_AVX") V4SF
38 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
40 ;; All 128bit vector modes
41 (define_mode_iterator V_128
42 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
44 ;; All 256bit vector modes
45 (define_mode_iterator V_256
46 [V32QI V16HI V8SI V4DI V8SF V4DF])
48 ;; All vector float modes
49 (define_mode_iterator VF
50 [(V8SF "TARGET_AVX") V4SF
51 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
53 ;; All SFmode vector float modes
54 (define_mode_iterator VF1
55 [(V8SF "TARGET_AVX") V4SF])
57 ;; All DFmode vector float modes
58 (define_mode_iterator VF2
59 [(V4DF "TARGET_AVX") V2DF])
61 ;; All 128bit vector float modes
62 (define_mode_iterator VF_128
63 [V4SF (V2DF "TARGET_SSE2")])
65 ;; All 256bit vector float modes
66 (define_mode_iterator VF_256
69 ;; All vector integer modes
70 (define_mode_iterator VI
71 [(V32QI "TARGET_AVX") V16QI
72 (V16HI "TARGET_AVX") V8HI
73 (V8SI "TARGET_AVX") V4SI
74 (V4DI "TARGET_AVX") V2DI])
76 ;; All QImode vector integer modes
77 (define_mode_iterator VI1
78 [(V32QI "TARGET_AVX") V16QI])
80 ;; All DImode vector integer modes
81 (define_mode_iterator VI8
82 [(V4DI "TARGET_AVX") V2DI])
84 ;; All 128bit vector integer modes
85 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
87 ;; Random 128bit vector integer mode combinations
88 (define_mode_iterator VI12_128 [V16QI V8HI])
89 (define_mode_iterator VI14_128 [V16QI V4SI])
90 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
91 (define_mode_iterator VI24_128 [V8HI V4SI])
92 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
94 ;; Int-float size matches
95 (define_mode_iterator VI4F_128 [V4SI V4SF])
96 (define_mode_iterator VI8F_128 [V2DI V2DF])
97 (define_mode_iterator VI4F_256 [V8SI V8SF])
98 (define_mode_iterator VI8F_256 [V4DI V4DF])
100 ;; Mapping from float mode to required SSE level
101 (define_mode_attr sse
102 [(SF "sse") (DF "sse2")
103 (V4SF "sse") (V2DF "sse2")
104 (V8SF "avx") (V4DF "avx")])
106 (define_mode_attr sse2
107 [(V16QI "sse2") (V32QI "avx")
108 (V2DI "sse2") (V4DI "avx")])
110 (define_mode_attr sse3
111 [(V16QI "sse3") (V32QI "avx")])
113 (define_mode_attr sse4_1
114 [(V4SF "sse4_1") (V2DF "sse4_1")
115 (V8SF "avx") (V4DF "avx")])
117 (define_mode_attr avxsizesuffix
118 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
119 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
120 (V8SF "256") (V4DF "256")
121 (V4SF "") (V2DF "")])
123 ;; SSE instruction mode
124 (define_mode_attr sseinsnmode
125 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")
126 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
127 (V8SF "V8SF") (V4DF "V4DF")
128 (V4SF "V4SF") (V2DF "V2DF")])
130 ;; Mapping of vector float modes to an integer mode of the same size
131 (define_mode_attr sseintvecmode
132 [(V8SF "V8SI") (V4DF "V4DI")
133 (V4SF "V4SI") (V2DF "V2DI")])
135 ;; Mapping of vector modes to a vector mode of double size
136 (define_mode_attr ssedoublevecmode
137 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
138 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
139 (V8SF "V16SF") (V4DF "V8DF")
140 (V4SF "V8SF") (V2DF "V4DF")])
142 ;; Mapping of vector modes to a vector mode of half size
143 (define_mode_attr ssehalfvecmode
144 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
145 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
146 (V8SF "V4SF") (V4DF "V2DF")
149 ;; Mapping of vector modes back to the scalar modes
150 (define_mode_attr ssescalarmode
151 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
152 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
153 (V8SF "SF") (V4DF "DF")
154 (V4SF "SF") (V2DF "DF")])
156 ;; Number of scalar elements in each vector type
157 (define_mode_attr ssescalarnum
158 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
159 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
160 (V8SF "8") (V4DF "4")
161 (V4SF "4") (V2DF "2")])
163 ;; SSE scalar suffix for vector modes
164 (define_mode_attr ssescalarmodesuffix
165 [(V8SF "ss") (V4DF "sd")
166 (V4SF "ss") (V2DF "sd")
167 (V8SI "ss") (V4DI "sd")
170 ;; Pack/unpack vector modes
171 (define_mode_attr sseunpackmode
172 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
174 (define_mode_attr ssepackmode
175 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")])
177 ;; Mapping of the max integer size for xop rotate immediate constraint
178 (define_mode_attr sserotatemax
179 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
181 ;; Instruction suffix for sign and zero extensions.
182 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
187 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
188 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
190 (define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
192 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
194 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
200 ;; All of these patterns are enabled for SSE1 as well as SSE2.
201 ;; This is essential for maintaining stable calling conventions.
203 (define_expand "mov<mode>"
204 [(set (match_operand:V16 0 "nonimmediate_operand" "")
205 (match_operand:V16 1 "nonimmediate_operand" ""))]
208 ix86_expand_vector_move (<MODE>mode, operands);
212 (define_insn "*mov<mode>_internal"
213 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
214 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
216 && (register_operand (operands[0], <MODE>mode)
217 || register_operand (operands[1], <MODE>mode))"
219 switch (which_alternative)
222 return standard_sse_constant_opcode (insn, operands[1]);
225 switch (get_attr_mode (insn))
230 && (misaligned_operand (operands[0], <MODE>mode)
231 || misaligned_operand (operands[1], <MODE>mode)))
232 return "vmovups\t{%1, %0|%0, %1}";
234 return "%vmovaps\t{%1, %0|%0, %1}";
239 && (misaligned_operand (operands[0], <MODE>mode)
240 || misaligned_operand (operands[1], <MODE>mode)))
241 return "vmovupd\t{%1, %0|%0, %1}";
242 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
243 return "%vmovaps\t{%1, %0|%0, %1}";
245 return "%vmovapd\t{%1, %0|%0, %1}";
250 && (misaligned_operand (operands[0], <MODE>mode)
251 || misaligned_operand (operands[1], <MODE>mode)))
252 return "vmovdqu\t{%1, %0|%0, %1}";
253 else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
254 return "%vmovaps\t{%1, %0|%0, %1}";
256 return "%vmovdqa\t{%1, %0|%0, %1}";
265 [(set_attr "type" "sselog1,ssemov,ssemov")
266 (set_attr "prefix" "maybe_vex")
268 (cond [(ne (symbol_ref "TARGET_AVX") (const_int 0))
269 (const_string "<sseinsnmode>")
271 (ne (symbol_ref "optimize_function_for_size_p (cfun)")
273 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
274 (and (eq_attr "alternative" "2")
275 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
277 (const_string "V4SF")
278 (eq (const_string "<MODE>mode") (const_string "V4SFmode"))
279 (const_string "V4SF")
280 (eq (const_string "<MODE>mode") (const_string "V2DFmode"))
281 (const_string "V2DF")
283 (const_string "TI")))])
285 (define_insn "sse2_movq128"
286 [(set (match_operand:V2DI 0 "register_operand" "=x")
289 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
290 (parallel [(const_int 0)]))
293 "%vmovq\t{%1, %0|%0, %1}"
294 [(set_attr "type" "ssemov")
295 (set_attr "prefix" "maybe_vex")
296 (set_attr "mode" "TI")])
298 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
299 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
300 ;; from memory, we'd prefer to load the memory directly into the %xmm
301 ;; register. To facilitate this happy circumstance, this pattern won't
302 ;; split until after register allocation. If the 64-bit value didn't
303 ;; come from memory, this is the best we can do. This is much better
304 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
307 (define_insn_and_split "movdi_to_sse"
309 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
310 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
311 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
312 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
314 "&& reload_completed"
317 if (register_operand (operands[1], DImode))
319 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
320 Assemble the 64-bit DImode value in an xmm register. */
321 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
322 gen_rtx_SUBREG (SImode, operands[1], 0)));
323 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
324 gen_rtx_SUBREG (SImode, operands[1], 4)));
325 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
328 else if (memory_operand (operands[1], DImode))
329 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
330 operands[1], const0_rtx));
336 [(set (match_operand:V4SF 0 "register_operand" "")
337 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
338 "TARGET_SSE && reload_completed"
341 (vec_duplicate:V4SF (match_dup 1))
345 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
346 operands[2] = CONST0_RTX (V4SFmode);
350 [(set (match_operand:V2DF 0 "register_operand" "")
351 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
352 "TARGET_SSE2 && reload_completed"
353 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
355 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
356 operands[2] = CONST0_RTX (DFmode);
359 (define_expand "push<mode>1"
360 [(match_operand:V16 0 "register_operand" "")]
363 ix86_expand_push (<MODE>mode, operands[0]);
367 (define_expand "movmisalign<mode>"
368 [(set (match_operand:V16 0 "nonimmediate_operand" "")
369 (match_operand:V16 1 "nonimmediate_operand" ""))]
372 ix86_expand_vector_move_misalign (<MODE>mode, operands);
376 (define_expand "<sse>_movu<ssemodesuffix><avxsizesuffix>"
377 [(set (match_operand:VF 0 "nonimmediate_operand" "")
379 [(match_operand:VF 1 "nonimmediate_operand" "")]
383 if (MEM_P (operands[0]) && MEM_P (operands[1]))
384 operands[1] = force_reg (<MODE>mode, operands[1]);
387 (define_insn "*<sse>_movu<ssemodesuffix><avxsizesuffix>"
388 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
390 [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
392 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
393 "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
394 [(set_attr "type" "ssemov")
395 (set_attr "movu" "1")
396 (set_attr "prefix" "maybe_vex")
397 (set_attr "mode" "<MODE>")])
399 (define_expand "<sse2>_movdqu<avxsizesuffix>"
400 [(set (match_operand:VI1 0 "nonimmediate_operand" "")
401 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "")]
405 if (MEM_P (operands[0]) && MEM_P (operands[1]))
406 operands[1] = force_reg (<MODE>mode, operands[1]);
409 (define_insn "*<sse2>_movdqu<avxsizesuffix>"
410 [(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m")
411 (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
413 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
414 "%vmovdqu\t{%1, %0|%0, %1}"
415 [(set_attr "type" "ssemov")
416 (set_attr "movu" "1")
417 (set (attr "prefix_data16")
419 (ne (symbol_ref "TARGET_AVX") (const_int 0))
422 (set_attr "prefix" "maybe_vex")
423 (set_attr "mode" "<sseinsnmode>")])
425 (define_insn "<sse3>_lddqu<avxsizesuffix>"
426 [(set (match_operand:VI1 0 "register_operand" "=x")
427 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
430 "%vlddqu\t{%1, %0|%0, %1}"
431 [(set_attr "type" "ssemov")
432 (set_attr "movu" "1")
433 (set (attr "prefix_data16")
435 (ne (symbol_ref "TARGET_AVX") (const_int 0))
438 (set (attr "prefix_rep")
440 (ne (symbol_ref "TARGET_AVX") (const_int 0))
443 (set_attr "prefix" "maybe_vex")
444 (set_attr "mode" "<sseinsnmode>")])
446 (define_insn "sse2_movntsi"
447 [(set (match_operand:SI 0 "memory_operand" "=m")
448 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
451 "movnti\t{%1, %0|%0, %1}"
452 [(set_attr "type" "ssemov")
453 (set_attr "prefix_data16" "0")
454 (set_attr "mode" "V2DF")])
456 (define_insn "<sse>_movnt<mode>"
457 [(set (match_operand:VF 0 "memory_operand" "=m")
458 (unspec:VF [(match_operand:VF 1 "register_operand" "x")]
461 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
462 [(set_attr "type" "ssemov")
463 (set_attr "prefix" "maybe_vex")
464 (set_attr "mode" "<MODE>")])
466 (define_insn "<sse2>_movnt<mode>"
467 [(set (match_operand:VI8 0 "memory_operand" "=m")
468 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
471 "%vmovntdq\t{%1, %0|%0, %1}"
472 [(set_attr "type" "ssecvt")
473 (set (attr "prefix_data16")
475 (ne (symbol_ref "TARGET_AVX") (const_int 0))
478 (set_attr "prefix" "maybe_vex")
479 (set_attr "mode" "<sseinsnmode>")])
481 ; Expand patterns for non-temporal stores. At the moment, only those
482 ; that directly map to insns are defined; it would be possible to
483 ; define patterns for other modes that would expand to several insns.
485 ;; Modes handled by storent patterns.
486 (define_mode_iterator STORENT_MODE
487 [(SI "TARGET_SSE2") (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
489 (V8SF "TARGET_AVX") V4SF
490 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
492 (define_expand "storent<mode>"
493 [(set (match_operand:STORENT_MODE 0 "memory_operand" "")
495 [(match_operand:STORENT_MODE 1 "register_operand" "")]
499 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
501 ;; Parallel floating point arithmetic
503 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505 (define_expand "<code><mode>2"
506 [(set (match_operand:VF 0 "register_operand" "")
508 (match_operand:VF 1 "register_operand" "")))]
510 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
512 (define_insn_and_split "*absneg<mode>2"
513 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
514 (match_operator:VF 3 "absneg_operator"
515 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
516 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))]
522 enum rtx_code absneg_op;
528 if (MEM_P (operands[1]))
529 op1 = operands[2], op2 = operands[1];
531 op1 = operands[1], op2 = operands[2];
536 if (rtx_equal_p (operands[0], operands[1]))
542 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
543 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
544 t = gen_rtx_SET (VOIDmode, operands[0], t);
548 [(set_attr "isa" "noavx,noavx,avx,avx")])
550 (define_expand "<plusminus_insn><mode>3"
551 [(set (match_operand:VF 0 "register_operand" "")
553 (match_operand:VF 1 "nonimmediate_operand" "")
554 (match_operand:VF 2 "nonimmediate_operand" "")))]
556 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
558 (define_insn "*<plusminus_insn><mode>3"
559 [(set (match_operand:VF 0 "register_operand" "=x,x")
561 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x")
562 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
563 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
565 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
566 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
567 [(set_attr "isa" "noavx,avx")
568 (set_attr "type" "sseadd")
569 (set_attr "prefix" "orig,vex")
570 (set_attr "mode" "<MODE>")])
572 (define_insn "<sse>_vm<plusminus_insn><mode>3"
573 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
576 (match_operand:VF_128 1 "register_operand" "0,x")
577 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
582 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
583 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
584 [(set_attr "isa" "noavx,avx")
585 (set_attr "type" "sseadd")
586 (set_attr "prefix" "orig,vex")
587 (set_attr "mode" "<ssescalarmode>")])
589 (define_expand "mul<mode>3"
590 [(set (match_operand:VF 0 "register_operand" "")
592 (match_operand:VF 1 "nonimmediate_operand" "")
593 (match_operand:VF 2 "nonimmediate_operand" "")))]
595 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
597 (define_insn "*mul<mode>3"
598 [(set (match_operand:VF 0 "register_operand" "=x,x")
600 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
601 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
602 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
604 mul<ssemodesuffix>\t{%2, %0|%0, %2}
605 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
606 [(set_attr "isa" "noavx,avx")
607 (set_attr "type" "ssemul")
608 (set_attr "prefix" "orig,vex")
609 (set_attr "mode" "<MODE>")])
611 (define_insn "<sse>_vmmul<mode>3"
612 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
615 (match_operand:VF_128 1 "register_operand" "0,x")
616 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
621 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2}
622 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
623 [(set_attr "isa" "noavx,avx")
624 (set_attr "type" "ssemul")
625 (set_attr "prefix" "orig,vex")
626 (set_attr "mode" "<ssescalarmode>")])
628 (define_expand "div<mode>3"
629 [(set (match_operand:VF2 0 "register_operand" "")
630 (div:VF2 (match_operand:VF2 1 "register_operand" "")
631 (match_operand:VF2 2 "nonimmediate_operand" "")))]
633 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
635 (define_expand "div<mode>3"
636 [(set (match_operand:VF1 0 "register_operand" "")
637 (div:VF1 (match_operand:VF1 1 "register_operand" "")
638 (match_operand:VF1 2 "nonimmediate_operand" "")))]
641 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
643 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
644 && flag_finite_math_only && !flag_trapping_math
645 && flag_unsafe_math_optimizations)
647 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
652 (define_insn "<sse>_div<mode>3"
653 [(set (match_operand:VF 0 "register_operand" "=x,x")
655 (match_operand:VF 1 "register_operand" "0,x")
656 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
659 div<ssemodesuffix>\t{%2, %0|%0, %2}
660 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
661 [(set_attr "isa" "noavx,avx")
662 (set_attr "type" "ssediv")
663 (set_attr "prefix" "orig,vex")
664 (set_attr "mode" "<MODE>")])
666 (define_insn "<sse>_vmdiv<mode>3"
667 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
670 (match_operand:VF_128 1 "register_operand" "0,x")
671 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
676 div<ssescalarmodesuffix>\t{%2, %0|%0, %2}
677 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
678 [(set_attr "isa" "noavx,avx")
679 (set_attr "type" "ssediv")
680 (set_attr "prefix" "orig,vex")
681 (set_attr "mode" "<ssescalarmode>")])
683 (define_insn "<sse>_rcp<mode>2"
684 [(set (match_operand:VF1 0 "register_operand" "=x")
686 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
688 "%vrcpps\t{%1, %0|%0, %1}"
689 [(set_attr "type" "sse")
690 (set_attr "atom_sse_attr" "rcp")
691 (set_attr "prefix" "maybe_vex")
692 (set_attr "mode" "<MODE>")])
694 (define_insn "sse_vmrcpv4sf2"
695 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
697 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
699 (match_operand:V4SF 2 "register_operand" "0,x")
703 rcpss\t{%1, %0|%0, %1}
704 vrcpss\t{%1, %2, %0|%0, %2, %1}"
705 [(set_attr "isa" "noavx,avx")
706 (set_attr "type" "sse")
707 (set_attr "atom_sse_attr" "rcp")
708 (set_attr "prefix" "orig,vex")
709 (set_attr "mode" "SF")])
711 (define_expand "sqrt<mode>2"
712 [(set (match_operand:VF2 0 "register_operand" "")
713 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand" "")))]
716 (define_expand "sqrt<mode>2"
717 [(set (match_operand:VF1 0 "register_operand" "")
718 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand" "")))]
721 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
722 && flag_finite_math_only && !flag_trapping_math
723 && flag_unsafe_math_optimizations)
725 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
730 (define_insn "<sse>_sqrt<mode>2"
731 [(set (match_operand:VF 0 "register_operand" "=x")
732 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))]
734 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
735 [(set_attr "type" "sse")
736 (set_attr "atom_sse_attr" "sqrt")
737 (set_attr "prefix" "maybe_vex")
738 (set_attr "mode" "<MODE>")])
740 (define_insn "<sse>_vmsqrt<mode>2"
741 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
744 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm"))
745 (match_operand:VF_128 2 "register_operand" "0,x")
749 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
750 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
751 [(set_attr "isa" "noavx,avx")
752 (set_attr "type" "sse")
753 (set_attr "atom_sse_attr" "sqrt")
754 (set_attr "prefix" "orig,vex")
755 (set_attr "mode" "<ssescalarmode>")])
757 (define_expand "rsqrt<mode>2"
758 [(set (match_operand:VF1 0 "register_operand" "")
760 [(match_operand:VF1 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
763 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
767 (define_insn "<sse>_rsqrt<mode>2"
768 [(set (match_operand:VF1 0 "register_operand" "=x")
770 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
772 "%vrsqrtps\t{%1, %0|%0, %1}"
773 [(set_attr "type" "sse")
774 (set_attr "prefix" "maybe_vex")
775 (set_attr "mode" "<MODE>")])
777 (define_insn "sse_vmrsqrtv4sf2"
778 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
780 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
782 (match_operand:V4SF 2 "register_operand" "0,x")
786 rsqrtss\t{%1, %0|%0, %1}
787 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
788 [(set_attr "isa" "noavx,avx")
789 (set_attr "type" "sse")
790 (set_attr "prefix" "orig,vex")
791 (set_attr "mode" "SF")])
793 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
794 ;; isn't really correct, as those rtl operators aren't defined when
795 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
797 (define_expand "<code><mode>3"
798 [(set (match_operand:VF 0 "register_operand" "")
800 (match_operand:VF 1 "nonimmediate_operand" "")
801 (match_operand:VF 2 "nonimmediate_operand" "")))]
804 if (!flag_finite_math_only)
805 operands[1] = force_reg (<MODE>mode, operands[1]);
806 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
809 (define_insn "*<code><mode>3_finite"
810 [(set (match_operand:VF 0 "register_operand" "=x,x")
812 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
813 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
814 "TARGET_SSE && flag_finite_math_only
815 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
817 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
818 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
819 [(set_attr "isa" "noavx,avx")
820 (set_attr "type" "sseadd")
821 (set_attr "prefix" "orig,vex")
822 (set_attr "mode" "<MODE>")])
824 (define_insn "*<code><mode>3"
825 [(set (match_operand:VF 0 "register_operand" "=x,x")
827 (match_operand:VF 1 "register_operand" "0,x")
828 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
829 "TARGET_SSE && !flag_finite_math_only"
831 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
832 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
833 [(set_attr "isa" "noavx,avx")
834 (set_attr "type" "sseadd")
835 (set_attr "prefix" "orig,vex")
836 (set_attr "mode" "<MODE>")])
838 (define_insn "<sse>_vm<code><mode>3"
839 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
842 (match_operand:VF_128 1 "register_operand" "0,x")
843 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm"))
848 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2}
849 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
850 [(set_attr "isa" "noavx,avx")
851 (set_attr "type" "sse")
852 (set_attr "prefix" "orig,vex")
853 (set_attr "mode" "<ssescalarmode>")])
855 ;; These versions of the min/max patterns implement exactly the operations
856 ;; min = (op1 < op2 ? op1 : op2)
857 ;; max = (!(op1 < op2) ? op1 : op2)
858 ;; Their operands are not commutative, and thus they may be used in the
859 ;; presence of -0.0 and NaN.
861 (define_insn "*ieee_smin<mode>3"
862 [(set (match_operand:VF 0 "register_operand" "=x,x")
864 [(match_operand:VF 1 "register_operand" "0,x")
865 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
869 min<ssemodesuffix>\t{%2, %0|%0, %2}
870 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
871 [(set_attr "isa" "noavx,avx")
872 (set_attr "type" "sseadd")
873 (set_attr "prefix" "orig,vex")
874 (set_attr "mode" "<MODE>")])
876 (define_insn "*ieee_smax<mode>3"
877 [(set (match_operand:VF 0 "register_operand" "=x,x")
879 [(match_operand:VF 1 "register_operand" "0,x")
880 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]
884 max<ssemodesuffix>\t{%2, %0|%0, %2}
885 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
886 [(set_attr "isa" "noavx,avx")
887 (set_attr "type" "sseadd")
888 (set_attr "prefix" "orig,vex")
889 (set_attr "mode" "<MODE>")])
891 (define_insn "avx_addsubv4df3"
892 [(set (match_operand:V4DF 0 "register_operand" "=x")
895 (match_operand:V4DF 1 "register_operand" "x")
896 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
897 (minus:V4DF (match_dup 1) (match_dup 2))
900 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
901 [(set_attr "type" "sseadd")
902 (set_attr "prefix" "vex")
903 (set_attr "mode" "V4DF")])
905 (define_insn "sse3_addsubv2df3"
906 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
909 (match_operand:V2DF 1 "register_operand" "0,x")
910 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
911 (minus:V2DF (match_dup 1) (match_dup 2))
915 addsubpd\t{%2, %0|%0, %2}
916 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
917 [(set_attr "isa" "noavx,avx")
918 (set_attr "type" "sseadd")
919 (set_attr "atom_unit" "complex")
920 (set_attr "prefix" "orig,vex")
921 (set_attr "mode" "V2DF")])
923 (define_insn "avx_addsubv8sf3"
924 [(set (match_operand:V8SF 0 "register_operand" "=x")
927 (match_operand:V8SF 1 "register_operand" "x")
928 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
929 (minus:V8SF (match_dup 1) (match_dup 2))
932 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
933 [(set_attr "type" "sseadd")
934 (set_attr "prefix" "vex")
935 (set_attr "mode" "V8SF")])
937 (define_insn "sse3_addsubv4sf3"
938 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
941 (match_operand:V4SF 1 "register_operand" "0,x")
942 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
943 (minus:V4SF (match_dup 1) (match_dup 2))
947 addsubps\t{%2, %0|%0, %2}
948 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
949 [(set_attr "isa" "noavx,avx")
950 (set_attr "type" "sseadd")
951 (set_attr "prefix" "orig,vex")
952 (set_attr "prefix_rep" "1,*")
953 (set_attr "mode" "V4SF")])
955 (define_insn "avx_h<plusminus_insn>v4df3"
956 [(set (match_operand:V4DF 0 "register_operand" "=x")
961 (match_operand:V4DF 1 "register_operand" "x")
962 (parallel [(const_int 0)]))
963 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
965 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
966 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
970 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
971 (parallel [(const_int 0)]))
972 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
974 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
975 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
977 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
978 [(set_attr "type" "sseadd")
979 (set_attr "prefix" "vex")
980 (set_attr "mode" "V4DF")])
982 (define_insn "sse3_h<plusminus_insn>v2df3"
983 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
987 (match_operand:V2DF 1 "register_operand" "0,x")
988 (parallel [(const_int 0)]))
989 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
992 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
993 (parallel [(const_int 0)]))
994 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
997 h<plusminus_mnemonic>pd\t{%2, %0|%0, %2}
998 vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
999 [(set_attr "isa" "noavx,avx")
1000 (set_attr "type" "sseadd")
1001 (set_attr "prefix" "orig,vex")
1002 (set_attr "mode" "V2DF")])
1004 (define_insn "avx_h<plusminus_insn>v8sf3"
1005 [(set (match_operand:V8SF 0 "register_operand" "=x")
1011 (match_operand:V8SF 1 "register_operand" "x")
1012 (parallel [(const_int 0)]))
1013 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1015 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1016 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1020 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1021 (parallel [(const_int 0)]))
1022 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1024 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1025 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1029 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1030 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1032 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1033 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1036 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1037 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1039 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1040 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1042 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1043 [(set_attr "type" "sseadd")
1044 (set_attr "prefix" "vex")
1045 (set_attr "mode" "V8SF")])
1047 (define_insn "sse3_h<plusminus_insn>v4sf3"
1048 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1053 (match_operand:V4SF 1 "register_operand" "0,x")
1054 (parallel [(const_int 0)]))
1055 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1057 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1058 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1062 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1063 (parallel [(const_int 0)]))
1064 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1066 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1067 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1070 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1071 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1072 [(set_attr "isa" "noavx,avx")
1073 (set_attr "type" "sseadd")
1074 (set_attr "atom_unit" "complex")
1075 (set_attr "prefix" "orig,vex")
1076 (set_attr "prefix_rep" "1,*")
1077 (set_attr "mode" "V4SF")])
1079 (define_expand "reduc_splus_v4df"
1080 [(match_operand:V4DF 0 "register_operand" "")
1081 (match_operand:V4DF 1 "register_operand" "")]
1084 rtx tmp = gen_reg_rtx (V4DFmode);
1085 rtx tmp2 = gen_reg_rtx (V4DFmode);
1086 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1087 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1088 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1092 (define_expand "reduc_splus_v2df"
1093 [(match_operand:V2DF 0 "register_operand" "")
1094 (match_operand:V2DF 1 "register_operand" "")]
1097 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1101 (define_expand "reduc_splus_v8sf"
1102 [(match_operand:V8SF 0 "register_operand" "")
1103 (match_operand:V8SF 1 "register_operand" "")]
1106 rtx tmp = gen_reg_rtx (V8SFmode);
1107 rtx tmp2 = gen_reg_rtx (V8SFmode);
1108 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
1109 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
1110 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
1111 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
1115 (define_expand "reduc_splus_v4sf"
1116 [(match_operand:V4SF 0 "register_operand" "")
1117 (match_operand:V4SF 1 "register_operand" "")]
1122 rtx tmp = gen_reg_rtx (V4SFmode);
1123 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
1124 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
1127 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
1132 (define_expand "reduc_smax_v4sf"
1133 [(match_operand:V4SF 0 "register_operand" "")
1134 (match_operand:V4SF 1 "register_operand" "")]
1137 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
1141 (define_expand "reduc_smin_v4sf"
1142 [(match_operand:V4SF 0 "register_operand" "")
1143 (match_operand:V4SF 1 "register_operand" "")]
1146 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
1150 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1152 ;; Parallel floating point comparisons
1154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1156 (define_insn "avx_cmp<mode>3"
1157 [(set (match_operand:VF 0 "register_operand" "=x")
1159 [(match_operand:VF 1 "register_operand" "x")
1160 (match_operand:VF 2 "nonimmediate_operand" "xm")
1161 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1164 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1165 [(set_attr "type" "ssecmp")
1166 (set_attr "length_immediate" "1")
1167 (set_attr "prefix" "vex")
1168 (set_attr "mode" "<MODE>")])
1170 (define_insn "avx_vmcmp<mode>3"
1171 [(set (match_operand:VF_128 0 "register_operand" "=x")
1174 [(match_operand:VF_128 1 "register_operand" "x")
1175 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
1176 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1181 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1182 [(set_attr "type" "ssecmp")
1183 (set_attr "length_immediate" "1")
1184 (set_attr "prefix" "vex")
1185 (set_attr "mode" "<ssescalarmode>")])
1187 (define_insn "<sse>_maskcmp<mode>3"
1188 [(set (match_operand:VF 0 "register_operand" "=x,x")
1189 (match_operator:VF 3 "sse_comparison_operator"
1190 [(match_operand:VF 1 "register_operand" "0,x")
1191 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))]
1194 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
1195 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1196 [(set_attr "isa" "noavx,avx")
1197 (set_attr "type" "ssecmp")
1198 (set_attr "length_immediate" "1")
1199 (set_attr "prefix" "orig,vex")
1200 (set_attr "mode" "<MODE>")])
1202 (define_insn "<sse>_vmmaskcmp<mode>3"
1203 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1205 (match_operator:VF_128 3 "sse_comparison_operator"
1206 [(match_operand:VF_128 1 "register_operand" "0,x")
1207 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
1212 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2}
1213 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1214 [(set_attr "isa" "noavx,avx")
1215 (set_attr "type" "ssecmp")
1216 (set_attr "length_immediate" "1,*")
1217 (set_attr "prefix" "orig,vex")
1218 (set_attr "mode" "<ssescalarmode>")])
1220 (define_insn "<sse>_comi"
1221 [(set (reg:CCFP FLAGS_REG)
1224 (match_operand:<ssevecmode> 0 "register_operand" "x")
1225 (parallel [(const_int 0)]))
1227 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1228 (parallel [(const_int 0)]))))]
1229 "SSE_FLOAT_MODE_P (<MODE>mode)"
1230 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1231 [(set_attr "type" "ssecomi")
1232 (set_attr "prefix" "maybe_vex")
1233 (set_attr "prefix_rep" "0")
1234 (set (attr "prefix_data16")
1235 (if_then_else (eq_attr "mode" "DF")
1237 (const_string "0")))
1238 (set_attr "mode" "<MODE>")])
1240 (define_insn "<sse>_ucomi"
1241 [(set (reg:CCFPU FLAGS_REG)
1244 (match_operand:<ssevecmode> 0 "register_operand" "x")
1245 (parallel [(const_int 0)]))
1247 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
1248 (parallel [(const_int 0)]))))]
1249 "SSE_FLOAT_MODE_P (<MODE>mode)"
1250 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}"
1251 [(set_attr "type" "ssecomi")
1252 (set_attr "prefix" "maybe_vex")
1253 (set_attr "prefix_rep" "0")
1254 (set (attr "prefix_data16")
1255 (if_then_else (eq_attr "mode" "DF")
1257 (const_string "0")))
1258 (set_attr "mode" "<MODE>")])
1260 (define_expand "vcond<mode>"
1261 [(set (match_operand:VF 0 "register_operand" "")
1263 (match_operator 3 ""
1264 [(match_operand:VF 4 "nonimmediate_operand" "")
1265 (match_operand:VF 5 "nonimmediate_operand" "")])
1266 (match_operand:VF 1 "general_operand" "")
1267 (match_operand:VF 2 "general_operand" "")))]
1270 bool ok = ix86_expand_fp_vcond (operands);
1275 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1277 ;; Parallel floating point logical operations
1279 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1281 (define_insn "<sse>_andnot<mode>3"
1282 [(set (match_operand:VF 0 "register_operand" "=x,x")
1285 (match_operand:VF 1 "register_operand" "0,x"))
1286 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1289 static char buf[32];
1292 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1294 switch (which_alternative)
1297 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1300 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1306 snprintf (buf, sizeof (buf), insn, suffix);
1309 [(set_attr "isa" "noavx,avx")
1310 (set_attr "type" "sselog")
1311 (set_attr "prefix" "orig,vex")
1312 (set_attr "mode" "<MODE>")])
1314 (define_expand "<code><mode>3"
1315 [(set (match_operand:VF 0 "register_operand" "")
1317 (match_operand:VF 1 "nonimmediate_operand" "")
1318 (match_operand:VF 2 "nonimmediate_operand" "")))]
1320 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1322 (define_insn "*<code><mode>3"
1323 [(set (match_operand:VF 0 "register_operand" "=x,x")
1325 (match_operand:VF 1 "nonimmediate_operand" "%0,x")
1326 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
1327 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1329 static char buf[32];
1332 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssemodesuffix>";
1334 switch (which_alternative)
1337 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1340 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1346 snprintf (buf, sizeof (buf), insn, suffix);
1349 [(set_attr "isa" "noavx,avx")
1350 (set_attr "type" "sselog")
1351 (set_attr "prefix" "orig,vex")
1352 (set_attr "mode" "<MODE>")])
1354 (define_expand "copysign<mode>3"
1357 (not:VF (match_dup 3))
1358 (match_operand:VF 1 "nonimmediate_operand" "")))
1360 (and:VF (match_dup 3)
1361 (match_operand:VF 2 "nonimmediate_operand" "")))
1362 (set (match_operand:VF 0 "register_operand" "")
1363 (ior:VF (match_dup 4) (match_dup 5)))]
1366 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
1368 operands[4] = gen_reg_rtx (<MODE>mode);
1369 operands[5] = gen_reg_rtx (<MODE>mode);
1372 ;; Also define scalar versions. These are used for abs, neg, and
1373 ;; conditional move. Using subregs into vector modes causes register
1374 ;; allocation lossage. These patterns do not allow memory operands
1375 ;; because the native instructions read the full 128-bits.
1377 (define_insn "*andnot<mode>3"
1378 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1381 (match_operand:MODEF 1 "register_operand" "0,x"))
1382 (match_operand:MODEF 2 "register_operand" "x,x")))]
1383 "SSE_FLOAT_MODE_P (<MODE>mode)"
1385 static char buf[32];
1388 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1390 switch (which_alternative)
1393 insn = "andn%s\t{%%2, %%0|%%0, %%2}";
1396 insn = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1402 snprintf (buf, sizeof (buf), insn, suffix);
1405 [(set_attr "isa" "noavx,avx")
1406 (set_attr "type" "sselog")
1407 (set_attr "prefix" "orig,vex")
1408 (set_attr "mode" "<ssevecmode>")])
1410 (define_insn "*<code><mode>3"
1411 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
1413 (match_operand:MODEF 1 "register_operand" "%0,x")
1414 (match_operand:MODEF 2 "register_operand" "x,x")))]
1415 "SSE_FLOAT_MODE_P (<MODE>mode)"
1417 static char buf[32];
1420 = TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ? "ps" : "<ssevecmodesuffix>";
1422 switch (which_alternative)
1425 insn = "<logic>%s\t{%%2, %%0|%%0, %%2}";
1428 insn = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
1434 snprintf (buf, sizeof (buf), insn, suffix);
1437 [(set_attr "isa" "noavx,avx")
1438 (set_attr "type" "sselog")
1439 (set_attr "prefix" "orig,vex")
1440 (set_attr "mode" "<ssevecmode>")])
1442 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1444 ;; FMA4 floating point multiply/accumulate instructions. This
1445 ;; includes the scalar version of the instructions as well as the
1448 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1450 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1451 ;; combine to generate a multiply/add with two memory references. We then
1452 ;; split this insn, into loading up the destination register with one of the
1453 ;; memory operations. If we don't manage to split the insn, reload will
1454 ;; generate the appropriate moves. The reason this is needed, is that combine
1455 ;; has already folded one of the memory references into both the multiply and
1456 ;; add insns, and it can't generate a new pseudo. I.e.:
1457 ;; (set (reg1) (mem (addr1)))
1458 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1459 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1461 ;; ??? This is historic, pre-dating the gimple fma transformation.
1462 ;; We could now properly represent that only one memory operand is
1463 ;; allowed and not be penalized during optimization.
1465 ;; Intrinsic FMA operations.
1467 ;; The standard names for fma is only available with SSE math enabled.
1468 (define_expand "fma<mode>4"
1469 [(set (match_operand:FMAMODE 0 "register_operand")
1471 (match_operand:FMAMODE 1 "nonimmediate_operand")
1472 (match_operand:FMAMODE 2 "nonimmediate_operand")
1473 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1474 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1476 (define_expand "fms<mode>4"
1477 [(set (match_operand:FMAMODE 0 "register_operand")
1479 (match_operand:FMAMODE 1 "nonimmediate_operand")
1480 (match_operand:FMAMODE 2 "nonimmediate_operand")
1481 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1482 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1484 (define_expand "fnma<mode>4"
1485 [(set (match_operand:FMAMODE 0 "register_operand")
1487 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1488 (match_operand:FMAMODE 2 "nonimmediate_operand")
1489 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1490 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1492 (define_expand "fnms<mode>4"
1493 [(set (match_operand:FMAMODE 0 "register_operand")
1495 (neg:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand"))
1496 (match_operand:FMAMODE 2 "nonimmediate_operand")
1497 (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
1498 "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
1500 ;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
1501 (define_expand "fma4i_fmadd_<mode>"
1502 [(set (match_operand:FMAMODE 0 "register_operand")
1504 (match_operand:FMAMODE 1 "nonimmediate_operand")
1505 (match_operand:FMAMODE 2 "nonimmediate_operand")
1506 (match_operand:FMAMODE 3 "nonimmediate_operand")))]
1507 "TARGET_FMA || TARGET_FMA4")
1509 (define_insn "*fma4i_fmadd_<mode>"
1510 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1512 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1513 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1514 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1516 "vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1517 [(set_attr "type" "ssemuladd")
1518 (set_attr "mode" "<MODE>")])
1520 (define_insn "*fma4i_fmsub_<mode>"
1521 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1523 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
1524 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1526 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1528 "vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1529 [(set_attr "type" "ssemuladd")
1530 (set_attr "mode" "<MODE>")])
1532 (define_insn "*fma4i_fnmadd_<mode>"
1533 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1536 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1537 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1538 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x")))]
1540 "vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1541 [(set_attr "type" "ssemuladd")
1542 (set_attr "mode" "<MODE>")])
1544 (define_insn "*fma4i_fnmsub_<mode>"
1545 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
1548 (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x"))
1549 (match_operand:FMAMODE 2 "nonimmediate_operand" " x,m")
1551 (match_operand:FMAMODE 3 "nonimmediate_operand" "xm,x"))))]
1553 "vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1554 [(set_attr "type" "ssemuladd")
1555 (set_attr "mode" "<MODE>")])
1557 ;; Scalar versions of the above. Unlike ADDSS et al, these write the
1558 ;; entire destination register, with the high-order elements zeroed.
1560 (define_expand "fma4i_vmfmadd_<mode>"
1561 [(set (match_operand:VF_128 0 "register_operand")
1564 (match_operand:VF_128 1 "nonimmediate_operand")
1565 (match_operand:VF_128 2 "nonimmediate_operand")
1566 (match_operand:VF_128 3 "nonimmediate_operand"))
1571 operands[4] = CONST0_RTX (<MODE>mode);
1574 (define_insn "*fma4i_vmfmadd_<mode>"
1575 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1578 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1579 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1580 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1581 (match_operand:VF_128 4 "const0_operand" "")
1584 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1585 [(set_attr "type" "ssemuladd")
1586 (set_attr "mode" "<MODE>")])
1588 (define_insn "*fma4i_vmfmsub_<mode>"
1589 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1592 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
1593 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1595 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1596 (match_operand:VF_128 4 "const0_operand" "")
1599 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1600 [(set_attr "type" "ssemuladd")
1601 (set_attr "mode" "<MODE>")])
1603 (define_insn "*fma4i_vmfnmadd_<mode>"
1604 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1608 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1609 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1610 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
1611 (match_operand:VF_128 4 "const0_operand" "")
1614 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1615 [(set_attr "type" "ssemuladd")
1616 (set_attr "mode" "<MODE>")])
1618 (define_insn "*fma4i_vmfnmsub_<mode>"
1619 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
1623 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
1624 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
1626 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
1627 (match_operand:VF_128 4 "const0_operand" "")
1630 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1631 [(set_attr "type" "ssemuladd")
1632 (set_attr "mode" "<MODE>")])
1634 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1636 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
1638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1640 ;; It would be possible to represent these without the UNSPEC as
1643 ;; (fma op1 op2 op3)
1644 ;; (fma op1 op2 (neg op3))
1647 ;; But this doesn't seem useful in practice.
1649 (define_expand "fmaddsub_<mode>"
1650 [(set (match_operand:VF 0 "register_operand")
1652 [(match_operand:VF 1 "nonimmediate_operand")
1653 (match_operand:VF 2 "nonimmediate_operand")
1654 (match_operand:VF 3 "nonimmediate_operand")]
1656 "TARGET_FMA || TARGET_FMA4")
1658 (define_insn "*fma4_fmaddsub_<mode>"
1659 [(set (match_operand:VF 0 "register_operand" "=x,x")
1661 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1662 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1663 (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
1666 "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1667 [(set_attr "type" "ssemuladd")
1668 (set_attr "mode" "<MODE>")])
1670 (define_insn "*fma4_fmsubadd_<mode>"
1671 [(set (match_operand:VF 0 "register_operand" "=x,x")
1673 [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
1674 (match_operand:VF 2 "nonimmediate_operand" " x,m")
1676 (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
1679 "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1680 [(set_attr "type" "ssemuladd")
1681 (set_attr "mode" "<MODE>")])
1683 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1685 ;; FMA3 floating point multiply/accumulate instructions.
1687 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1689 (define_insn "*fma_fmadd_<mode>"
1690 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1692 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1693 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1694 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1697 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1698 vfmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1699 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1700 [(set_attr "type" "ssemuladd")
1701 (set_attr "mode" "<MODE>")])
1703 (define_insn "*fma_fmsub_<mode>"
1704 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1706 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
1707 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1709 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1712 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1713 vfmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1714 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1715 [(set_attr "type" "ssemuladd")
1716 (set_attr "mode" "<MODE>")])
1718 (define_insn "*fma_fmadd_<mode>"
1719 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1722 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1723 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1724 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
1727 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1728 vfnmadd312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1729 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1730 [(set_attr "type" "ssemuladd")
1731 (set_attr "mode" "<MODE>")])
1733 (define_insn "*fma_fmsub_<mode>"
1734 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
1737 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
1738 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
1740 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
1743 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1744 vfnmsub312<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1745 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1746 [(set_attr "type" "ssemuladd")
1747 (set_attr "mode" "<MODE>")])
1749 (define_insn "*fma_fmaddsub_<mode>"
1750 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1752 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1753 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1754 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
1758 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1759 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1760 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1761 [(set_attr "type" "ssemuladd")
1762 (set_attr "mode" "<MODE>")])
1764 (define_insn "*fma_fmsubadd_<mode>"
1765 [(set (match_operand:VF 0 "register_operand" "=x,x,x")
1767 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
1768 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
1770 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
1774 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
1775 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
1776 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1777 [(set_attr "type" "ssemuladd")
1778 (set_attr "mode" "<MODE>")])
1780 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1782 ;; Parallel single-precision floating point conversion operations
1784 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1786 (define_insn "sse_cvtpi2ps"
1787 [(set (match_operand:V4SF 0 "register_operand" "=x")
1790 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
1791 (match_operand:V4SF 1 "register_operand" "0")
1794 "cvtpi2ps\t{%2, %0|%0, %2}"
1795 [(set_attr "type" "ssecvt")
1796 (set_attr "mode" "V4SF")])
1798 (define_insn "sse_cvtps2pi"
1799 [(set (match_operand:V2SI 0 "register_operand" "=y")
1801 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1803 (parallel [(const_int 0) (const_int 1)])))]
1805 "cvtps2pi\t{%1, %0|%0, %1}"
1806 [(set_attr "type" "ssecvt")
1807 (set_attr "unit" "mmx")
1808 (set_attr "mode" "DI")])
1810 (define_insn "sse_cvttps2pi"
1811 [(set (match_operand:V2SI 0 "register_operand" "=y")
1813 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
1814 (parallel [(const_int 0) (const_int 1)])))]
1816 "cvttps2pi\t{%1, %0|%0, %1}"
1817 [(set_attr "type" "ssecvt")
1818 (set_attr "unit" "mmx")
1819 (set_attr "prefix_rep" "0")
1820 (set_attr "mode" "SF")])
1822 (define_insn "sse_cvtsi2ss"
1823 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1826 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
1827 (match_operand:V4SF 1 "register_operand" "0,0,x")
1831 cvtsi2ss\t{%2, %0|%0, %2}
1832 cvtsi2ss\t{%2, %0|%0, %2}
1833 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
1834 [(set_attr "isa" "noavx,noavx,avx")
1835 (set_attr "type" "sseicvt")
1836 (set_attr "athlon_decode" "vector,double,*")
1837 (set_attr "amdfam10_decode" "vector,double,*")
1838 (set_attr "bdver1_decode" "double,direct,*")
1839 (set_attr "prefix" "orig,orig,vex")
1840 (set_attr "mode" "SF")])
1842 (define_insn "sse_cvtsi2ssq"
1843 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
1846 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
1847 (match_operand:V4SF 1 "register_operand" "0,0,x")
1849 "TARGET_SSE && TARGET_64BIT"
1851 cvtsi2ssq\t{%2, %0|%0, %2}
1852 cvtsi2ssq\t{%2, %0|%0, %2}
1853 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
1854 [(set_attr "isa" "noavx,noavx,avx")
1855 (set_attr "type" "sseicvt")
1856 (set_attr "athlon_decode" "vector,double,*")
1857 (set_attr "amdfam10_decode" "vector,double,*")
1858 (set_attr "bdver1_decode" "double,direct,*")
1859 (set_attr "length_vex" "*,*,4")
1860 (set_attr "prefix_rex" "1,1,*")
1861 (set_attr "prefix" "orig,orig,vex")
1862 (set_attr "mode" "SF")])
1864 (define_insn "sse_cvtss2si"
1865 [(set (match_operand:SI 0 "register_operand" "=r,r")
1868 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1869 (parallel [(const_int 0)]))]
1870 UNSPEC_FIX_NOTRUNC))]
1872 "%vcvtss2si\t{%1, %0|%0, %1}"
1873 [(set_attr "type" "sseicvt")
1874 (set_attr "athlon_decode" "double,vector")
1875 (set_attr "bdver1_decode" "double,double")
1876 (set_attr "prefix_rep" "1")
1877 (set_attr "prefix" "maybe_vex")
1878 (set_attr "mode" "SI")])
1880 (define_insn "sse_cvtss2si_2"
1881 [(set (match_operand:SI 0 "register_operand" "=r,r")
1882 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1883 UNSPEC_FIX_NOTRUNC))]
1885 "%vcvtss2si\t{%1, %0|%0, %1}"
1886 [(set_attr "type" "sseicvt")
1887 (set_attr "athlon_decode" "double,vector")
1888 (set_attr "amdfam10_decode" "double,double")
1889 (set_attr "bdver1_decode" "double,double")
1890 (set_attr "prefix_rep" "1")
1891 (set_attr "prefix" "maybe_vex")
1892 (set_attr "mode" "SI")])
1894 (define_insn "sse_cvtss2siq"
1895 [(set (match_operand:DI 0 "register_operand" "=r,r")
1898 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1899 (parallel [(const_int 0)]))]
1900 UNSPEC_FIX_NOTRUNC))]
1901 "TARGET_SSE && TARGET_64BIT"
1902 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1903 [(set_attr "type" "sseicvt")
1904 (set_attr "athlon_decode" "double,vector")
1905 (set_attr "bdver1_decode" "double,double")
1906 (set_attr "prefix_rep" "1")
1907 (set_attr "prefix" "maybe_vex")
1908 (set_attr "mode" "DI")])
1910 (define_insn "sse_cvtss2siq_2"
1911 [(set (match_operand:DI 0 "register_operand" "=r,r")
1912 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
1913 UNSPEC_FIX_NOTRUNC))]
1914 "TARGET_SSE && TARGET_64BIT"
1915 "%vcvtss2si{q}\t{%1, %0|%0, %1}"
1916 [(set_attr "type" "sseicvt")
1917 (set_attr "athlon_decode" "double,vector")
1918 (set_attr "amdfam10_decode" "double,double")
1919 (set_attr "bdver1_decode" "double,double")
1920 (set_attr "prefix_rep" "1")
1921 (set_attr "prefix" "maybe_vex")
1922 (set_attr "mode" "DI")])
1924 (define_insn "sse_cvttss2si"
1925 [(set (match_operand:SI 0 "register_operand" "=r,r")
1928 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1929 (parallel [(const_int 0)]))))]
1931 "%vcvttss2si\t{%1, %0|%0, %1}"
1932 [(set_attr "type" "sseicvt")
1933 (set_attr "athlon_decode" "double,vector")
1934 (set_attr "amdfam10_decode" "double,double")
1935 (set_attr "bdver1_decode" "double,double")
1936 (set_attr "prefix_rep" "1")
1937 (set_attr "prefix" "maybe_vex")
1938 (set_attr "mode" "SI")])
1940 (define_insn "sse_cvttss2siq"
1941 [(set (match_operand:DI 0 "register_operand" "=r,r")
1944 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1945 (parallel [(const_int 0)]))))]
1946 "TARGET_SSE && TARGET_64BIT"
1947 "%vcvttss2si{q}\t{%1, %0|%0, %1}"
1948 [(set_attr "type" "sseicvt")
1949 (set_attr "athlon_decode" "double,vector")
1950 (set_attr "amdfam10_decode" "double,double")
1951 (set_attr "bdver1_decode" "double,double")
1952 (set_attr "prefix_rep" "1")
1953 (set_attr "prefix" "maybe_vex")
1954 (set_attr "mode" "DI")])
1956 (define_insn "avx_cvtdq2ps256"
1957 [(set (match_operand:V8SF 0 "register_operand" "=x")
1958 (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
1960 "vcvtdq2ps\t{%1, %0|%0, %1}"
1961 [(set_attr "type" "ssecvt")
1962 (set_attr "prefix" "vex")
1963 (set_attr "mode" "V8SF")])
1965 (define_insn "sse2_cvtdq2ps"
1966 [(set (match_operand:V4SF 0 "register_operand" "=x")
1967 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1969 "%vcvtdq2ps\t{%1, %0|%0, %1}"
1970 [(set_attr "type" "ssecvt")
1971 (set_attr "prefix" "maybe_vex")
1972 (set_attr "mode" "V4SF")])
1974 (define_expand "sse2_cvtudq2ps"
1976 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
1978 (lt:V4SF (match_dup 5) (match_dup 3)))
1980 (and:V4SF (match_dup 6) (match_dup 4)))
1981 (set (match_operand:V4SF 0 "register_operand" "")
1982 (plus:V4SF (match_dup 5) (match_dup 7)))]
1985 REAL_VALUE_TYPE TWO32r;
1989 real_ldexp (&TWO32r, &dconst1, 32);
1990 x = const_double_from_real_value (TWO32r, SFmode);
1992 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
1993 operands[4] = force_reg (V4SFmode,
1994 ix86_build_const_vector (V4SFmode, 1, x));
1996 for (i = 5; i < 8; i++)
1997 operands[i] = gen_reg_rtx (V4SFmode);
2000 (define_insn "avx_cvtps2dq256"
2001 [(set (match_operand:V8SI 0 "register_operand" "=x")
2002 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
2003 UNSPEC_FIX_NOTRUNC))]
2005 "vcvtps2dq\t{%1, %0|%0, %1}"
2006 [(set_attr "type" "ssecvt")
2007 (set_attr "prefix" "vex")
2008 (set_attr "mode" "OI")])
2010 (define_insn "sse2_cvtps2dq"
2011 [(set (match_operand:V4SI 0 "register_operand" "=x")
2012 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
2013 UNSPEC_FIX_NOTRUNC))]
2015 "%vcvtps2dq\t{%1, %0|%0, %1}"
2016 [(set_attr "type" "ssecvt")
2017 (set (attr "prefix_data16")
2019 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2021 (const_string "1")))
2022 (set_attr "prefix" "maybe_vex")
2023 (set_attr "mode" "TI")])
2025 (define_insn "avx_cvttps2dq256"
2026 [(set (match_operand:V8SI 0 "register_operand" "=x")
2027 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
2029 "vcvttps2dq\t{%1, %0|%0, %1}"
2030 [(set_attr "type" "ssecvt")
2031 (set_attr "prefix" "vex")
2032 (set_attr "mode" "OI")])
2034 (define_insn "sse2_cvttps2dq"
2035 [(set (match_operand:V4SI 0 "register_operand" "=x")
2036 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2038 "%vcvttps2dq\t{%1, %0|%0, %1}"
2039 [(set_attr "type" "ssecvt")
2040 (set (attr "prefix_rep")
2042 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2044 (const_string "1")))
2045 (set (attr "prefix_data16")
2047 (ne (symbol_ref "TARGET_AVX") (const_int 0))
2049 (const_string "0")))
2050 (set_attr "prefix_data16" "0")
2051 (set_attr "prefix" "maybe_vex")
2052 (set_attr "mode" "TI")])
2054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2056 ;; Parallel double-precision floating point conversion operations
2058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2060 (define_insn "sse2_cvtpi2pd"
2061 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2062 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2064 "cvtpi2pd\t{%1, %0|%0, %1}"
2065 [(set_attr "type" "ssecvt")
2066 (set_attr "unit" "mmx,*")
2067 (set_attr "prefix_data16" "1,*")
2068 (set_attr "mode" "V2DF")])
2070 (define_insn "sse2_cvtpd2pi"
2071 [(set (match_operand:V2SI 0 "register_operand" "=y")
2072 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2073 UNSPEC_FIX_NOTRUNC))]
2075 "cvtpd2pi\t{%1, %0|%0, %1}"
2076 [(set_attr "type" "ssecvt")
2077 (set_attr "unit" "mmx")
2078 (set_attr "bdver1_decode" "double")
2079 (set_attr "prefix_data16" "1")
2080 (set_attr "mode" "DI")])
2082 (define_insn "sse2_cvttpd2pi"
2083 [(set (match_operand:V2SI 0 "register_operand" "=y")
2084 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
2086 "cvttpd2pi\t{%1, %0|%0, %1}"
2087 [(set_attr "type" "ssecvt")
2088 (set_attr "unit" "mmx")
2089 (set_attr "bdver1_decode" "double")
2090 (set_attr "prefix_data16" "1")
2091 (set_attr "mode" "TI")])
2093 (define_insn "sse2_cvtsi2sd"
2094 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2097 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
2098 (match_operand:V2DF 1 "register_operand" "0,0,x")
2102 cvtsi2sd\t{%2, %0|%0, %2}
2103 cvtsi2sd\t{%2, %0|%0, %2}
2104 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
2105 [(set_attr "isa" "noavx,noavx,avx")
2106 (set_attr "type" "sseicvt")
2107 (set_attr "athlon_decode" "double,direct,*")
2108 (set_attr "amdfam10_decode" "vector,double,*")
2109 (set_attr "bdver1_decode" "double,direct,*")
2110 (set_attr "prefix" "orig,orig,vex")
2111 (set_attr "mode" "DF")])
2113 (define_insn "sse2_cvtsi2sdq"
2114 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2117 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
2118 (match_operand:V2DF 1 "register_operand" "0,0,x")
2120 "TARGET_SSE2 && TARGET_64BIT"
2122 cvtsi2sdq\t{%2, %0|%0, %2}
2123 cvtsi2sdq\t{%2, %0|%0, %2}
2124 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2125 [(set_attr "isa" "noavx,noavx,avx")
2126 (set_attr "type" "sseicvt")
2127 (set_attr "athlon_decode" "double,direct,*")
2128 (set_attr "amdfam10_decode" "vector,double,*")
2129 (set_attr "bdver1_decode" "double,direct,*")
2130 (set_attr "length_vex" "*,*,4")
2131 (set_attr "prefix_rex" "1,1,*")
2132 (set_attr "prefix" "orig,orig,vex")
2133 (set_attr "mode" "DF")])
2135 (define_insn "sse2_cvtsd2si"
2136 [(set (match_operand:SI 0 "register_operand" "=r,r")
2139 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2140 (parallel [(const_int 0)]))]
2141 UNSPEC_FIX_NOTRUNC))]
2143 "%vcvtsd2si\t{%1, %0|%0, %1}"
2144 [(set_attr "type" "sseicvt")
2145 (set_attr "athlon_decode" "double,vector")
2146 (set_attr "bdver1_decode" "double,double")
2147 (set_attr "prefix_rep" "1")
2148 (set_attr "prefix" "maybe_vex")
2149 (set_attr "mode" "SI")])
2151 (define_insn "sse2_cvtsd2si_2"
2152 [(set (match_operand:SI 0 "register_operand" "=r,r")
2153 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2154 UNSPEC_FIX_NOTRUNC))]
2156 "%vcvtsd2si\t{%1, %0|%0, %1}"
2157 [(set_attr "type" "sseicvt")
2158 (set_attr "athlon_decode" "double,vector")
2159 (set_attr "amdfam10_decode" "double,double")
2160 (set_attr "bdver1_decode" "double,double")
2161 (set_attr "prefix_rep" "1")
2162 (set_attr "prefix" "maybe_vex")
2163 (set_attr "mode" "SI")])
2165 (define_insn "sse2_cvtsd2siq"
2166 [(set (match_operand:DI 0 "register_operand" "=r,r")
2169 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2170 (parallel [(const_int 0)]))]
2171 UNSPEC_FIX_NOTRUNC))]
2172 "TARGET_SSE2 && TARGET_64BIT"
2173 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2174 [(set_attr "type" "sseicvt")
2175 (set_attr "athlon_decode" "double,vector")
2176 (set_attr "bdver1_decode" "double,double")
2177 (set_attr "prefix_rep" "1")
2178 (set_attr "prefix" "maybe_vex")
2179 (set_attr "mode" "DI")])
2181 (define_insn "sse2_cvtsd2siq_2"
2182 [(set (match_operand:DI 0 "register_operand" "=r,r")
2183 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")]
2184 UNSPEC_FIX_NOTRUNC))]
2185 "TARGET_SSE2 && TARGET_64BIT"
2186 "%vcvtsd2si{q}\t{%1, %0|%0, %1}"
2187 [(set_attr "type" "sseicvt")
2188 (set_attr "athlon_decode" "double,vector")
2189 (set_attr "amdfam10_decode" "double,double")
2190 (set_attr "bdver1_decode" "double,double")
2191 (set_attr "prefix_rep" "1")
2192 (set_attr "prefix" "maybe_vex")
2193 (set_attr "mode" "DI")])
2195 (define_insn "sse2_cvttsd2si"
2196 [(set (match_operand:SI 0 "register_operand" "=r,r")
2199 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2200 (parallel [(const_int 0)]))))]
2202 "%vcvttsd2si\t{%1, %0|%0, %1}"
2203 [(set_attr "type" "sseicvt")
2204 (set_attr "athlon_decode" "double,vector")
2205 (set_attr "amdfam10_decode" "double,double")
2206 (set_attr "bdver1_decode" "double,double")
2207 (set_attr "prefix_rep" "1")
2208 (set_attr "prefix" "maybe_vex")
2209 (set_attr "mode" "SI")])
2211 (define_insn "sse2_cvttsd2siq"
2212 [(set (match_operand:DI 0 "register_operand" "=r,r")
2215 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
2216 (parallel [(const_int 0)]))))]
2217 "TARGET_SSE2 && TARGET_64BIT"
2218 "%vcvttsd2si{q}\t{%1, %0|%0, %1}"
2219 [(set_attr "type" "sseicvt")
2220 (set_attr "athlon_decode" "double,vector")
2221 (set_attr "amdfam10_decode" "double,double")
2222 (set_attr "bdver1_decode" "double,double")
2223 (set_attr "prefix_rep" "1")
2224 (set_attr "prefix" "maybe_vex")
2225 (set_attr "mode" "DI")])
2227 (define_insn "avx_cvtdq2pd256"
2228 [(set (match_operand:V4DF 0 "register_operand" "=x")
2229 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
2231 "vcvtdq2pd\t{%1, %0|%0, %1}"
2232 [(set_attr "type" "ssecvt")
2233 (set_attr "prefix" "vex")
2234 (set_attr "mode" "V4DF")])
2236 (define_insn "*avx_cvtdq2pd256_2"
2237 [(set (match_operand:V4DF 0 "register_operand" "=x")
2240 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
2241 (parallel [(const_int 0) (const_int 1)
2242 (const_int 2) (const_int 3)]))))]
2244 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
2245 [(set_attr "type" "ssecvt")
2246 (set_attr "prefix" "vex")
2247 (set_attr "mode" "V4DF")])
2249 (define_insn "sse2_cvtdq2pd"
2250 [(set (match_operand:V2DF 0 "register_operand" "=x")
2253 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
2254 (parallel [(const_int 0) (const_int 1)]))))]
2256 "%vcvtdq2pd\t{%1, %0|%0, %1}"
2257 [(set_attr "type" "ssecvt")
2258 (set_attr "prefix" "maybe_vex")
2259 (set_attr "mode" "V2DF")])
2261 (define_insn "avx_cvtpd2dq256"
2262 [(set (match_operand:V4SI 0 "register_operand" "=x")
2263 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
2264 UNSPEC_FIX_NOTRUNC))]
2266 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
2267 [(set_attr "type" "ssecvt")
2268 (set_attr "prefix" "vex")
2269 (set_attr "mode" "OI")])
2271 (define_expand "sse2_cvtpd2dq"
2272 [(set (match_operand:V4SI 0 "register_operand" "")
2274 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2278 "operands[2] = CONST0_RTX (V2SImode);")
2280 (define_insn "*sse2_cvtpd2dq"
2281 [(set (match_operand:V4SI 0 "register_operand" "=x")
2283 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2285 (match_operand:V2SI 2 "const0_operand" "")))]
2289 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
2291 return "cvtpd2dq\t{%1, %0|%0, %1}";
2293 [(set_attr "type" "ssecvt")
2294 (set_attr "prefix_rep" "1")
2295 (set_attr "prefix_data16" "0")
2296 (set_attr "prefix" "maybe_vex")
2297 (set_attr "mode" "TI")
2298 (set_attr "amdfam10_decode" "double")
2299 (set_attr "athlon_decode" "vector")
2300 (set_attr "bdver1_decode" "double")])
2302 (define_insn "avx_cvttpd2dq256"
2303 [(set (match_operand:V4SI 0 "register_operand" "=x")
2304 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2306 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
2307 [(set_attr "type" "ssecvt")
2308 (set_attr "prefix" "vex")
2309 (set_attr "mode" "OI")])
2311 (define_expand "sse2_cvttpd2dq"
2312 [(set (match_operand:V4SI 0 "register_operand" "")
2314 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2317 "operands[2] = CONST0_RTX (V2SImode);")
2319 (define_insn "*sse2_cvttpd2dq"
2320 [(set (match_operand:V4SI 0 "register_operand" "=x")
2322 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2323 (match_operand:V2SI 2 "const0_operand" "")))]
2327 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
2329 return "cvttpd2dq\t{%1, %0|%0, %1}";
2331 [(set_attr "type" "ssecvt")
2332 (set_attr "amdfam10_decode" "double")
2333 (set_attr "athlon_decode" "vector")
2334 (set_attr "bdver1_decode" "double")
2335 (set_attr "prefix" "maybe_vex")
2336 (set_attr "mode" "TI")])
2338 (define_insn "sse2_cvtsd2ss"
2339 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
2342 (float_truncate:V2SF
2343 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm")))
2344 (match_operand:V4SF 1 "register_operand" "0,0,x")
2348 cvtsd2ss\t{%2, %0|%0, %2}
2349 cvtsd2ss\t{%2, %0|%0, %2}
2350 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
2351 [(set_attr "isa" "noavx,noavx,avx")
2352 (set_attr "type" "ssecvt")
2353 (set_attr "athlon_decode" "vector,double,*")
2354 (set_attr "amdfam10_decode" "vector,double,*")
2355 (set_attr "bdver1_decode" "direct,direct,*")
2356 (set_attr "prefix" "orig,orig,vex")
2357 (set_attr "mode" "SF")])
2359 (define_insn "sse2_cvtss2sd"
2360 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
2364 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm")
2365 (parallel [(const_int 0) (const_int 1)])))
2366 (match_operand:V2DF 1 "register_operand" "0,0,x")
2370 cvtss2sd\t{%2, %0|%0, %2}
2371 cvtss2sd\t{%2, %0|%0, %2}
2372 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
2373 [(set_attr "isa" "noavx,noavx,avx")
2374 (set_attr "type" "ssecvt")
2375 (set_attr "amdfam10_decode" "vector,double,*")
2376 (set_attr "athlon_decode" "direct,direct,*")
2377 (set_attr "bdver1_decode" "direct,direct,*")
2378 (set_attr "prefix" "orig,orig,vex")
2379 (set_attr "mode" "DF")])
2381 (define_insn "avx_cvtpd2ps256"
2382 [(set (match_operand:V4SF 0 "register_operand" "=x")
2383 (float_truncate:V4SF
2384 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
2386 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
2387 [(set_attr "type" "ssecvt")
2388 (set_attr "prefix" "vex")
2389 (set_attr "mode" "V4SF")])
2391 (define_expand "sse2_cvtpd2ps"
2392 [(set (match_operand:V4SF 0 "register_operand" "")
2394 (float_truncate:V2SF
2395 (match_operand:V2DF 1 "nonimmediate_operand" ""))
2398 "operands[2] = CONST0_RTX (V2SFmode);")
2400 (define_insn "*sse2_cvtpd2ps"
2401 [(set (match_operand:V4SF 0 "register_operand" "=x")
2403 (float_truncate:V2SF
2404 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2405 (match_operand:V2SF 2 "const0_operand" "")))]
2409 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
2411 return "cvtpd2ps\t{%1, %0|%0, %1}";
2413 [(set_attr "type" "ssecvt")
2414 (set_attr "amdfam10_decode" "double")
2415 (set_attr "athlon_decode" "vector")
2416 (set_attr "bdver1_decode" "double")
2417 (set_attr "prefix_data16" "1")
2418 (set_attr "prefix" "maybe_vex")
2419 (set_attr "mode" "V4SF")])
2421 (define_insn "avx_cvtps2pd256"
2422 [(set (match_operand:V4DF 0 "register_operand" "=x")
2424 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2426 "vcvtps2pd\t{%1, %0|%0, %1}"
2427 [(set_attr "type" "ssecvt")
2428 (set_attr "prefix" "vex")
2429 (set_attr "mode" "V4DF")])
2431 (define_insn "*avx_cvtps2pd256_2"
2432 [(set (match_operand:V4DF 0 "register_operand" "=x")
2435 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2436 (parallel [(const_int 0) (const_int 1)
2437 (const_int 2) (const_int 3)]))))]
2439 "vcvtps2pd\t{%x1, %0|%0, %x1}"
2440 [(set_attr "type" "ssecvt")
2441 (set_attr "prefix" "vex")
2442 (set_attr "mode" "V4DF")])
2444 (define_insn "sse2_cvtps2pd"
2445 [(set (match_operand:V2DF 0 "register_operand" "=x")
2448 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2449 (parallel [(const_int 0) (const_int 1)]))))]
2451 "%vcvtps2pd\t{%1, %0|%0, %1}"
2452 [(set_attr "type" "ssecvt")
2453 (set_attr "amdfam10_decode" "direct")
2454 (set_attr "athlon_decode" "double")
2455 (set_attr "bdver1_decode" "double")
2456 (set_attr "prefix_data16" "0")
2457 (set_attr "prefix" "maybe_vex")
2458 (set_attr "mode" "V2DF")])
2460 (define_expand "vec_unpacks_hi_v4sf"
2465 (match_operand:V4SF 1 "nonimmediate_operand" ""))
2466 (parallel [(const_int 6) (const_int 7)
2467 (const_int 2) (const_int 3)])))
2468 (set (match_operand:V2DF 0 "register_operand" "")
2472 (parallel [(const_int 0) (const_int 1)]))))]
2474 "operands[2] = gen_reg_rtx (V4SFmode);")
2476 (define_expand "vec_unpacks_hi_v8sf"
2479 (match_operand:V8SF 1 "nonimmediate_operand" "")
2480 (parallel [(const_int 4) (const_int 5)
2481 (const_int 6) (const_int 7)])))
2482 (set (match_operand:V4DF 0 "register_operand" "")
2486 "operands[2] = gen_reg_rtx (V4SFmode);")
2488 (define_expand "vec_unpacks_lo_v4sf"
2489 [(set (match_operand:V2DF 0 "register_operand" "")
2492 (match_operand:V4SF 1 "nonimmediate_operand" "")
2493 (parallel [(const_int 0) (const_int 1)]))))]
2496 (define_expand "vec_unpacks_lo_v8sf"
2497 [(set (match_operand:V4DF 0 "register_operand" "")
2500 (match_operand:V8SF 1 "nonimmediate_operand" "")
2501 (parallel [(const_int 0) (const_int 1)
2502 (const_int 2) (const_int 3)]))))]
2505 (define_expand "vec_unpacks_float_hi_v8hi"
2506 [(match_operand:V4SF 0 "register_operand" "")
2507 (match_operand:V8HI 1 "register_operand" "")]
2510 rtx tmp = gen_reg_rtx (V4SImode);
2512 emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
2513 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2517 (define_expand "vec_unpacks_float_lo_v8hi"
2518 [(match_operand:V4SF 0 "register_operand" "")
2519 (match_operand:V8HI 1 "register_operand" "")]
2522 rtx tmp = gen_reg_rtx (V4SImode);
2524 emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
2525 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2529 (define_expand "vec_unpacku_float_hi_v8hi"
2530 [(match_operand:V4SF 0 "register_operand" "")
2531 (match_operand:V8HI 1 "register_operand" "")]
2534 rtx tmp = gen_reg_rtx (V4SImode);
2536 emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
2537 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2541 (define_expand "vec_unpacku_float_lo_v8hi"
2542 [(match_operand:V4SF 0 "register_operand" "")
2543 (match_operand:V8HI 1 "register_operand" "")]
2546 rtx tmp = gen_reg_rtx (V4SImode);
2548 emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
2549 emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
2553 (define_expand "vec_unpacks_float_hi_v4si"
2556 (match_operand:V4SI 1 "nonimmediate_operand" "")
2557 (parallel [(const_int 2) (const_int 3)
2558 (const_int 2) (const_int 3)])))
2559 (set (match_operand:V2DF 0 "register_operand" "")
2563 (parallel [(const_int 0) (const_int 1)]))))]
2565 "operands[2] = gen_reg_rtx (V4SImode);")
2567 (define_expand "vec_unpacks_float_lo_v4si"
2568 [(set (match_operand:V2DF 0 "register_operand" "")
2571 (match_operand:V4SI 1 "nonimmediate_operand" "")
2572 (parallel [(const_int 0) (const_int 1)]))))]
2575 (define_expand "vec_unpacks_float_hi_v8si"
2578 (match_operand:V8SI 1 "nonimmediate_operand" "")
2579 (parallel [(const_int 4) (const_int 5)
2580 (const_int 6) (const_int 7)])))
2581 (set (match_operand:V4DF 0 "register_operand" "")
2585 "operands[2] = gen_reg_rtx (V4SImode);")
2587 (define_expand "vec_unpacks_float_lo_v8si"
2588 [(set (match_operand:V4DF 0 "register_operand" "")
2591 (match_operand:V8SI 1 "nonimmediate_operand" "")
2592 (parallel [(const_int 0) (const_int 1)
2593 (const_int 2) (const_int 3)]))))]
2596 (define_expand "vec_unpacku_float_hi_v4si"
2599 (match_operand:V4SI 1 "nonimmediate_operand" "")
2600 (parallel [(const_int 2) (const_int 3)
2601 (const_int 2) (const_int 3)])))
2606 (parallel [(const_int 0) (const_int 1)]))))
2608 (lt:V2DF (match_dup 6) (match_dup 3)))
2610 (and:V2DF (match_dup 7) (match_dup 4)))
2611 (set (match_operand:V2DF 0 "register_operand" "")
2612 (plus:V2DF (match_dup 6) (match_dup 8)))]
2615 REAL_VALUE_TYPE TWO32r;
2619 real_ldexp (&TWO32r, &dconst1, 32);
2620 x = const_double_from_real_value (TWO32r, DFmode);
2622 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2623 operands[4] = force_reg (V2DFmode,
2624 ix86_build_const_vector (V2DFmode, 1, x));
2626 operands[5] = gen_reg_rtx (V4SImode);
2628 for (i = 6; i < 9; i++)
2629 operands[i] = gen_reg_rtx (V2DFmode);
2632 (define_expand "vec_unpacku_float_lo_v4si"
2636 (match_operand:V4SI 1 "nonimmediate_operand" "")
2637 (parallel [(const_int 0) (const_int 1)]))))
2639 (lt:V2DF (match_dup 5) (match_dup 3)))
2641 (and:V2DF (match_dup 6) (match_dup 4)))
2642 (set (match_operand:V2DF 0 "register_operand" "")
2643 (plus:V2DF (match_dup 5) (match_dup 7)))]
2646 REAL_VALUE_TYPE TWO32r;
2650 real_ldexp (&TWO32r, &dconst1, 32);
2651 x = const_double_from_real_value (TWO32r, DFmode);
2653 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
2654 operands[4] = force_reg (V2DFmode,
2655 ix86_build_const_vector (V2DFmode, 1, x));
2657 for (i = 5; i < 8; i++)
2658 operands[i] = gen_reg_rtx (V2DFmode);
2661 (define_expand "vec_pack_trunc_v4df"
2663 (float_truncate:V4SF
2664 (match_operand:V4DF 1 "nonimmediate_operand" "")))
2666 (float_truncate:V4SF
2667 (match_operand:V4DF 2 "nonimmediate_operand" "")))
2668 (set (match_operand:V8SF 0 "register_operand" "")
2674 operands[3] = gen_reg_rtx (V4SFmode);
2675 operands[4] = gen_reg_rtx (V4SFmode);
2678 (define_expand "vec_pack_trunc_v2df"
2679 [(match_operand:V4SF 0 "register_operand" "")
2680 (match_operand:V2DF 1 "nonimmediate_operand" "")
2681 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2686 r1 = gen_reg_rtx (V4SFmode);
2687 r2 = gen_reg_rtx (V4SFmode);
2689 emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
2690 emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
2691 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
2695 (define_expand "vec_pack_sfix_trunc_v2df"
2696 [(match_operand:V4SI 0 "register_operand" "")
2697 (match_operand:V2DF 1 "nonimmediate_operand" "")
2698 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2703 r1 = gen_reg_rtx (V4SImode);
2704 r2 = gen_reg_rtx (V4SImode);
2706 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2707 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2708 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2709 gen_lowpart (V2DImode, r1),
2710 gen_lowpart (V2DImode, r2)));
2714 (define_expand "vec_pack_sfix_v2df"
2715 [(match_operand:V4SI 0 "register_operand" "")
2716 (match_operand:V2DF 1 "nonimmediate_operand" "")
2717 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2722 r1 = gen_reg_rtx (V4SImode);
2723 r2 = gen_reg_rtx (V4SImode);
2725 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2726 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2727 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2728 gen_lowpart (V2DImode, r1),
2729 gen_lowpart (V2DImode, r2)));
2733 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2735 ;; Parallel single-precision floating point element swizzling
2737 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2739 (define_expand "sse_movhlps_exp"
2740 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2743 (match_operand:V4SF 1 "nonimmediate_operand" "")
2744 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2745 (parallel [(const_int 6)
2751 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2753 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
2755 /* Fix up the destination if needed. */
2756 if (dst != operands[0])
2757 emit_move_insn (operands[0], dst);
2762 (define_insn "sse_movhlps"
2763 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
2766 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2767 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
2768 (parallel [(const_int 6)
2772 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2774 movhlps\t{%2, %0|%0, %2}
2775 vmovhlps\t{%2, %1, %0|%0, %1, %2}
2776 movlps\t{%H2, %0|%0, %H2}
2777 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
2778 %vmovhps\t{%2, %0|%0, %2}"
2779 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2780 (set_attr "type" "ssemov")
2781 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2782 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2784 (define_expand "sse_movlhps_exp"
2785 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
2788 (match_operand:V4SF 1 "nonimmediate_operand" "")
2789 (match_operand:V4SF 2 "nonimmediate_operand" ""))
2790 (parallel [(const_int 0)
2796 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
2798 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
2800 /* Fix up the destination if needed. */
2801 if (dst != operands[0])
2802 emit_move_insn (operands[0], dst);
2807 (define_insn "sse_movlhps"
2808 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
2811 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
2812 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,x,x"))
2813 (parallel [(const_int 0)
2817 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
2819 movlhps\t{%2, %0|%0, %2}
2820 vmovlhps\t{%2, %1, %0|%0, %1, %2}
2821 movhps\t{%2, %0|%0, %2}
2822 vmovhps\t{%2, %1, %0|%0, %1, %2}
2823 %vmovlps\t{%2, %H0|%H0, %2}"
2824 [(set_attr "isa" "noavx,avx,noavx,avx,base")
2825 (set_attr "type" "ssemov")
2826 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
2827 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
2829 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2830 (define_insn "avx_unpckhps256"
2831 [(set (match_operand:V8SF 0 "register_operand" "=x")
2834 (match_operand:V8SF 1 "register_operand" "x")
2835 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2836 (parallel [(const_int 2) (const_int 10)
2837 (const_int 3) (const_int 11)
2838 (const_int 6) (const_int 14)
2839 (const_int 7) (const_int 15)])))]
2841 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2842 [(set_attr "type" "sselog")
2843 (set_attr "prefix" "vex")
2844 (set_attr "mode" "V8SF")])
2846 (define_expand "vec_interleave_highv8sf"
2850 (match_operand:V8SF 1 "register_operand" "x")
2851 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2852 (parallel [(const_int 0) (const_int 8)
2853 (const_int 1) (const_int 9)
2854 (const_int 4) (const_int 12)
2855 (const_int 5) (const_int 13)])))
2861 (parallel [(const_int 2) (const_int 10)
2862 (const_int 3) (const_int 11)
2863 (const_int 6) (const_int 14)
2864 (const_int 7) (const_int 15)])))
2865 (set (match_operand:V8SF 0 "register_operand" "")
2870 (parallel [(const_int 4) (const_int 5)
2871 (const_int 6) (const_int 7)
2872 (const_int 12) (const_int 13)
2873 (const_int 14) (const_int 15)])))]
2876 operands[3] = gen_reg_rtx (V8SFmode);
2877 operands[4] = gen_reg_rtx (V8SFmode);
2880 (define_insn "vec_interleave_highv4sf"
2881 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2884 (match_operand:V4SF 1 "register_operand" "0,x")
2885 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2886 (parallel [(const_int 2) (const_int 6)
2887 (const_int 3) (const_int 7)])))]
2890 unpckhps\t{%2, %0|%0, %2}
2891 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
2892 [(set_attr "isa" "noavx,avx")
2893 (set_attr "type" "sselog")
2894 (set_attr "prefix" "orig,vex")
2895 (set_attr "mode" "V4SF")])
2897 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
2898 (define_insn "avx_unpcklps256"
2899 [(set (match_operand:V8SF 0 "register_operand" "=x")
2902 (match_operand:V8SF 1 "register_operand" "x")
2903 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2904 (parallel [(const_int 0) (const_int 8)
2905 (const_int 1) (const_int 9)
2906 (const_int 4) (const_int 12)
2907 (const_int 5) (const_int 13)])))]
2909 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2910 [(set_attr "type" "sselog")
2911 (set_attr "prefix" "vex")
2912 (set_attr "mode" "V8SF")])
2914 (define_expand "vec_interleave_lowv8sf"
2918 (match_operand:V8SF 1 "register_operand" "x")
2919 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2920 (parallel [(const_int 0) (const_int 8)
2921 (const_int 1) (const_int 9)
2922 (const_int 4) (const_int 12)
2923 (const_int 5) (const_int 13)])))
2929 (parallel [(const_int 2) (const_int 10)
2930 (const_int 3) (const_int 11)
2931 (const_int 6) (const_int 14)
2932 (const_int 7) (const_int 15)])))
2933 (set (match_operand:V8SF 0 "register_operand" "")
2938 (parallel [(const_int 0) (const_int 1)
2939 (const_int 2) (const_int 3)
2940 (const_int 8) (const_int 9)
2941 (const_int 10) (const_int 11)])))]
2944 operands[3] = gen_reg_rtx (V8SFmode);
2945 operands[4] = gen_reg_rtx (V8SFmode);
2948 (define_insn "vec_interleave_lowv4sf"
2949 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2952 (match_operand:V4SF 1 "register_operand" "0,x")
2953 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2954 (parallel [(const_int 0) (const_int 4)
2955 (const_int 1) (const_int 5)])))]
2958 unpcklps\t{%2, %0|%0, %2}
2959 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
2960 [(set_attr "isa" "noavx,avx")
2961 (set_attr "type" "sselog")
2962 (set_attr "prefix" "orig,vex")
2963 (set_attr "mode" "V4SF")])
2965 ;; These are modeled with the same vec_concat as the others so that we
2966 ;; capture users of shufps that can use the new instructions
2967 (define_insn "avx_movshdup256"
2968 [(set (match_operand:V8SF 0 "register_operand" "=x")
2971 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
2973 (parallel [(const_int 1) (const_int 1)
2974 (const_int 3) (const_int 3)
2975 (const_int 5) (const_int 5)
2976 (const_int 7) (const_int 7)])))]
2978 "vmovshdup\t{%1, %0|%0, %1}"
2979 [(set_attr "type" "sse")
2980 (set_attr "prefix" "vex")
2981 (set_attr "mode" "V8SF")])
2983 (define_insn "sse3_movshdup"
2984 [(set (match_operand:V4SF 0 "register_operand" "=x")
2987 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2989 (parallel [(const_int 1)
2994 "%vmovshdup\t{%1, %0|%0, %1}"
2995 [(set_attr "type" "sse")
2996 (set_attr "prefix_rep" "1")
2997 (set_attr "prefix" "maybe_vex")
2998 (set_attr "mode" "V4SF")])
3000 (define_insn "avx_movsldup256"
3001 [(set (match_operand:V8SF 0 "register_operand" "=x")
3004 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
3006 (parallel [(const_int 0) (const_int 0)
3007 (const_int 2) (const_int 2)
3008 (const_int 4) (const_int 4)
3009 (const_int 6) (const_int 6)])))]
3011 "vmovsldup\t{%1, %0|%0, %1}"
3012 [(set_attr "type" "sse")
3013 (set_attr "prefix" "vex")
3014 (set_attr "mode" "V8SF")])
3016 (define_insn "sse3_movsldup"
3017 [(set (match_operand:V4SF 0 "register_operand" "=x")
3020 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
3022 (parallel [(const_int 0)
3027 "%vmovsldup\t{%1, %0|%0, %1}"
3028 [(set_attr "type" "sse")
3029 (set_attr "prefix_rep" "1")
3030 (set_attr "prefix" "maybe_vex")
3031 (set_attr "mode" "V4SF")])
3033 (define_expand "avx_shufps256"
3034 [(match_operand:V8SF 0 "register_operand" "")
3035 (match_operand:V8SF 1 "register_operand" "")
3036 (match_operand:V8SF 2 "nonimmediate_operand" "")
3037 (match_operand:SI 3 "const_int_operand" "")]
3040 int mask = INTVAL (operands[3]);
3041 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
3042 GEN_INT ((mask >> 0) & 3),
3043 GEN_INT ((mask >> 2) & 3),
3044 GEN_INT (((mask >> 4) & 3) + 8),
3045 GEN_INT (((mask >> 6) & 3) + 8),
3046 GEN_INT (((mask >> 0) & 3) + 4),
3047 GEN_INT (((mask >> 2) & 3) + 4),
3048 GEN_INT (((mask >> 4) & 3) + 12),
3049 GEN_INT (((mask >> 6) & 3) + 12)));
3053 ;; One bit in mask selects 2 elements.
3054 (define_insn "avx_shufps256_1"
3055 [(set (match_operand:V8SF 0 "register_operand" "=x")
3058 (match_operand:V8SF 1 "register_operand" "x")
3059 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
3060 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3061 (match_operand 4 "const_0_to_3_operand" "")
3062 (match_operand 5 "const_8_to_11_operand" "")
3063 (match_operand 6 "const_8_to_11_operand" "")
3064 (match_operand 7 "const_4_to_7_operand" "")
3065 (match_operand 8 "const_4_to_7_operand" "")
3066 (match_operand 9 "const_12_to_15_operand" "")
3067 (match_operand 10 "const_12_to_15_operand" "")])))]
3069 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
3070 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
3071 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
3072 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
3075 mask = INTVAL (operands[3]);
3076 mask |= INTVAL (operands[4]) << 2;
3077 mask |= (INTVAL (operands[5]) - 8) << 4;
3078 mask |= (INTVAL (operands[6]) - 8) << 6;
3079 operands[3] = GEN_INT (mask);
3081 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3083 [(set_attr "type" "sselog")
3084 (set_attr "length_immediate" "1")
3085 (set_attr "prefix" "vex")
3086 (set_attr "mode" "V8SF")])
3088 (define_expand "sse_shufps"
3089 [(match_operand:V4SF 0 "register_operand" "")
3090 (match_operand:V4SF 1 "register_operand" "")
3091 (match_operand:V4SF 2 "nonimmediate_operand" "")
3092 (match_operand:SI 3 "const_int_operand" "")]
3095 int mask = INTVAL (operands[3]);
3096 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
3097 GEN_INT ((mask >> 0) & 3),
3098 GEN_INT ((mask >> 2) & 3),
3099 GEN_INT (((mask >> 4) & 3) + 4),
3100 GEN_INT (((mask >> 6) & 3) + 4)));
3104 (define_insn "sse_shufps_<mode>"
3105 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
3106 (vec_select:VI4F_128
3107 (vec_concat:<ssedoublevecmode>
3108 (match_operand:VI4F_128 1 "register_operand" "0,x")
3109 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
3110 (parallel [(match_operand 3 "const_0_to_3_operand" "")
3111 (match_operand 4 "const_0_to_3_operand" "")
3112 (match_operand 5 "const_4_to_7_operand" "")
3113 (match_operand 6 "const_4_to_7_operand" "")])))]
3117 mask |= INTVAL (operands[3]) << 0;
3118 mask |= INTVAL (operands[4]) << 2;
3119 mask |= (INTVAL (operands[5]) - 4) << 4;
3120 mask |= (INTVAL (operands[6]) - 4) << 6;
3121 operands[3] = GEN_INT (mask);
3123 switch (which_alternative)
3126 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3128 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3133 [(set_attr "isa" "noavx,avx")
3134 (set_attr "type" "sselog")
3135 (set_attr "length_immediate" "1")
3136 (set_attr "prefix" "orig,vex")
3137 (set_attr "mode" "V4SF")])
3139 (define_insn "sse_storehps"
3140 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3142 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
3143 (parallel [(const_int 2) (const_int 3)])))]
3146 %vmovhps\t{%1, %0|%0, %1}
3147 %vmovhlps\t{%1, %d0|%d0, %1}
3148 %vmovlps\t{%H1, %d0|%d0, %H1}"
3149 [(set_attr "type" "ssemov")
3150 (set_attr "prefix" "maybe_vex")
3151 (set_attr "mode" "V2SF,V4SF,V2SF")])
3153 (define_expand "sse_loadhps_exp"
3154 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3157 (match_operand:V4SF 1 "nonimmediate_operand" "")
3158 (parallel [(const_int 0) (const_int 1)]))
3159 (match_operand:V2SF 2 "nonimmediate_operand" "")))]
3162 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3164 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
3166 /* Fix up the destination if needed. */
3167 if (dst != operands[0])
3168 emit_move_insn (operands[0], dst);
3173 (define_insn "sse_loadhps"
3174 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
3177 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
3178 (parallel [(const_int 0) (const_int 1)]))
3179 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
3182 movhps\t{%2, %0|%0, %2}
3183 vmovhps\t{%2, %1, %0|%0, %1, %2}
3184 movlhps\t{%2, %0|%0, %2}
3185 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3186 %vmovlps\t{%2, %H0|%H0, %2}"
3187 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3188 (set_attr "type" "ssemov")
3189 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3190 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
3192 (define_insn "sse_storelps"
3193 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3195 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
3196 (parallel [(const_int 0) (const_int 1)])))]
3199 %vmovlps\t{%1, %0|%0, %1}
3200 %vmovaps\t{%1, %0|%0, %1}
3201 %vmovlps\t{%1, %d0|%d0, %1}"
3202 [(set_attr "type" "ssemov")
3203 (set_attr "prefix" "maybe_vex")
3204 (set_attr "mode" "V2SF,V4SF,V2SF")])
3206 (define_expand "sse_loadlps_exp"
3207 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
3209 (match_operand:V2SF 2 "nonimmediate_operand" "")
3211 (match_operand:V4SF 1 "nonimmediate_operand" "")
3212 (parallel [(const_int 2) (const_int 3)]))))]
3215 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
3217 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
3219 /* Fix up the destination if needed. */
3220 if (dst != operands[0])
3221 emit_move_insn (operands[0], dst);
3226 (define_insn "sse_loadlps"
3227 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
3229 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,x,x")
3231 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
3232 (parallel [(const_int 2) (const_int 3)]))))]
3235 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3236 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3237 movlps\t{%2, %0|%0, %2}
3238 vmovlps\t{%2, %1, %0|%0, %1, %2}
3239 %vmovlps\t{%2, %0|%0, %2}"
3240 [(set_attr "isa" "noavx,avx,noavx,avx,base")
3241 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
3242 (set_attr "length_immediate" "1,1,*,*,*")
3243 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
3244 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
3246 (define_insn "sse_movss"
3247 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3249 (match_operand:V4SF 2 "register_operand" " x,x")
3250 (match_operand:V4SF 1 "register_operand" " 0,x")
3254 movss\t{%2, %0|%0, %2}
3255 vmovss\t{%2, %1, %0|%0, %1, %2}"
3256 [(set_attr "isa" "noavx,avx")
3257 (set_attr "type" "ssemov")
3258 (set_attr "prefix" "orig,vex")
3259 (set_attr "mode" "SF")])
3261 (define_expand "vec_dupv4sf"
3262 [(set (match_operand:V4SF 0 "register_operand" "")
3264 (match_operand:SF 1 "nonimmediate_operand" "")))]
3268 operands[1] = force_reg (V4SFmode, operands[1]);
3271 (define_insn "*vec_dupv4sf_avx"
3272 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3274 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3277 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3278 vbroadcastss\t{%1, %0|%0, %1}"
3279 [(set_attr "type" "sselog1,ssemov")
3280 (set_attr "length_immediate" "1,0")
3281 (set_attr "prefix_extra" "0,1")
3282 (set_attr "prefix" "vex")
3283 (set_attr "mode" "V4SF")])
3285 (define_insn "*vec_dupv4sf"
3286 [(set (match_operand:V4SF 0 "register_operand" "=x")
3288 (match_operand:SF 1 "register_operand" "0")))]
3290 "shufps\t{$0, %0, %0|%0, %0, 0}"
3291 [(set_attr "type" "sselog1")
3292 (set_attr "length_immediate" "1")
3293 (set_attr "mode" "V4SF")])
3295 ;; Although insertps takes register source, we prefer
3296 ;; unpcklps with register source since it is shorter.
3297 (define_insn "*vec_concatv2sf_sse4_1"
3298 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
3300 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
3301 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
3304 unpcklps\t{%2, %0|%0, %2}
3305 vunpcklps\t{%2, %1, %0|%0, %1, %2}
3306 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3307 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3308 %vmovss\t{%1, %0|%0, %1}
3309 punpckldq\t{%2, %0|%0, %2}
3310 movd\t{%1, %0|%0, %1}"
3311 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
3312 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
3313 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
3314 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
3315 (set_attr "length_immediate" "*,*,1,1,*,*,*")
3316 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
3317 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
3319 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3320 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3321 ;; alternatives pretty much forces the MMX alternative to be chosen.
3322 (define_insn "*vec_concatv2sf_sse"
3323 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
3325 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
3326 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
3329 unpcklps\t{%2, %0|%0, %2}
3330 movss\t{%1, %0|%0, %1}
3331 punpckldq\t{%2, %0|%0, %2}
3332 movd\t{%1, %0|%0, %1}"
3333 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3334 (set_attr "mode" "V4SF,SF,DI,DI")])
3336 (define_insn "*vec_concatv4sf"
3337 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
3339 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
3340 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
3343 movlhps\t{%2, %0|%0, %2}
3344 vmovlhps\t{%2, %1, %0|%0, %1, %2}
3345 movhps\t{%2, %0|%0, %2}
3346 vmovhps\t{%2, %1, %0|%0, %1, %2}"
3347 [(set_attr "isa" "noavx,avx,noavx,avx")
3348 (set_attr "type" "ssemov")
3349 (set_attr "prefix" "orig,vex,orig,vex")
3350 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
3352 (define_expand "vec_init<mode>"
3353 [(match_operand:V_128 0 "register_operand" "")
3354 (match_operand 1 "" "")]
3357 ix86_expand_vector_init (false, operands[0], operands[1]);
3361 ;; Avoid combining registers from different units in a single alternative,
3362 ;; see comment above inline_secondary_memory_needed function in i386.c
3363 (define_insn "*vec_set<mode>_0_sse4_1"
3364 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3365 "=x,x,x ,x,x,x ,x ,m,m,m")
3367 (vec_duplicate:VI4F_128
3368 (match_operand:<ssescalarmode> 2 "general_operand"
3369 " x,m,*r,x,x,*rm,*rm,x,*r,fF"))
3370 (match_operand:VI4F_128 1 "vector_move_operand"
3371 " C,C,C ,0,x,0 ,x ,0,0 ,0")
3375 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
3376 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3377 %vmovd\t{%2, %0|%0, %2}
3378 movss\t{%2, %0|%0, %2}
3379 vmovss\t{%2, %1, %0|%0, %1, %2}
3380 pinsrd\t{$0, %2, %0|%0, %2, 0}
3381 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3385 [(set_attr "isa" "base,base,base,noavx,avx,noavx,avx,base,base,base")
3386 (set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov,sselog,sselog,*,*,*")
3387 (set_attr "prefix_extra" "*,*,*,*,*,1,1,*,*,*")
3388 (set_attr "length_immediate" "*,*,*,*,*,1,1,*,*,*")
3389 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,vex,orig,vex,*,*,*")
3390 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,TI,TI,*,*,*")])
3392 ;; Avoid combining registers from different units in a single alternative,
3393 ;; see comment above inline_secondary_memory_needed function in i386.c
3394 (define_insn "*vec_set<mode>_0_sse2"
3395 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3398 (vec_duplicate:VI4F_128
3399 (match_operand:<ssescalarmode> 2 "general_operand"
3401 (match_operand:VI4F_128 1 "vector_move_operand"
3406 mov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
3407 movd\t{%2, %0|%0, %2}
3408 movss\t{%2, %0|%0, %2}
3412 [(set_attr "type" "ssemov")
3413 (set_attr "mode" "<ssescalarmode>,SI,SF,*,*,*")])
3415 ;; Avoid combining registers from different units in a single alternative,
3416 ;; see comment above inline_secondary_memory_needed function in i386.c
3417 (define_insn "vec_set<mode>_0"
3418 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
3421 (vec_duplicate:VI4F_128
3422 (match_operand:<ssescalarmode> 2 "general_operand"
3424 (match_operand:VI4F_128 1 "vector_move_operand"
3429 movss\t{%2, %0|%0, %2}
3430 movss\t{%2, %0|%0, %2}
3434 [(set_attr "type" "ssemov")
3435 (set_attr "mode" "SF,SF,*,*,*")])
3437 ;; A subset is vec_setv4sf.
3438 (define_insn "*vec_setv4sf_sse4_1"
3439 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3442 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
3443 (match_operand:V4SF 1 "register_operand" "0,x")
3444 (match_operand:SI 3 "const_int_operand" "")))]
3446 && ((unsigned) exact_log2 (INTVAL (operands[3]))
3447 < GET_MODE_NUNITS (V4SFmode))"
3449 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3450 switch (which_alternative)
3453 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3455 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3460 [(set_attr "isa" "noavx,avx")
3461 (set_attr "type" "sselog")
3462 (set_attr "prefix_data16" "1,*")
3463 (set_attr "prefix_extra" "1")
3464 (set_attr "length_immediate" "1")
3465 (set_attr "prefix" "orig,vex")
3466 (set_attr "mode" "V4SF")])
3468 (define_insn "sse4_1_insertps"
3469 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3470 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
3471 (match_operand:V4SF 1 "register_operand" "0,x")
3472 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
3476 if (MEM_P (operands[2]))
3478 unsigned count_s = INTVAL (operands[3]) >> 6;
3480 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
3481 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
3483 switch (which_alternative)
3486 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3488 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3493 [(set_attr "isa" "noavx,avx")
3494 (set_attr "type" "sselog")
3495 (set_attr "prefix_data16" "1,*")
3496 (set_attr "prefix_extra" "1")
3497 (set_attr "length_immediate" "1")
3498 (set_attr "prefix" "orig,vex")
3499 (set_attr "mode" "V4SF")])
3502 [(set (match_operand:VI4F_128 0 "memory_operand" "")
3504 (vec_duplicate:VI4F_128
3505 (match_operand:<ssescalarmode> 1 "nonmemory_operand" ""))
3508 "TARGET_SSE && reload_completed"
3511 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0),
3516 (define_expand "vec_set<mode>"
3517 [(match_operand:V_128 0 "register_operand" "")
3518 (match_operand:<ssescalarmode> 1 "register_operand" "")
3519 (match_operand 2 "const_int_operand" "")]
3522 ix86_expand_vector_set (false, operands[0], operands[1],
3523 INTVAL (operands[2]));
3527 (define_insn_and_split "*vec_extractv4sf_0"
3528 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
3530 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
3531 (parallel [(const_int 0)])))]
3532 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3534 "&& reload_completed"
3537 rtx op1 = operands[1];
3539 op1 = gen_rtx_REG (SFmode, REGNO (op1));
3541 op1 = gen_lowpart (SFmode, op1);
3542 emit_move_insn (operands[0], op1);
3546 (define_expand "avx_vextractf128<mode>"
3547 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "")
3548 (match_operand:V_256 1 "register_operand" "")
3549 (match_operand:SI 2 "const_0_to_1_operand" "")]
3552 rtx (*insn)(rtx, rtx);
3554 switch (INTVAL (operands[2]))
3557 insn = gen_vec_extract_lo_<mode>;
3560 insn = gen_vec_extract_hi_<mode>;
3566 emit_insn (insn (operands[0], operands[1]));
3570 (define_insn_and_split "vec_extract_lo_<mode>"
3571 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3572 (vec_select:<ssehalfvecmode>
3573 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
3574 (parallel [(const_int 0) (const_int 1)])))]
3577 "&& reload_completed"
3580 rtx op1 = operands[1];
3582 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3584 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3585 emit_move_insn (operands[0], op1);
3589 (define_insn "vec_extract_hi_<mode>"
3590 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3591 (vec_select:<ssehalfvecmode>
3592 (match_operand:VI8F_256 1 "register_operand" "x,x")
3593 (parallel [(const_int 2) (const_int 3)])))]
3595 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3596 [(set_attr "type" "sselog")
3597 (set_attr "prefix_extra" "1")
3598 (set_attr "length_immediate" "1")
3599 (set_attr "memory" "none,store")
3600 (set_attr "prefix" "vex")
3601 (set_attr "mode" "V8SF")])
3603 (define_insn_and_split "vec_extract_lo_<mode>"
3604 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3605 (vec_select:<ssehalfvecmode>
3606 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
3607 (parallel [(const_int 0) (const_int 1)
3608 (const_int 2) (const_int 3)])))]
3611 "&& reload_completed"
3614 rtx op1 = operands[1];
3616 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
3618 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
3619 emit_move_insn (operands[0], op1);
3623 (define_insn "vec_extract_hi_<mode>"
3624 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
3625 (vec_select:<ssehalfvecmode>
3626 (match_operand:VI4F_256 1 "register_operand" "x,x")
3627 (parallel [(const_int 4) (const_int 5)
3628 (const_int 6) (const_int 7)])))]
3630 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3631 [(set_attr "type" "sselog")
3632 (set_attr "prefix_extra" "1")
3633 (set_attr "length_immediate" "1")
3634 (set_attr "memory" "none,store")
3635 (set_attr "prefix" "vex")
3636 (set_attr "mode" "V8SF")])
3638 (define_insn_and_split "vec_extract_lo_v16hi"
3639 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3641 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
3642 (parallel [(const_int 0) (const_int 1)
3643 (const_int 2) (const_int 3)
3644 (const_int 4) (const_int 5)
3645 (const_int 6) (const_int 7)])))]
3648 "&& reload_completed"
3651 rtx op1 = operands[1];
3653 op1 = gen_rtx_REG (V8HImode, REGNO (op1));
3655 op1 = gen_lowpart (V8HImode, op1);
3656 emit_move_insn (operands[0], op1);
3660 (define_insn "vec_extract_hi_v16hi"
3661 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
3663 (match_operand:V16HI 1 "register_operand" "x,x")
3664 (parallel [(const_int 8) (const_int 9)
3665 (const_int 10) (const_int 11)
3666 (const_int 12) (const_int 13)
3667 (const_int 14) (const_int 15)])))]
3669 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3670 [(set_attr "type" "sselog")
3671 (set_attr "prefix_extra" "1")
3672 (set_attr "length_immediate" "1")
3673 (set_attr "memory" "none,store")
3674 (set_attr "prefix" "vex")
3675 (set_attr "mode" "V8SF")])
3677 (define_insn_and_split "vec_extract_lo_v32qi"
3678 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3680 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
3681 (parallel [(const_int 0) (const_int 1)
3682 (const_int 2) (const_int 3)
3683 (const_int 4) (const_int 5)
3684 (const_int 6) (const_int 7)
3685 (const_int 8) (const_int 9)
3686 (const_int 10) (const_int 11)
3687 (const_int 12) (const_int 13)
3688 (const_int 14) (const_int 15)])))]
3691 "&& reload_completed"
3694 rtx op1 = operands[1];
3696 op1 = gen_rtx_REG (V16QImode, REGNO (op1));
3698 op1 = gen_lowpart (V16QImode, op1);
3699 emit_move_insn (operands[0], op1);
3703 (define_insn "vec_extract_hi_v32qi"
3704 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
3706 (match_operand:V32QI 1 "register_operand" "x,x")
3707 (parallel [(const_int 16) (const_int 17)
3708 (const_int 18) (const_int 19)
3709 (const_int 20) (const_int 21)
3710 (const_int 22) (const_int 23)
3711 (const_int 24) (const_int 25)
3712 (const_int 26) (const_int 27)
3713 (const_int 28) (const_int 29)
3714 (const_int 30) (const_int 31)])))]
3716 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3717 [(set_attr "type" "sselog")
3718 (set_attr "prefix_extra" "1")
3719 (set_attr "length_immediate" "1")
3720 (set_attr "memory" "none,store")
3721 (set_attr "prefix" "vex")
3722 (set_attr "mode" "V8SF")])
3724 (define_insn "*sse4_1_extractps"
3725 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
3727 (match_operand:V4SF 1 "register_operand" "x")
3728 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3730 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3731 [(set_attr "type" "sselog")
3732 (set_attr "prefix_data16" "1")
3733 (set_attr "prefix_extra" "1")
3734 (set_attr "length_immediate" "1")
3735 (set_attr "prefix" "maybe_vex")
3736 (set_attr "mode" "V4SF")])
3738 (define_insn_and_split "*vec_extract_v4sf_mem"
3739 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3741 (match_operand:V4SF 1 "memory_operand" "o")
3742 (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
3748 int i = INTVAL (operands[2]);
3750 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
3754 ;; Modes handled by vec_extract patterns.
3755 (define_mode_iterator VEC_EXTRACT_MODE
3756 [V16QI V8HI V4SI V2DI
3757 (V8SF "TARGET_AVX") V4SF
3758 (V4DF "TARGET_AVX") V2DF])
3760 (define_expand "vec_extract<mode>"
3761 [(match_operand:<ssescalarmode> 0 "register_operand" "")
3762 (match_operand:VEC_EXTRACT_MODE 1 "register_operand" "")
3763 (match_operand 2 "const_int_operand" "")]
3766 ix86_expand_vector_extract (false, operands[0], operands[1],
3767 INTVAL (operands[2]));
3771 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3773 ;; Parallel double-precision floating point element swizzling
3775 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3777 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3778 (define_insn "avx_unpckhpd256"
3779 [(set (match_operand:V4DF 0 "register_operand" "=x")
3782 (match_operand:V4DF 1 "register_operand" "x")
3783 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3784 (parallel [(const_int 1) (const_int 5)
3785 (const_int 3) (const_int 7)])))]
3787 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3788 [(set_attr "type" "sselog")
3789 (set_attr "prefix" "vex")
3790 (set_attr "mode" "V4DF")])
3792 (define_expand "vec_interleave_highv4df"
3796 (match_operand:V4DF 1 "register_operand" "x")
3797 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3798 (parallel [(const_int 0) (const_int 4)
3799 (const_int 2) (const_int 6)])))
3805 (parallel [(const_int 1) (const_int 5)
3806 (const_int 3) (const_int 7)])))
3807 (set (match_operand:V4DF 0 "register_operand" "")
3812 (parallel [(const_int 2) (const_int 3)
3813 (const_int 6) (const_int 7)])))]
3816 operands[3] = gen_reg_rtx (V4DFmode);
3817 operands[4] = gen_reg_rtx (V4DFmode);
3821 (define_expand "vec_interleave_highv2df"
3822 [(set (match_operand:V2DF 0 "register_operand" "")
3825 (match_operand:V2DF 1 "nonimmediate_operand" "")
3826 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3827 (parallel [(const_int 1)
3831 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
3832 operands[2] = force_reg (V2DFmode, operands[2]);
3835 (define_insn "*sse3_interleave_highv2df"
3836 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
3839 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
3840 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
3841 (parallel [(const_int 1)
3843 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3845 unpckhpd\t{%2, %0|%0, %2}
3846 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
3847 %vmovddup\t{%H1, %0|%0, %H1}
3848 movlpd\t{%H1, %0|%0, %H1}
3849 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3850 %vmovhpd\t{%1, %0|%0, %1}"
3851 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3852 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3853 (set_attr "prefix_data16" "*,*,*,1,*,1")
3854 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3855 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3857 (define_insn "*sse2_interleave_highv2df"
3858 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3861 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3862 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3863 (parallel [(const_int 1)
3865 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3867 unpckhpd\t{%2, %0|%0, %2}
3868 movlpd\t{%H1, %0|%0, %H1}
3869 movhpd\t{%1, %0|%0, %1}"
3870 [(set_attr "type" "sselog,ssemov,ssemov")
3871 (set_attr "prefix_data16" "*,1,1")
3872 (set_attr "mode" "V2DF,V1DF,V1DF")])
3874 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3875 (define_expand "avx_movddup256"
3876 [(set (match_operand:V4DF 0 "register_operand" "")
3879 (match_operand:V4DF 1 "nonimmediate_operand" "")
3881 (parallel [(const_int 0) (const_int 4)
3882 (const_int 2) (const_int 6)])))]
3885 (define_expand "avx_unpcklpd256"
3886 [(set (match_operand:V4DF 0 "register_operand" "")
3889 (match_operand:V4DF 1 "register_operand" "")
3890 (match_operand:V4DF 2 "nonimmediate_operand" ""))
3891 (parallel [(const_int 0) (const_int 4)
3892 (const_int 2) (const_int 6)])))]
3895 (define_insn "*avx_unpcklpd256"
3896 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
3899 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
3900 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
3901 (parallel [(const_int 0) (const_int 4)
3902 (const_int 2) (const_int 6)])))]
3904 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
3906 vmovddup\t{%1, %0|%0, %1}
3907 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
3908 [(set_attr "type" "sselog")
3909 (set_attr "prefix" "vex")
3910 (set_attr "mode" "V4DF")])
3912 (define_expand "vec_interleave_lowv4df"
3916 (match_operand:V4DF 1 "register_operand" "x")
3917 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
3918 (parallel [(const_int 0) (const_int 4)
3919 (const_int 2) (const_int 6)])))
3925 (parallel [(const_int 1) (const_int 5)
3926 (const_int 3) (const_int 7)])))
3927 (set (match_operand:V4DF 0 "register_operand" "")
3932 (parallel [(const_int 0) (const_int 1)
3933 (const_int 4) (const_int 5)])))]
3936 operands[3] = gen_reg_rtx (V4DFmode);
3937 operands[4] = gen_reg_rtx (V4DFmode);
3940 (define_expand "vec_interleave_lowv2df"
3941 [(set (match_operand:V2DF 0 "register_operand" "")
3944 (match_operand:V2DF 1 "nonimmediate_operand" "")
3945 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3946 (parallel [(const_int 0)
3950 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
3951 operands[1] = force_reg (V2DFmode, operands[1]);
3954 (define_insn "*sse3_interleave_lowv2df"
3955 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
3958 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
3959 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
3960 (parallel [(const_int 0)
3962 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3964 unpcklpd\t{%2, %0|%0, %2}
3965 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
3966 %vmovddup\t{%1, %0|%0, %1}
3967 movhpd\t{%2, %0|%0, %2}
3968 vmovhpd\t{%2, %1, %0|%0, %1, %2}
3969 %vmovlpd\t{%2, %H0|%H0, %2}"
3970 [(set_attr "isa" "noavx,avx,base,noavx,avx,base")
3971 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
3972 (set_attr "prefix_data16" "*,*,*,1,*,1")
3973 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
3974 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,V1DF")])
3976 (define_insn "*sse2_interleave_lowv2df"
3977 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
3980 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
3981 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
3982 (parallel [(const_int 0)
3984 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3986 unpcklpd\t{%2, %0|%0, %2}
3987 movhpd\t{%2, %0|%0, %2}
3988 movlpd\t{%2, %H0|%H0, %2}"
3989 [(set_attr "type" "sselog,ssemov,ssemov")
3990 (set_attr "prefix_data16" "*,1,1")
3991 (set_attr "mode" "V2DF,V1DF,V1DF")])
3994 [(set (match_operand:V2DF 0 "memory_operand" "")
3997 (match_operand:V2DF 1 "register_operand" "")
3999 (parallel [(const_int 0)
4001 "TARGET_SSE3 && reload_completed"
4004 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
4005 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4006 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4011 [(set (match_operand:V2DF 0 "register_operand" "")
4014 (match_operand:V2DF 1 "memory_operand" "")
4016 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4017 (match_operand:SI 3 "const_int_operand" "")])))]
4018 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4019 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4021 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4024 (define_expand "avx_shufpd256"
4025 [(match_operand:V4DF 0 "register_operand" "")
4026 (match_operand:V4DF 1 "register_operand" "")
4027 (match_operand:V4DF 2 "nonimmediate_operand" "")
4028 (match_operand:SI 3 "const_int_operand" "")]
4031 int mask = INTVAL (operands[3]);
4032 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
4034 GEN_INT (mask & 2 ? 5 : 4),
4035 GEN_INT (mask & 4 ? 3 : 2),
4036 GEN_INT (mask & 8 ? 7 : 6)));
4040 (define_insn "avx_shufpd256_1"
4041 [(set (match_operand:V4DF 0 "register_operand" "=x")
4044 (match_operand:V4DF 1 "register_operand" "x")
4045 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4046 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4047 (match_operand 4 "const_4_to_5_operand" "")
4048 (match_operand 5 "const_2_to_3_operand" "")
4049 (match_operand 6 "const_6_to_7_operand" "")])))]
4053 mask = INTVAL (operands[3]);
4054 mask |= (INTVAL (operands[4]) - 4) << 1;
4055 mask |= (INTVAL (operands[5]) - 2) << 2;
4056 mask |= (INTVAL (operands[6]) - 6) << 3;
4057 operands[3] = GEN_INT (mask);
4059 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4061 [(set_attr "type" "sselog")
4062 (set_attr "length_immediate" "1")
4063 (set_attr "prefix" "vex")
4064 (set_attr "mode" "V4DF")])
4066 (define_expand "sse2_shufpd"
4067 [(match_operand:V2DF 0 "register_operand" "")
4068 (match_operand:V2DF 1 "register_operand" "")
4069 (match_operand:V2DF 2 "nonimmediate_operand" "")
4070 (match_operand:SI 3 "const_int_operand" "")]
4073 int mask = INTVAL (operands[3]);
4074 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
4076 GEN_INT (mask & 2 ? 3 : 2)));
4080 ;; Modes handled by vec_extract_even/odd pattern.
4081 (define_mode_iterator VEC_EXTRACT_EVENODD_MODE
4082 [(V16QI "TARGET_SSE2")
4083 (V8HI "TARGET_SSE2")
4084 (V4SI "TARGET_SSE2")
4085 (V2DI "TARGET_SSE2")
4086 (V8SF "TARGET_AVX") V4SF
4087 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
4089 (define_expand "vec_extract_even<mode>"
4090 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4091 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4092 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4095 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4099 (define_expand "vec_extract_odd<mode>"
4100 [(match_operand:VEC_EXTRACT_EVENODD_MODE 0 "register_operand" "")
4101 (match_operand:VEC_EXTRACT_EVENODD_MODE 1 "register_operand" "")
4102 (match_operand:VEC_EXTRACT_EVENODD_MODE 2 "register_operand" "")]
4105 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4109 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4111 (define_insn "vec_interleave_highv2di"
4112 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4115 (match_operand:V2DI 1 "register_operand" "0,x")
4116 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4117 (parallel [(const_int 1)
4121 punpckhqdq\t{%2, %0|%0, %2}
4122 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4123 [(set_attr "isa" "noavx,avx")
4124 (set_attr "type" "sselog")
4125 (set_attr "prefix_data16" "1,*")
4126 (set_attr "prefix" "orig,vex")
4127 (set_attr "mode" "TI")])
4129 (define_insn "vec_interleave_lowv2di"
4130 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4133 (match_operand:V2DI 1 "register_operand" "0,x")
4134 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
4135 (parallel [(const_int 0)
4139 punpcklqdq\t{%2, %0|%0, %2}
4140 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4141 [(set_attr "isa" "noavx,avx")
4142 (set_attr "type" "sselog")
4143 (set_attr "prefix_data16" "1,*")
4144 (set_attr "prefix" "orig,vex")
4145 (set_attr "mode" "TI")])
4147 (define_insn "sse2_shufpd_<mode>"
4148 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
4149 (vec_select:VI8F_128
4150 (vec_concat:<ssedoublevecmode>
4151 (match_operand:VI8F_128 1 "register_operand" "0,x")
4152 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
4153 (parallel [(match_operand 3 "const_0_to_1_operand" "")
4154 (match_operand 4 "const_2_to_3_operand" "")])))]
4158 mask = INTVAL (operands[3]);
4159 mask |= (INTVAL (operands[4]) - 2) << 1;
4160 operands[3] = GEN_INT (mask);
4162 switch (which_alternative)
4165 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4167 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4172 [(set_attr "isa" "noavx,avx")
4173 (set_attr "type" "sselog")
4174 (set_attr "length_immediate" "1")
4175 (set_attr "prefix" "orig,vex")
4176 (set_attr "mode" "V2DF")])
4178 ;; Avoid combining registers from different units in a single alternative,
4179 ;; see comment above inline_secondary_memory_needed function in i386.c
4180 (define_insn "sse2_storehpd"
4181 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
4183 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
4184 (parallel [(const_int 1)])))]
4185 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4187 %vmovhpd\t{%1, %0|%0, %1}
4189 vunpckhpd\t{%d1, %0|%0, %d1}
4193 [(set_attr "isa" "base,noavx,avx,base,base,base")
4194 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
4195 (set (attr "prefix_data16")
4197 (and (eq_attr "alternative" "0")
4198 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4200 (const_string "*")))
4201 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
4202 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
4205 [(set (match_operand:DF 0 "register_operand" "")
4207 (match_operand:V2DF 1 "memory_operand" "")
4208 (parallel [(const_int 1)])))]
4209 "TARGET_SSE2 && reload_completed"
4210 [(set (match_dup 0) (match_dup 1))]
4211 "operands[1] = adjust_address (operands[1], DFmode, 8);")
4213 ;; Avoid combining registers from different units in a single alternative,
4214 ;; see comment above inline_secondary_memory_needed function in i386.c
4215 (define_insn "sse2_storelpd"
4216 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
4218 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
4219 (parallel [(const_int 0)])))]
4220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4222 %vmovlpd\t{%1, %0|%0, %1}
4227 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4228 (set_attr "prefix_data16" "1,*,*,*,*")
4229 (set_attr "prefix" "maybe_vex")
4230 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4233 [(set (match_operand:DF 0 "register_operand" "")
4235 (match_operand:V2DF 1 "nonimmediate_operand" "")
4236 (parallel [(const_int 0)])))]
4237 "TARGET_SSE2 && reload_completed"
4240 rtx op1 = operands[1];
4242 op1 = gen_rtx_REG (DFmode, REGNO (op1));
4244 op1 = gen_lowpart (DFmode, op1);
4245 emit_move_insn (operands[0], op1);
4249 (define_expand "sse2_loadhpd_exp"
4250 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4253 (match_operand:V2DF 1 "nonimmediate_operand" "")
4254 (parallel [(const_int 0)]))
4255 (match_operand:DF 2 "nonimmediate_operand" "")))]
4258 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4260 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
4262 /* Fix up the destination if needed. */
4263 if (dst != operands[0])
4264 emit_move_insn (operands[0], dst);
4269 ;; Avoid combining registers from different units in a single alternative,
4270 ;; see comment above inline_secondary_memory_needed function in i386.c
4271 (define_insn "sse2_loadhpd"
4272 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4276 (match_operand:V2DF 1 "nonimmediate_operand"
4278 (parallel [(const_int 0)]))
4279 (match_operand:DF 2 "nonimmediate_operand"
4280 " m,m,x,x,x,*f,r")))]
4281 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4283 movhpd\t{%2, %0|%0, %2}
4284 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4285 unpcklpd\t{%2, %0|%0, %2}
4286 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4290 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
4291 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
4292 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
4293 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
4294 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
4297 [(set (match_operand:V2DF 0 "memory_operand" "")
4299 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
4300 (match_operand:DF 1 "register_operand" "")))]
4301 "TARGET_SSE2 && reload_completed"
4302 [(set (match_dup 0) (match_dup 1))]
4303 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4305 (define_expand "sse2_loadlpd_exp"
4306 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4308 (match_operand:DF 2 "nonimmediate_operand" "")
4310 (match_operand:V2DF 1 "nonimmediate_operand" "")
4311 (parallel [(const_int 1)]))))]
4314 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
4316 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
4318 /* Fix up the destination if needed. */
4319 if (dst != operands[0])
4320 emit_move_insn (operands[0], dst);
4325 ;; Avoid combining registers from different units in a single alternative,
4326 ;; see comment above inline_secondary_memory_needed function in i386.c
4327 (define_insn "sse2_loadlpd"
4328 [(set (match_operand:V2DF 0 "nonimmediate_operand"
4329 "=x,x,x,x,x,x,x,x,m,m ,m")
4331 (match_operand:DF 2 "nonimmediate_operand"
4332 " m,m,m,x,x,0,0,x,x,*f,r")
4334 (match_operand:V2DF 1 "vector_move_operand"
4335 " C,0,x,0,x,x,o,o,0,0 ,0")
4336 (parallel [(const_int 1)]))))]
4337 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4339 %vmovsd\t{%2, %0|%0, %2}
4340 movlpd\t{%2, %0|%0, %2}
4341 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4342 movsd\t{%2, %0|%0, %2}
4343 vmovsd\t{%2, %1, %0|%0, %1, %2}
4344 shufpd\t{$2, %1, %0|%0, %1, 2}
4345 movhpd\t{%H1, %0|%0, %H1}
4346 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
4350 [(set_attr "isa" "base,noavx,avx,noavx,avx,noavx,noavx,avx,base,base,base")
4351 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov,fmov,imov")
4352 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
4353 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
4354 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
4355 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
4358 [(set (match_operand:V2DF 0 "memory_operand" "")
4360 (match_operand:DF 1 "register_operand" "")
4361 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
4362 "TARGET_SSE2 && reload_completed"
4363 [(set (match_dup 0) (match_dup 1))]
4364 "operands[0] = adjust_address (operands[0], DFmode, 8);")
4366 ;; Not sure these two are ever used, but it doesn't hurt to have
4368 (define_insn "*vec_extractv2df_1_sse"
4369 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4371 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
4372 (parallel [(const_int 1)])))]
4373 "!TARGET_SSE2 && TARGET_SSE
4374 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4376 movhps\t{%1, %0|%0, %1}
4377 movhlps\t{%1, %0|%0, %1}
4378 movlps\t{%H1, %0|%0, %H1}"
4379 [(set_attr "type" "ssemov")
4380 (set_attr "mode" "V2SF,V4SF,V2SF")])
4382 (define_insn "*vec_extractv2df_0_sse"
4383 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
4385 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
4386 (parallel [(const_int 0)])))]
4387 "!TARGET_SSE2 && TARGET_SSE
4388 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
4390 movlps\t{%1, %0|%0, %1}
4391 movaps\t{%1, %0|%0, %1}
4392 movlps\t{%1, %0|%0, %1}"
4393 [(set_attr "type" "ssemov")
4394 (set_attr "mode" "V2SF,V4SF,V2SF")])
4396 (define_insn "sse2_movsd"
4397 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
4399 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
4400 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
4404 movsd\t{%2, %0|%0, %2}
4405 vmovsd\t{%2, %1, %0|%0, %1, %2}
4406 movlpd\t{%2, %0|%0, %2}
4407 vmovlpd\t{%2, %1, %0|%0, %1, %2}
4408 %vmovlpd\t{%2, %0|%0, %2}
4409 shufpd\t{$2, %1, %0|%0, %1, 2}
4410 movhps\t{%H1, %0|%0, %H1}
4411 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
4412 %vmovhps\t{%1, %H0|%H0, %1}"
4413 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx,avx,base")
4414 (set_attr "type" "ssemov,ssemov,ssemov,ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
4415 (set (attr "prefix_data16")
4417 (and (eq_attr "alternative" "2,4")
4418 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
4420 (const_string "*")))
4421 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
4422 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
4423 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
4425 (define_insn "*vec_dupv2df_sse3"
4426 [(set (match_operand:V2DF 0 "register_operand" "=x")
4428 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
4430 "%vmovddup\t{%1, %0|%0, %1}"
4431 [(set_attr "type" "sselog1")
4432 (set_attr "prefix" "maybe_vex")
4433 (set_attr "mode" "DF")])
4435 (define_insn "vec_dupv2df"
4436 [(set (match_operand:V2DF 0 "register_operand" "=x")
4438 (match_operand:DF 1 "register_operand" "0")))]
4441 [(set_attr "type" "sselog1")
4442 (set_attr "mode" "V2DF")])
4444 (define_insn "*vec_concatv2df_sse3"
4445 [(set (match_operand:V2DF 0 "register_operand" "=x")
4447 (match_operand:DF 1 "nonimmediate_operand" "xm")
4450 "%vmovddup\t{%1, %0|%0, %1}"
4451 [(set_attr "type" "sselog1")
4452 (set_attr "prefix" "maybe_vex")
4453 (set_attr "mode" "DF")])
4455 (define_insn "*vec_concatv2df"
4456 [(set (match_operand:V2DF 0 "register_operand" "=Y2,x,Y2,x,Y2,x,x")
4458 (match_operand:DF 1 "nonimmediate_operand" " 0 ,x,0 ,x,m ,0,0")
4459 (match_operand:DF 2 "vector_move_operand" " Y2,x,m ,m,C ,x,m")))]
4462 unpcklpd\t{%2, %0|%0, %2}
4463 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4464 movhpd\t{%2, %0|%0, %2}
4465 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4466 %vmovsd\t{%1, %0|%0, %1}
4467 movlhps\t{%2, %0|%0, %2}
4468 movhps\t{%2, %0|%0, %2}"
4469 [(set_attr "isa" "noavx,avx,noavx,avx,base,noavx,noavx")
4470 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,ssemov,ssemov")
4471 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
4472 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
4473 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
4475 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4477 ;; Parallel integral arithmetic
4479 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4481 (define_expand "neg<mode>2"
4482 [(set (match_operand:VI_128 0 "register_operand" "")
4485 (match_operand:VI_128 1 "nonimmediate_operand" "")))]
4487 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
4489 (define_expand "<plusminus_insn><mode>3"
4490 [(set (match_operand:VI_128 0 "register_operand" "")
4492 (match_operand:VI_128 1 "nonimmediate_operand" "")
4493 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
4495 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4497 (define_insn "*<plusminus_insn><mode>3"
4498 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
4500 (match_operand:VI_128 1 "nonimmediate_operand" "<comm>0,x")
4501 (match_operand:VI_128 2 "nonimmediate_operand" "xm,xm")))]
4502 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4504 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4505 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4506 [(set_attr "isa" "noavx,avx")
4507 (set_attr "type" "sseiadd")
4508 (set_attr "prefix_data16" "1,*")
4509 (set_attr "prefix" "orig,vex")
4510 (set_attr "mode" "TI")])
4512 (define_expand "sse2_<plusminus_insn><mode>3"
4513 [(set (match_operand:VI12_128 0 "register_operand" "")
4514 (sat_plusminus:VI12_128
4515 (match_operand:VI12_128 1 "nonimmediate_operand" "")
4516 (match_operand:VI12_128 2 "nonimmediate_operand" "")))]
4518 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4520 (define_insn "*sse2_<plusminus_insn><mode>3"
4521 [(set (match_operand:VI12_128 0 "register_operand" "=x,x")
4522 (sat_plusminus:VI12_128
4523 (match_operand:VI12_128 1 "nonimmediate_operand" "<comm>0,x")
4524 (match_operand:VI12_128 2 "nonimmediate_operand" "xm,xm")))]
4525 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
4527 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
4528 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
4529 [(set_attr "isa" "noavx,avx")
4530 (set_attr "type" "sseiadd")
4531 (set_attr "prefix_data16" "1,*")
4532 (set_attr "prefix" "orig,vex")
4533 (set_attr "mode" "TI")])
4535 (define_insn_and_split "mulv16qi3"
4536 [(set (match_operand:V16QI 0 "register_operand" "")
4537 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4538 (match_operand:V16QI 2 "register_operand" "")))]
4540 && can_create_pseudo_p ()"
4548 for (i = 0; i < 6; ++i)
4549 t[i] = gen_reg_rtx (V16QImode);
4551 /* Unpack data such that we've got a source byte in each low byte of
4552 each word. We don't care what goes into the high byte of each word.
4553 Rather than trying to get zero in there, most convenient is to let
4554 it be a copy of the low byte. */
4555 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4556 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4557 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4558 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4560 /* Multiply words. The end-of-line annotations here give a picture of what
4561 the output of that instruction looks like. Dot means don't care; the
4562 letters are the bytes of the result with A being the most significant. */
4563 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4564 gen_lowpart (V8HImode, t[0]),
4565 gen_lowpart (V8HImode, t[1])));
4566 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4567 gen_lowpart (V8HImode, t[2]),
4568 gen_lowpart (V8HImode, t[3])));
4570 /* Extract the even bytes and merge them back together. */
4571 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4575 (define_expand "mulv8hi3"
4576 [(set (match_operand:V8HI 0 "register_operand" "")
4577 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
4578 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
4580 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4582 (define_insn "*mulv8hi3"
4583 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4584 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4585 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
4586 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4588 pmullw\t{%2, %0|%0, %2}
4589 vpmullw\t{%2, %1, %0|%0, %1, %2}"
4590 [(set_attr "isa" "noavx,avx")
4591 (set_attr "type" "sseimul")
4592 (set_attr "prefix_data16" "1,*")
4593 (set_attr "prefix" "orig,vex")
4594 (set_attr "mode" "TI")])
4596 (define_expand "<s>mulv8hi3_highpart"
4597 [(set (match_operand:V8HI 0 "register_operand" "")
4602 (match_operand:V8HI 1 "nonimmediate_operand" ""))
4604 (match_operand:V8HI 2 "nonimmediate_operand" "")))
4607 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4609 (define_insn "*<s>mulv8hi3_highpart"
4610 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
4615 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
4617 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
4619 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4621 pmulh<u>w\t{%2, %0|%0, %2}
4622 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
4623 [(set_attr "isa" "noavx,avx")
4624 (set_attr "type" "sseimul")
4625 (set_attr "prefix_data16" "1,*")
4626 (set_attr "prefix" "orig,vex")
4627 (set_attr "mode" "TI")])
4629 (define_expand "sse2_umulv2siv2di3"
4630 [(set (match_operand:V2DI 0 "register_operand" "")
4634 (match_operand:V4SI 1 "nonimmediate_operand" "")
4635 (parallel [(const_int 0) (const_int 2)])))
4638 (match_operand:V4SI 2 "nonimmediate_operand" "")
4639 (parallel [(const_int 0) (const_int 2)])))))]
4641 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4643 (define_insn "*sse2_umulv2siv2di3"
4644 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4648 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4649 (parallel [(const_int 0) (const_int 2)])))
4652 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4653 (parallel [(const_int 0) (const_int 2)])))))]
4654 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4656 pmuludq\t{%2, %0|%0, %2}
4657 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
4658 [(set_attr "isa" "noavx,avx")
4659 (set_attr "type" "sseimul")
4660 (set_attr "prefix_data16" "1,*")
4661 (set_attr "prefix" "orig,vex")
4662 (set_attr "mode" "TI")])
4664 (define_expand "sse4_1_mulv2siv2di3"
4665 [(set (match_operand:V2DI 0 "register_operand" "")
4669 (match_operand:V4SI 1 "nonimmediate_operand" "")
4670 (parallel [(const_int 0) (const_int 2)])))
4673 (match_operand:V4SI 2 "nonimmediate_operand" "")
4674 (parallel [(const_int 0) (const_int 2)])))))]
4676 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
4678 (define_insn "*sse4_1_mulv2siv2di3"
4679 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
4683 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4684 (parallel [(const_int 0) (const_int 2)])))
4687 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
4688 (parallel [(const_int 0) (const_int 2)])))))]
4689 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4691 pmuldq\t{%2, %0|%0, %2}
4692 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4693 [(set_attr "isa" "noavx,avx")
4694 (set_attr "type" "sseimul")
4695 (set_attr "prefix_data16" "1,*")
4696 (set_attr "prefix_extra" "1")
4697 (set_attr "prefix" "orig,vex")
4698 (set_attr "mode" "TI")])
4700 (define_expand "sse2_pmaddwd"
4701 [(set (match_operand:V4SI 0 "register_operand" "")
4706 (match_operand:V8HI 1 "nonimmediate_operand" "")
4707 (parallel [(const_int 0)
4713 (match_operand:V8HI 2 "nonimmediate_operand" "")
4714 (parallel [(const_int 0)
4720 (vec_select:V4HI (match_dup 1)
4721 (parallel [(const_int 1)
4726 (vec_select:V4HI (match_dup 2)
4727 (parallel [(const_int 1)
4730 (const_int 7)]))))))]
4732 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
4734 (define_insn "*sse2_pmaddwd"
4735 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4740 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
4741 (parallel [(const_int 0)
4747 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
4748 (parallel [(const_int 0)
4754 (vec_select:V4HI (match_dup 1)
4755 (parallel [(const_int 1)
4760 (vec_select:V4HI (match_dup 2)
4761 (parallel [(const_int 1)
4764 (const_int 7)]))))))]
4765 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4767 pmaddwd\t{%2, %0|%0, %2}
4768 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
4769 [(set_attr "isa" "noavx,avx")
4770 (set_attr "type" "sseiadd")
4771 (set_attr "atom_unit" "simul")
4772 (set_attr "prefix_data16" "1,*")
4773 (set_attr "prefix" "orig,vex")
4774 (set_attr "mode" "TI")])
4776 (define_expand "mulv4si3"
4777 [(set (match_operand:V4SI 0 "register_operand" "")
4778 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4779 (match_operand:V4SI 2 "register_operand" "")))]
4782 if (TARGET_SSE4_1 || TARGET_AVX)
4783 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
4786 (define_insn "*sse4_1_mulv4si3"
4787 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
4788 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
4789 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")))]
4790 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4792 pmulld\t{%2, %0|%0, %2}
4793 vpmulld\t{%2, %1, %0|%0, %1, %2}"
4794 [(set_attr "isa" "noavx,avx")
4795 (set_attr "type" "sseimul")
4796 (set_attr "prefix_extra" "1")
4797 (set_attr "prefix" "orig,vex")
4798 (set_attr "mode" "TI")])
4800 (define_insn_and_split "*sse2_mulv4si3"
4801 [(set (match_operand:V4SI 0 "register_operand" "")
4802 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
4803 (match_operand:V4SI 2 "register_operand" "")))]
4804 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
4805 && can_create_pseudo_p ()"
4810 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4816 t1 = gen_reg_rtx (V4SImode);
4817 t2 = gen_reg_rtx (V4SImode);
4818 t3 = gen_reg_rtx (V4SImode);
4819 t4 = gen_reg_rtx (V4SImode);
4820 t5 = gen_reg_rtx (V4SImode);
4821 t6 = gen_reg_rtx (V4SImode);
4822 thirtytwo = GEN_INT (32);
4824 /* Multiply elements 2 and 0. */
4825 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
4828 /* Shift both input vectors down one element, so that elements 3
4829 and 1 are now in the slots for elements 2 and 0. For K8, at
4830 least, this is faster than using a shuffle. */
4831 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
4832 gen_lowpart (V1TImode, op1),
4834 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
4835 gen_lowpart (V1TImode, op2),
4837 /* Multiply elements 3 and 1. */
4838 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
4841 /* Move the results in element 2 down to element 1; we don't care
4842 what goes in elements 2 and 3. */
4843 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
4844 const0_rtx, const0_rtx));
4845 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
4846 const0_rtx, const0_rtx));
4848 /* Merge the parts back together. */
4849 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
4853 (define_insn_and_split "mulv2di3"
4854 [(set (match_operand:V2DI 0 "register_operand" "")
4855 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
4856 (match_operand:V2DI 2 "register_operand" "")))]
4858 && can_create_pseudo_p ()"
4863 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
4872 /* op1: A,B,C,D, op2: E,F,G,H */
4873 op1 = gen_lowpart (V4SImode, op1);
4874 op2 = gen_lowpart (V4SImode, op2);
4876 t1 = gen_reg_rtx (V4SImode);
4877 t2 = gen_reg_rtx (V4SImode);
4878 t3 = gen_reg_rtx (V2DImode);
4879 t4 = gen_reg_rtx (V2DImode);
4882 emit_insn (gen_sse2_pshufd_1 (t1, op1,
4888 /* t2: (B*E),(A*F),(D*G),(C*H) */
4889 emit_insn (gen_mulv4si3 (t2, t1, op2));
4891 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
4892 emit_insn (gen_xop_phadddq (t3, t2));
4894 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
4895 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
4897 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
4898 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
4902 t1 = gen_reg_rtx (V2DImode);
4903 t2 = gen_reg_rtx (V2DImode);
4904 t3 = gen_reg_rtx (V2DImode);
4905 t4 = gen_reg_rtx (V2DImode);
4906 t5 = gen_reg_rtx (V2DImode);
4907 t6 = gen_reg_rtx (V2DImode);
4908 thirtytwo = GEN_INT (32);
4910 /* Multiply low parts. */
4911 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
4912 gen_lowpart (V4SImode, op2)));
4914 /* Shift input vectors left 32 bits so we can multiply high parts. */
4915 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
4916 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
4918 /* Multiply high parts by low parts. */
4919 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
4920 gen_lowpart (V4SImode, t3)));
4921 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
4922 gen_lowpart (V4SImode, t2)));
4924 /* Shift them back. */
4925 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
4926 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
4928 /* Add the three parts together. */
4929 emit_insn (gen_addv2di3 (t6, t1, t4));
4930 emit_insn (gen_addv2di3 (op0, t6, t5));
4935 (define_expand "vec_widen_smult_hi_v8hi"
4936 [(match_operand:V4SI 0 "register_operand" "")
4937 (match_operand:V8HI 1 "register_operand" "")
4938 (match_operand:V8HI 2 "register_operand" "")]
4941 rtx op1, op2, t1, t2, dest;
4945 t1 = gen_reg_rtx (V8HImode);
4946 t2 = gen_reg_rtx (V8HImode);
4947 dest = gen_lowpart (V8HImode, operands[0]);
4949 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4950 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4951 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4955 (define_expand "vec_widen_smult_lo_v8hi"
4956 [(match_operand:V4SI 0 "register_operand" "")
4957 (match_operand:V8HI 1 "register_operand" "")
4958 (match_operand:V8HI 2 "register_operand" "")]
4961 rtx op1, op2, t1, t2, dest;
4965 t1 = gen_reg_rtx (V8HImode);
4966 t2 = gen_reg_rtx (V8HImode);
4967 dest = gen_lowpart (V8HImode, operands[0]);
4969 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4970 emit_insn (gen_smulv8hi3_highpart (t2, op1, op2));
4971 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
4975 (define_expand "vec_widen_umult_hi_v8hi"
4976 [(match_operand:V4SI 0 "register_operand" "")
4977 (match_operand:V8HI 1 "register_operand" "")
4978 (match_operand:V8HI 2 "register_operand" "")]
4981 rtx op1, op2, t1, t2, dest;
4985 t1 = gen_reg_rtx (V8HImode);
4986 t2 = gen_reg_rtx (V8HImode);
4987 dest = gen_lowpart (V8HImode, operands[0]);
4989 emit_insn (gen_mulv8hi3 (t1, op1, op2));
4990 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
4991 emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
4995 (define_expand "vec_widen_umult_lo_v8hi"
4996 [(match_operand:V4SI 0 "register_operand" "")
4997 (match_operand:V8HI 1 "register_operand" "")
4998 (match_operand:V8HI 2 "register_operand" "")]
5001 rtx op1, op2, t1, t2, dest;
5005 t1 = gen_reg_rtx (V8HImode);
5006 t2 = gen_reg_rtx (V8HImode);
5007 dest = gen_lowpart (V8HImode, operands[0]);
5009 emit_insn (gen_mulv8hi3 (t1, op1, op2));
5010 emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
5011 emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
5015 (define_expand "vec_widen_smult_hi_v4si"
5016 [(match_operand:V2DI 0 "register_operand" "")
5017 (match_operand:V4SI 1 "register_operand" "")
5018 (match_operand:V4SI 2 "register_operand" "")]
5023 t1 = gen_reg_rtx (V4SImode);
5024 t2 = gen_reg_rtx (V4SImode);
5026 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5031 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5036 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5040 (define_expand "vec_widen_smult_lo_v4si"
5041 [(match_operand:V2DI 0 "register_operand" "")
5042 (match_operand:V4SI 1 "register_operand" "")
5043 (match_operand:V4SI 2 "register_operand" "")]
5048 t1 = gen_reg_rtx (V4SImode);
5049 t2 = gen_reg_rtx (V4SImode);
5051 emit_insn (gen_sse2_pshufd_1 (t1, operands[1],
5056 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5061 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5065 (define_expand "vec_widen_umult_hi_v4si"
5066 [(match_operand:V2DI 0 "register_operand" "")
5067 (match_operand:V4SI 1 "register_operand" "")
5068 (match_operand:V4SI 2 "register_operand" "")]
5071 rtx op1, op2, t1, t2;
5075 t1 = gen_reg_rtx (V4SImode);
5076 t2 = gen_reg_rtx (V4SImode);
5078 emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
5079 emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
5080 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5084 (define_expand "vec_widen_umult_lo_v4si"
5085 [(match_operand:V2DI 0 "register_operand" "")
5086 (match_operand:V4SI 1 "register_operand" "")
5087 (match_operand:V4SI 2 "register_operand" "")]
5090 rtx op1, op2, t1, t2;
5094 t1 = gen_reg_rtx (V4SImode);
5095 t2 = gen_reg_rtx (V4SImode);
5097 emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
5098 emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
5099 emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
5103 (define_expand "sdot_prodv8hi"
5104 [(match_operand:V4SI 0 "register_operand" "")
5105 (match_operand:V8HI 1 "register_operand" "")
5106 (match_operand:V8HI 2 "register_operand" "")
5107 (match_operand:V4SI 3 "register_operand" "")]
5110 rtx t = gen_reg_rtx (V4SImode);
5111 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
5112 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
5116 (define_expand "udot_prodv4si"
5117 [(match_operand:V2DI 0 "register_operand" "")
5118 (match_operand:V4SI 1 "register_operand" "")
5119 (match_operand:V4SI 2 "register_operand" "")
5120 (match_operand:V2DI 3 "register_operand" "")]
5125 t1 = gen_reg_rtx (V2DImode);
5126 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5127 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5129 t2 = gen_reg_rtx (V4SImode);
5130 t3 = gen_reg_rtx (V4SImode);
5131 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5132 gen_lowpart (V1TImode, operands[1]),
5134 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5135 gen_lowpart (V1TImode, operands[2]),
5138 t4 = gen_reg_rtx (V2DImode);
5139 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5141 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5145 (define_insn "ashr<mode>3"
5146 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5148 (match_operand:VI24_128 1 "register_operand" "0,x")
5149 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5152 psra<ssemodesuffix>\t{%2, %0|%0, %2}
5153 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5154 [(set_attr "isa" "noavx,avx")
5155 (set_attr "type" "sseishft")
5156 (set (attr "length_immediate")
5157 (if_then_else (match_operand 2 "const_int_operand" "")
5159 (const_string "0")))
5160 (set_attr "prefix_data16" "1,*")
5161 (set_attr "prefix" "orig,vex")
5162 (set_attr "mode" "TI")])
5164 (define_insn "lshr<mode>3"
5165 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5167 (match_operand:VI248_128 1 "register_operand" "0,x")
5168 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5171 psrl<ssemodesuffix>\t{%2, %0|%0, %2}
5172 vpsrl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5173 [(set_attr "isa" "noavx,avx")
5174 (set_attr "type" "sseishft")
5175 (set (attr "length_immediate")
5176 (if_then_else (match_operand 2 "const_int_operand" "")
5178 (const_string "0")))
5179 (set_attr "prefix_data16" "1,*")
5180 (set_attr "prefix" "orig,vex")
5181 (set_attr "mode" "TI")])
5183 (define_insn "ashl<mode>3"
5184 [(set (match_operand:VI248_128 0 "register_operand" "=x,x")
5186 (match_operand:VI248_128 1 "register_operand" "0,x")
5187 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
5190 psll<ssemodesuffix>\t{%2, %0|%0, %2}
5191 vpsll<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5192 [(set_attr "isa" "noavx,avx")
5193 (set_attr "type" "sseishft")
5194 (set (attr "length_immediate")
5195 (if_then_else (match_operand 2 "const_int_operand" "")
5197 (const_string "0")))
5198 (set_attr "prefix_data16" "1,*")
5199 (set_attr "prefix" "orig,vex")
5200 (set_attr "mode" "TI")])
5202 (define_expand "vec_shl_<mode>"
5203 [(set (match_operand:VI_128 0 "register_operand" "")
5205 (match_operand:VI_128 1 "register_operand" "")
5206 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5209 operands[0] = gen_lowpart (V1TImode, operands[0]);
5210 operands[1] = gen_lowpart (V1TImode, operands[1]);
5213 (define_insn "sse2_ashlv1ti3"
5214 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5216 (match_operand:V1TI 1 "register_operand" "0,x")
5217 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5220 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5222 switch (which_alternative)
5225 return "pslldq\t{%2, %0|%0, %2}";
5227 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5232 [(set_attr "isa" "noavx,avx")
5233 (set_attr "type" "sseishft")
5234 (set_attr "length_immediate" "1")
5235 (set_attr "prefix_data16" "1,*")
5236 (set_attr "prefix" "orig,vex")
5237 (set_attr "mode" "TI")])
5239 (define_expand "vec_shr_<mode>"
5240 [(set (match_operand:VI_128 0 "register_operand" "")
5242 (match_operand:VI_128 1 "register_operand" "")
5243 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5246 operands[0] = gen_lowpart (V1TImode, operands[0]);
5247 operands[1] = gen_lowpart (V1TImode, operands[1]);
5250 (define_insn "sse2_lshrv1ti3"
5251 [(set (match_operand:V1TI 0 "register_operand" "=x,x")
5253 (match_operand:V1TI 1 "register_operand" "0,x")
5254 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
5257 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5259 switch (which_alternative)
5262 return "psrldq\t{%2, %0|%0, %2}";
5264 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5269 [(set_attr "isa" "noavx,avx")
5270 (set_attr "type" "sseishft")
5271 (set_attr "length_immediate" "1")
5272 (set_attr "atom_unit" "sishuf")
5273 (set_attr "prefix_data16" "1,*")
5274 (set_attr "prefix" "orig,vex")
5275 (set_attr "mode" "TI")])
5277 (define_insn "*sse4_1_<code><mode>3"
5278 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
5280 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
5281 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
5282 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5284 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5285 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5286 [(set_attr "isa" "noavx,avx")
5287 (set_attr "type" "sseiadd")
5288 (set_attr "prefix_extra" "1,*")
5289 (set_attr "prefix" "orig,vex")
5290 (set_attr "mode" "TI")])
5292 (define_insn "*<code>v8hi3"
5293 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5295 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
5296 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
5297 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
5299 p<maxmin_int>w\t{%2, %0|%0, %2}
5300 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
5301 [(set_attr "isa" "noavx,avx")
5302 (set_attr "type" "sseiadd")
5303 (set_attr "prefix_data16" "1,*")
5304 (set_attr "prefix_extra" "*,1")
5305 (set_attr "prefix" "orig,vex")
5306 (set_attr "mode" "TI")])
5308 (define_expand "smax<mode>3"
5309 [(set (match_operand:VI14_128 0 "register_operand" "")
5310 (smax:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5311 (match_operand:VI14_128 2 "register_operand" "")))]
5315 ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
5321 xops[0] = operands[0];
5322 xops[1] = operands[1];
5323 xops[2] = operands[2];
5324 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5325 xops[4] = operands[1];
5326 xops[5] = operands[2];
5327 ok = ix86_expand_int_vcond (xops);
5333 (define_expand "smin<mode>3"
5334 [(set (match_operand:VI14_128 0 "register_operand" "")
5335 (smin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
5336 (match_operand:VI14_128 2 "register_operand" "")))]
5340 ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
5346 xops[0] = operands[0];
5347 xops[1] = operands[2];
5348 xops[2] = operands[1];
5349 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5350 xops[4] = operands[1];
5351 xops[5] = operands[2];
5352 ok = ix86_expand_int_vcond (xops);
5358 (define_expand "<code>v8hi3"
5359 [(set (match_operand:V8HI 0 "register_operand" "")
5361 (match_operand:V8HI 1 "nonimmediate_operand" "")
5362 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5364 "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
5366 (define_expand "smaxv2di3"
5367 [(set (match_operand:V2DI 0 "register_operand" "")
5368 (smax:V2DI (match_operand:V2DI 1 "register_operand" "")
5369 (match_operand:V2DI 2 "register_operand" "")))]
5375 xops[0] = operands[0];
5376 xops[1] = operands[1];
5377 xops[2] = operands[2];
5378 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5379 xops[4] = operands[1];
5380 xops[5] = operands[2];
5381 ok = ix86_expand_int_vcond (xops);
5386 (define_expand "sminv2di3"
5387 [(set (match_operand:V2DI 0 "register_operand" "")
5388 (smin:V2DI (match_operand:V2DI 1 "register_operand" "")
5389 (match_operand:V2DI 2 "register_operand" "")))]
5395 xops[0] = operands[0];
5396 xops[1] = operands[2];
5397 xops[2] = operands[1];
5398 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
5399 xops[4] = operands[1];
5400 xops[5] = operands[2];
5401 ok = ix86_expand_int_vcond (xops);
5406 (define_insn "*sse4_1_<code><mode>3"
5407 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
5409 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
5410 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
5411 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5413 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
5414 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5415 [(set_attr "isa" "noavx,avx")
5416 (set_attr "type" "sseiadd")
5417 (set_attr "prefix_extra" "1,*")
5418 (set_attr "prefix" "orig,vex")
5419 (set_attr "mode" "TI")])
5421 (define_insn "*<code>v16qi3"
5422 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5424 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
5425 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
5426 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
5428 p<maxmin_int>b\t{%2, %0|%0, %2}
5429 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
5430 [(set_attr "isa" "noavx,avx")
5431 (set_attr "type" "sseiadd")
5432 (set_attr "prefix_data16" "1,*")
5433 (set_attr "prefix_extra" "*,1")
5434 (set_attr "prefix" "orig,vex")
5435 (set_attr "mode" "TI")])
5437 (define_expand "<code>v16qi3"
5438 [(set (match_operand:V16QI 0 "register_operand" "")
5440 (match_operand:V16QI 1 "nonimmediate_operand" "")
5441 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
5443 "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
5445 (define_expand "umaxv8hi3"
5446 [(set (match_operand:V8HI 0 "register_operand" "")
5447 (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
5448 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
5452 ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
5455 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
5456 if (rtx_equal_p (op3, op2))
5457 op3 = gen_reg_rtx (V8HImode);
5458 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
5459 emit_insn (gen_addv8hi3 (op0, op3, op2));
5464 (define_expand "umaxv4si3"
5465 [(set (match_operand:V4SI 0 "register_operand" "")
5466 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
5467 (match_operand:V4SI 2 "register_operand" "")))]
5471 ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
5477 xops[0] = operands[0];
5478 xops[1] = operands[1];
5479 xops[2] = operands[2];
5480 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5481 xops[4] = operands[1];
5482 xops[5] = operands[2];
5483 ok = ix86_expand_int_vcond (xops);
5489 (define_expand "umin<mode>3"
5490 [(set (match_operand:VI24_128 0 "register_operand" "")
5491 (umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
5492 (match_operand:VI24_128 2 "register_operand" "")))]
5496 ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
5502 xops[0] = operands[0];
5503 xops[1] = operands[2];
5504 xops[2] = operands[1];
5505 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5506 xops[4] = operands[1];
5507 xops[5] = operands[2];
5508 ok = ix86_expand_int_vcond (xops);
5514 (define_expand "umaxv2di3"
5515 [(set (match_operand:V2DI 0 "register_operand" "")
5516 (umax:V2DI (match_operand:V2DI 1 "register_operand" "")
5517 (match_operand:V2DI 2 "register_operand" "")))]
5523 xops[0] = operands[0];
5524 xops[1] = operands[1];
5525 xops[2] = operands[2];
5526 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5527 xops[4] = operands[1];
5528 xops[5] = operands[2];
5529 ok = ix86_expand_int_vcond (xops);
5534 (define_expand "uminv2di3"
5535 [(set (match_operand:V2DI 0 "register_operand" "")
5536 (umin:V2DI (match_operand:V2DI 1 "register_operand" "")
5537 (match_operand:V2DI 2 "register_operand" "")))]
5543 xops[0] = operands[0];
5544 xops[1] = operands[2];
5545 xops[2] = operands[1];
5546 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
5547 xops[4] = operands[1];
5548 xops[5] = operands[2];
5549 ok = ix86_expand_int_vcond (xops);
5554 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5556 ;; Parallel integral comparisons
5558 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5560 (define_insn "*sse4_1_eqv2di3"
5561 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5563 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
5564 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5565 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
5567 pcmpeqq\t{%2, %0|%0, %2}
5568 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
5569 [(set_attr "isa" "noavx,avx")
5570 (set_attr "type" "ssecmp")
5571 (set_attr "prefix_extra" "1")
5572 (set_attr "prefix" "orig,vex")
5573 (set_attr "mode" "TI")])
5575 (define_insn "*sse2_eq<mode>3"
5576 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5578 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
5579 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5580 "TARGET_SSE2 && !TARGET_XOP
5581 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5583 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
5584 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5585 [(set_attr "isa" "noavx,avx")
5586 (set_attr "type" "ssecmp")
5587 (set_attr "prefix_data16" "1,*")
5588 (set_attr "prefix" "orig,vex")
5589 (set_attr "mode" "TI")])
5591 (define_expand "sse2_eq<mode>3"
5592 [(set (match_operand:VI124_128 0 "register_operand" "")
5594 (match_operand:VI124_128 1 "nonimmediate_operand" "")
5595 (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
5596 "TARGET_SSE2 && !TARGET_XOP "
5597 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5599 (define_expand "sse4_1_eqv2di3"
5600 [(set (match_operand:V2DI 0 "register_operand" "")
5602 (match_operand:V2DI 1 "nonimmediate_operand" "")
5603 (match_operand:V2DI 2 "nonimmediate_operand" "")))]
5605 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
5607 (define_insn "sse4_2_gtv2di3"
5608 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
5610 (match_operand:V2DI 1 "register_operand" "0,x")
5611 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
5614 pcmpgtq\t{%2, %0|%0, %2}
5615 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
5616 [(set_attr "isa" "noavx,avx")
5617 (set_attr "type" "ssecmp")
5618 (set_attr "prefix_extra" "1")
5619 (set_attr "prefix" "orig,vex")
5620 (set_attr "mode" "TI")])
5622 (define_insn "sse2_gt<mode>3"
5623 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
5625 (match_operand:VI124_128 1 "register_operand" "0,x")
5626 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
5627 "TARGET_SSE2 && !TARGET_XOP"
5629 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
5630 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
5631 [(set_attr "isa" "noavx,avx")
5632 (set_attr "type" "ssecmp")
5633 (set_attr "prefix_data16" "1,*")
5634 (set_attr "prefix" "orig,vex")
5635 (set_attr "mode" "TI")])
5637 (define_expand "vcond<mode>"
5638 [(set (match_operand:VI124_128 0 "register_operand" "")
5639 (if_then_else:VI124_128
5640 (match_operator 3 ""
5641 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5642 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5643 (match_operand:VI124_128 1 "general_operand" "")
5644 (match_operand:VI124_128 2 "general_operand" "")))]
5647 bool ok = ix86_expand_int_vcond (operands);
5652 (define_expand "vcondv2di"
5653 [(set (match_operand:V2DI 0 "register_operand" "")
5655 (match_operator 3 ""
5656 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5657 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5658 (match_operand:V2DI 1 "general_operand" "")
5659 (match_operand:V2DI 2 "general_operand" "")))]
5662 bool ok = ix86_expand_int_vcond (operands);
5667 (define_expand "vcondu<mode>"
5668 [(set (match_operand:VI124_128 0 "register_operand" "")
5669 (if_then_else:VI124_128
5670 (match_operator 3 ""
5671 [(match_operand:VI124_128 4 "nonimmediate_operand" "")
5672 (match_operand:VI124_128 5 "nonimmediate_operand" "")])
5673 (match_operand:VI124_128 1 "general_operand" "")
5674 (match_operand:VI124_128 2 "general_operand" "")))]
5677 bool ok = ix86_expand_int_vcond (operands);
5682 (define_expand "vconduv2di"
5683 [(set (match_operand:V2DI 0 "register_operand" "")
5685 (match_operator 3 ""
5686 [(match_operand:V2DI 4 "nonimmediate_operand" "")
5687 (match_operand:V2DI 5 "nonimmediate_operand" "")])
5688 (match_operand:V2DI 1 "general_operand" "")
5689 (match_operand:V2DI 2 "general_operand" "")))]
5692 bool ok = ix86_expand_int_vcond (operands);
5697 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5699 ;; Parallel bitwise logical operations
5701 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5703 (define_expand "one_cmpl<mode>2"
5704 [(set (match_operand:VI 0 "register_operand" "")
5705 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "")
5709 int i, n = GET_MODE_NUNITS (<MODE>mode);
5710 rtvec v = rtvec_alloc (n);
5712 for (i = 0; i < n; ++i)
5713 RTVEC_ELT (v, i) = constm1_rtx;
5715 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
5718 (define_expand "sse2_andnot<mode>3"
5719 [(set (match_operand:VI_128 0 "register_operand" "")
5721 (not:VI_128 (match_operand:VI_128 1 "register_operand" ""))
5722 (match_operand:VI_128 2 "nonimmediate_operand" "")))]
5725 (define_insn "*andnot<mode>3"
5726 [(set (match_operand:VI 0 "register_operand" "=x,x")
5728 (not:VI (match_operand:VI 1 "register_operand" "0,x"))
5729 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5732 static char buf[32];
5735 = (get_attr_mode (insn) == MODE_TI) ? "pandn" : "andnps";
5737 switch (which_alternative)
5740 ops = "%s\t{%%2, %%0|%%0, %%2}";
5743 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5749 snprintf (buf, sizeof (buf), ops, tmp);
5752 [(set_attr "isa" "noavx,avx")
5753 (set_attr "type" "sselog")
5754 (set (attr "prefix_data16")
5756 (and (eq_attr "alternative" "0")
5757 (eq_attr "mode" "TI"))
5759 (const_string "*")))
5760 (set_attr "prefix" "orig,vex")
5762 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5763 (const_string "V8SF")
5764 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5767 (const_string "V4SF")))])
5769 (define_expand "<code><mode>3"
5770 [(set (match_operand:VI 0 "register_operand" "")
5772 (match_operand:VI 1 "nonimmediate_operand" "")
5773 (match_operand:VI 2 "nonimmediate_operand" "")))]
5775 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5777 (define_insn "*<code><mode>3"
5778 [(set (match_operand:VI 0 "register_operand" "=x,x")
5780 (match_operand:VI 1 "nonimmediate_operand" "%0,x")
5781 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))]
5783 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5785 static char buf[32];
5788 = (get_attr_mode (insn) == MODE_TI) ? "p<logic>" : "<logic>ps";
5790 switch (which_alternative)
5793 ops = "%s\t{%%2, %%0|%%0, %%2}";
5796 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5802 snprintf (buf, sizeof (buf), ops, tmp);
5805 [(set_attr "isa" "noavx,avx")
5806 (set_attr "type" "sselog")
5807 (set (attr "prefix_data16")
5809 (and (eq_attr "alternative" "0")
5810 (eq_attr "mode" "TI"))
5812 (const_string "*")))
5813 (set_attr "prefix" "orig,vex")
5815 (cond [(ne (symbol_ref "GET_MODE_SIZE (<MODE>mode) > 128") (const_int 0))
5816 (const_string "V8SF")
5817 (ne (symbol_ref "TARGET_SSE2") (const_int 0))
5820 (const_string "V4SF")))])
5822 (define_insn "*andnottf3"
5823 [(set (match_operand:TF 0 "register_operand" "=x,x")
5825 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
5826 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5829 pandn\t{%2, %0|%0, %2}
5830 vpandn\t{%2, %1, %0|%0, %1, %2}"
5831 [(set_attr "isa" "noavx,avx")
5832 (set_attr "type" "sselog")
5833 (set_attr "prefix_data16" "1,*")
5834 (set_attr "prefix" "orig,vex")
5835 (set_attr "mode" "TI")])
5837 (define_expand "<code>tf3"
5838 [(set (match_operand:TF 0 "register_operand" "")
5840 (match_operand:TF 1 "nonimmediate_operand" "")
5841 (match_operand:TF 2 "nonimmediate_operand" "")))]
5843 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
5845 (define_insn "*<code>tf3"
5846 [(set (match_operand:TF 0 "register_operand" "=x,x")
5848 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
5849 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
5851 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
5853 p<logic>\t{%2, %0|%0, %2}
5854 vp<logic>\t{%2, %1, %0|%0, %1, %2}"
5855 [(set_attr "isa" "noavx,avx")
5856 (set_attr "type" "sselog")
5857 (set_attr "prefix_data16" "1,*")
5858 (set_attr "prefix" "orig,vex")
5859 (set_attr "mode" "TI")])
5861 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5863 ;; Parallel integral element swizzling
5865 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5867 (define_expand "vec_pack_trunc_<mode>"
5868 [(match_operand:<ssepackmode> 0 "register_operand" "")
5869 (match_operand:VI248_128 1 "register_operand" "")
5870 (match_operand:VI248_128 2 "register_operand" "")]
5873 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
5874 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
5875 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
5879 (define_insn "sse2_packsswb"
5880 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5883 (match_operand:V8HI 1 "register_operand" "0,x"))
5885 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5888 packsswb\t{%2, %0|%0, %2}
5889 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
5890 [(set_attr "isa" "noavx,avx")
5891 (set_attr "type" "sselog")
5892 (set_attr "prefix_data16" "1,*")
5893 (set_attr "prefix" "orig,vex")
5894 (set_attr "mode" "TI")])
5896 (define_insn "sse2_packssdw"
5897 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5900 (match_operand:V4SI 1 "register_operand" "0,x"))
5902 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
5905 packssdw\t{%2, %0|%0, %2}
5906 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
5907 [(set_attr "isa" "noavx,avx")
5908 (set_attr "type" "sselog")
5909 (set_attr "prefix_data16" "1,*")
5910 (set_attr "prefix" "orig,vex")
5911 (set_attr "mode" "TI")])
5913 (define_insn "sse2_packuswb"
5914 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5917 (match_operand:V8HI 1 "register_operand" "0,x"))
5919 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))))]
5922 packuswb\t{%2, %0|%0, %2}
5923 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
5924 [(set_attr "isa" "noavx,avx")
5925 (set_attr "type" "sselog")
5926 (set_attr "prefix_data16" "1,*")
5927 (set_attr "prefix" "orig,vex")
5928 (set_attr "mode" "TI")])
5930 (define_insn "vec_interleave_highv16qi"
5931 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5934 (match_operand:V16QI 1 "register_operand" "0,x")
5935 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5936 (parallel [(const_int 8) (const_int 24)
5937 (const_int 9) (const_int 25)
5938 (const_int 10) (const_int 26)
5939 (const_int 11) (const_int 27)
5940 (const_int 12) (const_int 28)
5941 (const_int 13) (const_int 29)
5942 (const_int 14) (const_int 30)
5943 (const_int 15) (const_int 31)])))]
5946 punpckhbw\t{%2, %0|%0, %2}
5947 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
5948 [(set_attr "isa" "noavx,avx")
5949 (set_attr "type" "sselog")
5950 (set_attr "prefix_data16" "1,*")
5951 (set_attr "prefix" "orig,vex")
5952 (set_attr "mode" "TI")])
5954 (define_insn "vec_interleave_lowv16qi"
5955 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
5958 (match_operand:V16QI 1 "register_operand" "0,x")
5959 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
5960 (parallel [(const_int 0) (const_int 16)
5961 (const_int 1) (const_int 17)
5962 (const_int 2) (const_int 18)
5963 (const_int 3) (const_int 19)
5964 (const_int 4) (const_int 20)
5965 (const_int 5) (const_int 21)
5966 (const_int 6) (const_int 22)
5967 (const_int 7) (const_int 23)])))]
5970 punpcklbw\t{%2, %0|%0, %2}
5971 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
5972 [(set_attr "isa" "noavx,avx")
5973 (set_attr "type" "sselog")
5974 (set_attr "prefix_data16" "1,*")
5975 (set_attr "prefix" "orig,vex")
5976 (set_attr "mode" "TI")])
5978 (define_insn "vec_interleave_highv8hi"
5979 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
5982 (match_operand:V8HI 1 "register_operand" "0,x")
5983 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
5984 (parallel [(const_int 4) (const_int 12)
5985 (const_int 5) (const_int 13)
5986 (const_int 6) (const_int 14)
5987 (const_int 7) (const_int 15)])))]
5990 punpckhwd\t{%2, %0|%0, %2}
5991 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
5992 [(set_attr "isa" "noavx,avx")
5993 (set_attr "type" "sselog")
5994 (set_attr "prefix_data16" "1,*")
5995 (set_attr "prefix" "orig,vex")
5996 (set_attr "mode" "TI")])
5998 (define_insn "vec_interleave_lowv8hi"
5999 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6002 (match_operand:V8HI 1 "register_operand" "0,x")
6003 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
6004 (parallel [(const_int 0) (const_int 8)
6005 (const_int 1) (const_int 9)
6006 (const_int 2) (const_int 10)
6007 (const_int 3) (const_int 11)])))]
6010 punpcklwd\t{%2, %0|%0, %2}
6011 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6012 [(set_attr "isa" "noavx,avx")
6013 (set_attr "type" "sselog")
6014 (set_attr "prefix_data16" "1,*")
6015 (set_attr "prefix" "orig,vex")
6016 (set_attr "mode" "TI")])
6018 (define_insn "vec_interleave_highv4si"
6019 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6022 (match_operand:V4SI 1 "register_operand" "0,x")
6023 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6024 (parallel [(const_int 2) (const_int 6)
6025 (const_int 3) (const_int 7)])))]
6028 punpckhdq\t{%2, %0|%0, %2}
6029 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6030 [(set_attr "isa" "noavx,avx")
6031 (set_attr "type" "sselog")
6032 (set_attr "prefix_data16" "1,*")
6033 (set_attr "prefix" "orig,vex")
6034 (set_attr "mode" "TI")])
6036 (define_insn "vec_interleave_lowv4si"
6037 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6040 (match_operand:V4SI 1 "register_operand" "0,x")
6041 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
6042 (parallel [(const_int 0) (const_int 4)
6043 (const_int 1) (const_int 5)])))]
6046 punpckldq\t{%2, %0|%0, %2}
6047 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6048 [(set_attr "isa" "noavx,avx")
6049 (set_attr "type" "sselog")
6050 (set_attr "prefix_data16" "1,*")
6051 (set_attr "prefix" "orig,vex")
6052 (set_attr "mode" "TI")])
6054 ;; Modes handled by pinsr patterns.
6055 (define_mode_iterator PINSR_MODE
6056 [(V16QI "TARGET_SSE4_1") V8HI
6057 (V4SI "TARGET_SSE4_1")
6058 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
6060 (define_mode_attr sse2p4_1
6061 [(V16QI "sse4_1") (V8HI "sse2")
6062 (V4SI "sse4_1") (V2DI "sse4_1")])
6064 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
6065 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
6066 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
6067 (vec_merge:PINSR_MODE
6068 (vec_duplicate:PINSR_MODE
6069 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
6070 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
6071 (match_operand:SI 3 "const_int_operand" "")))]
6073 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6074 < GET_MODE_NUNITS (<MODE>mode))"
6076 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6078 switch (which_alternative)
6081 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6082 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
6085 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
6087 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
6088 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6091 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6096 [(set_attr "isa" "noavx,noavx,avx,avx")
6097 (set_attr "type" "sselog")
6098 (set (attr "prefix_rex")
6100 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6101 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
6103 (const_string "*")))
6104 (set (attr "prefix_data16")
6106 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6107 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6109 (const_string "*")))
6110 (set (attr "prefix_extra")
6112 (and (eq (symbol_ref "TARGET_AVX") (const_int 0))
6113 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
6115 (const_string "1")))
6116 (set_attr "length_immediate" "1")
6117 (set_attr "prefix" "orig,orig,vex,vex")
6118 (set_attr "mode" "TI")])
6120 (define_insn "*sse4_1_pextrb_<mode>"
6121 [(set (match_operand:SWI48 0 "register_operand" "=r")
6124 (match_operand:V16QI 1 "register_operand" "x")
6125 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6127 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
6128 [(set_attr "type" "sselog")
6129 (set_attr "prefix_extra" "1")
6130 (set_attr "length_immediate" "1")
6131 (set_attr "prefix" "maybe_vex")
6132 (set_attr "mode" "TI")])
6134 (define_insn "*sse4_1_pextrb_memory"
6135 [(set (match_operand:QI 0 "memory_operand" "=m")
6137 (match_operand:V16QI 1 "register_operand" "x")
6138 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6140 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6141 [(set_attr "type" "sselog")
6142 (set_attr "prefix_extra" "1")
6143 (set_attr "length_immediate" "1")
6144 (set_attr "prefix" "maybe_vex")
6145 (set_attr "mode" "TI")])
6147 (define_insn "*sse2_pextrw_<mode>"
6148 [(set (match_operand:SWI48 0 "register_operand" "=r")
6151 (match_operand:V8HI 1 "register_operand" "x")
6152 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6154 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
6155 [(set_attr "type" "sselog")
6156 (set_attr "prefix_data16" "1")
6157 (set_attr "length_immediate" "1")
6158 (set_attr "prefix" "maybe_vex")
6159 (set_attr "mode" "TI")])
6161 (define_insn "*sse4_1_pextrw_memory"
6162 [(set (match_operand:HI 0 "memory_operand" "=m")
6164 (match_operand:V8HI 1 "register_operand" "x")
6165 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6167 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6168 [(set_attr "type" "sselog")
6169 (set_attr "prefix_extra" "1")
6170 (set_attr "length_immediate" "1")
6171 (set_attr "prefix" "maybe_vex")
6172 (set_attr "mode" "TI")])
6174 (define_insn "*sse4_1_pextrd"
6175 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6177 (match_operand:V4SI 1 "register_operand" "x")
6178 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6180 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6181 [(set_attr "type" "sselog")
6182 (set_attr "prefix_extra" "1")
6183 (set_attr "length_immediate" "1")
6184 (set_attr "prefix" "maybe_vex")
6185 (set_attr "mode" "TI")])
6187 (define_insn "*sse4_1_pextrd_zext"
6188 [(set (match_operand:DI 0 "register_operand" "=r")
6191 (match_operand:V4SI 1 "register_operand" "x")
6192 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
6193 "TARGET_64BIT && TARGET_SSE4_1"
6194 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
6195 [(set_attr "type" "sselog")
6196 (set_attr "prefix_extra" "1")
6197 (set_attr "length_immediate" "1")
6198 (set_attr "prefix" "maybe_vex")
6199 (set_attr "mode" "TI")])
6201 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6202 (define_insn "*sse4_1_pextrq"
6203 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
6205 (match_operand:V2DI 1 "register_operand" "x")
6206 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6207 "TARGET_SSE4_1 && TARGET_64BIT"
6208 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6209 [(set_attr "type" "sselog")
6210 (set_attr "prefix_rex" "1")
6211 (set_attr "prefix_extra" "1")
6212 (set_attr "length_immediate" "1")
6213 (set_attr "prefix" "maybe_vex")
6214 (set_attr "mode" "TI")])
6216 (define_expand "sse2_pshufd"
6217 [(match_operand:V4SI 0 "register_operand" "")
6218 (match_operand:V4SI 1 "nonimmediate_operand" "")
6219 (match_operand:SI 2 "const_int_operand" "")]
6222 int mask = INTVAL (operands[2]);
6223 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
6224 GEN_INT ((mask >> 0) & 3),
6225 GEN_INT ((mask >> 2) & 3),
6226 GEN_INT ((mask >> 4) & 3),
6227 GEN_INT ((mask >> 6) & 3)));
6231 (define_insn "sse2_pshufd_1"
6232 [(set (match_operand:V4SI 0 "register_operand" "=x")
6234 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
6235 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6236 (match_operand 3 "const_0_to_3_operand" "")
6237 (match_operand 4 "const_0_to_3_operand" "")
6238 (match_operand 5 "const_0_to_3_operand" "")])))]
6242 mask |= INTVAL (operands[2]) << 0;
6243 mask |= INTVAL (operands[3]) << 2;
6244 mask |= INTVAL (operands[4]) << 4;
6245 mask |= INTVAL (operands[5]) << 6;
6246 operands[2] = GEN_INT (mask);
6248 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6250 [(set_attr "type" "sselog1")
6251 (set_attr "prefix_data16" "1")
6252 (set_attr "prefix" "maybe_vex")
6253 (set_attr "length_immediate" "1")
6254 (set_attr "mode" "TI")])
6256 (define_expand "sse2_pshuflw"
6257 [(match_operand:V8HI 0 "register_operand" "")
6258 (match_operand:V8HI 1 "nonimmediate_operand" "")
6259 (match_operand:SI 2 "const_int_operand" "")]
6262 int mask = INTVAL (operands[2]);
6263 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
6264 GEN_INT ((mask >> 0) & 3),
6265 GEN_INT ((mask >> 2) & 3),
6266 GEN_INT ((mask >> 4) & 3),
6267 GEN_INT ((mask >> 6) & 3)));
6271 (define_insn "sse2_pshuflw_1"
6272 [(set (match_operand:V8HI 0 "register_operand" "=x")
6274 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6275 (parallel [(match_operand 2 "const_0_to_3_operand" "")
6276 (match_operand 3 "const_0_to_3_operand" "")
6277 (match_operand 4 "const_0_to_3_operand" "")
6278 (match_operand 5 "const_0_to_3_operand" "")
6286 mask |= INTVAL (operands[2]) << 0;
6287 mask |= INTVAL (operands[3]) << 2;
6288 mask |= INTVAL (operands[4]) << 4;
6289 mask |= INTVAL (operands[5]) << 6;
6290 operands[2] = GEN_INT (mask);
6292 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6294 [(set_attr "type" "sselog")
6295 (set_attr "prefix_data16" "0")
6296 (set_attr "prefix_rep" "1")
6297 (set_attr "prefix" "maybe_vex")
6298 (set_attr "length_immediate" "1")
6299 (set_attr "mode" "TI")])
6301 (define_expand "sse2_pshufhw"
6302 [(match_operand:V8HI 0 "register_operand" "")
6303 (match_operand:V8HI 1 "nonimmediate_operand" "")
6304 (match_operand:SI 2 "const_int_operand" "")]
6307 int mask = INTVAL (operands[2]);
6308 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
6309 GEN_INT (((mask >> 0) & 3) + 4),
6310 GEN_INT (((mask >> 2) & 3) + 4),
6311 GEN_INT (((mask >> 4) & 3) + 4),
6312 GEN_INT (((mask >> 6) & 3) + 4)));
6316 (define_insn "sse2_pshufhw_1"
6317 [(set (match_operand:V8HI 0 "register_operand" "=x")
6319 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
6320 (parallel [(const_int 0)
6324 (match_operand 2 "const_4_to_7_operand" "")
6325 (match_operand 3 "const_4_to_7_operand" "")
6326 (match_operand 4 "const_4_to_7_operand" "")
6327 (match_operand 5 "const_4_to_7_operand" "")])))]
6331 mask |= (INTVAL (operands[2]) - 4) << 0;
6332 mask |= (INTVAL (operands[3]) - 4) << 2;
6333 mask |= (INTVAL (operands[4]) - 4) << 4;
6334 mask |= (INTVAL (operands[5]) - 4) << 6;
6335 operands[2] = GEN_INT (mask);
6337 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6339 [(set_attr "type" "sselog")
6340 (set_attr "prefix_rep" "1")
6341 (set_attr "prefix_data16" "0")
6342 (set_attr "prefix" "maybe_vex")
6343 (set_attr "length_immediate" "1")
6344 (set_attr "mode" "TI")])
6346 (define_expand "sse2_loadd"
6347 [(set (match_operand:V4SI 0 "register_operand" "")
6350 (match_operand:SI 1 "nonimmediate_operand" ""))
6354 "operands[2] = CONST0_RTX (V4SImode);")
6356 (define_insn "sse2_loadld"
6357 [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x,x")
6360 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
6361 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
6365 %vmovd\t{%2, %0|%0, %2}
6366 %vmovd\t{%2, %0|%0, %2}
6367 movss\t{%2, %0|%0, %2}
6368 movss\t{%2, %0|%0, %2}
6369 vmovss\t{%2, %1, %0|%0, %1, %2}"
6370 [(set_attr "isa" "base,base,noavx,noavx,avx")
6371 (set_attr "type" "ssemov")
6372 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
6373 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
6375 (define_insn_and_split "sse2_stored"
6376 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r")
6378 (match_operand:V4SI 1 "register_operand" "x,Yi")
6379 (parallel [(const_int 0)])))]
6382 "&& reload_completed
6383 && (TARGET_INTER_UNIT_MOVES
6384 || MEM_P (operands [0])
6385 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6386 [(set (match_dup 0) (match_dup 1))]
6387 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
6389 (define_insn_and_split "*vec_ext_v4si_mem"
6390 [(set (match_operand:SI 0 "register_operand" "=r")
6392 (match_operand:V4SI 1 "memory_operand" "o")
6393 (parallel [(match_operand 2 "const_0_to_3_operand" "")])))]
6399 int i = INTVAL (operands[2]);
6401 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4));
6405 (define_expand "sse_storeq"
6406 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6408 (match_operand:V2DI 1 "register_operand" "")
6409 (parallel [(const_int 0)])))]
6412 (define_insn "*sse2_storeq_rex64"
6413 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r")
6415 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o")
6416 (parallel [(const_int 0)])))]
6417 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6421 mov{q}\t{%1, %0|%0, %1}"
6422 [(set_attr "type" "*,*,imov")
6423 (set_attr "mode" "*,*,DI")])
6425 (define_insn "*sse2_storeq"
6426 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm")
6428 (match_operand:V2DI 1 "register_operand" "x")
6429 (parallel [(const_int 0)])))]
6434 [(set (match_operand:DI 0 "nonimmediate_operand" "")
6436 (match_operand:V2DI 1 "register_operand" "")
6437 (parallel [(const_int 0)])))]
6440 && (TARGET_INTER_UNIT_MOVES
6441 || MEM_P (operands [0])
6442 || !GENERAL_REGNO_P (true_regnum (operands [0])))"
6443 [(set (match_dup 0) (match_dup 1))]
6444 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
6446 (define_insn "*vec_extractv2di_1_rex64"
6447 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r")
6449 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o")
6450 (parallel [(const_int 1)])))]
6451 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6453 %vmovhps\t{%1, %0|%0, %1}
6454 psrldq\t{$8, %0|%0, 8}
6455 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6456 %vmovq\t{%H1, %0|%0, %H1}
6457 mov{q}\t{%H1, %0|%0, %H1}"
6458 [(set_attr "isa" "base,noavx,avx,base,base")
6459 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov")
6460 (set_attr "length_immediate" "*,1,1,*,*")
6461 (set_attr "memory" "*,none,none,*,*")
6462 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig")
6463 (set_attr "mode" "V2SF,TI,TI,TI,DI")])
6465 (define_insn "*vec_extractv2di_1_sse2"
6466 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x")
6468 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o")
6469 (parallel [(const_int 1)])))]
6471 && TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6473 %vmovhps\t{%1, %0|%0, %1}
6474 psrldq\t{$8, %0|%0, 8}
6475 vpsrldq\t{$8, %1, %0|%0, %1, 8}
6476 %vmovq\t{%H1, %0|%0, %H1}"
6477 [(set_attr "isa" "base,noavx,avx,base")
6478 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov")
6479 (set_attr "length_immediate" "*,1,1,*")
6480 (set_attr "memory" "*,none,none,*")
6481 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex")
6482 (set_attr "mode" "V2SF,TI,TI,TI")])
6484 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
6485 (define_insn "*vec_extractv2di_1_sse"
6486 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
6488 (match_operand:V2DI 1 "nonimmediate_operand" " x,x,o")
6489 (parallel [(const_int 1)])))]
6490 "!TARGET_SSE2 && TARGET_SSE
6491 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6493 movhps\t{%1, %0|%0, %1}
6494 movhlps\t{%1, %0|%0, %1}
6495 movlps\t{%H1, %0|%0, %H1}"
6496 [(set_attr "type" "ssemov")
6497 (set_attr "mode" "V2SF,V4SF,V2SF")])
6499 (define_insn "*vec_dupv4si_avx"
6500 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
6502 (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
6505 vpshufd\t{$0, %1, %0|%0, %1, 0}
6506 vbroadcastss\t{%1, %0|%0, %1}"
6507 [(set_attr "type" "sselog1,ssemov")
6508 (set_attr "length_immediate" "1,0")
6509 (set_attr "prefix_extra" "0,1")
6510 (set_attr "prefix" "vex")
6511 (set_attr "mode" "TI,V4SF")])
6513 (define_insn "*vec_dupv4si"
6514 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
6516 (match_operand:SI 1 "register_operand" " Y2,0")))]
6519 pshufd\t{$0, %1, %0|%0, %1, 0}
6520 shufps\t{$0, %0, %0|%0, %0, 0}"
6521 [(set_attr "type" "sselog1")
6522 (set_attr "length_immediate" "1")
6523 (set_attr "mode" "TI,V4SF")])
6525 (define_insn "*vec_dupv2di_sse3"
6526 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
6528 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
6532 vpunpcklqdq\t{%d1, %0|%0, %d1}
6533 %vmovddup\t{%1, %0|%0, %1}"
6534 [(set_attr "isa" "noavx,avx,base")
6535 (set_attr "type" "sselog1")
6536 (set_attr "prefix" "orig,vex,maybe_vex")
6537 (set_attr "mode" "TI,TI,DF")])
6539 (define_insn "*vec_dupv2di"
6540 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
6542 (match_operand:DI 1 "register_operand" " 0 ,0")))]
6547 [(set_attr "type" "sselog1,ssemov")
6548 (set_attr "mode" "TI,V4SF")])
6550 (define_insn "*vec_concatv2si_sse4_1"
6551 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
6553 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
6554 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
6557 pinsrd\t{$0x1, %2, %0|%0, %2, 0x1}
6558 vpinsrd\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6559 punpckldq\t{%2, %0|%0, %2}
6560 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
6561 %vmovd\t{%1, %0|%0, %1}
6562 punpckldq\t{%2, %0|%0, %2}
6563 movd\t{%1, %0|%0, %1}"
6564 [(set_attr "isa" "noavx,avx,noavx,avx,base,base,base")
6565 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6566 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
6567 (set_attr "length_immediate" "1,1,*,*,*,*,*")
6568 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
6569 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
6571 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6572 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6573 ;; alternatives pretty much forces the MMX alternative to be chosen.
6574 (define_insn "*vec_concatv2si_sse2"
6575 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y")
6577 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
6578 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))]
6581 punpckldq\t{%2, %0|%0, %2}
6582 movd\t{%1, %0|%0, %1}
6583 punpckldq\t{%2, %0|%0, %2}
6584 movd\t{%1, %0|%0, %1}"
6585 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6586 (set_attr "mode" "TI,TI,DI,DI")])
6588 (define_insn "*vec_concatv2si_sse"
6589 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
6591 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
6592 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
6595 unpcklps\t{%2, %0|%0, %2}
6596 movss\t{%1, %0|%0, %1}
6597 punpckldq\t{%2, %0|%0, %2}
6598 movd\t{%1, %0|%0, %1}"
6599 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6600 (set_attr "mode" "V4SF,V4SF,DI,DI")])
6602 (define_insn "*vec_concatv4si"
6603 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x,x,x")
6605 (match_operand:V2SI 1 "register_operand" " 0 ,x,0,0,x")
6606 (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,x,m,m")))]
6609 punpcklqdq\t{%2, %0|%0, %2}
6610 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6611 movlhps\t{%2, %0|%0, %2}
6612 movhps\t{%2, %0|%0, %2}
6613 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6614 [(set_attr "isa" "noavx,avx,noavx,noavx,avx")
6615 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
6616 (set_attr "prefix" "orig,vex,orig,orig,vex")
6617 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
6619 ;; movd instead of movq is required to handle broken assemblers.
6620 (define_insn "*vec_concatv2di_rex64_sse4_1"
6621 [(set (match_operand:V2DI 0 "register_operand"
6622 "=x, x, x,Yi,!x,x,x,x,x")
6624 (match_operand:DI 1 "nonimmediate_operand"
6625 " 0, x,xm,r ,*y,0,x,0,x")
6626 (match_operand:DI 2 "vector_move_operand"
6627 "rm,rm, C,C ,C ,x,x,m,m")))]
6628 "TARGET_64BIT && TARGET_SSE4_1"
6630 pinsrq\t{$0x1, %2, %0|%0, %2, 0x1}
6631 vpinsrq\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
6632 %vmovq\t{%1, %0|%0, %1}
6633 %vmovd\t{%1, %0|%0, %1}
6634 movq2dq\t{%1, %0|%0, %1}
6635 punpcklqdq\t{%2, %0|%0, %2}
6636 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6637 movhps\t{%2, %0|%0, %2}
6638 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6639 [(set_attr "isa" "noavx,avx,base,base,base,noavx,avx,noavx,avx")
6640 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov,sselog,sselog,ssemov,ssemov")
6641 (set (attr "prefix_rex")
6643 (and (eq_attr "alternative" "0,3")
6644 (eq (symbol_ref "TARGET_AVX") (const_int 0)))
6646 (const_string "*")))
6647 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*")
6648 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*")
6649 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex")
6650 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")])
6652 ;; movd instead of movq is required to handle broken assemblers.
6653 (define_insn "*vec_concatv2di_rex64_sse"
6654 [(set (match_operand:V2DI 0 "register_operand" "=Y2,Yi,!Y2,Y2,x,x")
6656 (match_operand:DI 1 "nonimmediate_operand" "Y2m,r ,*y ,0 ,0,0")
6657 (match_operand:DI 2 "vector_move_operand" " C ,C ,C ,Y2,x,m")))]
6658 "TARGET_64BIT && TARGET_SSE"
6660 movq\t{%1, %0|%0, %1}
6661 movd\t{%1, %0|%0, %1}
6662 movq2dq\t{%1, %0|%0, %1}
6663 punpcklqdq\t{%2, %0|%0, %2}
6664 movlhps\t{%2, %0|%0, %2}
6665 movhps\t{%2, %0|%0, %2}"
6666 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
6667 (set_attr "prefix_rex" "*,1,*,*,*,*")
6668 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
6670 (define_insn "vec_concatv2di"
6671 [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x,x")
6673 (match_operand:DI 1 "nonimmediate_operand" "Y2m,*y , 0,x,0,0,x")
6674 (match_operand:DI 2 "vector_move_operand" " C , C ,Y2,x,x,m,m")))]
6675 "!TARGET_64BIT && TARGET_SSE"
6677 %vmovq\t{%1, %0|%0, %1}
6678 movq2dq\t{%1, %0|%0, %1}
6679 punpcklqdq\t{%2, %0|%0, %2}
6680 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
6681 movlhps\t{%2, %0|%0, %2}
6682 movhps\t{%2, %0|%0, %2}
6683 vmovhps\t{%2, %1, %0|%0, %1, %2}"
6684 [(set_attr "isa" "base,base,noavx,avx,noavx,noavx,avx")
6685 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov")
6686 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex")
6687 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")])
6689 (define_expand "vec_unpacks_lo_<mode>"
6690 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6691 (match_operand:VI124_128 1 "register_operand" "")]
6693 "ix86_expand_sse_unpack (operands, false, false); DONE;")
6695 (define_expand "vec_unpacks_hi_<mode>"
6696 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6697 (match_operand:VI124_128 1 "register_operand" "")]
6699 "ix86_expand_sse_unpack (operands, false, true); DONE;")
6701 (define_expand "vec_unpacku_lo_<mode>"
6702 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6703 (match_operand:VI124_128 1 "register_operand" "")]
6705 "ix86_expand_sse_unpack (operands, true, false); DONE;")
6707 (define_expand "vec_unpacku_hi_<mode>"
6708 [(match_operand:<sseunpackmode> 0 "register_operand" "")
6709 (match_operand:VI124_128 1 "register_operand" "")]
6711 "ix86_expand_sse_unpack (operands, true, true); DONE;")
6713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6717 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6719 (define_expand "sse2_uavgv16qi3"
6720 [(set (match_operand:V16QI 0 "register_operand" "")
6726 (match_operand:V16QI 1 "nonimmediate_operand" ""))
6728 (match_operand:V16QI 2 "nonimmediate_operand" "")))
6729 (const_vector:V16QI [(const_int 1) (const_int 1)
6730 (const_int 1) (const_int 1)
6731 (const_int 1) (const_int 1)
6732 (const_int 1) (const_int 1)
6733 (const_int 1) (const_int 1)
6734 (const_int 1) (const_int 1)
6735 (const_int 1) (const_int 1)
6736 (const_int 1) (const_int 1)]))
6739 "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
6741 (define_insn "*sse2_uavgv16qi3"
6742 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
6748 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
6750 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
6751 (const_vector:V16QI [(const_int 1) (const_int 1)
6752 (const_int 1) (const_int 1)
6753 (const_int 1) (const_int 1)
6754 (const_int 1) (const_int 1)
6755 (const_int 1) (const_int 1)
6756 (const_int 1) (const_int 1)
6757 (const_int 1) (const_int 1)
6758 (const_int 1) (const_int 1)]))
6760 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
6762 pavgb\t{%2, %0|%0, %2}
6763 vpavgb\t{%2, %1, %0|%0, %1, %2}"
6764 [(set_attr "isa" "noavx,avx")
6765 (set_attr "type" "sseiadd")
6766 (set_attr "prefix_data16" "1,*")
6767 (set_attr "prefix" "orig,vex")
6768 (set_attr "mode" "TI")])
6770 (define_expand "sse2_uavgv8hi3"
6771 [(set (match_operand:V8HI 0 "register_operand" "")
6777 (match_operand:V8HI 1 "nonimmediate_operand" ""))
6779 (match_operand:V8HI 2 "nonimmediate_operand" "")))
6780 (const_vector:V8HI [(const_int 1) (const_int 1)
6781 (const_int 1) (const_int 1)
6782 (const_int 1) (const_int 1)
6783 (const_int 1) (const_int 1)]))
6786 "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
6788 (define_insn "*sse2_uavgv8hi3"
6789 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
6795 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
6797 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
6798 (const_vector:V8HI [(const_int 1) (const_int 1)
6799 (const_int 1) (const_int 1)
6800 (const_int 1) (const_int 1)
6801 (const_int 1) (const_int 1)]))
6803 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
6805 pavgw\t{%2, %0|%0, %2}
6806 vpavgw\t{%2, %1, %0|%0, %1, %2}"
6807 [(set_attr "isa" "noavx,avx")
6808 (set_attr "type" "sseiadd")
6809 (set_attr "prefix_data16" "1,*")
6810 (set_attr "prefix" "orig,vex")
6811 (set_attr "mode" "TI")])
6813 ;; The correct representation for this is absolutely enormous, and
6814 ;; surely not generally useful.
6815 (define_insn "sse2_psadbw"
6816 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
6817 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0,x")
6818 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
6822 psadbw\t{%2, %0|%0, %2}
6823 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
6824 [(set_attr "isa" "noavx,avx")
6825 (set_attr "type" "sseiadd")
6826 (set_attr "atom_unit" "simul")
6827 (set_attr "prefix_data16" "1,*")
6828 (set_attr "prefix" "orig,vex")
6829 (set_attr "mode" "TI")])
6831 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
6832 [(set (match_operand:SI 0 "register_operand" "=r")
6834 [(match_operand:VF 1 "register_operand" "x")]
6837 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
6838 [(set_attr "type" "ssemov")
6839 (set_attr "prefix" "maybe_vex")
6840 (set_attr "mode" "<MODE>")])
6842 (define_insn "sse2_pmovmskb"
6843 [(set (match_operand:SI 0 "register_operand" "=r")
6844 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
6847 "%vpmovmskb\t{%1, %0|%0, %1}"
6848 [(set_attr "type" "ssemov")
6849 (set_attr "prefix_data16" "1")
6850 (set_attr "prefix" "maybe_vex")
6851 (set_attr "mode" "SI")])
6853 (define_expand "sse2_maskmovdqu"
6854 [(set (match_operand:V16QI 0 "memory_operand" "")
6855 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "")
6856 (match_operand:V16QI 2 "register_operand" "")
6861 (define_insn "*sse2_maskmovdqu"
6862 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
6863 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
6864 (match_operand:V16QI 2 "register_operand" "x")
6865 (mem:V16QI (match_dup 0))]
6868 "%vmaskmovdqu\t{%2, %1|%1, %2}"
6869 [(set_attr "type" "ssemov")
6870 (set_attr "prefix_data16" "1")
6871 ;; The implicit %rdi operand confuses default length_vex computation.
6872 (set (attr "length_vex")
6873 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
6874 (set_attr "prefix" "maybe_vex")
6875 (set_attr "mode" "TI")])
6877 (define_insn "sse_ldmxcsr"
6878 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
6882 [(set_attr "type" "sse")
6883 (set_attr "atom_sse_attr" "mxcsr")
6884 (set_attr "prefix" "maybe_vex")
6885 (set_attr "memory" "load")])
6887 (define_insn "sse_stmxcsr"
6888 [(set (match_operand:SI 0 "memory_operand" "=m")
6889 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
6892 [(set_attr "type" "sse")
6893 (set_attr "atom_sse_attr" "mxcsr")
6894 (set_attr "prefix" "maybe_vex")
6895 (set_attr "memory" "store")])
6897 (define_expand "sse_sfence"
6899 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6900 "TARGET_SSE || TARGET_3DNOW_A"
6902 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6903 MEM_VOLATILE_P (operands[0]) = 1;
6906 (define_insn "*sse_sfence"
6907 [(set (match_operand:BLK 0 "" "")
6908 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
6909 "TARGET_SSE || TARGET_3DNOW_A"
6911 [(set_attr "type" "sse")
6912 (set_attr "length_address" "0")
6913 (set_attr "atom_sse_attr" "fence")
6914 (set_attr "memory" "unknown")])
6916 (define_insn "sse2_clflush"
6917 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
6921 [(set_attr "type" "sse")
6922 (set_attr "atom_sse_attr" "fence")
6923 (set_attr "memory" "unknown")])
6925 (define_expand "sse2_mfence"
6927 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
6930 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6931 MEM_VOLATILE_P (operands[0]) = 1;
6934 (define_insn "*sse2_mfence"
6935 [(set (match_operand:BLK 0 "" "")
6936 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
6937 "TARGET_64BIT || TARGET_SSE2"
6939 [(set_attr "type" "sse")
6940 (set_attr "length_address" "0")
6941 (set_attr "atom_sse_attr" "fence")
6942 (set_attr "memory" "unknown")])
6944 (define_expand "sse2_lfence"
6946 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
6949 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
6950 MEM_VOLATILE_P (operands[0]) = 1;
6953 (define_insn "*sse2_lfence"
6954 [(set (match_operand:BLK 0 "" "")
6955 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
6958 [(set_attr "type" "sse")
6959 (set_attr "length_address" "0")
6960 (set_attr "atom_sse_attr" "lfence")
6961 (set_attr "memory" "unknown")])
6963 (define_insn "sse3_mwait"
6964 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
6965 (match_operand:SI 1 "register_operand" "c")]
6968 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
6969 ;; Since 32bit register operands are implicitly zero extended to 64bit,
6970 ;; we only need to set up 32bit registers.
6972 [(set_attr "length" "3")])
6974 (define_insn "sse3_monitor"
6975 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
6976 (match_operand:SI 1 "register_operand" "c")
6977 (match_operand:SI 2 "register_operand" "d")]
6979 "TARGET_SSE3 && !TARGET_64BIT"
6980 "monitor\t%0, %1, %2"
6981 [(set_attr "length" "3")])
6983 (define_insn "sse3_monitor64"
6984 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
6985 (match_operand:SI 1 "register_operand" "c")
6986 (match_operand:SI 2 "register_operand" "d")]
6988 "TARGET_SSE3 && TARGET_64BIT"
6989 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
6990 ;; RCX and RDX are used. Since 32bit register operands are implicitly
6991 ;; zero extended to 64bit, we only need to set up 32bit registers.
6993 [(set_attr "length" "3")])
6995 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6997 ;; SSSE3 instructions
6999 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7001 (define_insn "ssse3_phaddwv8hi3"
7002 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7008 (match_operand:V8HI 1 "register_operand" "0,x")
7009 (parallel [(const_int 0)]))
7010 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7012 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7013 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7016 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7017 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7019 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7020 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7025 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7026 (parallel [(const_int 0)]))
7027 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7029 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7030 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7033 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7034 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7036 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7037 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7040 phaddw\t{%2, %0|%0, %2}
7041 vphaddw\t{%2, %1, %0|%0, %1, %2}"
7042 [(set_attr "isa" "noavx,avx")
7043 (set_attr "type" "sseiadd")
7044 (set_attr "atom_unit" "complex")
7045 (set_attr "prefix_data16" "1,*")
7046 (set_attr "prefix_extra" "1")
7047 (set_attr "prefix" "orig,vex")
7048 (set_attr "mode" "TI")])
7050 (define_insn "ssse3_phaddwv4hi3"
7051 [(set (match_operand:V4HI 0 "register_operand" "=y")
7056 (match_operand:V4HI 1 "register_operand" "0")
7057 (parallel [(const_int 0)]))
7058 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7060 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7061 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7065 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7066 (parallel [(const_int 0)]))
7067 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7069 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7070 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7072 "phaddw\t{%2, %0|%0, %2}"
7073 [(set_attr "type" "sseiadd")
7074 (set_attr "atom_unit" "complex")
7075 (set_attr "prefix_extra" "1")
7076 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7077 (set_attr "mode" "DI")])
7079 (define_insn "ssse3_phadddv4si3"
7080 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7085 (match_operand:V4SI 1 "register_operand" "0,x")
7086 (parallel [(const_int 0)]))
7087 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7089 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7090 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7094 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7095 (parallel [(const_int 0)]))
7096 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7098 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7099 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7102 phaddd\t{%2, %0|%0, %2}
7103 vphaddd\t{%2, %1, %0|%0, %1, %2}"
7104 [(set_attr "isa" "noavx,avx")
7105 (set_attr "type" "sseiadd")
7106 (set_attr "atom_unit" "complex")
7107 (set_attr "prefix_data16" "1,*")
7108 (set_attr "prefix_extra" "1")
7109 (set_attr "prefix" "orig,vex")
7110 (set_attr "mode" "TI")])
7112 (define_insn "ssse3_phadddv2si3"
7113 [(set (match_operand:V2SI 0 "register_operand" "=y")
7117 (match_operand:V2SI 1 "register_operand" "0")
7118 (parallel [(const_int 0)]))
7119 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7122 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7123 (parallel [(const_int 0)]))
7124 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7126 "phaddd\t{%2, %0|%0, %2}"
7127 [(set_attr "type" "sseiadd")
7128 (set_attr "atom_unit" "complex")
7129 (set_attr "prefix_extra" "1")
7130 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7131 (set_attr "mode" "DI")])
7133 (define_insn "ssse3_phaddswv8hi3"
7134 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7140 (match_operand:V8HI 1 "register_operand" "0,x")
7141 (parallel [(const_int 0)]))
7142 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7144 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7145 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7148 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7149 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7151 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7152 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7157 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7158 (parallel [(const_int 0)]))
7159 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7161 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7162 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7165 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7166 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7168 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7169 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7172 phaddsw\t{%2, %0|%0, %2}
7173 vphaddsw\t{%2, %1, %0|%0, %1, %2}"
7174 [(set_attr "isa" "noavx,avx")
7175 (set_attr "type" "sseiadd")
7176 (set_attr "atom_unit" "complex")
7177 (set_attr "prefix_data16" "1,*")
7178 (set_attr "prefix_extra" "1")
7179 (set_attr "prefix" "orig,vex")
7180 (set_attr "mode" "TI")])
7182 (define_insn "ssse3_phaddswv4hi3"
7183 [(set (match_operand:V4HI 0 "register_operand" "=y")
7188 (match_operand:V4HI 1 "register_operand" "0")
7189 (parallel [(const_int 0)]))
7190 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7192 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7193 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7197 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7198 (parallel [(const_int 0)]))
7199 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7201 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7202 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7204 "phaddsw\t{%2, %0|%0, %2}"
7205 [(set_attr "type" "sseiadd")
7206 (set_attr "atom_unit" "complex")
7207 (set_attr "prefix_extra" "1")
7208 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7209 (set_attr "mode" "DI")])
7211 (define_insn "ssse3_phsubwv8hi3"
7212 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7218 (match_operand:V8HI 1 "register_operand" "0,x")
7219 (parallel [(const_int 0)]))
7220 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7222 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7223 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7226 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7227 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7229 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7230 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7235 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7236 (parallel [(const_int 0)]))
7237 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7239 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7240 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7243 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7244 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7246 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7247 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7250 phsubw\t{%2, %0|%0, %2}
7251 vphsubw\t{%2, %1, %0|%0, %1, %2}"
7252 [(set_attr "isa" "noavx,avx")
7253 (set_attr "type" "sseiadd")
7254 (set_attr "atom_unit" "complex")
7255 (set_attr "prefix_data16" "1,*")
7256 (set_attr "prefix_extra" "1")
7257 (set_attr "prefix" "orig,vex")
7258 (set_attr "mode" "TI")])
7260 (define_insn "ssse3_phsubwv4hi3"
7261 [(set (match_operand:V4HI 0 "register_operand" "=y")
7266 (match_operand:V4HI 1 "register_operand" "0")
7267 (parallel [(const_int 0)]))
7268 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7270 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7271 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7275 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7276 (parallel [(const_int 0)]))
7277 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7279 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7280 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7282 "phsubw\t{%2, %0|%0, %2}"
7283 [(set_attr "type" "sseiadd")
7284 (set_attr "atom_unit" "complex")
7285 (set_attr "prefix_extra" "1")
7286 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7287 (set_attr "mode" "DI")])
7289 (define_insn "ssse3_phsubdv4si3"
7290 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7295 (match_operand:V4SI 1 "register_operand" "0,x")
7296 (parallel [(const_int 0)]))
7297 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7299 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
7300 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
7304 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7305 (parallel [(const_int 0)]))
7306 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
7308 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7309 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7312 phsubd\t{%2, %0|%0, %2}
7313 vphsubd\t{%2, %1, %0|%0, %1, %2}"
7315 [(set_attr "isa" "noavx,avx")
7316 (set_attr "type" "sseiadd")
7317 (set_attr "atom_unit" "complex")
7318 (set_attr "prefix_data16" "1,*")
7319 (set_attr "prefix_extra" "1")
7320 (set_attr "prefix" "orig,vex")
7321 (set_attr "mode" "TI")])
7323 (define_insn "ssse3_phsubdv2si3"
7324 [(set (match_operand:V2SI 0 "register_operand" "=y")
7328 (match_operand:V2SI 1 "register_operand" "0")
7329 (parallel [(const_int 0)]))
7330 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
7333 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
7334 (parallel [(const_int 0)]))
7335 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7337 "phsubd\t{%2, %0|%0, %2}"
7338 [(set_attr "type" "sseiadd")
7339 (set_attr "atom_unit" "complex")
7340 (set_attr "prefix_extra" "1")
7341 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7342 (set_attr "mode" "DI")])
7344 (define_insn "ssse3_phsubswv8hi3"
7345 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7351 (match_operand:V8HI 1 "register_operand" "0,x")
7352 (parallel [(const_int 0)]))
7353 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7355 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7356 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7359 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
7360 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
7362 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
7363 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
7368 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7369 (parallel [(const_int 0)]))
7370 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7372 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7373 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
7376 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
7377 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
7379 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7380 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7383 phsubsw\t{%2, %0|%0, %2}
7384 vphsubsw\t{%2, %1, %0|%0, %1, %2}"
7385 [(set_attr "isa" "noavx,avx")
7386 (set_attr "type" "sseiadd")
7387 (set_attr "atom_unit" "complex")
7388 (set_attr "prefix_data16" "1,*")
7389 (set_attr "prefix_extra" "1")
7390 (set_attr "prefix" "orig,vex")
7391 (set_attr "mode" "TI")])
7393 (define_insn "ssse3_phsubswv4hi3"
7394 [(set (match_operand:V4HI 0 "register_operand" "=y")
7399 (match_operand:V4HI 1 "register_operand" "0")
7400 (parallel [(const_int 0)]))
7401 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
7403 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
7404 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
7408 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
7409 (parallel [(const_int 0)]))
7410 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
7412 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7413 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7415 "phsubsw\t{%2, %0|%0, %2}"
7416 [(set_attr "type" "sseiadd")
7417 (set_attr "atom_unit" "complex")
7418 (set_attr "prefix_extra" "1")
7419 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7420 (set_attr "mode" "DI")])
7422 (define_insn "ssse3_pmaddubsw128"
7423 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7428 (match_operand:V16QI 1 "register_operand" "0,x")
7429 (parallel [(const_int 0)
7439 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7440 (parallel [(const_int 0)
7450 (vec_select:V8QI (match_dup 1)
7451 (parallel [(const_int 1)
7460 (vec_select:V8QI (match_dup 2)
7461 (parallel [(const_int 1)
7468 (const_int 15)]))))))]
7471 pmaddubsw\t{%2, %0|%0, %2}
7472 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
7473 [(set_attr "isa" "noavx,avx")
7474 (set_attr "type" "sseiadd")
7475 (set_attr "atom_unit" "simul")
7476 (set_attr "prefix_data16" "1,*")
7477 (set_attr "prefix_extra" "1")
7478 (set_attr "prefix" "orig,vex")
7479 (set_attr "mode" "TI")])
7481 (define_insn "ssse3_pmaddubsw"
7482 [(set (match_operand:V4HI 0 "register_operand" "=y")
7487 (match_operand:V8QI 1 "register_operand" "0")
7488 (parallel [(const_int 0)
7494 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
7495 (parallel [(const_int 0)
7501 (vec_select:V4QI (match_dup 1)
7502 (parallel [(const_int 1)
7507 (vec_select:V4QI (match_dup 2)
7508 (parallel [(const_int 1)
7511 (const_int 7)]))))))]
7513 "pmaddubsw\t{%2, %0|%0, %2}"
7514 [(set_attr "type" "sseiadd")
7515 (set_attr "atom_unit" "simul")
7516 (set_attr "prefix_extra" "1")
7517 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7518 (set_attr "mode" "DI")])
7520 (define_expand "ssse3_pmulhrswv8hi3"
7521 [(set (match_operand:V8HI 0 "register_operand" "")
7528 (match_operand:V8HI 1 "nonimmediate_operand" ""))
7530 (match_operand:V8HI 2 "nonimmediate_operand" "")))
7532 (const_vector:V8HI [(const_int 1) (const_int 1)
7533 (const_int 1) (const_int 1)
7534 (const_int 1) (const_int 1)
7535 (const_int 1) (const_int 1)]))
7538 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
7540 (define_insn "*ssse3_pmulhrswv8hi3"
7541 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7548 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
7550 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
7552 (const_vector:V8HI [(const_int 1) (const_int 1)
7553 (const_int 1) (const_int 1)
7554 (const_int 1) (const_int 1)
7555 (const_int 1) (const_int 1)]))
7557 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
7559 pmulhrsw\t{%2, %0|%0, %2}
7560 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
7561 [(set_attr "isa" "noavx,avx")
7562 (set_attr "type" "sseimul")
7563 (set_attr "prefix_data16" "1,*")
7564 (set_attr "prefix_extra" "1")
7565 (set_attr "prefix" "orig,vex")
7566 (set_attr "mode" "TI")])
7568 (define_expand "ssse3_pmulhrswv4hi3"
7569 [(set (match_operand:V4HI 0 "register_operand" "")
7576 (match_operand:V4HI 1 "nonimmediate_operand" ""))
7578 (match_operand:V4HI 2 "nonimmediate_operand" "")))
7580 (const_vector:V4HI [(const_int 1) (const_int 1)
7581 (const_int 1) (const_int 1)]))
7584 "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
7586 (define_insn "*ssse3_pmulhrswv4hi3"
7587 [(set (match_operand:V4HI 0 "register_operand" "=y")
7594 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
7596 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
7598 (const_vector:V4HI [(const_int 1) (const_int 1)
7599 (const_int 1) (const_int 1)]))
7601 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
7602 "pmulhrsw\t{%2, %0|%0, %2}"
7603 [(set_attr "type" "sseimul")
7604 (set_attr "prefix_extra" "1")
7605 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7606 (set_attr "mode" "DI")])
7608 (define_insn "ssse3_pshufbv16qi3"
7609 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7610 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7611 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")]
7615 pshufb\t{%2, %0|%0, %2}
7616 vpshufb\t{%2, %1, %0|%0, %1, %2}"
7617 [(set_attr "isa" "noavx,avx")
7618 (set_attr "type" "sselog1")
7619 (set_attr "prefix_data16" "1,*")
7620 (set_attr "prefix_extra" "1")
7621 (set_attr "prefix" "orig,vex")
7622 (set_attr "mode" "TI")])
7624 (define_insn "ssse3_pshufbv8qi3"
7625 [(set (match_operand:V8QI 0 "register_operand" "=y")
7626 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
7627 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
7630 "pshufb\t{%2, %0|%0, %2}";
7631 [(set_attr "type" "sselog1")
7632 (set_attr "prefix_extra" "1")
7633 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7634 (set_attr "mode" "DI")])
7636 (define_insn "ssse3_psign<mode>3"
7637 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
7639 [(match_operand:VI124_128 1 "register_operand" "0,x")
7640 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")]
7644 psign<ssemodesuffix>\t{%2, %0|%0, %2}
7645 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7646 [(set_attr "isa" "noavx,avx")
7647 (set_attr "type" "sselog1")
7648 (set_attr "prefix_data16" "1,*")
7649 (set_attr "prefix_extra" "1")
7650 (set_attr "prefix" "orig,vex")
7651 (set_attr "mode" "TI")])
7653 (define_insn "ssse3_psign<mode>3"
7654 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7656 [(match_operand:MMXMODEI 1 "register_operand" "0")
7657 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
7660 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
7661 [(set_attr "type" "sselog1")
7662 (set_attr "prefix_extra" "1")
7663 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7664 (set_attr "mode" "DI")])
7666 (define_insn "ssse3_palignrti"
7667 [(set (match_operand:TI 0 "register_operand" "=x,x")
7668 (unspec:TI [(match_operand:TI 1 "register_operand" "0,x")
7669 (match_operand:TI 2 "nonimmediate_operand" "xm,xm")
7670 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
7674 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7676 switch (which_alternative)
7679 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7681 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7686 [(set_attr "isa" "noavx,avx")
7687 (set_attr "type" "sseishft")
7688 (set_attr "atom_unit" "sishuf")
7689 (set_attr "prefix_data16" "1,*")
7690 (set_attr "prefix_extra" "1")
7691 (set_attr "length_immediate" "1")
7692 (set_attr "prefix" "orig,vex")
7693 (set_attr "mode" "TI")])
7695 (define_insn "ssse3_palignrdi"
7696 [(set (match_operand:DI 0 "register_operand" "=y")
7697 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
7698 (match_operand:DI 2 "nonimmediate_operand" "ym")
7699 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
7703 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
7704 return "palignr\t{%3, %2, %0|%0, %2, %3}";
7706 [(set_attr "type" "sseishft")
7707 (set_attr "atom_unit" "sishuf")
7708 (set_attr "prefix_extra" "1")
7709 (set_attr "length_immediate" "1")
7710 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7711 (set_attr "mode" "DI")])
7713 (define_insn "abs<mode>2"
7714 [(set (match_operand:VI124_128 0 "register_operand" "=x")
7716 (match_operand:VI124_128 1 "nonimmediate_operand" "xm")))]
7718 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
7719 [(set_attr "type" "sselog1")
7720 (set_attr "prefix_data16" "1")
7721 (set_attr "prefix_extra" "1")
7722 (set_attr "prefix" "maybe_vex")
7723 (set_attr "mode" "TI")])
7725 (define_insn "abs<mode>2"
7726 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
7728 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
7730 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
7731 [(set_attr "type" "sselog1")
7732 (set_attr "prefix_rep" "0")
7733 (set_attr "prefix_extra" "1")
7734 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7735 (set_attr "mode" "DI")])
7737 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7739 ;; AMD SSE4A instructions
7741 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7743 (define_insn "sse4a_movnt<mode>"
7744 [(set (match_operand:MODEF 0 "memory_operand" "=m")
7746 [(match_operand:MODEF 1 "register_operand" "x")]
7749 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
7750 [(set_attr "type" "ssemov")
7751 (set_attr "mode" "<MODE>")])
7753 (define_insn "sse4a_vmmovnt<mode>"
7754 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
7755 (unspec:<ssescalarmode>
7756 [(vec_select:<ssescalarmode>
7757 (match_operand:VF_128 1 "register_operand" "x")
7758 (parallel [(const_int 0)]))]
7761 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
7762 [(set_attr "type" "ssemov")
7763 (set_attr "mode" "<ssescalarmode>")])
7765 (define_insn "sse4a_extrqi"
7766 [(set (match_operand:V2DI 0 "register_operand" "=x")
7767 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7768 (match_operand 2 "const_int_operand" "")
7769 (match_operand 3 "const_int_operand" "")]
7772 "extrq\t{%3, %2, %0|%0, %2, %3}"
7773 [(set_attr "type" "sse")
7774 (set_attr "prefix_data16" "1")
7775 (set_attr "length_immediate" "2")
7776 (set_attr "mode" "TI")])
7778 (define_insn "sse4a_extrq"
7779 [(set (match_operand:V2DI 0 "register_operand" "=x")
7780 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7781 (match_operand:V16QI 2 "register_operand" "x")]
7784 "extrq\t{%2, %0|%0, %2}"
7785 [(set_attr "type" "sse")
7786 (set_attr "prefix_data16" "1")
7787 (set_attr "mode" "TI")])
7789 (define_insn "sse4a_insertqi"
7790 [(set (match_operand:V2DI 0 "register_operand" "=x")
7791 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7792 (match_operand:V2DI 2 "register_operand" "x")
7793 (match_operand 3 "const_int_operand" "")
7794 (match_operand 4 "const_int_operand" "")]
7797 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
7798 [(set_attr "type" "sseins")
7799 (set_attr "prefix_data16" "0")
7800 (set_attr "prefix_rep" "1")
7801 (set_attr "length_immediate" "2")
7802 (set_attr "mode" "TI")])
7804 (define_insn "sse4a_insertq"
7805 [(set (match_operand:V2DI 0 "register_operand" "=x")
7806 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
7807 (match_operand:V2DI 2 "register_operand" "x")]
7810 "insertq\t{%2, %0|%0, %2}"
7811 [(set_attr "type" "sseins")
7812 (set_attr "prefix_data16" "0")
7813 (set_attr "prefix_rep" "1")
7814 (set_attr "mode" "TI")])
7816 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7818 ;; Intel SSE4.1 instructions
7820 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7822 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
7823 [(set (match_operand:VF 0 "register_operand" "=x,x")
7825 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7826 (match_operand:VF 1 "register_operand" "0,x")
7827 (match_operand:SI 3 "const_int_operand" "")))]
7829 && IN_RANGE (INTVAL (operands[3]), 0, (1 << GET_MODE_NUNITS (<MODE>mode))-1)"
7831 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7832 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7833 [(set_attr "isa" "noavx,avx")
7834 (set_attr "type" "ssemov")
7835 (set_attr "length_immediate" "1")
7836 (set_attr "prefix_data16" "1,*")
7837 (set_attr "prefix_extra" "1")
7838 (set_attr "prefix" "orig,vex")
7839 (set_attr "mode" "<MODE>")])
7841 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
7842 [(set (match_operand:VF 0 "reg_not_xmm0_operand_maybe_avx" "=x,x")
7844 [(match_operand:VF 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7845 (match_operand:VF 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7846 (match_operand:VF 3 "register_operand" "Yz,x")]
7850 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7851 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7852 [(set_attr "isa" "noavx,avx")
7853 (set_attr "type" "ssemov")
7854 (set_attr "length_immediate" "1")
7855 (set_attr "prefix_data16" "1,*")
7856 (set_attr "prefix_extra" "1")
7857 (set_attr "prefix" "orig,vex")
7858 (set_attr "mode" "<MODE>")])
7860 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
7861 [(set (match_operand:VF 0 "register_operand" "=x,x")
7863 [(match_operand:VF 1 "nonimmediate_operand" "%0,x")
7864 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")
7865 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7869 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
7870 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7871 [(set_attr "isa" "noavx,avx")
7872 (set_attr "type" "ssemul")
7873 (set_attr "length_immediate" "1")
7874 (set_attr "prefix_data16" "1,*")
7875 (set_attr "prefix_extra" "1")
7876 (set_attr "prefix" "orig,vex")
7877 (set_attr "mode" "<MODE>")])
7879 (define_insn "sse4_1_movntdqa"
7880 [(set (match_operand:V2DI 0 "register_operand" "=x")
7881 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
7884 "%vmovntdqa\t{%1, %0|%0, %1}"
7885 [(set_attr "type" "ssemov")
7886 (set_attr "prefix_extra" "1")
7887 (set_attr "prefix" "maybe_vex")
7888 (set_attr "mode" "TI")])
7890 (define_insn "sse4_1_mpsadbw"
7891 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
7892 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,x")
7893 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
7894 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
7898 mpsadbw\t{%3, %2, %0|%0, %2, %3}
7899 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7900 [(set_attr "isa" "noavx,avx")
7901 (set_attr "type" "sselog1")
7902 (set_attr "length_immediate" "1")
7903 (set_attr "prefix_extra" "1")
7904 (set_attr "prefix" "orig,vex")
7905 (set_attr "mode" "TI")])
7907 (define_insn "sse4_1_packusdw"
7908 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7911 (match_operand:V4SI 1 "register_operand" "0,x"))
7913 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
7916 packusdw\t{%2, %0|%0, %2}
7917 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
7918 [(set_attr "isa" "noavx,avx")
7919 (set_attr "type" "sselog")
7920 (set_attr "prefix_extra" "1")
7921 (set_attr "prefix" "orig,vex")
7922 (set_attr "mode" "TI")])
7924 (define_insn "sse4_1_pblendvb"
7925 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x,x")
7927 [(match_operand:V16QI 1 "reg_not_xmm0_operand_maybe_avx" "0,x")
7928 (match_operand:V16QI 2 "nonimm_not_xmm0_operand_maybe_avx" "xm,xm")
7929 (match_operand:V16QI 3 "register_operand" "Yz,x")]
7933 pblendvb\t{%3, %2, %0|%0, %2, %3}
7934 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7935 [(set_attr "isa" "noavx,avx")
7936 (set_attr "type" "ssemov")
7937 (set_attr "prefix_extra" "1")
7938 (set_attr "length_immediate" "*,1")
7939 (set_attr "prefix" "orig,vex")
7940 (set_attr "mode" "TI")])
7942 (define_insn "sse4_1_pblendw"
7943 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
7945 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
7946 (match_operand:V8HI 1 "register_operand" "0,x")
7947 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
7950 pblendw\t{%3, %2, %0|%0, %2, %3}
7951 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
7952 [(set_attr "isa" "noavx,avx")
7953 (set_attr "type" "ssemov")
7954 (set_attr "prefix_extra" "1")
7955 (set_attr "length_immediate" "1")
7956 (set_attr "prefix" "orig,vex")
7957 (set_attr "mode" "TI")])
7959 (define_insn "sse4_1_phminposuw"
7960 [(set (match_operand:V8HI 0 "register_operand" "=x")
7961 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
7962 UNSPEC_PHMINPOSUW))]
7964 "%vphminposuw\t{%1, %0|%0, %1}"
7965 [(set_attr "type" "sselog1")
7966 (set_attr "prefix_extra" "1")
7967 (set_attr "prefix" "maybe_vex")
7968 (set_attr "mode" "TI")])
7970 (define_insn "sse4_1_<code>v8qiv8hi2"
7971 [(set (match_operand:V8HI 0 "register_operand" "=x")
7974 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7975 (parallel [(const_int 0)
7984 "%vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
7985 [(set_attr "type" "ssemov")
7986 (set_attr "prefix_extra" "1")
7987 (set_attr "prefix" "maybe_vex")
7988 (set_attr "mode" "TI")])
7990 (define_insn "sse4_1_<code>v4qiv4si2"
7991 [(set (match_operand:V4SI 0 "register_operand" "=x")
7994 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
7995 (parallel [(const_int 0)
8000 "%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
8001 [(set_attr "type" "ssemov")
8002 (set_attr "prefix_extra" "1")
8003 (set_attr "prefix" "maybe_vex")
8004 (set_attr "mode" "TI")])
8006 (define_insn "sse4_1_<code>v4hiv4si2"
8007 [(set (match_operand:V4SI 0 "register_operand" "=x")
8010 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8011 (parallel [(const_int 0)
8016 "%vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
8017 [(set_attr "type" "ssemov")
8018 (set_attr "prefix_extra" "1")
8019 (set_attr "prefix" "maybe_vex")
8020 (set_attr "mode" "TI")])
8022 (define_insn "sse4_1_<code>v2qiv2di2"
8023 [(set (match_operand:V2DI 0 "register_operand" "=x")
8026 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8027 (parallel [(const_int 0)
8030 "%vpmov<extsuffix>bq\t{%1, %0|%0, %1}"
8031 [(set_attr "type" "ssemov")
8032 (set_attr "prefix_extra" "1")
8033 (set_attr "prefix" "maybe_vex")
8034 (set_attr "mode" "TI")])
8036 (define_insn "sse4_1_<code>v2hiv2di2"
8037 [(set (match_operand:V2DI 0 "register_operand" "=x")
8040 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8041 (parallel [(const_int 0)
8044 "%vpmov<extsuffix>wq\t{%1, %0|%0, %1}"
8045 [(set_attr "type" "ssemov")
8046 (set_attr "prefix_extra" "1")
8047 (set_attr "prefix" "maybe_vex")
8048 (set_attr "mode" "TI")])
8050 (define_insn "sse4_1_<code>v2siv2di2"
8051 [(set (match_operand:V2DI 0 "register_operand" "=x")
8054 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8055 (parallel [(const_int 0)
8058 "%vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
8059 [(set_attr "type" "ssemov")
8060 (set_attr "prefix_extra" "1")
8061 (set_attr "prefix" "maybe_vex")
8062 (set_attr "mode" "TI")])
8064 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
8065 ;; setting FLAGS_REG. But it is not a really compare instruction.
8066 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
8067 [(set (reg:CC FLAGS_REG)
8068 (unspec:CC [(match_operand:VF 0 "register_operand" "x")
8069 (match_operand:VF 1 "nonimmediate_operand" "xm")]
8072 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
8073 [(set_attr "type" "ssecomi")
8074 (set_attr "prefix_extra" "1")
8075 (set_attr "prefix" "vex")
8076 (set_attr "mode" "<MODE>")])
8078 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
8079 ;; But it is not a really compare instruction.
8080 (define_insn "avx_ptest256"
8081 [(set (reg:CC FLAGS_REG)
8082 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
8083 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
8086 "vptest\t{%1, %0|%0, %1}"
8087 [(set_attr "type" "ssecomi")
8088 (set_attr "prefix_extra" "1")
8089 (set_attr "prefix" "vex")
8090 (set_attr "mode" "OI")])
8092 (define_insn "sse4_1_ptest"
8093 [(set (reg:CC FLAGS_REG)
8094 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
8095 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
8098 "%vptest\t{%1, %0|%0, %1}"
8099 [(set_attr "type" "ssecomi")
8100 (set_attr "prefix_extra" "1")
8101 (set_attr "prefix" "maybe_vex")
8102 (set_attr "mode" "TI")])
8104 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
8105 [(set (match_operand:VF 0 "register_operand" "=x")
8107 [(match_operand:VF 1 "nonimmediate_operand" "xm")
8108 (match_operand:SI 2 "const_0_to_15_operand" "n")]
8111 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8112 [(set_attr "type" "ssecvt")
8113 (set (attr "prefix_data16")
8115 (ne (symbol_ref "TARGET_AVX") (const_int 0))
8117 (const_string "1")))
8118 (set_attr "prefix_extra" "1")
8119 (set_attr "length_immediate" "1")
8120 (set_attr "prefix" "maybe_vex")
8121 (set_attr "mode" "<MODE>")])
8123 (define_insn "sse4_1_round<ssescalarmodesuffix>"
8124 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
8127 [(match_operand:VF_128 2 "register_operand" "x,x")
8128 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
8130 (match_operand:VF_128 1 "register_operand" "0,x")
8134 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
8135 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8136 [(set_attr "isa" "noavx,avx")
8137 (set_attr "type" "ssecvt")
8138 (set_attr "length_immediate" "1")
8139 (set_attr "prefix_data16" "1,*")
8140 (set_attr "prefix_extra" "1")
8141 (set_attr "prefix" "orig,vex")
8142 (set_attr "mode" "<MODE>")])
8144 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8146 ;; Intel SSE4.2 string/text processing instructions
8148 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8150 (define_insn_and_split "sse4_2_pcmpestr"
8151 [(set (match_operand:SI 0 "register_operand" "=c,c")
8153 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8154 (match_operand:SI 3 "register_operand" "a,a")
8155 (match_operand:V16QI 4 "nonimm_not_xmm0_operand" "x,m")
8156 (match_operand:SI 5 "register_operand" "d,d")
8157 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
8159 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8167 (set (reg:CC FLAGS_REG)
8176 && can_create_pseudo_p ()"
8181 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8182 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8183 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8186 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
8187 operands[3], operands[4],
8188 operands[5], operands[6]));
8190 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
8191 operands[3], operands[4],
8192 operands[5], operands[6]));
8193 if (flags && !(ecx || xmm0))
8194 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
8195 operands[2], operands[3],
8196 operands[4], operands[5],
8200 [(set_attr "type" "sselog")
8201 (set_attr "prefix_data16" "1")
8202 (set_attr "prefix_extra" "1")
8203 (set_attr "length_immediate" "1")
8204 (set_attr "memory" "none,load")
8205 (set_attr "mode" "TI")])
8207 (define_insn "sse4_2_pcmpestri"
8208 [(set (match_operand:SI 0 "register_operand" "=c,c")
8210 [(match_operand:V16QI 1 "register_operand" "x,x")
8211 (match_operand:SI 2 "register_operand" "a,a")
8212 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8213 (match_operand:SI 4 "register_operand" "d,d")
8214 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8216 (set (reg:CC FLAGS_REG)
8225 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
8226 [(set_attr "type" "sselog")
8227 (set_attr "prefix_data16" "1")
8228 (set_attr "prefix_extra" "1")
8229 (set_attr "prefix" "maybe_vex")
8230 (set_attr "length_immediate" "1")
8231 (set_attr "memory" "none,load")
8232 (set_attr "mode" "TI")])
8234 (define_insn "sse4_2_pcmpestrm"
8235 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8237 [(match_operand:V16QI 1 "register_operand" "x,x")
8238 (match_operand:SI 2 "register_operand" "a,a")
8239 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
8240 (match_operand:SI 4 "register_operand" "d,d")
8241 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
8243 (set (reg:CC FLAGS_REG)
8252 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
8253 [(set_attr "type" "sselog")
8254 (set_attr "prefix_data16" "1")
8255 (set_attr "prefix_extra" "1")
8256 (set_attr "length_immediate" "1")
8257 (set_attr "prefix" "maybe_vex")
8258 (set_attr "memory" "none,load")
8259 (set_attr "mode" "TI")])
8261 (define_insn "sse4_2_pcmpestr_cconly"
8262 [(set (reg:CC FLAGS_REG)
8264 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8265 (match_operand:SI 3 "register_operand" "a,a,a,a")
8266 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
8267 (match_operand:SI 5 "register_operand" "d,d,d,d")
8268 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
8270 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8271 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8274 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8275 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
8276 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
8277 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
8278 [(set_attr "type" "sselog")
8279 (set_attr "prefix_data16" "1")
8280 (set_attr "prefix_extra" "1")
8281 (set_attr "length_immediate" "1")
8282 (set_attr "memory" "none,load,none,load")
8283 (set_attr "prefix" "maybe_vex")
8284 (set_attr "mode" "TI")])
8286 (define_insn_and_split "sse4_2_pcmpistr"
8287 [(set (match_operand:SI 0 "register_operand" "=c,c")
8289 [(match_operand:V16QI 2 "reg_not_xmm0_operand" "x,x")
8290 (match_operand:V16QI 3 "nonimm_not_xmm0_operand" "x,m")
8291 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
8293 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
8299 (set (reg:CC FLAGS_REG)
8306 && can_create_pseudo_p ()"
8311 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
8312 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
8313 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
8316 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
8317 operands[3], operands[4]));
8319 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
8320 operands[3], operands[4]));
8321 if (flags && !(ecx || xmm0))
8322 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
8323 operands[2], operands[3],
8327 [(set_attr "type" "sselog")
8328 (set_attr "prefix_data16" "1")
8329 (set_attr "prefix_extra" "1")
8330 (set_attr "length_immediate" "1")
8331 (set_attr "memory" "none,load")
8332 (set_attr "mode" "TI")])
8334 (define_insn "sse4_2_pcmpistri"
8335 [(set (match_operand:SI 0 "register_operand" "=c,c")
8337 [(match_operand:V16QI 1 "register_operand" "x,x")
8338 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8339 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8341 (set (reg:CC FLAGS_REG)
8348 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
8349 [(set_attr "type" "sselog")
8350 (set_attr "prefix_data16" "1")
8351 (set_attr "prefix_extra" "1")
8352 (set_attr "length_immediate" "1")
8353 (set_attr "prefix" "maybe_vex")
8354 (set_attr "memory" "none,load")
8355 (set_attr "mode" "TI")])
8357 (define_insn "sse4_2_pcmpistrm"
8358 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
8360 [(match_operand:V16QI 1 "register_operand" "x,x")
8361 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
8362 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
8364 (set (reg:CC FLAGS_REG)
8371 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
8372 [(set_attr "type" "sselog")
8373 (set_attr "prefix_data16" "1")
8374 (set_attr "prefix_extra" "1")
8375 (set_attr "length_immediate" "1")
8376 (set_attr "prefix" "maybe_vex")
8377 (set_attr "memory" "none,load")
8378 (set_attr "mode" "TI")])
8380 (define_insn "sse4_2_pcmpistr_cconly"
8381 [(set (reg:CC FLAGS_REG)
8383 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
8384 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
8385 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
8387 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
8388 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
8391 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8392 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
8393 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
8394 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
8395 [(set_attr "type" "sselog")
8396 (set_attr "prefix_data16" "1")
8397 (set_attr "prefix_extra" "1")
8398 (set_attr "length_immediate" "1")
8399 (set_attr "memory" "none,load,none,load")
8400 (set_attr "prefix" "maybe_vex")
8401 (set_attr "mode" "TI")])
8403 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8407 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8409 ;; XOP parallel integer multiply/add instructions.
8410 ;; Note the XOP multiply/add instructions
8411 ;; a[i] = b[i] * c[i] + d[i];
8412 ;; do not allow the value being added to be a memory operation.
8413 (define_insn "xop_pmacsww"
8414 [(set (match_operand:V8HI 0 "register_operand" "=x")
8417 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8418 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8419 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8421 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8422 [(set_attr "type" "ssemuladd")
8423 (set_attr "mode" "TI")])
8425 (define_insn "xop_pmacssww"
8426 [(set (match_operand:V8HI 0 "register_operand" "=x")
8428 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8429 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
8430 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
8432 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8433 [(set_attr "type" "ssemuladd")
8434 (set_attr "mode" "TI")])
8436 (define_insn "xop_pmacsdd"
8437 [(set (match_operand:V4SI 0 "register_operand" "=x")
8440 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8441 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8442 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8444 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8445 [(set_attr "type" "ssemuladd")
8446 (set_attr "mode" "TI")])
8448 (define_insn "xop_pmacssdd"
8449 [(set (match_operand:V4SI 0 "register_operand" "=x")
8451 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8452 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
8453 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8455 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8456 [(set_attr "type" "ssemuladd")
8457 (set_attr "mode" "TI")])
8459 (define_insn "xop_pmacssdql"
8460 [(set (match_operand:V2DI 0 "register_operand" "=x")
8465 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8466 (parallel [(const_int 1)
8469 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8470 (parallel [(const_int 1)
8472 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8474 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8475 [(set_attr "type" "ssemuladd")
8476 (set_attr "mode" "TI")])
8478 (define_insn "xop_pmacssdqh"
8479 [(set (match_operand:V2DI 0 "register_operand" "=x")
8484 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8485 (parallel [(const_int 0)
8489 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8490 (parallel [(const_int 0)
8492 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8494 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8495 [(set_attr "type" "ssemuladd")
8496 (set_attr "mode" "TI")])
8498 (define_insn "xop_pmacsdql"
8499 [(set (match_operand:V2DI 0 "register_operand" "=x")
8504 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8505 (parallel [(const_int 1)
8509 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8510 (parallel [(const_int 1)
8512 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8514 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8515 [(set_attr "type" "ssemuladd")
8516 (set_attr "mode" "TI")])
8518 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8519 ;; fake it with a multiply/add. In general, we expect the define_split to
8520 ;; occur before register allocation, so we have to handle the corner case where
8521 ;; the target is the same as operands 1/2
8522 (define_insn_and_split "xop_mulv2div2di3_low"
8523 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8527 (match_operand:V4SI 1 "register_operand" "%x")
8528 (parallel [(const_int 1)
8532 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8533 (parallel [(const_int 1)
8534 (const_int 3)])))))]
8537 "&& reload_completed"
8546 (parallel [(const_int 1)
8551 (parallel [(const_int 1)
8555 operands[3] = CONST0_RTX (V2DImode);
8557 [(set_attr "type" "ssemul")
8558 (set_attr "mode" "TI")])
8560 (define_insn "xop_pmacsdqh"
8561 [(set (match_operand:V2DI 0 "register_operand" "=x")
8566 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
8567 (parallel [(const_int 0)
8571 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8572 (parallel [(const_int 0)
8574 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
8576 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8577 [(set_attr "type" "ssemuladd")
8578 (set_attr "mode" "TI")])
8580 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
8581 ;; fake it with a multiply/add. In general, we expect the define_split to
8582 ;; occur before register allocation, so we have to handle the corner case where
8583 ;; the target is the same as either operands[1] or operands[2]
8584 (define_insn_and_split "xop_mulv2div2di3_high"
8585 [(set (match_operand:V2DI 0 "register_operand" "=&x")
8589 (match_operand:V4SI 1 "register_operand" "%x")
8590 (parallel [(const_int 0)
8594 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
8595 (parallel [(const_int 0)
8596 (const_int 2)])))))]
8599 "&& reload_completed"
8608 (parallel [(const_int 0)
8613 (parallel [(const_int 0)
8617 operands[3] = CONST0_RTX (V2DImode);
8619 [(set_attr "type" "ssemul")
8620 (set_attr "mode" "TI")])
8622 ;; XOP parallel integer multiply/add instructions for the intrinisics
8623 (define_insn "xop_pmacsswd"
8624 [(set (match_operand:V4SI 0 "register_operand" "=x")
8629 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8630 (parallel [(const_int 1)
8636 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8637 (parallel [(const_int 1)
8641 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8643 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8644 [(set_attr "type" "ssemuladd")
8645 (set_attr "mode" "TI")])
8647 (define_insn "xop_pmacswd"
8648 [(set (match_operand:V4SI 0 "register_operand" "=x")
8653 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8654 (parallel [(const_int 1)
8660 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8661 (parallel [(const_int 1)
8665 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8667 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8668 [(set_attr "type" "ssemuladd")
8669 (set_attr "mode" "TI")])
8671 (define_insn "xop_pmadcsswd"
8672 [(set (match_operand:V4SI 0 "register_operand" "=x")
8678 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8679 (parallel [(const_int 0)
8685 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8686 (parallel [(const_int 0)
8694 (parallel [(const_int 1)
8701 (parallel [(const_int 1)
8705 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8707 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8708 [(set_attr "type" "ssemuladd")
8709 (set_attr "mode" "TI")])
8711 (define_insn "xop_pmadcswd"
8712 [(set (match_operand:V4SI 0 "register_operand" "=x")
8718 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
8719 (parallel [(const_int 0)
8725 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
8726 (parallel [(const_int 0)
8734 (parallel [(const_int 1)
8741 (parallel [(const_int 1)
8745 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
8747 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8748 [(set_attr "type" "ssemuladd")
8749 (set_attr "mode" "TI")])
8751 ;; XOP parallel XMM conditional moves
8752 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
8753 [(set (match_operand:V 0 "register_operand" "=x,x")
8755 (match_operand:V 3 "nonimmediate_operand" "x,m")
8756 (match_operand:V 1 "vector_move_operand" "x,x")
8757 (match_operand:V 2 "vector_move_operand" "xm,x")))]
8759 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8760 [(set_attr "type" "sse4arg")])
8762 ;; XOP horizontal add/subtract instructions
8763 (define_insn "xop_phaddbw"
8764 [(set (match_operand:V8HI 0 "register_operand" "=x")
8768 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8769 (parallel [(const_int 0)
8780 (parallel [(const_int 1)
8787 (const_int 15)])))))]
8789 "vphaddbw\t{%1, %0|%0, %1}"
8790 [(set_attr "type" "sseiadd1")])
8792 (define_insn "xop_phaddbd"
8793 [(set (match_operand:V4SI 0 "register_operand" "=x")
8798 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8799 (parallel [(const_int 0)
8806 (parallel [(const_int 1)
8814 (parallel [(const_int 2)
8821 (parallel [(const_int 3)
8824 (const_int 15)]))))))]
8826 "vphaddbd\t{%1, %0|%0, %1}"
8827 [(set_attr "type" "sseiadd1")])
8829 (define_insn "xop_phaddbq"
8830 [(set (match_operand:V2DI 0 "register_operand" "=x")
8836 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8837 (parallel [(const_int 0)
8842 (parallel [(const_int 1)
8848 (parallel [(const_int 2)
8853 (parallel [(const_int 3)
8860 (parallel [(const_int 8)
8865 (parallel [(const_int 9)
8871 (parallel [(const_int 10)
8876 (parallel [(const_int 11)
8877 (const_int 15)])))))))]
8879 "vphaddbq\t{%1, %0|%0, %1}"
8880 [(set_attr "type" "sseiadd1")])
8882 (define_insn "xop_phaddwd"
8883 [(set (match_operand:V4SI 0 "register_operand" "=x")
8887 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8888 (parallel [(const_int 0)
8895 (parallel [(const_int 1)
8898 (const_int 7)])))))]
8900 "vphaddwd\t{%1, %0|%0, %1}"
8901 [(set_attr "type" "sseiadd1")])
8903 (define_insn "xop_phaddwq"
8904 [(set (match_operand:V2DI 0 "register_operand" "=x")
8909 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
8910 (parallel [(const_int 0)
8915 (parallel [(const_int 1)
8921 (parallel [(const_int 2)
8926 (parallel [(const_int 3)
8927 (const_int 7)]))))))]
8929 "vphaddwq\t{%1, %0|%0, %1}"
8930 [(set_attr "type" "sseiadd1")])
8932 (define_insn "xop_phadddq"
8933 [(set (match_operand:V2DI 0 "register_operand" "=x")
8937 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
8938 (parallel [(const_int 0)
8943 (parallel [(const_int 1)
8944 (const_int 3)])))))]
8946 "vphadddq\t{%1, %0|%0, %1}"
8947 [(set_attr "type" "sseiadd1")])
8949 (define_insn "xop_phaddubw"
8950 [(set (match_operand:V8HI 0 "register_operand" "=x")
8954 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8955 (parallel [(const_int 0)
8966 (parallel [(const_int 1)
8973 (const_int 15)])))))]
8975 "vphaddubw\t{%1, %0|%0, %1}"
8976 [(set_attr "type" "sseiadd1")])
8978 (define_insn "xop_phaddubd"
8979 [(set (match_operand:V4SI 0 "register_operand" "=x")
8984 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
8985 (parallel [(const_int 0)
8992 (parallel [(const_int 1)
9000 (parallel [(const_int 2)
9007 (parallel [(const_int 3)
9010 (const_int 15)]))))))]
9012 "vphaddubd\t{%1, %0|%0, %1}"
9013 [(set_attr "type" "sseiadd1")])
9015 (define_insn "xop_phaddubq"
9016 [(set (match_operand:V2DI 0 "register_operand" "=x")
9022 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9023 (parallel [(const_int 0)
9028 (parallel [(const_int 1)
9034 (parallel [(const_int 2)
9039 (parallel [(const_int 3)
9046 (parallel [(const_int 8)
9051 (parallel [(const_int 9)
9057 (parallel [(const_int 10)
9062 (parallel [(const_int 11)
9063 (const_int 15)])))))))]
9065 "vphaddubq\t{%1, %0|%0, %1}"
9066 [(set_attr "type" "sseiadd1")])
9068 (define_insn "xop_phadduwd"
9069 [(set (match_operand:V4SI 0 "register_operand" "=x")
9073 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9074 (parallel [(const_int 0)
9081 (parallel [(const_int 1)
9084 (const_int 7)])))))]
9086 "vphadduwd\t{%1, %0|%0, %1}"
9087 [(set_attr "type" "sseiadd1")])
9089 (define_insn "xop_phadduwq"
9090 [(set (match_operand:V2DI 0 "register_operand" "=x")
9095 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9096 (parallel [(const_int 0)
9101 (parallel [(const_int 1)
9107 (parallel [(const_int 2)
9112 (parallel [(const_int 3)
9113 (const_int 7)]))))))]
9115 "vphadduwq\t{%1, %0|%0, %1}"
9116 [(set_attr "type" "sseiadd1")])
9118 (define_insn "xop_phaddudq"
9119 [(set (match_operand:V2DI 0 "register_operand" "=x")
9123 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9124 (parallel [(const_int 0)
9129 (parallel [(const_int 1)
9130 (const_int 3)])))))]
9132 "vphaddudq\t{%1, %0|%0, %1}"
9133 [(set_attr "type" "sseiadd1")])
9135 (define_insn "xop_phsubbw"
9136 [(set (match_operand:V8HI 0 "register_operand" "=x")
9140 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
9141 (parallel [(const_int 0)
9152 (parallel [(const_int 1)
9159 (const_int 15)])))))]
9161 "vphsubbw\t{%1, %0|%0, %1}"
9162 [(set_attr "type" "sseiadd1")])
9164 (define_insn "xop_phsubwd"
9165 [(set (match_operand:V4SI 0 "register_operand" "=x")
9169 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
9170 (parallel [(const_int 0)
9177 (parallel [(const_int 1)
9180 (const_int 7)])))))]
9182 "vphsubwd\t{%1, %0|%0, %1}"
9183 [(set_attr "type" "sseiadd1")])
9185 (define_insn "xop_phsubdq"
9186 [(set (match_operand:V2DI 0 "register_operand" "=x")
9190 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
9191 (parallel [(const_int 0)
9196 (parallel [(const_int 1)
9197 (const_int 3)])))))]
9199 "vphsubdq\t{%1, %0|%0, %1}"
9200 [(set_attr "type" "sseiadd1")])
9202 ;; XOP permute instructions
9203 (define_insn "xop_pperm"
9204 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9206 [(match_operand:V16QI 1 "register_operand" "x,x")
9207 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
9208 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
9209 UNSPEC_XOP_PERMUTE))]
9210 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9211 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9212 [(set_attr "type" "sse4arg")
9213 (set_attr "mode" "TI")])
9215 ;; XOP pack instructions that combine two vectors into a smaller vector
9216 (define_insn "xop_pperm_pack_v2di_v4si"
9217 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9220 (match_operand:V2DI 1 "register_operand" "x,x"))
9222 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
9223 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9224 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9225 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9226 [(set_attr "type" "sse4arg")
9227 (set_attr "mode" "TI")])
9229 (define_insn "xop_pperm_pack_v4si_v8hi"
9230 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9233 (match_operand:V4SI 1 "register_operand" "x,x"))
9235 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
9236 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9237 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9238 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9239 [(set_attr "type" "sse4arg")
9240 (set_attr "mode" "TI")])
9242 (define_insn "xop_pperm_pack_v8hi_v16qi"
9243 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9246 (match_operand:V8HI 1 "register_operand" "x,x"))
9248 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
9249 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
9250 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
9251 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9252 [(set_attr "type" "sse4arg")
9253 (set_attr "mode" "TI")])
9255 ;; XOP packed rotate instructions
9256 (define_expand "rotl<mode>3"
9257 [(set (match_operand:VI_128 0 "register_operand" "")
9259 (match_operand:VI_128 1 "nonimmediate_operand" "")
9260 (match_operand:SI 2 "general_operand")))]
9263 /* If we were given a scalar, convert it to parallel */
9264 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9266 rtvec vs = rtvec_alloc (<ssescalarnum>);
9267 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9268 rtx reg = gen_reg_rtx (<MODE>mode);
9269 rtx op2 = operands[2];
9272 if (GET_MODE (op2) != <ssescalarmode>mode)
9274 op2 = gen_reg_rtx (<ssescalarmode>mode);
9275 convert_move (op2, operands[2], false);
9278 for (i = 0; i < <ssescalarnum>; i++)
9279 RTVEC_ELT (vs, i) = op2;
9281 emit_insn (gen_vec_init<mode> (reg, par));
9282 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9287 (define_expand "rotr<mode>3"
9288 [(set (match_operand:VI_128 0 "register_operand" "")
9290 (match_operand:VI_128 1 "nonimmediate_operand" "")
9291 (match_operand:SI 2 "general_operand")))]
9294 /* If we were given a scalar, convert it to parallel */
9295 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
9297 rtvec vs = rtvec_alloc (<ssescalarnum>);
9298 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
9299 rtx neg = gen_reg_rtx (<MODE>mode);
9300 rtx reg = gen_reg_rtx (<MODE>mode);
9301 rtx op2 = operands[2];
9304 if (GET_MODE (op2) != <ssescalarmode>mode)
9306 op2 = gen_reg_rtx (<ssescalarmode>mode);
9307 convert_move (op2, operands[2], false);
9310 for (i = 0; i < <ssescalarnum>; i++)
9311 RTVEC_ELT (vs, i) = op2;
9313 emit_insn (gen_vec_init<mode> (reg, par));
9314 emit_insn (gen_neg<mode>2 (neg, reg));
9315 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
9320 (define_insn "xop_rotl<mode>3"
9321 [(set (match_operand:VI_128 0 "register_operand" "=x")
9323 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9324 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9326 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9327 [(set_attr "type" "sseishft")
9328 (set_attr "length_immediate" "1")
9329 (set_attr "mode" "TI")])
9331 (define_insn "xop_rotr<mode>3"
9332 [(set (match_operand:VI_128 0 "register_operand" "=x")
9334 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
9335 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
9338 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
9339 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
9341 [(set_attr "type" "sseishft")
9342 (set_attr "length_immediate" "1")
9343 (set_attr "mode" "TI")])
9345 (define_expand "vrotr<mode>3"
9346 [(match_operand:VI_128 0 "register_operand" "")
9347 (match_operand:VI_128 1 "register_operand" "")
9348 (match_operand:VI_128 2 "register_operand" "")]
9351 rtx reg = gen_reg_rtx (<MODE>mode);
9352 emit_insn (gen_neg<mode>2 (reg, operands[2]));
9353 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
9357 (define_expand "vrotl<mode>3"
9358 [(match_operand:VI_128 0 "register_operand" "")
9359 (match_operand:VI_128 1 "register_operand" "")
9360 (match_operand:VI_128 2 "register_operand" "")]
9363 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
9367 (define_insn "xop_vrotl<mode>3"
9368 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9369 (if_then_else:VI_128
9371 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9374 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9378 (neg:VI_128 (match_dup 2)))))]
9379 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9380 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9381 [(set_attr "type" "sseishft")
9382 (set_attr "prefix_data16" "0")
9383 (set_attr "prefix_extra" "2")
9384 (set_attr "mode" "TI")])
9386 ;; XOP packed shift instructions.
9387 ;; FIXME: add V2DI back in
9388 (define_expand "vlshr<mode>3"
9389 [(match_operand:VI124_128 0 "register_operand" "")
9390 (match_operand:VI124_128 1 "register_operand" "")
9391 (match_operand:VI124_128 2 "register_operand" "")]
9394 rtx neg = gen_reg_rtx (<MODE>mode);
9395 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9396 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
9400 (define_expand "vashr<mode>3"
9401 [(match_operand:VI124_128 0 "register_operand" "")
9402 (match_operand:VI124_128 1 "register_operand" "")
9403 (match_operand:VI124_128 2 "register_operand" "")]
9406 rtx neg = gen_reg_rtx (<MODE>mode);
9407 emit_insn (gen_neg<mode>2 (neg, operands[2]));
9408 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
9412 (define_expand "vashl<mode>3"
9413 [(match_operand:VI124_128 0 "register_operand" "")
9414 (match_operand:VI124_128 1 "register_operand" "")
9415 (match_operand:VI124_128 2 "register_operand" "")]
9418 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
9422 (define_insn "xop_ashl<mode>3"
9423 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9424 (if_then_else:VI_128
9426 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9429 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9433 (neg:VI_128 (match_dup 2)))))]
9434 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9435 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9436 [(set_attr "type" "sseishft")
9437 (set_attr "prefix_data16" "0")
9438 (set_attr "prefix_extra" "2")
9439 (set_attr "mode" "TI")])
9441 (define_insn "xop_lshl<mode>3"
9442 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
9443 (if_then_else:VI_128
9445 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
9448 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
9452 (neg:VI_128 (match_dup 2)))))]
9453 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9454 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9455 [(set_attr "type" "sseishft")
9456 (set_attr "prefix_data16" "0")
9457 (set_attr "prefix_extra" "2")
9458 (set_attr "mode" "TI")])
9460 ;; SSE2 doesn't have some shift varients, so define versions for XOP
9461 (define_expand "ashlv16qi3"
9462 [(match_operand:V16QI 0 "register_operand" "")
9463 (match_operand:V16QI 1 "register_operand" "")
9464 (match_operand:SI 2 "nonmemory_operand" "")]
9467 rtvec vs = rtvec_alloc (16);
9468 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9469 rtx reg = gen_reg_rtx (V16QImode);
9471 for (i = 0; i < 16; i++)
9472 RTVEC_ELT (vs, i) = operands[2];
9474 emit_insn (gen_vec_initv16qi (reg, par));
9475 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9479 (define_expand "lshlv16qi3"
9480 [(match_operand:V16QI 0 "register_operand" "")
9481 (match_operand:V16QI 1 "register_operand" "")
9482 (match_operand:SI 2 "nonmemory_operand" "")]
9485 rtvec vs = rtvec_alloc (16);
9486 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9487 rtx reg = gen_reg_rtx (V16QImode);
9489 for (i = 0; i < 16; i++)
9490 RTVEC_ELT (vs, i) = operands[2];
9492 emit_insn (gen_vec_initv16qi (reg, par));
9493 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
9497 (define_expand "ashrv16qi3"
9498 [(match_operand:V16QI 0 "register_operand" "")
9499 (match_operand:V16QI 1 "register_operand" "")
9500 (match_operand:SI 2 "nonmemory_operand" "")]
9503 rtvec vs = rtvec_alloc (16);
9504 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
9505 rtx reg = gen_reg_rtx (V16QImode);
9507 rtx ele = ((CONST_INT_P (operands[2]))
9508 ? GEN_INT (- INTVAL (operands[2]))
9511 for (i = 0; i < 16; i++)
9512 RTVEC_ELT (vs, i) = ele;
9514 emit_insn (gen_vec_initv16qi (reg, par));
9516 if (!CONST_INT_P (operands[2]))
9518 rtx neg = gen_reg_rtx (V16QImode);
9519 emit_insn (gen_negv16qi2 (neg, reg));
9520 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
9523 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
9528 (define_expand "ashrv2di3"
9529 [(match_operand:V2DI 0 "register_operand" "")
9530 (match_operand:V2DI 1 "register_operand" "")
9531 (match_operand:DI 2 "nonmemory_operand" "")]
9534 rtvec vs = rtvec_alloc (2);
9535 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
9536 rtx reg = gen_reg_rtx (V2DImode);
9539 if (CONST_INT_P (operands[2]))
9540 ele = GEN_INT (- INTVAL (operands[2]));
9541 else if (GET_MODE (operands[2]) != DImode)
9543 rtx move = gen_reg_rtx (DImode);
9544 ele = gen_reg_rtx (DImode);
9545 convert_move (move, operands[2], false);
9546 emit_insn (gen_negdi2 (ele, move));
9550 ele = gen_reg_rtx (DImode);
9551 emit_insn (gen_negdi2 (ele, operands[2]));
9554 RTVEC_ELT (vs, 0) = ele;
9555 RTVEC_ELT (vs, 1) = ele;
9556 emit_insn (gen_vec_initv2di (reg, par));
9557 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
9562 (define_insn "xop_frcz<mode>2"
9563 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
9565 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
9568 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
9569 [(set_attr "type" "ssecvt1")
9570 (set_attr "mode" "<MODE>")])
9573 (define_expand "xop_vmfrcz<mode>2"
9574 [(set (match_operand:VF_128 0 "register_operand")
9577 [(match_operand:VF_128 1 "nonimmediate_operand")]
9583 operands[3] = CONST0_RTX (<MODE>mode);
9586 (define_insn "*xop_vmfrcz_<mode>"
9587 [(set (match_operand:VF_128 0 "register_operand" "=x")
9590 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
9592 (match_operand:VF_128 2 "const0_operand")
9595 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
9596 [(set_attr "type" "ssecvt1")
9597 (set_attr "mode" "<MODE>")])
9599 (define_insn "xop_maskcmp<mode>3"
9600 [(set (match_operand:VI_128 0 "register_operand" "=x")
9601 (match_operator:VI_128 1 "ix86_comparison_int_operator"
9602 [(match_operand:VI_128 2 "register_operand" "x")
9603 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
9605 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9606 [(set_attr "type" "sse4arg")
9607 (set_attr "prefix_data16" "0")
9608 (set_attr "prefix_rep" "0")
9609 (set_attr "prefix_extra" "2")
9610 (set_attr "length_immediate" "1")
9611 (set_attr "mode" "TI")])
9613 (define_insn "xop_maskcmp_uns<mode>3"
9614 [(set (match_operand:VI_128 0 "register_operand" "=x")
9615 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
9616 [(match_operand:VI_128 2 "register_operand" "x")
9617 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
9619 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9620 [(set_attr "type" "ssecmp")
9621 (set_attr "prefix_data16" "0")
9622 (set_attr "prefix_rep" "0")
9623 (set_attr "prefix_extra" "2")
9624 (set_attr "length_immediate" "1")
9625 (set_attr "mode" "TI")])
9627 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
9628 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
9629 ;; the exact instruction generated for the intrinsic.
9630 (define_insn "xop_maskcmp_uns2<mode>3"
9631 [(set (match_operand:VI_128 0 "register_operand" "=x")
9633 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
9634 [(match_operand:VI_128 2 "register_operand" "x")
9635 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
9636 UNSPEC_XOP_UNSIGNED_CMP))]
9638 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
9639 [(set_attr "type" "ssecmp")
9640 (set_attr "prefix_data16" "0")
9641 (set_attr "prefix_extra" "2")
9642 (set_attr "length_immediate" "1")
9643 (set_attr "mode" "TI")])
9645 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
9646 ;; being added here to be complete.
9647 (define_insn "xop_pcom_tf<mode>3"
9648 [(set (match_operand:VI_128 0 "register_operand" "=x")
9650 [(match_operand:VI_128 1 "register_operand" "x")
9651 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
9652 (match_operand:SI 3 "const_int_operand" "n")]
9653 UNSPEC_XOP_TRUEFALSE))]
9656 return ((INTVAL (operands[3]) != 0)
9657 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9658 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
9660 [(set_attr "type" "ssecmp")
9661 (set_attr "prefix_data16" "0")
9662 (set_attr "prefix_extra" "2")
9663 (set_attr "length_immediate" "1")
9664 (set_attr "mode" "TI")])
9666 (define_insn "xop_vpermil2<mode>3"
9667 [(set (match_operand:VF 0 "register_operand" "=x")
9669 [(match_operand:VF 1 "register_operand" "x")
9670 (match_operand:VF 2 "nonimmediate_operand" "%x")
9671 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
9672 (match_operand:SI 4 "const_0_to_3_operand" "n")]
9675 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
9676 [(set_attr "type" "sse4arg")
9677 (set_attr "length_immediate" "1")
9678 (set_attr "mode" "<MODE>")])
9680 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9682 (define_insn "aesenc"
9683 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9684 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9685 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9689 aesenc\t{%2, %0|%0, %2}
9690 vaesenc\t{%2, %1, %0|%0, %1, %2}"
9691 [(set_attr "isa" "noavx,avx")
9692 (set_attr "type" "sselog1")
9693 (set_attr "prefix_extra" "1")
9694 (set_attr "prefix" "orig,vex")
9695 (set_attr "mode" "TI")])
9697 (define_insn "aesenclast"
9698 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9699 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9700 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9701 UNSPEC_AESENCLAST))]
9704 aesenclast\t{%2, %0|%0, %2}
9705 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
9706 [(set_attr "isa" "noavx,avx")
9707 (set_attr "type" "sselog1")
9708 (set_attr "prefix_extra" "1")
9709 (set_attr "prefix" "orig,vex")
9710 (set_attr "mode" "TI")])
9712 (define_insn "aesdec"
9713 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9714 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9715 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9719 aesdec\t{%2, %0|%0, %2}
9720 vaesdec\t{%2, %1, %0|%0, %1, %2}"
9721 [(set_attr "isa" "noavx,avx")
9722 (set_attr "type" "sselog1")
9723 (set_attr "prefix_extra" "1")
9724 (set_attr "prefix" "orig,vex")
9725 (set_attr "mode" "TI")])
9727 (define_insn "aesdeclast"
9728 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9729 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9730 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
9731 UNSPEC_AESDECLAST))]
9734 aesdeclast\t{%2, %0|%0, %2}
9735 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
9736 [(set_attr "isa" "noavx,avx")
9737 (set_attr "type" "sselog1")
9738 (set_attr "prefix_extra" "1")
9739 (set_attr "prefix" "orig,vex")
9740 (set_attr "mode" "TI")])
9742 (define_insn "aesimc"
9743 [(set (match_operand:V2DI 0 "register_operand" "=x")
9744 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
9747 "%vaesimc\t{%1, %0|%0, %1}"
9748 [(set_attr "type" "sselog1")
9749 (set_attr "prefix_extra" "1")
9750 (set_attr "prefix" "maybe_vex")
9751 (set_attr "mode" "TI")])
9753 (define_insn "aeskeygenassist"
9754 [(set (match_operand:V2DI 0 "register_operand" "=x")
9755 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
9756 (match_operand:SI 2 "const_0_to_255_operand" "n")]
9757 UNSPEC_AESKEYGENASSIST))]
9759 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
9760 [(set_attr "type" "sselog1")
9761 (set_attr "prefix_extra" "1")
9762 (set_attr "length_immediate" "1")
9763 (set_attr "prefix" "maybe_vex")
9764 (set_attr "mode" "TI")])
9766 (define_insn "pclmulqdq"
9767 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
9768 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
9769 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
9770 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
9774 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
9775 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9776 [(set_attr "isa" "noavx,avx")
9777 (set_attr "type" "sselog1")
9778 (set_attr "prefix_extra" "1")
9779 (set_attr "length_immediate" "1")
9780 (set_attr "prefix" "orig,vex")
9781 (set_attr "mode" "TI")])
9783 (define_expand "avx_vzeroall"
9784 [(match_par_dup 0 [(const_int 0)])]
9787 int nregs = TARGET_64BIT ? 16 : 8;
9790 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
9792 XVECEXP (operands[0], 0, 0)
9793 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
9796 for (regno = 0; regno < nregs; regno++)
9797 XVECEXP (operands[0], 0, regno + 1)
9798 = gen_rtx_SET (VOIDmode,
9799 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
9800 CONST0_RTX (V8SImode));
9803 (define_insn "*avx_vzeroall"
9804 [(match_parallel 0 "vzeroall_operation"
9805 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
9808 [(set_attr "type" "sse")
9809 (set_attr "modrm" "0")
9810 (set_attr "memory" "none")
9811 (set_attr "prefix" "vex")
9812 (set_attr "mode" "OI")])
9814 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
9815 ;; if the upper 128bits are unused.
9816 (define_insn "avx_vzeroupper"
9817 [(unspec_volatile [(match_operand 0 "const_int_operand" "")]
9818 UNSPECV_VZEROUPPER)]
9821 [(set_attr "type" "sse")
9822 (set_attr "modrm" "0")
9823 (set_attr "memory" "none")
9824 (set_attr "prefix" "vex")
9825 (set_attr "mode" "OI")])
9827 (define_insn "vec_dup<mode>"
9828 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
9829 (vec_duplicate:AVX256MODE24P
9830 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,?x")))]
9833 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
9835 [(set_attr "type" "ssemov")
9836 (set_attr "prefix_extra" "1")
9837 (set_attr "prefix" "vex")
9838 (set_attr "mode" "V8SF")])
9841 [(set (match_operand:AVX256MODE24P 0 "register_operand" "")
9842 (vec_duplicate:AVX256MODE24P
9843 (match_operand:<ssescalarmode> 1 "register_operand" "")))]
9844 "TARGET_AVX && reload_completed"
9845 [(set (match_dup 2) (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
9846 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
9847 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
9849 (define_insn "avx_vbroadcastf128_<mode>"
9850 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
9852 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
9856 vbroadcastf128\t{%1, %0|%0, %1}
9857 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
9858 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
9859 [(set_attr "type" "ssemov,sselog1,sselog1")
9860 (set_attr "prefix_extra" "1")
9861 (set_attr "length_immediate" "0,1,1")
9862 (set_attr "prefix" "vex")
9863 (set_attr "mode" "V4SF,V8SF,V8SF")])
9865 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
9866 ;; If it so happens that the input is in memory, use vbroadcast.
9867 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
9868 (define_insn "*avx_vperm_broadcast_v4sf"
9869 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
9871 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
9872 (match_parallel 2 "avx_vbroadcast_operand"
9873 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9876 int elt = INTVAL (operands[3]);
9877 switch (which_alternative)
9881 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
9882 return "vbroadcastss\t{%1, %0|%0, %1}";
9884 operands[2] = GEN_INT (elt * 0x55);
9885 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
9890 [(set_attr "type" "ssemov,ssemov,sselog1")
9891 (set_attr "prefix_extra" "1")
9892 (set_attr "length_immediate" "0,0,1")
9893 (set_attr "prefix" "vex")
9894 (set_attr "mode" "SF,SF,V4SF")])
9896 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
9897 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
9899 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
9900 (match_parallel 2 "avx_vbroadcast_operand"
9901 [(match_operand 3 "const_int_operand" "C,n,n")])))]
9904 "&& reload_completed"
9905 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
9907 rtx op0 = operands[0], op1 = operands[1];
9908 int elt = INTVAL (operands[3]);
9914 /* Shuffle element we care about into all elements of the 128-bit lane.
9915 The other lane gets shuffled too, but we don't care. */
9916 if (<MODE>mode == V4DFmode)
9917 mask = (elt & 1 ? 15 : 0);
9919 mask = (elt & 3) * 0x55;
9920 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
9922 /* Shuffle the lane we care about into both lanes of the dest. */
9923 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
9924 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
9928 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode,
9929 elt * GET_MODE_SIZE (<ssescalarmode>mode));
9932 (define_expand "avx_vpermil<mode>"
9933 [(set (match_operand:VF2 0 "register_operand" "")
9935 (match_operand:VF2 1 "nonimmediate_operand" "")
9936 (match_operand:SI 2 "const_0_to_255_operand" "")))]
9939 int mask = INTVAL (operands[2]);
9940 rtx perm[<ssescalarnum>];
9942 perm[0] = GEN_INT (mask & 1);
9943 perm[1] = GEN_INT ((mask >> 1) & 1);
9944 if (<MODE>mode == V4DFmode)
9946 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
9947 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
9951 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
9954 (define_expand "avx_vpermil<mode>"
9955 [(set (match_operand:VF1 0 "register_operand" "")
9957 (match_operand:VF1 1 "nonimmediate_operand" "")
9958 (match_operand:SI 2 "const_0_to_255_operand" "")))]
9961 int mask = INTVAL (operands[2]);
9962 rtx perm[<ssescalarnum>];
9964 perm[0] = GEN_INT (mask & 3);
9965 perm[1] = GEN_INT ((mask >> 2) & 3);
9966 perm[2] = GEN_INT ((mask >> 4) & 3);
9967 perm[3] = GEN_INT ((mask >> 6) & 3);
9968 if (<MODE>mode == V8SFmode)
9970 perm[4] = GEN_INT ((mask & 3) + 4);
9971 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
9972 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
9973 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
9977 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
9980 (define_insn "*avx_vpermilp<mode>"
9981 [(set (match_operand:VF 0 "register_operand" "=x")
9983 (match_operand:VF 1 "nonimmediate_operand" "xm")
9984 (match_parallel 2 ""
9985 [(match_operand 3 "const_int_operand" "")])))]
9987 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
9989 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
9990 operands[2] = GEN_INT (mask);
9991 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
9993 [(set_attr "type" "sselog")
9994 (set_attr "prefix_extra" "1")
9995 (set_attr "length_immediate" "1")
9996 (set_attr "prefix" "vex")
9997 (set_attr "mode" "<MODE>")])
9999 (define_insn "avx_vpermilvar<mode>3"
10000 [(set (match_operand:VF 0 "register_operand" "=x")
10002 [(match_operand:VF 1 "register_operand" "x")
10003 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")]
10006 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10007 [(set_attr "type" "sselog")
10008 (set_attr "prefix_extra" "1")
10009 (set_attr "prefix" "vex")
10010 (set_attr "mode" "<MODE>")])
10012 (define_expand "avx_vperm2f128<mode>3"
10013 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
10014 (unspec:AVX256MODE2P
10015 [(match_operand:AVX256MODE2P 1 "register_operand" "")
10016 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
10017 (match_operand:SI 3 "const_0_to_255_operand" "")]
10018 UNSPEC_VPERMIL2F128))]
10021 int mask = INTVAL (operands[3]);
10022 if ((mask & 0x88) == 0)
10024 rtx perm[<ssescalarnum>], t1, t2;
10025 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
10027 base = (mask & 3) * nelt2;
10028 for (i = 0; i < nelt2; ++i)
10029 perm[i] = GEN_INT (base + i);
10031 base = ((mask >> 4) & 3) * nelt2;
10032 for (i = 0; i < nelt2; ++i)
10033 perm[i + nelt2] = GEN_INT (base + i);
10035 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
10036 operands[1], operands[2]);
10037 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
10038 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
10039 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
10045 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
10046 ;; means that in order to represent this properly in rtl we'd have to
10047 ;; nest *another* vec_concat with a zero operand and do the select from
10048 ;; a 4x wide vector. That doesn't seem very nice.
10049 (define_insn "*avx_vperm2f128<mode>_full"
10050 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10051 (unspec:AVX256MODE2P
10052 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
10053 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
10054 (match_operand:SI 3 "const_0_to_255_operand" "n")]
10055 UNSPEC_VPERMIL2F128))]
10057 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10058 [(set_attr "type" "sselog")
10059 (set_attr "prefix_extra" "1")
10060 (set_attr "length_immediate" "1")
10061 (set_attr "prefix" "vex")
10062 (set_attr "mode" "V8SF")])
10064 (define_insn "*avx_vperm2f128<mode>_nozero"
10065 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
10066 (vec_select:AVX256MODE2P
10067 (vec_concat:<ssedoublevecmode>
10068 (match_operand:AVX256MODE2P 1 "register_operand" "x")
10069 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
10070 (match_parallel 3 ""
10071 [(match_operand 4 "const_int_operand" "")])))]
10073 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
10075 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
10076 operands[3] = GEN_INT (mask);
10077 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10079 [(set_attr "type" "sselog")
10080 (set_attr "prefix_extra" "1")
10081 (set_attr "length_immediate" "1")
10082 (set_attr "prefix" "vex")
10083 (set_attr "mode" "V8SF")])
10085 (define_expand "avx_vinsertf128<mode>"
10086 [(match_operand:V_256 0 "register_operand" "")
10087 (match_operand:V_256 1 "register_operand" "")
10088 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "")
10089 (match_operand:SI 3 "const_0_to_1_operand" "")]
10092 rtx (*insn)(rtx, rtx, rtx);
10094 switch (INTVAL (operands[3]))
10097 insn = gen_vec_set_lo_<mode>;
10100 insn = gen_vec_set_hi_<mode>;
10103 gcc_unreachable ();
10106 emit_insn (insn (operands[0], operands[1], operands[2]));
10110 (define_insn "vec_set_lo_<mode>"
10111 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10112 (vec_concat:VI8F_256
10113 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10114 (vec_select:<ssehalfvecmode>
10115 (match_operand:VI8F_256 1 "register_operand" "x")
10116 (parallel [(const_int 2) (const_int 3)]))))]
10118 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10119 [(set_attr "type" "sselog")
10120 (set_attr "prefix_extra" "1")
10121 (set_attr "length_immediate" "1")
10122 (set_attr "prefix" "vex")
10123 (set_attr "mode" "V8SF")])
10125 (define_insn "vec_set_hi_<mode>"
10126 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
10127 (vec_concat:VI8F_256
10128 (vec_select:<ssehalfvecmode>
10129 (match_operand:VI8F_256 1 "register_operand" "x")
10130 (parallel [(const_int 0) (const_int 1)]))
10131 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10133 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10134 [(set_attr "type" "sselog")
10135 (set_attr "prefix_extra" "1")
10136 (set_attr "length_immediate" "1")
10137 (set_attr "prefix" "vex")
10138 (set_attr "mode" "V8SF")])
10140 (define_insn "vec_set_lo_<mode>"
10141 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10142 (vec_concat:VI4F_256
10143 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
10144 (vec_select:<ssehalfvecmode>
10145 (match_operand:VI4F_256 1 "register_operand" "x")
10146 (parallel [(const_int 4) (const_int 5)
10147 (const_int 6) (const_int 7)]))))]
10149 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10150 [(set_attr "type" "sselog")
10151 (set_attr "prefix_extra" "1")
10152 (set_attr "length_immediate" "1")
10153 (set_attr "prefix" "vex")
10154 (set_attr "mode" "V8SF")])
10156 (define_insn "vec_set_hi_<mode>"
10157 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
10158 (vec_concat:VI4F_256
10159 (vec_select:<ssehalfvecmode>
10160 (match_operand:VI4F_256 1 "register_operand" "x")
10161 (parallel [(const_int 0) (const_int 1)
10162 (const_int 2) (const_int 3)]))
10163 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
10165 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10166 [(set_attr "type" "sselog")
10167 (set_attr "prefix_extra" "1")
10168 (set_attr "length_immediate" "1")
10169 (set_attr "prefix" "vex")
10170 (set_attr "mode" "V8SF")])
10172 (define_insn "vec_set_lo_v16hi"
10173 [(set (match_operand:V16HI 0 "register_operand" "=x")
10175 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10177 (match_operand:V16HI 1 "register_operand" "x")
10178 (parallel [(const_int 8) (const_int 9)
10179 (const_int 10) (const_int 11)
10180 (const_int 12) (const_int 13)
10181 (const_int 14) (const_int 15)]))))]
10183 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10184 [(set_attr "type" "sselog")
10185 (set_attr "prefix_extra" "1")
10186 (set_attr "length_immediate" "1")
10187 (set_attr "prefix" "vex")
10188 (set_attr "mode" "V8SF")])
10190 (define_insn "vec_set_hi_v16hi"
10191 [(set (match_operand:V16HI 0 "register_operand" "=x")
10194 (match_operand:V16HI 1 "register_operand" "x")
10195 (parallel [(const_int 0) (const_int 1)
10196 (const_int 2) (const_int 3)
10197 (const_int 4) (const_int 5)
10198 (const_int 6) (const_int 7)]))
10199 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
10201 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10202 [(set_attr "type" "sselog")
10203 (set_attr "prefix_extra" "1")
10204 (set_attr "length_immediate" "1")
10205 (set_attr "prefix" "vex")
10206 (set_attr "mode" "V8SF")])
10208 (define_insn "vec_set_lo_v32qi"
10209 [(set (match_operand:V32QI 0 "register_operand" "=x")
10211 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
10213 (match_operand:V32QI 1 "register_operand" "x")
10214 (parallel [(const_int 16) (const_int 17)
10215 (const_int 18) (const_int 19)
10216 (const_int 20) (const_int 21)
10217 (const_int 22) (const_int 23)
10218 (const_int 24) (const_int 25)
10219 (const_int 26) (const_int 27)
10220 (const_int 28) (const_int 29)
10221 (const_int 30) (const_int 31)]))))]
10223 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
10224 [(set_attr "type" "sselog")
10225 (set_attr "prefix_extra" "1")
10226 (set_attr "length_immediate" "1")
10227 (set_attr "prefix" "vex")
10228 (set_attr "mode" "V8SF")])
10230 (define_insn "vec_set_hi_v32qi"
10231 [(set (match_operand:V32QI 0 "register_operand" "=x")
10234 (match_operand:V32QI 1 "register_operand" "x")
10235 (parallel [(const_int 0) (const_int 1)
10236 (const_int 2) (const_int 3)
10237 (const_int 4) (const_int 5)
10238 (const_int 6) (const_int 7)
10239 (const_int 8) (const_int 9)
10240 (const_int 10) (const_int 11)
10241 (const_int 12) (const_int 13)
10242 (const_int 14) (const_int 15)]))
10243 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
10245 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
10246 [(set_attr "type" "sselog")
10247 (set_attr "prefix_extra" "1")
10248 (set_attr "length_immediate" "1")
10249 (set_attr "prefix" "vex")
10250 (set_attr "mode" "V8SF")])
10252 (define_expand "avx_maskload<ssemodesuffix><avxsizesuffix>"
10253 [(set (match_operand:VF 0 "register_operand" "")
10255 [(match_operand:<sseintvecmode> 2 "register_operand" "")
10256 (match_operand:VF 1 "memory_operand" "")
10261 (define_expand "avx_maskstore<ssemodesuffix><avxsizesuffix>"
10262 [(set (match_operand:VF 0 "memory_operand" "")
10264 [(match_operand:<sseintvecmode> 1 "register_operand" "")
10265 (match_operand:VF 2 "register_operand" "")
10270 (define_insn "*avx_maskmov<ssemodesuffix><avxsizesuffix>"
10271 [(set (match_operand:VF 0 "nonimmediate_operand" "=x,m")
10273 [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
10274 (match_operand:VF 2 "nonimmediate_operand" "m,x")
10278 && (REG_P (operands[0]) == MEM_P (operands[2]))"
10279 "vmaskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10280 [(set_attr "type" "sselog1")
10281 (set_attr "prefix_extra" "1")
10282 (set_attr "prefix" "vex")
10283 (set_attr "mode" "<MODE>")])
10285 (define_insn_and_split "avx_<ssemodesuffix><avxsizesuffix>_<ssemodesuffix>"
10286 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
10287 (unspec:AVX256MODE2P
10288 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
10292 "&& reload_completed"
10295 rtx op0 = operands[0];
10296 rtx op1 = operands[1];
10298 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
10300 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
10301 emit_move_insn (op0, op1);
10305 (define_expand "vec_init<mode>"
10306 [(match_operand:V_256 0 "register_operand" "")
10307 (match_operand 1 "" "")]
10310 ix86_expand_vector_init (false, operands[0], operands[1]);
10314 (define_insn "*vec_concat<mode>_avx"
10315 [(set (match_operand:V_256 0 "register_operand" "=x,x")
10317 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
10318 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
10321 switch (which_alternative)
10324 return "vinsertf128\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
10326 switch (get_attr_mode (insn))
10329 return "vmovaps\t{%1, %x0|%x0, %1}";
10331 return "vmovapd\t{%1, %x0|%x0, %1}";
10333 return "vmovdqa\t{%1, %x0|%x0, %1}";
10336 gcc_unreachable ();
10339 [(set_attr "type" "sselog,ssemov")
10340 (set_attr "prefix_extra" "1,*")
10341 (set_attr "length_immediate" "1,*")
10342 (set_attr "prefix" "vex")
10343 (set_attr "mode" "<sseinsnmode>")])
10345 (define_insn "vcvtph2ps"
10346 [(set (match_operand:V4SF 0 "register_operand" "=x")
10348 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
10350 (parallel [(const_int 0) (const_int 1)
10351 (const_int 1) (const_int 2)])))]
10353 "vcvtph2ps\t{%1, %0|%0, %1}"
10354 [(set_attr "type" "ssecvt")
10355 (set_attr "prefix" "vex")
10356 (set_attr "mode" "V4SF")])
10358 (define_insn "*vcvtph2ps_load"
10359 [(set (match_operand:V4SF 0 "register_operand" "=x")
10360 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
10361 UNSPEC_VCVTPH2PS))]
10363 "vcvtph2ps\t{%1, %0|%0, %1}"
10364 [(set_attr "type" "ssecvt")
10365 (set_attr "prefix" "vex")
10366 (set_attr "mode" "V8SF")])
10368 (define_insn "vcvtph2ps256"
10369 [(set (match_operand:V8SF 0 "register_operand" "=x")
10370 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
10371 UNSPEC_VCVTPH2PS))]
10373 "vcvtph2ps\t{%1, %0|%0, %1}"
10374 [(set_attr "type" "ssecvt")
10375 (set_attr "prefix" "vex")
10376 (set_attr "mode" "V8SF")])
10378 (define_expand "vcvtps2ph"
10379 [(set (match_operand:V8HI 0 "register_operand" "")
10381 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "")
10382 (match_operand:SI 2 "immediate_operand" "")]
10386 "operands[3] = CONST0_RTX (V4HImode);")
10388 (define_insn "*vcvtps2ph"
10389 [(set (match_operand:V8HI 0 "register_operand" "=x")
10391 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10392 (match_operand:SI 2 "immediate_operand" "N")]
10394 (match_operand:V4HI 3 "const0_operand" "")))]
10396 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10397 [(set_attr "type" "ssecvt")
10398 (set_attr "prefix" "vex")
10399 (set_attr "mode" "V4SF")])
10401 (define_insn "*vcvtps2ph_store"
10402 [(set (match_operand:V4HI 0 "memory_operand" "=m")
10403 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
10404 (match_operand:SI 2 "immediate_operand" "N")]
10405 UNSPEC_VCVTPS2PH))]
10407 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10408 [(set_attr "type" "ssecvt")
10409 (set_attr "prefix" "vex")
10410 (set_attr "mode" "V4SF")])
10412 (define_insn "vcvtps2ph256"
10413 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
10414 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
10415 (match_operand:SI 2 "immediate_operand" "N")]
10416 UNSPEC_VCVTPS2PH))]
10418 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
10419 [(set_attr "type" "ssecvt")
10420 (set_attr "prefix" "vex")
10421 (set_attr "mode" "V8SF")])